Lines Matching +full:fine +full:- +full:granular

2  * Performance events - AMD IBS
6 * For licencing details see kernel-base/COPYING
29 #include <asm/amd-ibs.h>
105 s64 left = local64_read(&hwc->period_left); in perf_event_set_period()
106 s64 period = hwc->sample_period; in perf_event_set_period()
112 if (unlikely(left <= -period)) { in perf_event_set_period()
114 local64_set(&hwc->period_left, left); in perf_event_set_period()
115 hwc->last_period = period; in perf_event_set_period()
121 local64_set(&hwc->period_left, left); in perf_event_set_period()
122 hwc->last_period = period; in perf_event_set_period()
129 * Thus we shorten the next-to-last period and set the last in perf_event_set_period()
133 left -= max; in perf_event_set_period()
148 struct hw_perf_event *hwc = &event->hw; in perf_event_try_update()
149 int shift = 64 - width; in perf_event_try_update()
157 * exchange a new raw count - then add that new-prev delta in perf_event_try_update()
160 prev_raw_count = local64_read(&hwc->prev_count); in perf_event_try_update()
161 if (!local64_try_cmpxchg(&hwc->prev_count, in perf_event_try_update()
168 * (event-)time and add that to the generic event. in perf_event_try_update()
170 * Careful, not all hw sign-extends above the physical width in perf_event_try_update()
173 delta = (new_raw_count << shift) - (prev_raw_count << shift); in perf_event_try_update()
176 local64_add(delta, &event->count); in perf_event_try_update()
177 local64_sub(delta, &hwc->period_left); in perf_event_try_update()
195 * core pmu config -> IBS config
197 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
198 * perf record -a -e r076:p ... # same as -e cpu-cycles:p
199 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
202 * MSRC001_1033) is used to select either cycle or micro-ops counting
207 switch (event->attr.type) { in core_pmu_ibs_config()
209 switch (event->attr.config) { in core_pmu_ibs_config()
216 switch (event->attr.config) { in core_pmu_ibs_config()
226 return -ENOENT; in core_pmu_ibs_config()
229 return -EOPNOTSUPP; in core_pmu_ibs_config()
242 if (!event->attr.precise_ip || event->attr.precise_ip > 2) in forward_event_to_ibs()
243 return -EOPNOTSUPP; in forward_event_to_ibs()
246 event->attr.type = perf_ibs_op.pmu.type; in forward_event_to_ibs()
247 event->attr.config = config; in forward_event_to_ibs()
249 return -ENOENT; in forward_event_to_ibs()
260 if (event->group_leader == event) in validate_group()
263 if (event->group_leader->pmu == event->pmu) in validate_group()
264 return -EINVAL; in validate_group()
266 for_each_sibling_event(sibling, event->group_leader) { in validate_group()
267 if (sibling->pmu == event->pmu) in validate_group()
268 return -EINVAL; in validate_group()
275 struct hw_perf_event *hwc = &event->hw; in perf_ibs_init()
280 perf_ibs = get_ibs_pmu(event->attr.type); in perf_ibs_init()
282 return -ENOENT; in perf_ibs_init()
284 config = event->attr.config; in perf_ibs_init()
286 if (event->pmu != &perf_ibs->pmu) in perf_ibs_init()
287 return -ENOENT; in perf_ibs_init()
289 if (config & ~perf_ibs->config_mask) in perf_ibs_init()
290 return -EINVAL; in perf_ibs_init()
293 return -EOPNOTSUPP; in perf_ibs_init()
296 if (event->attr.exclude_host || event->attr.exclude_guest || in perf_ibs_init()
297 event->attr.exclude_idle) in perf_ibs_init()
298 return -EINVAL; in perf_ibs_init()
300 if (!(event->attr.config2 & IBS_SW_FILTER_MASK) && in perf_ibs_init()
301 (event->attr.exclude_kernel || event->attr.exclude_user || in perf_ibs_init()
302 event->attr.exclude_hv)) in perf_ibs_init()
303 return -EINVAL; in perf_ibs_init()
309 if (hwc->sample_period) { in perf_ibs_init()
310 if (config & perf_ibs->cnt_mask) in perf_ibs_init()
312 return -EINVAL; in perf_ibs_init()
313 if (!event->attr.sample_freq && hwc->sample_period & 0x0f) in perf_ibs_init()
319 return -EINVAL; in perf_ibs_init()
320 hwc->sample_period &= ~0x0FULL; in perf_ibs_init()
321 if (!hwc->sample_period) in perf_ibs_init()
322 hwc->sample_period = 0x10; in perf_ibs_init()
324 max_cnt = config & perf_ibs->cnt_mask; in perf_ibs_init()
325 config &= ~perf_ibs->cnt_mask; in perf_ibs_init()
326 event->attr.sample_period = max_cnt << 4; in perf_ibs_init()
327 hwc->sample_period = event->attr.sample_period; in perf_ibs_init()
330 if (!hwc->sample_period) in perf_ibs_init()
331 return -EINVAL; in perf_ibs_init()
334 * If we modify hwc->sample_period, we also need to update in perf_ibs_init()
335 * hwc->last_period and hwc->period_left. in perf_ibs_init()
337 hwc->last_period = hwc->sample_period; in perf_ibs_init()
338 local64_set(&hwc->period_left, hwc->sample_period); in perf_ibs_init()
340 hwc->config_base = perf_ibs->msr; in perf_ibs_init()
341 hwc->config = config; in perf_ibs_init()
352 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); in perf_ibs_set_period()
353 local64_set(&hwc->prev_count, 0); in perf_ibs_set_period()
371 * If the internal 27-bit counter rolled over, the count is MaxCnt in get_ibs_op_count()
373 * Otherwise CurCnt has the full 27-bit current counter value. in get_ibs_op_count()
390 u64 count = perf_ibs->get_count(*config); in perf_ibs_event_update()
398 rdmsrl(event->hw.config_base, *config); in perf_ibs_event_update()
399 count = perf_ibs->get_count(*config); in perf_ibs_event_update()
406 u64 tmp = hwc->config | config; in perf_ibs_enable_event()
408 if (perf_ibs->fetch_count_reset_broken) in perf_ibs_enable_event()
409 wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); in perf_ibs_enable_event()
411 wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); in perf_ibs_enable_event()
415 * Erratum #420 Instruction-Based Sampling Engine May Generate
424 config &= ~perf_ibs->cnt_mask; in perf_ibs_disable_event()
426 wrmsrl(hwc->config_base, config); in perf_ibs_disable_event()
427 config &= ~perf_ibs->enable_mask; in perf_ibs_disable_event()
428 wrmsrl(hwc->config_base, config); in perf_ibs_disable_event()
439 struct hw_perf_event *hwc = &event->hw; in perf_ibs_start()
440 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); in perf_ibs_start()
441 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_start()
444 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) in perf_ibs_start()
447 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); in perf_ibs_start()
448 hwc->state = 0; in perf_ibs_start()
461 set_bit(IBS_STARTED, pcpu->state); in perf_ibs_start()
462 clear_bit(IBS_STOPPING, pcpu->state); in perf_ibs_start()
470 struct hw_perf_event *hwc = &event->hw; in perf_ibs_stop()
471 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); in perf_ibs_stop()
472 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_stop()
476 if (test_and_set_bit(IBS_STOPPING, pcpu->state)) in perf_ibs_stop()
479 stopping = test_bit(IBS_STARTED, pcpu->state); in perf_ibs_stop()
481 if (!stopping && (hwc->state & PERF_HES_UPTODATE)) in perf_ibs_stop()
484 rdmsrl(hwc->config_base, config); in perf_ibs_stop()
493 set_bit(IBS_STOPPED, pcpu->state); in perf_ibs_stop()
504 clear_bit(IBS_STARTED, pcpu->state); in perf_ibs_stop()
505 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); in perf_ibs_stop()
506 hwc->state |= PERF_HES_STOPPED; in perf_ibs_stop()
509 if (hwc->state & PERF_HES_UPTODATE) in perf_ibs_stop()
516 config &= ~perf_ibs->valid_mask; in perf_ibs_stop()
519 hwc->state |= PERF_HES_UPTODATE; in perf_ibs_stop()
524 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); in perf_ibs_add()
525 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_add()
527 if (test_and_set_bit(IBS_ENABLED, pcpu->state)) in perf_ibs_add()
528 return -ENOSPC; in perf_ibs_add()
530 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; in perf_ibs_add()
532 pcpu->event = event; in perf_ibs_add()
542 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); in perf_ibs_del()
543 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_del()
545 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) in perf_ibs_del()
550 pcpu->event = NULL; in perf_ibs_del()
580 return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; in zen4_ibs_extensions_is_visible()
631 return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; in cnt_ctl_is_visible()
729 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_mem_op()
731 data_src->mem_op = PERF_MEM_OP_NA; in perf_ibs_get_mem_op()
733 if (op_data3->ld_op) in perf_ibs_get_mem_op()
734 data_src->mem_op = PERF_MEM_OP_LOAD; in perf_ibs_get_mem_op()
735 else if (op_data3->st_op) in perf_ibs_get_mem_op()
736 data_src->mem_op = PERF_MEM_OP_STORE; in perf_ibs_get_mem_op()
741 * more fine granular DataSrc encodings. Others have coarse.
746 return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; in perf_ibs_data_src()
748 return op_data2->data_src_lo; in perf_ibs_data_src()
785 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_mem_lvl()
788 data_src->mem_lvl = 0; in perf_ibs_get_mem_lvl()
789 data_src->mem_lvl_num = 0; in perf_ibs_get_mem_lvl()
795 if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) in perf_ibs_get_mem_lvl()
799 if (op_data3->dc_miss == 0) in perf_ibs_get_mem_lvl()
803 if (op_data3->l2_miss == 0) { in perf_ibs_get_mem_lvl()
806 !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) in perf_ibs_get_mem_lvl()
814 if (data_src->mem_op != PERF_MEM_OP_LOAD) in perf_ibs_get_mem_lvl()
824 if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) { in perf_ibs_get_mem_lvl()
839 if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) { in perf_ibs_get_mem_lvl()
857 if (op_data3->dc_miss_no_mab_alloc) in perf_ibs_get_mem_lvl()
866 /* 0: Uninitialized, 1: Valid, -1: Invalid */ in perf_ibs_cache_hit_st_valid()
874 cache_hit_st_valid = -1; in perf_ibs_cache_hit_st_valid()
886 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_mem_snoop()
889 data_src->mem_snoop = PERF_MEM_SNOOP_NA; in perf_ibs_get_mem_snoop()
892 data_src->mem_op != PERF_MEM_OP_LOAD || in perf_ibs_get_mem_snoop()
893 data_src->mem_lvl & PERF_MEM_LVL_L1 || in perf_ibs_get_mem_snoop()
894 data_src->mem_lvl & PERF_MEM_LVL_L2 || in perf_ibs_get_mem_snoop()
895 op_data2->cache_hit_st) in perf_ibs_get_mem_snoop()
904 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; in perf_ibs_get_mem_snoop()
906 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; in perf_ibs_get_mem_snoop()
913 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_tlb_lvl()
915 data_src->mem_dtlb = PERF_MEM_TLB_NA; in perf_ibs_get_tlb_lvl()
917 if (!op_data3->dc_lin_addr_valid) in perf_ibs_get_tlb_lvl()
920 if (!op_data3->dc_l1tlb_miss) { in perf_ibs_get_tlb_lvl()
921 data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; in perf_ibs_get_tlb_lvl()
925 if (!op_data3->dc_l2tlb_miss) { in perf_ibs_get_tlb_lvl()
926 data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; in perf_ibs_get_tlb_lvl()
930 data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; in perf_ibs_get_tlb_lvl()
936 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_mem_lock()
938 data_src->mem_lock = PERF_MEM_LOCK_NA; in perf_ibs_get_mem_lock()
940 if (op_data3->dc_locked_op) in perf_ibs_get_mem_lock()
941 data_src->mem_lock = PERF_MEM_LOCK_LOCKED; in perf_ibs_get_mem_lock()
945 #define ibs_fetch_msr_idx(msr) (msr - MSR_AMD64_IBSFETCHCTL)
946 #define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL)
953 union perf_mem_data_src *data_src = &data->data_src; in perf_ibs_get_data_src()
955 data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data); in perf_ibs_get_data_src()
964 __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; in perf_ibs_get_op_data2()
968 (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { in perf_ibs_get_op_data2()
987 data->data_src.val = PERF_MEM_NA; in perf_ibs_parse_ld_st_data()
988 op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; in perf_ibs_parse_ld_st_data()
991 if (data->data_src.mem_op != PERF_MEM_OP_LOAD && in perf_ibs_parse_ld_st_data()
992 data->data_src.mem_op != PERF_MEM_OP_STORE) in perf_ibs_parse_ld_st_data()
999 data->sample_flags |= PERF_SAMPLE_DATA_SRC; in perf_ibs_parse_ld_st_data()
1003 data->data_src.mem_op == PERF_MEM_OP_LOAD) { in perf_ibs_parse_ld_st_data()
1004 op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; in perf_ibs_parse_ld_st_data()
1007 data->weight.var1_dw = op_data3.dc_miss_lat; in perf_ibs_parse_ld_st_data()
1008 data->weight.var2_w = op_data.tag_to_ret_ctr; in perf_ibs_parse_ld_st_data()
1010 data->weight.full = op_data3.dc_miss_lat; in perf_ibs_parse_ld_st_data()
1012 data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; in perf_ibs_parse_ld_st_data()
1016 data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; in perf_ibs_parse_ld_st_data()
1017 data->sample_flags |= PERF_SAMPLE_ADDR; in perf_ibs_parse_ld_st_data()
1021 data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; in perf_ibs_parse_ld_st_data()
1022 data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; in perf_ibs_parse_ld_st_data()
1035 return perf_ibs->offset_max; in perf_ibs_get_offset_max()
1048 op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; in perf_ibs_is_kernel_data_addr()
1049 dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; in perf_ibs_is_kernel_data_addr()
1051 return unlikely((event->attr.sample_type & sample_type_mask) && in perf_ibs_is_kernel_data_addr()
1062 op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; in perf_ibs_is_kernel_br_target()
1063 br_target = ibs_data->regs[br_target_idx]; in perf_ibs_is_kernel_br_target()
1065 return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) && in perf_ibs_is_kernel_br_target()
1076 if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel) in perf_ibs_swfilt_discard()
1082 if (br_target_idx != -1 && in perf_ibs_swfilt_discard()
1093 ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18); in perf_ibs_phyaddr_clear()
1094 ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0; in perf_ibs_phyaddr_clear()
1098 ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52); in perf_ibs_phyaddr_clear()
1099 ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0; in perf_ibs_phyaddr_clear()
1104 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); in perf_ibs_handle_irq()
1105 struct perf_event *event = pcpu->event; in perf_ibs_handle_irq()
1114 int br_target_idx = -1; in perf_ibs_handle_irq()
1116 if (!test_bit(IBS_STARTED, pcpu->state)) { in perf_ibs_handle_irq()
1124 if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) in perf_ibs_handle_irq()
1133 hwc = &event->hw; in perf_ibs_handle_irq()
1134 msr = hwc->config_base; in perf_ibs_handle_irq()
1137 if (!(*buf++ & perf_ibs->valid_mask)) in perf_ibs_handle_irq()
1142 perf_sample_data_init(&data, 0, hwc->last_period); in perf_ibs_handle_irq()
1151 offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); in perf_ibs_handle_irq()
1156 offset = find_next_bit(perf_ibs->offset_mask, in perf_ibs_handle_irq()
1157 perf_ibs->offset_max, in perf_ibs_handle_irq()
1165 if (event->attr.sample_type & PERF_SAMPLE_RAW) { in perf_ibs_handle_irq()
1189 if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) in perf_ibs_handle_irq()
1196 if ((event->attr.config2 & IBS_SW_FILTER_MASK) && in perf_ibs_handle_irq()
1206 if ((event->attr.sample_type & PERF_SAMPLE_RAW) && in perf_ibs_handle_irq()
1207 perf_allow_kernel(&event->attr)) { in perf_ibs_handle_irq()
1211 if (event->attr.sample_type & PERF_SAMPLE_RAW) { in perf_ibs_handle_irq()
1222 perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); in perf_ibs_handle_irq()
1266 perf_sample_event_took(sched_clock() - stamp); in perf_ibs_nmi_handler()
1279 return -ENOMEM; in perf_ibs_pmu_init()
1281 perf_ibs->pcpu = pcpu; in perf_ibs_pmu_init()
1283 ret = perf_pmu_register(&perf_ibs->pmu, name, -1); in perf_ibs_pmu_init()
1285 perf_ibs->pcpu = NULL; in perf_ibs_pmu_init()
1296 * they need a 0-1 transition of IbsFetchEn. in perf_ibs_fetch_init()
1373 /* IBS - apic initialization, for perf and oprofile */
1468 return -EINVAL; in setup_ibs_ctl()
1474 return -ENODEV; in setup_ibs_ctl()
1484 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
1541 return -EINVAL; in get_ibs_lvt_offset()
1618 return -ENODEV; /* ibs not supported by the cpu */ in amd_ibs_init()
1623 return -EINVAL; in amd_ibs_init()