Lines Matching full:spe

3  * Arm Statistical Profiling Extensions (SPE) support
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
90 struct arm_spe *spe; member
120 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, in arm_spe_dump()
130 ". ... ARM SPE data: size %#zx bytes\n", in arm_spe_dump()
159 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, in arm_spe_dump_event() argument
163 arm_spe_dump(spe, buf, len); in arm_spe_dump_event()
173 queue = &speq->spe->queues.queue_array[speq->queue_nr]; in arm_spe_get_trace()
189 int fd = perf_data__fd(speq->spe->session->data); in arm_spe_get_trace()
211 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, in arm_spe__alloc_queue() argument
225 speq->spe = spe; in arm_spe__alloc_queue()
250 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) in arm_spe_cpumode() argument
252 return ip >= spe->kernel_start ? in arm_spe_cpumode()
257 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, in arm_spe_set_pid_tid_cpu() argument
263 tid = machine__get_current_tid(spe->machine, speq->cpu); in arm_spe_set_pid_tid_cpu()
271 speq->thread = machine__find_thread(spe->machine, -1, in arm_spe_set_pid_tid_cpu()
284 struct arm_spe *spe = speq->spe; in arm_spe_set_tid() local
285 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); in arm_spe_set_tid()
290 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); in arm_spe_set_tid()
295 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu) in arm_spe__get_metadata_by_cpu() argument
299 if (!spe->metadata) in arm_spe__get_metadata_by_cpu()
302 for (i = 0; i < spe->metadata_nr_cpu; i++) in arm_spe__get_metadata_by_cpu()
303 if (spe->metadata[i][ARM_SPE_CPU] == cpu) in arm_spe__get_metadata_by_cpu()
304 return spe->metadata[i]; in arm_spe__get_metadata_by_cpu()
328 static void arm_spe_prep_sample(struct arm_spe *spe, in arm_spe_prep_sample() argument
335 if (!spe->timeless_decoding) in arm_spe_prep_sample()
336 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); in arm_spe_prep_sample()
339 sample->cpumode = arm_spe_cpumode(spe, sample->ip); in arm_spe_prep_sample()
358 arm_spe_deliver_synth_event(struct arm_spe *spe, in arm_spe_deliver_synth_event() argument
365 if (spe->synth_opts.inject) { in arm_spe_deliver_synth_event()
366 ret = arm_spe__inject_event(event, sample, spe->sample_type); in arm_spe_deliver_synth_event()
371 ret = perf_session__deliver_synth_event(spe->session, event, sample); in arm_spe_deliver_synth_event()
373 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); in arm_spe_deliver_synth_event()
381 struct arm_spe *spe = speq->spe; in arm_spe__synth_mem_sample() local
386 arm_spe_prep_sample(spe, speq, event, &sample); in arm_spe__synth_mem_sample()
395 return arm_spe_deliver_synth_event(spe, speq, event, &sample); in arm_spe__synth_mem_sample()
401 struct arm_spe *spe = speq->spe; in arm_spe__synth_branch_sample() local
406 arm_spe_prep_sample(spe, speq, event, &sample); in arm_spe__synth_branch_sample()
414 return arm_spe_deliver_synth_event(spe, speq, event, &sample); in arm_spe__synth_branch_sample()
420 struct arm_spe *spe = speq->spe; in arm_spe__synth_instruction_sample() local
429 if (speq->period_instructions < spe->instructions_sample_period) in arm_spe__synth_instruction_sample()
433 arm_spe_prep_sample(spe, speq, event, &sample); in arm_spe__synth_instruction_sample()
440 sample.period = spe->instructions_sample_period; in arm_spe__synth_instruction_sample()
444 return arm_spe_deliver_synth_event(spe, speq, event, &sample); in arm_spe__synth_instruction_sample()
491 * Neoverse SPE records. in arm_spe__synth_data_source_common()
624 struct arm_spe *spe = speq->spe; in arm_spe__synth_ds() local
630 if (spe->metadata_ver == 1) { in arm_spe__synth_ds()
633 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); in arm_spe__synth_ds()
634 cpuid = perf_env__cpuid(spe->session->evlist->env); in arm_spe__synth_ds()
643 if (!spe->is_homogeneous) in arm_spe__synth_ds()
647 if (spe->metadata) in arm_spe__synth_ds()
648 metadata = spe->metadata[0]; in arm_spe__synth_ds()
650 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); in arm_spe__synth_ds()
703 struct arm_spe *spe = speq->spe; in arm_spe_sample() local
710 if (spe->sample_flc) { in arm_spe_sample()
712 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, in arm_spe_sample()
719 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, in arm_spe_sample()
726 if (spe->sample_llc) { in arm_spe_sample()
728 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, in arm_spe_sample()
735 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, in arm_spe_sample()
742 if (spe->sample_tlb) { in arm_spe_sample()
744 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, in arm_spe_sample()
751 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, in arm_spe_sample()
758 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { in arm_spe_sample()
759 err = arm_spe__synth_branch_sample(speq, spe->branch_id); in arm_spe_sample()
764 if (spe->sample_remote_access && in arm_spe_sample()
766 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, in arm_spe_sample()
776 if (spe->sample_memory && is_ldst_op(record->op)) { in arm_spe_sample()
777 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); in arm_spe_sample()
782 if (spe->sample_instructions) { in arm_spe_sample()
783 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); in arm_spe_sample()
793 struct arm_spe *spe = speq->spe; in arm_spe_run_decoder() local
797 if (!spe->kernel_start) in arm_spe_run_decoder()
798 spe->kernel_start = machine__kernel_start(spe->machine); in arm_spe_run_decoder()
817 * can correlate samples between Arm SPE trace data and other in arm_spe_run_decoder()
825 if (!spe->timeless_decoding && record->context_id != (u64)-1) { in arm_spe_run_decoder()
830 spe->use_ctx_pkt_for_pid = true; in arm_spe_run_decoder()
844 * Error is detected when decode SPE trace data, continue to in arm_spe_run_decoder()
861 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { in arm_spe_run_decoder()
870 static int arm_spe__setup_queue(struct arm_spe *spe, in arm_spe__setup_queue() argument
880 speq = arm_spe__alloc_queue(spe, queue_nr); in arm_spe__setup_queue()
893 if (spe->timeless_decoding) in arm_spe__setup_queue()
908 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); in arm_spe__setup_queue()
917 static int arm_spe__setup_queues(struct arm_spe *spe) in arm_spe__setup_queues() argument
922 for (i = 0; i < spe->queues.nr_queues; i++) { in arm_spe__setup_queues()
923 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); in arm_spe__setup_queues()
931 static int arm_spe__update_queues(struct arm_spe *spe) in arm_spe__update_queues() argument
933 if (spe->queues.new_data) { in arm_spe__update_queues()
934 spe->queues.new_data = false; in arm_spe__update_queues()
935 return arm_spe__setup_queues(spe); in arm_spe__update_queues()
941 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) in arm_spe__is_timeless_decoding() argument
944 struct evlist *evlist = spe->session->evlist; in arm_spe__is_timeless_decoding()
959 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) in arm_spe_process_queues() argument
969 if (!spe->heap.heap_cnt) in arm_spe_process_queues()
972 if (spe->heap.heap_array[0].ordinal >= timestamp) in arm_spe_process_queues()
975 queue_nr = spe->heap.heap_array[0].queue_nr; in arm_spe_process_queues()
976 queue = &spe->queues.queue_array[queue_nr]; in arm_spe_process_queues()
979 auxtrace_heap__pop(&spe->heap); in arm_spe_process_queues()
981 if (spe->heap.heap_cnt) { in arm_spe_process_queues()
982 ts = spe->heap.heap_array[0].ordinal + 1; in arm_spe_process_queues()
991 * here we need to update the pid/tid in the thread and SPE queue. in arm_spe_process_queues()
993 if (!spe->use_ctx_pkt_for_pid) in arm_spe_process_queues()
994 arm_spe_set_pid_tid_cpu(spe, queue); in arm_spe_process_queues()
998 auxtrace_heap__add(&spe->heap, queue_nr, ts); in arm_spe_process_queues()
1003 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); in arm_spe_process_queues()
1014 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, in arm_spe_process_timeless_queues() argument
1017 struct auxtrace_queues *queues = &spe->queues; in arm_spe_process_timeless_queues()
1022 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; in arm_spe_process_timeless_queues()
1027 arm_spe_set_pid_tid_cpu(spe, queue); in arm_spe_process_timeless_queues()
1034 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, in arm_spe_context_switch() argument
1050 return machine__set_current_tid(spe->machine, cpu, pid, tid); in arm_spe_context_switch()
1060 struct arm_spe *spe = container_of(session->auxtrace, in arm_spe_process_event() local
1067 pr_err("SPE trace requires ordered events\n"); in arm_spe_process_event()
1072 timestamp = perf_time_to_tsc(sample->time, &spe->tc); in arm_spe_process_event()
1076 if (timestamp || spe->timeless_decoding) { in arm_spe_process_event()
1077 err = arm_spe__update_queues(spe); in arm_spe_process_event()
1082 if (spe->timeless_decoding) { in arm_spe_process_event()
1084 err = arm_spe_process_timeless_queues(spe, in arm_spe_process_event()
1089 err = arm_spe_process_queues(spe, timestamp); in arm_spe_process_event()
1093 if (!spe->use_ctx_pkt_for_pid && in arm_spe_process_event()
1096 err = arm_spe_context_switch(spe, event, sample); in arm_spe_process_event()
1106 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, in arm_spe_process_auxtrace_event() local
1109 if (!spe->data_queued) { in arm_spe_process_auxtrace_event()
1123 err = auxtrace_queues__add_event(&spe->queues, session, event, in arm_spe_process_auxtrace_event()
1131 arm_spe_dump_event(spe, buffer->data, in arm_spe_process_auxtrace_event()
1144 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, in arm_spe_flush() local
1154 ret = arm_spe__update_queues(spe); in arm_spe_flush()
1158 if (spe->timeless_decoding) in arm_spe_flush()
1159 return arm_spe_process_timeless_queues(spe, -1, in arm_spe_flush()
1162 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); in arm_spe_flush()
1166 if (!spe->use_ctx_pkt_for_pid) in arm_spe_flush()
1167 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" in arm_spe_flush()
1168 "Matching of TIDs to SPE events could be inaccurate.\n"); in arm_spe_flush()
1254 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, in arm_spe_free_events() local
1256 struct auxtrace_queues *queues = &spe->queues; in arm_spe_free_events()
1268 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, in arm_spe_free() local
1271 auxtrace_heap__free(&spe->heap); in arm_spe_free()
1274 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); in arm_spe_free()
1275 free(spe); in arm_spe_free()
1281 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); in arm_spe_evsel_is_auxtrace() local
1283 return evsel->core.attr.type == spe->pmu_type; in arm_spe_evsel_is_auxtrace()
1307 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) in arm_spe_print_info() argument
1315 if (spe->metadata_ver == 1) { in arm_spe_print_info()
1358 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) in arm_spe_synth_events() argument
1368 if (evsel->core.attr.type == spe->pmu_type) { in arm_spe_synth_events()
1375 pr_debug("No selected events with SPE trace data\n"); in arm_spe_synth_events()
1387 if (spe->timeless_decoding) in arm_spe_synth_events()
1392 spe->sample_type = attr.sample_type; in arm_spe_synth_events()
1408 if (spe->synth_opts.flc) { in arm_spe_synth_events()
1409 spe->sample_flc = true; in arm_spe_synth_events()
1415 spe->l1d_miss_id = id; in arm_spe_synth_events()
1423 spe->l1d_access_id = id; in arm_spe_synth_events()
1428 if (spe->synth_opts.llc) { in arm_spe_synth_events()
1429 spe->sample_llc = true; in arm_spe_synth_events()
1435 spe->llc_miss_id = id; in arm_spe_synth_events()
1443 spe->llc_access_id = id; in arm_spe_synth_events()
1448 if (spe->synth_opts.tlb) { in arm_spe_synth_events()
1449 spe->sample_tlb = true; in arm_spe_synth_events()
1455 spe->tlb_miss_id = id; in arm_spe_synth_events()
1463 spe->tlb_access_id = id; in arm_spe_synth_events()
1468 if (spe->synth_opts.branches) { in arm_spe_synth_events()
1469 spe->sample_branch = true; in arm_spe_synth_events()
1475 spe->branch_id = id; in arm_spe_synth_events()
1480 if (spe->synth_opts.remote_access) { in arm_spe_synth_events()
1481 spe->sample_remote_access = true; in arm_spe_synth_events()
1487 spe->remote_access_id = id; in arm_spe_synth_events()
1492 if (spe->synth_opts.mem) { in arm_spe_synth_events()
1493 spe->sample_memory = true; in arm_spe_synth_events()
1498 spe->memory_id = id; in arm_spe_synth_events()
1503 if (spe->synth_opts.instructions) { in arm_spe_synth_events()
1504 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { in arm_spe_synth_events()
1505 pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); in arm_spe_synth_events()
1508 if (spe->synth_opts.period > 1) in arm_spe_synth_events()
1509 pr_warning("Arm SPE has a hardware-based sample period.\n" in arm_spe_synth_events()
1512 spe->sample_instructions = true; in arm_spe_synth_events()
1514 attr.sample_period = spe->synth_opts.period; in arm_spe_synth_events()
1515 spe->instructions_sample_period = attr.sample_period; in arm_spe_synth_events()
1519 spe->instructions_id = id; in arm_spe_synth_events()
1557 struct arm_spe *spe; in arm_spe_process_auxtrace_info() local
1569 pr_err("Failed to parse Arm SPE metadata.\n"); in arm_spe_process_auxtrace_info()
1573 spe = zalloc(sizeof(struct arm_spe)); in arm_spe_process_auxtrace_info()
1574 if (!spe) { in arm_spe_process_auxtrace_info()
1579 err = auxtrace_queues__init(&spe->queues); in arm_spe_process_auxtrace_info()
1583 spe->session = session; in arm_spe_process_auxtrace_info()
1584 spe->machine = &session->machines.host; /* No kvm support */ in arm_spe_process_auxtrace_info()
1585 spe->auxtrace_type = auxtrace_info->type; in arm_spe_process_auxtrace_info()
1587 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; in arm_spe_process_auxtrace_info()
1589 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; in arm_spe_process_auxtrace_info()
1590 spe->metadata = metadata; in arm_spe_process_auxtrace_info()
1591 spe->metadata_ver = metadata_ver; in arm_spe_process_auxtrace_info()
1592 spe->metadata_nr_cpu = nr_cpu; in arm_spe_process_auxtrace_info()
1593 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); in arm_spe_process_auxtrace_info()
1595 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); in arm_spe_process_auxtrace_info()
1601 * in "spe->tc", which is used for later conversion between clock in arm_spe_process_auxtrace_info()
1607 spe->tc.time_shift = tc->time_shift; in arm_spe_process_auxtrace_info()
1608 spe->tc.time_mult = tc->time_mult; in arm_spe_process_auxtrace_info()
1609 spe->tc.time_zero = tc->time_zero; in arm_spe_process_auxtrace_info()
1612 spe->tc.time_cycles = tc->time_cycles; in arm_spe_process_auxtrace_info()
1613 spe->tc.time_mask = tc->time_mask; in arm_spe_process_auxtrace_info()
1614 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; in arm_spe_process_auxtrace_info()
1615 spe->tc.cap_user_time_short = tc->cap_user_time_short; in arm_spe_process_auxtrace_info()
1618 spe->auxtrace.process_event = arm_spe_process_event; in arm_spe_process_auxtrace_info()
1619 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; in arm_spe_process_auxtrace_info()
1620 spe->auxtrace.flush_events = arm_spe_flush; in arm_spe_process_auxtrace_info()
1621 spe->auxtrace.free_events = arm_spe_free_events; in arm_spe_process_auxtrace_info()
1622 spe->auxtrace.free = arm_spe_free; in arm_spe_process_auxtrace_info()
1623 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; in arm_spe_process_auxtrace_info()
1624 session->auxtrace = &spe->auxtrace; in arm_spe_process_auxtrace_info()
1626 arm_spe_print_info(spe, &auxtrace_info->priv[0]); in arm_spe_process_auxtrace_info()
1632 spe->synth_opts = *session->itrace_synth_opts; in arm_spe_process_auxtrace_info()
1634 itrace_synth_opts__set_default(&spe->synth_opts, false); in arm_spe_process_auxtrace_info()
1636 err = arm_spe_synth_events(spe, session); in arm_spe_process_auxtrace_info()
1640 err = auxtrace_queues__process_index(&spe->queues, session); in arm_spe_process_auxtrace_info()
1644 if (spe->queues.populated) in arm_spe_process_auxtrace_info()
1645 spe->data_queued = true; in arm_spe_process_auxtrace_info()
1650 auxtrace_queues__free(&spe->queues); in arm_spe_process_auxtrace_info()
1653 free(spe); in arm_spe_process_auxtrace_info()