xref: /aosp_15_r20/external/mesa3d/src/intel/perf/xe/intel_perf.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "perf/xe/intel_perf.h"
7 
8 #include <fcntl.h>
9 #include <sys/stat.h>
10 
11 #include "perf/intel_perf.h"
12 #include "intel_perf_common.h"
13 #include "intel/common/intel_gem.h"
14 
15 #include "drm-uapi/xe_drm.h"
16 
17 #define FIELD_PREP_ULL(_mask, _val) (((_val) << (ffsll(_mask) - 1)) & (_mask))
18 
xe_perf_get_oa_format(struct intel_perf_config * perf)19 uint64_t xe_perf_get_oa_format(struct intel_perf_config *perf)
20 {
21    uint64_t fmt;
22 
23    if (perf->devinfo->verx10 >= 200) {
24       /* BSpec: 60942
25        * PEC64u64
26        */
27       fmt = FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, DRM_XE_OA_FMT_TYPE_PEC);
28       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, 1);
29       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, 1);
30       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_BC_REPORT, 0);
31    } else {
32       /* BSpec: 52198
33        * same as I915_OA_FORMAT_A24u40_A14u32_B8_C8 and
34        * I915_OA_FORMAT_A32u40_A4u32_B8_C8 returned for gfx 125+ and gfx 120
35        * respectively.
36        */
37       fmt = FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, DRM_XE_OA_FMT_TYPE_OAG);
38       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, 5);
39       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, 0);
40       fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_BC_REPORT, 0);
41    }
42 
43    return fmt;
44 }
45 
46 bool
xe_oa_metrics_available(struct intel_perf_config * perf,int fd,bool use_register_snapshots)47 xe_oa_metrics_available(struct intel_perf_config *perf, int fd, bool use_register_snapshots)
48 {
49    bool perf_oa_available = false;
50    struct stat sb;
51 
52    /* The existence of this file implies that this Xe KMD version supports
53     * observation interface.
54     */
55    if (stat("/proc/sys/dev/xe/observation_paranoid", &sb) == 0) {
56       uint64_t paranoid = 1;
57 
58       /* Now we need to check if application has privileges to access observation
59        * interface.
60        *
61        * TODO: this approach does not takes into account applications running
62        * with CAP_PERFMON privileges.
63        */
64       read_file_uint64("/proc/sys/dev/xe/observation_paranoid", &paranoid);
65       if (paranoid == 0 || geteuid() == 0)
66          perf_oa_available = true;
67    }
68 
69    if (!perf_oa_available)
70       return perf_oa_available;
71 
72    perf->features_supported |= INTEL_PERF_FEATURE_HOLD_PREEMPTION;
73 
74    return perf_oa_available;
75 }
76 
77 uint64_t
xe_add_config(struct intel_perf_config * perf,int fd,const struct intel_perf_registers * config,const char * guid)78 xe_add_config(struct intel_perf_config *perf, int fd,
79               const struct intel_perf_registers *config,
80               const char *guid)
81 {
82    struct drm_xe_oa_config xe_config = {};
83    struct drm_xe_observation_param observation_param = {
84       .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
85       .observation_op = DRM_XE_OBSERVATION_OP_ADD_CONFIG,
86       .param = (uintptr_t)&xe_config,
87    };
88    uint32_t *regs;
89    int ret;
90 
91    memcpy(xe_config.uuid, guid, sizeof(xe_config.uuid));
92 
93    xe_config.n_regs = config->n_mux_regs + config->n_b_counter_regs + config->n_flex_regs;
94    assert(xe_config.n_regs > 0);
95 
96    regs = malloc(sizeof(uint64_t) * xe_config.n_regs);
97    xe_config.regs_ptr = (uintptr_t)regs;
98 
99    memcpy(regs, config->mux_regs, config->n_mux_regs * sizeof(uint64_t));
100    regs += 2 * config->n_mux_regs;
101    memcpy(regs, config->b_counter_regs, config->n_b_counter_regs * sizeof(uint64_t));
102    regs += 2 * config->n_b_counter_regs;
103    memcpy(regs, config->flex_regs, config->n_flex_regs * sizeof(uint64_t));
104 
105    ret = intel_ioctl(fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
106    free((void*)(uintptr_t)xe_config.regs_ptr);
107    return ret > 0 ? ret : 0;
108 }
109 
110 void
xe_remove_config(struct intel_perf_config * perf,int fd,uint64_t config_id)111 xe_remove_config(struct intel_perf_config *perf, int fd, uint64_t config_id)
112 {
113    struct drm_xe_observation_param observation_param = {
114       .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
115       .observation_op = DRM_XE_OBSERVATION_OP_REMOVE_CONFIG,
116       .param = (uintptr_t)&config_id,
117    };
118 
119    intel_ioctl(fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
120 }
121 
122 static void
oa_prop_set(struct drm_xe_ext_set_property * props,uint32_t * index,enum drm_xe_oa_property_id prop_id,uint64_t value)123 oa_prop_set(struct drm_xe_ext_set_property *props, uint32_t *index,
124             enum drm_xe_oa_property_id prop_id, uint64_t value)
125 {
126    if (*index > 0)
127       props[*index - 1].base.next_extension = (uintptr_t)&props[*index];
128 
129    props[*index].base.name = DRM_XE_OA_EXTENSION_SET_PROPERTY;
130    props[*index].property = prop_id;
131    props[*index].value = value;
132    *index = *index + 1;
133 }
134 
135 int
xe_perf_stream_open(struct intel_perf_config * perf_config,int drm_fd,uint32_t exec_id,uint64_t metrics_set_id,uint64_t report_format,uint64_t period_exponent,bool hold_preemption,bool enable)136 xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
137                     uint32_t exec_id, uint64_t metrics_set_id,
138                     uint64_t report_format, uint64_t period_exponent,
139                     bool hold_preemption, bool enable)
140 {
141    struct drm_xe_ext_set_property props[DRM_XE_OA_PROPERTY_NO_PREEMPT + 1] = {};
142    struct drm_xe_observation_param observation_param = {
143       .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
144       .observation_op = DRM_XE_OBSERVATION_OP_STREAM_OPEN,
145       .param = (uintptr_t)&props,
146    };
147    uint32_t i = 0;
148    int fd, flags;
149 
150    if (exec_id)
151       oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, exec_id);
152    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_DISABLED, !enable);
153    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_SAMPLE_OA, true);
154    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_METRIC_SET, metrics_set_id);
155    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_FORMAT, report_format);
156    oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, period_exponent);
157    if (hold_preemption)
158       oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_NO_PREEMPT, hold_preemption);
159 
160    fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
161    if (fd < 0)
162       return fd;
163 
164    flags = fcntl(fd, F_GETFL, 0);
165    flags |= O_CLOEXEC | O_NONBLOCK;
166    if (fcntl(fd, F_SETFL, flags)) {
167       close(fd);
168       return -1;
169    }
170 
171    return fd;
172 }
173 
174 int
xe_perf_stream_set_state(int perf_stream_fd,bool enable)175 xe_perf_stream_set_state(int perf_stream_fd, bool enable)
176 {
177    unsigned long uapi = enable ? DRM_XE_OBSERVATION_IOCTL_ENABLE :
178                                  DRM_XE_OBSERVATION_IOCTL_DISABLE;
179 
180    return intel_ioctl(perf_stream_fd, uapi, 0);
181 }
182 
183 int
xe_perf_stream_set_metrics_id(int perf_stream_fd,uint64_t metrics_set_id)184 xe_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id)
185 {
186    struct drm_xe_ext_set_property prop = {};
187    uint32_t index = 0;
188 
189    oa_prop_set(&prop, &index, DRM_XE_OA_PROPERTY_OA_METRIC_SET,
190                  metrics_set_id);
191    return intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
192                       (void *)(uintptr_t)&prop);
193 }
194 
195 static int
xe_perf_stream_read_error(int perf_stream_fd,uint8_t * buffer,size_t buffer_len)196 xe_perf_stream_read_error(int perf_stream_fd, uint8_t *buffer, size_t buffer_len)
197 {
198    struct drm_xe_oa_stream_status status = {};
199    struct intel_perf_record_header *header;
200    int ret;
201 
202    ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_STATUS, &status);
203    if (ret)
204       return -errno;
205 
206    header = (struct intel_perf_record_header *)buffer;
207    header->pad = 0;
208    header->type = 0;
209    header->size = sizeof(*header);
210    ret = header->size;
211 
212    if (status.oa_status & INTEL_PERF_RECORD_TYPE_OA_BUFFER_LOST)
213       header->type = INTEL_PERF_RECORD_TYPE_OA_BUFFER_LOST;
214    else if (status.oa_status & DRM_XE_OASTATUS_REPORT_LOST)
215       header->type = INTEL_PERF_RECORD_TYPE_OA_REPORT_LOST;
216    else if (status.oa_status & DRM_XE_OASTATUS_COUNTER_OVERFLOW)
217       header->type = INTEL_PERF_RECORD_TYPE_COUNTER_OVERFLOW;
218    else if (status.oa_status & DRM_XE_OASTATUS_MMIO_TRG_Q_FULL)
219       header->type = INTEL_PERF_RECORD_TYPE_MMIO_TRG_Q_FULL;
220    else
221       unreachable("missing");
222 
223    return header->type ? header->size : -1;
224 }
225 
226 int
xe_perf_stream_read_samples(struct intel_perf_config * perf_config,int perf_stream_fd,uint8_t * buffer,size_t buffer_len)227 xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd,
228                             uint8_t *buffer, size_t buffer_len)
229 {
230    const size_t sample_size = perf_config->oa_sample_size;
231    const size_t sample_header_size = sample_size + sizeof(struct intel_perf_record_header);
232    uint32_t num_samples = buffer_len / sample_header_size;
233    const size_t max_bytes_read = num_samples * sample_size;
234    uint8_t *offset, *offset_samples;
235    int len, i;
236 
237    if (buffer_len < sample_header_size)
238       return -ENOSPC;
239 
240    do {
241       len = read(perf_stream_fd, buffer, max_bytes_read);
242    } while (len < 0 && errno == EINTR);
243 
244    if (len <= 0) {
245       if (errno == EIO)
246          return xe_perf_stream_read_error(perf_stream_fd, buffer, buffer_len);
247 
248       return len < 0 ? -errno : 0;
249    }
250 
251    num_samples = len / sample_size;
252    offset = buffer;
253    offset_samples = buffer + (buffer_len - len);
254    /* move all samples to the end of buffer */
255    memmove(offset_samples, buffer, len);
256 
257    /* setup header, then copy sample from the end of buffer */
258    for (i = 0; i < num_samples; i++) {
259       struct intel_perf_record_header *header = (struct intel_perf_record_header *)offset;
260 
261       /* TODO: also append REPORT_LOST and BUFFER_LOST */
262       header->type = INTEL_PERF_RECORD_TYPE_SAMPLE;
263       header->pad = 0;
264       header->size = sample_header_size;
265       offset += sizeof(*header);
266 
267       memmove(offset, offset_samples, sample_size);
268       offset += sample_size;
269       offset_samples += sample_size;
270    }
271 
272    return offset - buffer;
273 }
274