xref: /aosp_15_r20/external/mesa3d/src/intel/perf/intel_perf_mdapi.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "intel_perf.h"
25 #include "intel_perf_mdapi.h"
26 #include "intel_perf_private.h"
27 #include "intel_perf_regs.h"
28 
29 #include "dev/intel_device_info.h"
30 
31 int
intel_perf_query_result_write_mdapi(void * data,uint32_t data_size,const struct intel_device_info * devinfo,const struct intel_perf_query_info * query,const struct intel_perf_query_result * result)32 intel_perf_query_result_write_mdapi(void *data, uint32_t data_size,
33                                     const struct intel_device_info *devinfo,
34                                     const struct intel_perf_query_info *query,
35                                     const struct intel_perf_query_result *result)
36 {
37    switch (devinfo->ver) {
38    case 7: {
39       struct gfx7_mdapi_metrics *mdapi_data = (struct gfx7_mdapi_metrics *) data;
40 
41       if (data_size < sizeof(*mdapi_data))
42          return 0;
43 
44       assert(devinfo->platform == INTEL_PLATFORM_HSW);
45 
46       for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
47          mdapi_data->ACounters[i] = result->accumulator[1 + i];
48 
49       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
50          mdapi_data->NOACounters[i] =
51             result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
52       }
53 
54       mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
55       mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
56 
57       mdapi_data->ReportsCount = result->reports_accumulated;
58       mdapi_data->TotalTime =
59          intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
60       mdapi_data->CoreFrequency = result->gt_frequency[1];
61       mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
62       mdapi_data->SplitOccured = result->query_disjoint;
63       return sizeof(*mdapi_data);
64    }
65    case 8: {
66       struct gfx8_mdapi_metrics *mdapi_data = (struct gfx8_mdapi_metrics *) data;
67 
68       if (data_size < sizeof(*mdapi_data))
69          return 0;
70 
71       for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
72          mdapi_data->OaCntr[i] = result->accumulator[2 + i];
73       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
74          mdapi_data->NoaCntr[i] =
75             result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
76       }
77 
78       mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
79       mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
80 
81       mdapi_data->ReportId = result->hw_id;
82       mdapi_data->ReportsCount = result->reports_accumulated;
83       mdapi_data->TotalTime =
84          intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
85       mdapi_data->BeginTimestamp =
86          intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
87       mdapi_data->GPUTicks = result->accumulator[1];
88       mdapi_data->CoreFrequency = result->gt_frequency[1];
89       mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
90       mdapi_data->SliceFrequency =
91          (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
92       mdapi_data->UnsliceFrequency =
93          (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
94       mdapi_data->SplitOccured = result->query_disjoint;
95       return sizeof(*mdapi_data);
96    }
97    case 9:
98    case 11:
99    case 12:{
100       struct gfx9_mdapi_metrics *mdapi_data = (struct gfx9_mdapi_metrics *) data;
101 
102       if (data_size < sizeof(*mdapi_data))
103          return 0;
104 
105       for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
106          mdapi_data->OaCntr[i] = result->accumulator[2 + i];
107       for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
108          mdapi_data->NoaCntr[i] =
109             result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
110       }
111 
112       mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
113       mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
114 
115       mdapi_data->ReportId = result->hw_id;
116       mdapi_data->ReportsCount = result->reports_accumulated;
117       mdapi_data->TotalTime =
118          intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
119       mdapi_data->BeginTimestamp =
120          intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
121       mdapi_data->GPUTicks = result->accumulator[1];
122       mdapi_data->CoreFrequency = result->gt_frequency[1];
123       mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
124       mdapi_data->SliceFrequency =
125          (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
126       mdapi_data->UnsliceFrequency =
127          (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
128       mdapi_data->SplitOccured = result->query_disjoint;
129       return sizeof(*mdapi_data);
130    }
131    default:
132       unreachable("unexpected gen");
133    }
134 }
135 
136 void
intel_perf_register_mdapi_statistic_query(struct intel_perf_config * perf_cfg,const struct intel_device_info * devinfo)137 intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg,
138                                           const struct intel_device_info *devinfo)
139 {
140    if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
141       return;
142 
143    struct intel_perf_query_info *query =
144       intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
145 
146    query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE;
147    query->name = "Intel_Raw_Pipeline_Statistics_Query";
148 
149    /* The order has to match mdapi_pipeline_metrics. */
150    intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
151                                      "N vertices submitted");
152    intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
153                                      "N primitives submitted");
154    intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
155                                      "N vertex shader invocations");
156    intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
157                                      "N geometry shader invocations");
158    intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
159                                      "N geometry shader primitives emitted");
160    intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
161                                      "N primitives entering clipping");
162    intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
163                                      "N primitives leaving clipping");
164    if (devinfo->verx10 == 75 || devinfo->ver == 8) {
165       intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
166                                   "N fragment shader invocations",
167                                   "N fragment shader invocations");
168    } else {
169       intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
170                                         "N fragment shader invocations");
171    }
172    intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
173                                      "N TCS shader invocations");
174    intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
175                                      "N TES shader invocations");
176    if (devinfo->ver >= 7) {
177       intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
178                                         "N compute shader invocations");
179    }
180 
181    if (devinfo->ver >= 10) {
182       /* Reuse existing CS invocation register until we can expose this new
183        * one.
184        */
185       intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
186                                         "Reserved1");
187    }
188 
189    query->data_size = sizeof(uint64_t) * query->n_counters;
190 }
191 
192 static void
fill_mdapi_perf_query_counter(struct intel_perf_query_info * query,const char * name,uint32_t data_offset,uint32_t data_size,enum intel_perf_counter_data_type data_type)193 fill_mdapi_perf_query_counter(struct intel_perf_query_info *query,
194                               const char *name,
195                               uint32_t data_offset,
196                               uint32_t data_size,
197                               enum intel_perf_counter_data_type data_type)
198 {
199    struct intel_perf_query_counter *counter = &query->counters[query->n_counters];
200 
201    assert(query->n_counters <= query->max_counters);
202 
203    counter->name = name;
204    counter->desc = "Raw counter value";
205    counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
206    counter->data_type = data_type;
207    counter->offset = data_offset;
208 
209    query->n_counters++;
210 
211    assert(counter->offset + intel_perf_query_counter_get_size(counter) <= query->data_size);
212 }
213 
214 #define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
215    fill_mdapi_perf_query_counter(query, #field_name,                    \
216                                  (uint8_t *) &struct_name.field_name -  \
217                                  (uint8_t *) &struct_name,              \
218                                  sizeof(struct_name.field_name),        \
219                                  INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
220 #define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
221    fill_mdapi_perf_query_counter(query,                                 \
222                                  ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
223                                  (uint8_t *) &struct_name.field_name[idx] - \
224                                  (uint8_t *) &struct_name,              \
225                                  sizeof(struct_name.field_name[0]),     \
226                                  INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
227 
228 void
intel_perf_register_mdapi_oa_query(struct intel_perf_config * perf,const struct intel_device_info * devinfo)229 intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
230                                    const struct intel_device_info *devinfo)
231 {
232    struct intel_perf_query_info *query = NULL;
233 
234    /* MDAPI requires different structures for pretty much every generation
235     * (right now we have definitions for gen 7 to 12).
236     */
237    if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
238       return;
239 
240    switch (devinfo->ver) {
241    case 7: {
242       query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7);
243 
244       struct gfx7_mdapi_metrics metric_data;
245       query->data_size = sizeof(metric_data);
246 
247       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
248       for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
249          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
250                                        metric_data, ACounters, i, UINT64);
251       }
252       for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
253          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
254                                        metric_data, NOACounters, i, UINT64);
255       }
256       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
257       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
258       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
259       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
260       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
261       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
262       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
263       break;
264    }
265    case 8: {
266       query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16);
267 
268       struct gfx8_mdapi_metrics metric_data;
269       query->data_size = sizeof(metric_data);
270 
271       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
272       MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
273       for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
274          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
275                                        metric_data, OaCntr, i, UINT64);
276       }
277       for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
278          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
279                                        metric_data, NoaCntr, i, UINT64);
280       }
281       MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
282       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
283       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
284       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
285       MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
286       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
287       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
288       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
289       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
290       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
291       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
292       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
293       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
294       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
295       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
296       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
297       break;
298    }
299    case 9:
300    case 11:
301    case 12: {
302       query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
303 
304       struct gfx9_mdapi_metrics metric_data;
305       query->data_size = sizeof(metric_data);
306 
307       MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
308       MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
309       for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
310          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
311                                        metric_data, OaCntr, i, UINT64);
312       }
313       for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
314          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
315                                        metric_data, NoaCntr, i, UINT64);
316       }
317       MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
318       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
319       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
320       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
321       MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
322       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
323       MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
324       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
325       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
326       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
327       MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
328       MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
329       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
330       MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
331       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
332       MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
333       for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
334          MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
335                                        metric_data, UserCntr, i, UINT64);
336       }
337       MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
338       MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
339       break;
340    }
341    default:
342       unreachable("Unsupported gen");
343       break;
344    }
345 
346    query->oa_format = intel_perf_get_oa_format(perf);
347    query->kind = INTEL_PERF_QUERY_TYPE_RAW;
348    query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
349    query->guid = INTEL_PERF_QUERY_GUID_MDAPI;
350 
351    {
352       /* Accumulation buffer offsets copied from an actual query... */
353       const struct intel_perf_query_info *copy_query =
354          &perf->queries[0];
355 
356       query->gpu_time_offset = copy_query->gpu_time_offset;
357       query->gpu_clock_offset = copy_query->gpu_clock_offset;
358       query->a_offset = copy_query->a_offset;
359       query->b_offset = copy_query->b_offset;
360       query->c_offset = copy_query->c_offset;
361       query->perfcnt_offset = copy_query->perfcnt_offset;
362    }
363 }
364