xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_perf.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 
28 #include "anv_private.h"
29 #include "vk_util.h"
30 
31 #include "perf/intel_perf.h"
32 #include "perf/intel_perf_mdapi.h"
33 
34 #include "util/mesa-sha1.h"
35 
36 void
anv_physical_device_init_perf(struct anv_physical_device * device,int fd)37 anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
38 {
39    struct intel_perf_config *perf = intel_perf_new(NULL);
40 
41    intel_perf_init_metrics(perf, &device->info, fd,
42                            false /* pipeline statistics */,
43                            true /* register snapshots */);
44 
45    if (!perf->n_queries)
46       goto err;
47 
48    /* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
49     * perf revision 2.
50     */
51    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
52       if (!intel_perf_has_hold_preemption(perf))
53          goto err;
54    }
55 
56    device->perf = perf;
57 
58    /* Compute the number of commands we need to implement a performance
59     * query.
60     */
61    const struct intel_perf_query_field_layout *layout = &perf->query_layout;
62    device->n_perf_query_commands = 0;
63    for (uint32_t f = 0; f < layout->n_fields; f++) {
64       struct intel_perf_query_field *field = &layout->fields[f];
65 
66       switch (field->type) {
67       case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
68          device->n_perf_query_commands++;
69          break;
70       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
71       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
72       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
73       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
74       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
75       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC:
76          device->n_perf_query_commands += field->size / 4;
77          break;
78       default:
79          unreachable("Unhandled register type");
80       }
81    }
82    device->n_perf_query_commands *= 2; /* Begin & End */
83    device->n_perf_query_commands += 1; /* availability */
84 
85    return;
86 
87 err:
88    intel_perf_free(perf);
89 }
90 
91 void
anv_device_perf_init(struct anv_device * device)92 anv_device_perf_init(struct anv_device *device)
93 {
94    device->perf_fd = -1;
95    device->perf_queue = NULL;
96 }
97 
98 void
anv_device_perf_close(struct anv_device * device)99 anv_device_perf_close(struct anv_device *device)
100 {
101    if (device->perf_fd == -1)
102       return;
103 
104    close(device->perf_fd);
105    device->perf_fd = -1;
106 }
107 
108 static int
anv_device_perf_open(struct anv_device * device,struct anv_queue * queue,uint64_t metric_id)109 anv_device_perf_open(struct anv_device *device, struct anv_queue *queue, uint64_t metric_id)
110 {
111    uint32_t context_or_exec_queue_id;
112    uint64_t period_exponent = 31; /* slowest sampling period */
113    int ret;
114 
115    switch (device->physical->info.kmd_type) {
116    case INTEL_KMD_TYPE_I915:
117       context_or_exec_queue_id = device->physical->has_vm_control ?
118                                  queue->context_id : device->context_id;
119       break;
120    case INTEL_KMD_TYPE_XE:
121       context_or_exec_queue_id = queue->exec_queue_id;
122       break;
123    default:
124       unreachable("missing");
125       context_or_exec_queue_id = 0;
126    }
127 
128    ret = intel_perf_stream_open(device->physical->perf, device->fd,
129                                 context_or_exec_queue_id, metric_id,
130                                 period_exponent, true, true);
131    if (ret >= 0)
132       device->perf_queue = queue;
133 
134    return ret;
135 }
136 
137 /* VK_INTEL_performance_query */
anv_InitializePerformanceApiINTEL(VkDevice _device,const VkInitializePerformanceApiInfoINTEL * pInitializeInfo)138 VkResult anv_InitializePerformanceApiINTEL(
139     VkDevice                                    _device,
140     const VkInitializePerformanceApiInfoINTEL*  pInitializeInfo)
141 {
142    ANV_FROM_HANDLE(anv_device, device, _device);
143 
144    if (!device->physical->perf)
145       return VK_ERROR_EXTENSION_NOT_PRESENT;
146 
147    /* Not much to do here */
148    return VK_SUCCESS;
149 }
150 
anv_GetPerformanceParameterINTEL(VkDevice _device,VkPerformanceParameterTypeINTEL parameter,VkPerformanceValueINTEL * pValue)151 VkResult anv_GetPerformanceParameterINTEL(
152     VkDevice                                    _device,
153     VkPerformanceParameterTypeINTEL             parameter,
154     VkPerformanceValueINTEL*                    pValue)
155 {
156       ANV_FROM_HANDLE(anv_device, device, _device);
157 
158       if (!device->physical->perf)
159          return VK_ERROR_EXTENSION_NOT_PRESENT;
160 
161       VkResult result = VK_SUCCESS;
162       switch (parameter) {
163       case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
164          pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
165          pValue->data.valueBool = VK_TRUE;
166          break;
167 
168       case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
169          pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
170          pValue->data.value32 = 25;
171          break;
172 
173       default:
174          result = VK_ERROR_FEATURE_NOT_PRESENT;
175          break;
176       }
177 
178       return result;
179 }
180 
anv_CmdSetPerformanceMarkerINTEL(VkCommandBuffer commandBuffer,const VkPerformanceMarkerInfoINTEL * pMarkerInfo)181 VkResult anv_CmdSetPerformanceMarkerINTEL(
182     VkCommandBuffer                             commandBuffer,
183     const VkPerformanceMarkerInfoINTEL*         pMarkerInfo)
184 {
185    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
186 
187    cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
188 
189    return VK_SUCCESS;
190 }
191 
anv_AcquirePerformanceConfigurationINTEL(VkDevice _device,const VkPerformanceConfigurationAcquireInfoINTEL * pAcquireInfo,VkPerformanceConfigurationINTEL * pConfiguration)192 VkResult anv_AcquirePerformanceConfigurationINTEL(
193     VkDevice                                    _device,
194     const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
195     VkPerformanceConfigurationINTEL*            pConfiguration)
196 {
197    ANV_FROM_HANDLE(anv_device, device, _device);
198    struct anv_performance_configuration_intel *config;
199 
200    config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
201                             VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
202    if (!config)
203       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
204 
205    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
206       config->register_config =
207          intel_perf_load_configuration(device->physical->perf, device->fd,
208                                      INTEL_PERF_QUERY_GUID_MDAPI);
209       if (!config->register_config) {
210          vk_object_free(&device->vk, NULL, config);
211          return VK_INCOMPLETE;
212       }
213 
214       uint64_t ret =
215          intel_perf_store_configuration(device->physical->perf, device->fd,
216                                       config->register_config, NULL /* guid */);
217       if (ret == 0) {
218          ralloc_free(config->register_config);
219          vk_object_free(&device->vk, NULL, config);
220          return VK_INCOMPLETE;
221       }
222 
223       config->config_id = ret;
224    }
225 
226    *pConfiguration = anv_performance_configuration_intel_to_handle(config);
227 
228    return VK_SUCCESS;
229 }
230 
anv_ReleasePerformanceConfigurationINTEL(VkDevice _device,VkPerformanceConfigurationINTEL _configuration)231 VkResult anv_ReleasePerformanceConfigurationINTEL(
232     VkDevice                                    _device,
233     VkPerformanceConfigurationINTEL             _configuration)
234 {
235    ANV_FROM_HANDLE(anv_device, device, _device);
236    ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
237 
238    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG))
239       intel_perf_remove_configuration(device->physical->perf, device->fd, config->config_id);
240 
241    ralloc_free(config->register_config);
242 
243    vk_object_free(&device->vk, NULL, config);
244 
245    return VK_SUCCESS;
246 }
247 
248 static struct anv_queue *
anv_device_get_perf_queue(struct anv_device * device)249 anv_device_get_perf_queue(struct anv_device *device)
250 {
251    for (uint32_t i = 0; i < device->queue_count; i++) {
252       struct anv_queue *queue = &device->queues[i];
253       const struct anv_queue_family *family = queue->family;
254 
255       if (family->supports_perf)
256          return queue;
257    }
258 
259    return NULL;
260 }
261 
anv_QueueSetPerformanceConfigurationINTEL(VkQueue _queue,VkPerformanceConfigurationINTEL _configuration)262 VkResult anv_QueueSetPerformanceConfigurationINTEL(
263     VkQueue                                     _queue,
264     VkPerformanceConfigurationINTEL             _configuration)
265 {
266    ANV_FROM_HANDLE(anv_queue, queue, _queue);
267    ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
268    struct anv_device *device = queue->device;
269 
270    if (queue != anv_device_get_perf_queue(device))
271       return VK_ERROR_UNKNOWN;
272 
273    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
274       if (device->perf_fd < 0) {
275          device->perf_fd = anv_device_perf_open(device, queue, config->config_id);
276          if (device->perf_fd < 0)
277             return VK_ERROR_INITIALIZATION_FAILED;
278       } else {
279          int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
280                                                     device->perf_fd,
281                                                     config->config_id);
282          if (ret < 0)
283             return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
284       }
285    }
286 
287    return VK_SUCCESS;
288 }
289 
anv_UninitializePerformanceApiINTEL(VkDevice _device)290 void anv_UninitializePerformanceApiINTEL(
291     VkDevice                                    _device)
292 {
293    ANV_FROM_HANDLE(anv_device, device, _device);
294 
295    anv_device_perf_close(device);
296 }
297 
298 /* VK_KHR_performance_query */
299 static const VkPerformanceCounterUnitKHR
300 intel_perf_counter_unit_to_vk_unit[] = {
301    [INTEL_PERF_COUNTER_UNITS_BYTES]                                = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
302    [INTEL_PERF_COUNTER_UNITS_HZ]                                   = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
303    [INTEL_PERF_COUNTER_UNITS_NS]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
304    [INTEL_PERF_COUNTER_UNITS_US]                                   = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
305    [INTEL_PERF_COUNTER_UNITS_PIXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
306    [INTEL_PERF_COUNTER_UNITS_TEXELS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
307    [INTEL_PERF_COUNTER_UNITS_THREADS]                              = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
308    [INTEL_PERF_COUNTER_UNITS_PERCENT]                              = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
309    [INTEL_PERF_COUNTER_UNITS_MESSAGES]                             = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
310    [INTEL_PERF_COUNTER_UNITS_NUMBER]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
311    [INTEL_PERF_COUNTER_UNITS_CYCLES]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
312    [INTEL_PERF_COUNTER_UNITS_EVENTS]                               = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
313    [INTEL_PERF_COUNTER_UNITS_UTILIZATION]                          = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
314    [INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
315    [INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
316    [INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES]        = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
317    [INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE]           = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
318 };
319 
320 static const VkPerformanceCounterStorageKHR
321 intel_perf_counter_data_type_to_vk_storage[] = {
322    [INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
323    [INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
324    [INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
325    [INTEL_PERF_COUNTER_DATA_TYPE_FLOAT]  = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
326    [INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
327 };
328 
anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(VkPhysicalDevice physicalDevice,uint32_t queueFamilyIndex,uint32_t * pCounterCount,VkPerformanceCounterKHR * pCounters,VkPerformanceCounterDescriptionKHR * pCounterDescriptions)329 VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
330     VkPhysicalDevice                            physicalDevice,
331     uint32_t                                    queueFamilyIndex,
332     uint32_t*                                   pCounterCount,
333     VkPerformanceCounterKHR*                    pCounters,
334     VkPerformanceCounterDescriptionKHR*         pCounterDescriptions)
335 {
336    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
337    struct intel_perf_config *perf = pdevice->perf;
338 
339    uint32_t desc_count = *pCounterCount;
340 
341    VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterKHR, out, pCounters, pCounterCount);
342    VK_OUTARRAY_MAKE_TYPED(VkPerformanceCounterDescriptionKHR, out_desc,
343                           pCounterDescriptions, &desc_count);
344 
345    /* We cannot support performance queries on anything other than RCS,
346     * because the MI_REPORT_PERF_COUNT command is not available on other
347     * engines.
348     */
349    struct anv_queue_family *queue_family =
350       &pdevice->queue.families[queueFamilyIndex];
351    if (queue_family->engine_class != INTEL_ENGINE_CLASS_RENDER)
352       return vk_outarray_status(&out);
353 
354    for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
355       const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
356 
357       vk_outarray_append_typed(VkPerformanceCounterKHR, &out, counter) {
358          counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
359          counter->scope = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR;
360          counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
361 
362          unsigned char sha1_result[20];
363          _mesa_sha1_compute(intel_counter->symbol_name,
364                             strlen(intel_counter->symbol_name),
365                             sha1_result);
366          memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
367       }
368 
369       vk_outarray_append_typed(VkPerformanceCounterDescriptionKHR, &out_desc, desc) {
370          desc->flags = 0; /* None so far. */
371          snprintf(desc->name, sizeof(desc->name), "%s",
372                   INTEL_DEBUG(DEBUG_PERF_SYMBOL_NAMES) ?
373                   intel_counter->symbol_name :
374                   intel_counter->name);
375          snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
376          snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
377       }
378    }
379 
380    return vk_outarray_status(&out);
381 }
382 
anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(VkPhysicalDevice physicalDevice,const VkQueryPoolPerformanceCreateInfoKHR * pPerformanceQueryCreateInfo,uint32_t * pNumPasses)383 void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
384     VkPhysicalDevice                            physicalDevice,
385     const VkQueryPoolPerformanceCreateInfoKHR*  pPerformanceQueryCreateInfo,
386     uint32_t*                                   pNumPasses)
387 {
388    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
389    struct intel_perf_config *perf = pdevice->perf;
390 
391    if (!perf) {
392       *pNumPasses = 0;
393       return;
394    }
395 
396    *pNumPasses = intel_perf_get_n_passes(perf,
397                                        pPerformanceQueryCreateInfo->pCounterIndices,
398                                        pPerformanceQueryCreateInfo->counterIndexCount,
399                                        NULL);
400 }
401 
anv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)402 VkResult anv_AcquireProfilingLockKHR(
403     VkDevice                                    _device,
404     const VkAcquireProfilingLockInfoKHR*        pInfo)
405 {
406    ANV_FROM_HANDLE(anv_device, device, _device);
407    struct intel_perf_config *perf = device->physical->perf;
408    struct intel_perf_query_info *first_metric_set = &perf->queries[0];
409    int fd = -1;
410 
411    assert(device->perf_fd == -1);
412 
413    if (!INTEL_DEBUG(DEBUG_NO_OACONFIG)) {
414       struct anv_queue *queue = anv_device_get_perf_queue(device);
415 
416       if (queue == NULL)
417          return VK_ERROR_UNKNOWN;
418       fd = anv_device_perf_open(device, queue, first_metric_set->oa_metrics_set_id);
419       if (fd < 0)
420          return VK_TIMEOUT;
421    }
422 
423    device->perf_fd = fd;
424    return VK_SUCCESS;
425 }
426 
anv_ReleaseProfilingLockKHR(VkDevice _device)427 void anv_ReleaseProfilingLockKHR(
428     VkDevice                                    _device)
429 {
430    ANV_FROM_HANDLE(anv_device, device, _device);
431 
432    anv_device_perf_close(device);
433 }
434 
435 void
anv_perf_write_pass_results(struct intel_perf_config * perf,struct anv_query_pool * pool,uint32_t pass,const struct intel_perf_query_result * accumulated_results,union VkPerformanceCounterResultKHR * results)436 anv_perf_write_pass_results(struct intel_perf_config *perf,
437                             struct anv_query_pool *pool, uint32_t pass,
438                             const struct intel_perf_query_result *accumulated_results,
439                             union VkPerformanceCounterResultKHR *results)
440 {
441    const struct intel_perf_query_info *query = pool->pass_query[pass];
442 
443    for (uint32_t c = 0; c < pool->n_counters; c++) {
444       const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
445 
446       if (counter_pass->query != query)
447          continue;
448 
449       switch (pool->pass_query[pass]->kind) {
450       case INTEL_PERF_QUERY_TYPE_PIPELINE: {
451          assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
452          uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
453          results[c].uint64 = accumulated_results->accumulator[accu_offset];
454          break;
455       }
456 
457       case INTEL_PERF_QUERY_TYPE_OA:
458       case INTEL_PERF_QUERY_TYPE_RAW:
459          switch (counter_pass->counter->data_type) {
460          case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
461             results[c].uint64 =
462                counter_pass->counter->oa_counter_read_uint64(perf,
463                                                              counter_pass->query,
464                                                              accumulated_results);
465             break;
466          case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
467             results[c].float32 =
468                counter_pass->counter->oa_counter_read_float(perf,
469                                                             counter_pass->query,
470                                                             accumulated_results);
471             break;
472          default:
473             /* So far we aren't using uint32, double or bool32... */
474             unreachable("unexpected counter data type");
475          }
476          break;
477 
478       default:
479          unreachable("invalid query type");
480       }
481 
482       /* The Vulkan extension only has nanoseconds as a unit */
483       if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
484          assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
485          results[c].uint64 *= 1000;
486       }
487    }
488 }
489