1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "intel_decoder.h"
25 #include "intel_decoder_private.h"
26
27 #include "util/macros.h"
28 #include "util/u_debug.h"
29 #include "util/u_dynarray.h"
30 #include "util/u_math.h" /* Needed for ROUND_DOWN_TO */
31
32 #include <string.h>
33
34 static const struct debug_control debug_control[] = {
35 { "color", INTEL_BATCH_DECODE_IN_COLOR },
36 { "full", INTEL_BATCH_DECODE_FULL },
37 { "offsets", INTEL_BATCH_DECODE_OFFSETS },
38 { "floats", INTEL_BATCH_DECODE_FLOATS },
39 { "surfaces", INTEL_BATCH_DECODE_SURFACES },
40 { "accumulate", INTEL_BATCH_DECODE_ACCUMULATE },
41 { "vb-data", INTEL_BATCH_DECODE_VB_DATA },
42 { NULL, 0 }
43 };
44
45 void
intel_batch_decode_ctx_init(struct intel_batch_decode_ctx * ctx,const struct intel_device_info * devinfo,FILE * fp,enum intel_batch_decode_flags flags,const char * xml_path,struct intel_batch_decode_bo (* get_bo)(void *,bool,uint64_t),unsigned (* get_state_size)(void *,uint64_t,uint64_t),void * user_data)46 intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
47 const struct intel_device_info *devinfo,
48 FILE *fp, enum intel_batch_decode_flags flags,
49 const char *xml_path,
50 struct intel_batch_decode_bo (*get_bo)(void *,
51 bool,
52 uint64_t),
53 unsigned (*get_state_size)(void *, uint64_t,
54 uint64_t),
55 void *user_data)
56 {
57 memset(ctx, 0, sizeof(*ctx));
58
59 ctx->devinfo = *devinfo;
60 ctx->get_bo = get_bo;
61 ctx->get_state_size = get_state_size;
62 ctx->user_data = user_data;
63 ctx->fp = fp;
64 ctx->flags = parse_enable_string(getenv("INTEL_DECODE"), flags, debug_control);
65 ctx->max_vbo_decoded_lines = -1; /* No limit! */
66 ctx->engine = INTEL_ENGINE_CLASS_RENDER;
67
68 if (xml_path == NULL)
69 ctx->spec = intel_spec_load(devinfo);
70 else
71 ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
72
73 ctx->commands =
74 _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
75 ctx->stats =
76 _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
77 }
78
79 void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx * ctx)80 intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
81 {
82 _mesa_hash_table_destroy(ctx->commands, NULL);
83 _mesa_hash_table_destroy(ctx->stats, NULL);
84 intel_spec_destroy(ctx->spec);
85 }
86
87 #define CSI "\e["
88 #define RED_COLOR CSI "31m"
89 #define BLUE_HEADER CSI "0;44m" CSI "1;37m"
90 #define GREEN_HEADER CSI "1;42m"
91 #define NORMAL CSI "0m"
92
93 static void
ctx_print_group(struct intel_batch_decode_ctx * ctx,struct intel_group * group,uint64_t address,const void * map)94 ctx_print_group(struct intel_batch_decode_ctx *ctx,
95 struct intel_group *group,
96 uint64_t address, const void *map)
97 {
98 intel_print_group(ctx->fp, group, address, map, 0,
99 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
100 }
101
102 struct intel_batch_decode_bo
ctx_get_bo(struct intel_batch_decode_ctx * ctx,bool ppgtt,uint64_t addr)103 ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
104 {
105 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
106 /* On Broadwell and above, we have 48-bit addresses which consume two
107 * dwords. Some packets require that these get stored in a "canonical
108 * form" which means that bit 47 is sign-extended through the upper
109 * bits. In order to correctly handle those aub dumps, we need to mask
110 * off the top 16 bits.
111 */
112 addr &= (~0ull >> 16);
113 }
114
115 struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
116
117 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
118 bo.addr &= (~0ull >> 16);
119
120 /* We may actually have an offset into the bo */
121 if (bo.map != NULL) {
122 assert(bo.addr <= addr);
123 uint64_t offset = addr - bo.addr;
124 bo.map += offset;
125 bo.addr += offset;
126 bo.size -= offset;
127 }
128
129 return bo;
130 }
131
132 static int
update_count(struct intel_batch_decode_ctx * ctx,uint64_t address,uint64_t base_address,unsigned element_dwords,unsigned guess)133 update_count(struct intel_batch_decode_ctx *ctx,
134 uint64_t address,
135 uint64_t base_address,
136 unsigned element_dwords,
137 unsigned guess)
138 {
139 unsigned size = 0;
140
141 if (ctx->get_state_size)
142 size = ctx->get_state_size(ctx->user_data, address, base_address);
143
144 if (size > 0)
145 return size / (sizeof(uint32_t) * element_dwords);
146
147 /* In the absence of any information, just guess arbitrarily. */
148 return guess;
149 }
150
151 static inline void
ctx_disassemble_program(struct intel_batch_decode_ctx * ctx,uint32_t ksp,const char * short_name,const char * name)152 ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
153 uint32_t ksp,
154 const char *short_name,
155 const char *name)
156 {
157 ctx->disassemble_program(ctx, ksp, short_name, name);
158 }
159
160 /* Heuristic to determine whether a uint32_t is probably actually a float
161 * (http://stackoverflow.com/a/2953466)
162 */
163
164 static bool
probably_float(uint32_t bits)165 probably_float(uint32_t bits)
166 {
167 int exp = ((bits & 0x7f800000U) >> 23) - 127;
168 uint32_t mant = bits & 0x007fffff;
169
170 /* +- 0.0 */
171 if (exp == -127 && mant == 0)
172 return true;
173
174 /* +- 1 billionth to 1 billion */
175 if (-30 <= exp && exp <= 30)
176 return true;
177
178 /* some value with only a few binary digits */
179 if ((mant & 0x0000ffff) == 0)
180 return true;
181
182 return false;
183 }
184
185 static void
ctx_print_buffer(struct intel_batch_decode_ctx * ctx,struct intel_batch_decode_bo bo,uint32_t read_length,uint32_t pitch,int max_lines)186 ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
187 struct intel_batch_decode_bo bo,
188 uint32_t read_length,
189 uint32_t pitch,
190 int max_lines)
191 {
192 const uint32_t *dw_end =
193 bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
194
195 int column_count = 0, pitch_col_count = 0, line_count = -1;
196 for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
197 if (pitch_col_count * 4 == pitch || column_count == 8) {
198 fprintf(ctx->fp, "\n");
199 column_count = 0;
200 if (pitch_col_count * 4 == pitch)
201 pitch_col_count = 0;
202 line_count++;
203
204 if (max_lines >= 0 && line_count >= max_lines)
205 break;
206 }
207 fprintf(ctx->fp, column_count == 0 ? " " : " ");
208
209 if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
210 fprintf(ctx->fp, " %8.2f", *(float *) dw);
211 else
212 fprintf(ctx->fp, " 0x%08x", *dw);
213
214 column_count++;
215 pitch_col_count++;
216 }
217 fprintf(ctx->fp, "\n");
218 }
219
220 static struct intel_group *
intel_ctx_find_instruction(struct intel_batch_decode_ctx * ctx,const uint32_t * p)221 intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
222 {
223 return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
224 }
225
226 static void
handle_state_base_address(struct intel_batch_decode_ctx * ctx,const uint32_t * p)227 handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
228 {
229 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
230
231 struct intel_field_iterator iter;
232 intel_field_iterator_init(&iter, inst, p, 0, false);
233
234 uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
235 bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
236
237 while (intel_field_iterator_next(&iter)) {
238 if (strcmp(iter.name, "Surface State Base Address") == 0) {
239 surface_base = iter.raw_value;
240 } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
241 dynamic_base = iter.raw_value;
242 } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
243 instruction_base = iter.raw_value;
244 } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
245 surface_modify = iter.raw_value;
246 } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
247 dynamic_modify = iter.raw_value;
248 } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
249 instruction_modify = iter.raw_value;
250 }
251 }
252
253 if (dynamic_modify)
254 ctx->dynamic_base = dynamic_base;
255
256 if (surface_modify)
257 ctx->surface_base = surface_base;
258
259 if (instruction_modify)
260 ctx->instruction_base = instruction_base;
261 }
262
263 static void
handle_binding_table_pool_alloc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)264 handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
265 const uint32_t *p)
266 {
267 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
268
269 struct intel_field_iterator iter;
270 intel_field_iterator_init(&iter, inst, p, 0, false);
271
272 uint64_t bt_pool_base = 0;
273 bool bt_pool_enable = false;
274
275 while (intel_field_iterator_next(&iter)) {
276 if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
277 bt_pool_base = iter.raw_value;
278 } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
279 bt_pool_enable = iter.raw_value;
280 }
281 }
282
283 if (bt_pool_enable || ctx->devinfo.verx10 >= 125) {
284 ctx->bt_pool_base = bt_pool_base;
285 } else {
286 ctx->bt_pool_base = 0;
287 }
288 }
289
290 static void
dump_binding_table(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)291 dump_binding_table(struct intel_batch_decode_ctx *ctx,
292 uint32_t offset, int count)
293 {
294 struct intel_group *strct =
295 intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
296 if (strct == NULL) {
297 fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
298 return;
299 }
300
301 /* Most platforms use a 16-bit pointer with 32B alignment in bits 15:5. */
302 uint32_t btp_alignment = 32;
303 uint32_t btp_pointer_bits = 16;
304
305 if (ctx->devinfo.verx10 >= 125) {
306 /* The pointer is now 21-bit with 32B alignment in bits 20:5. */
307 btp_pointer_bits = 21;
308 } else if (ctx->use_256B_binding_tables) {
309 /* When 256B binding tables are enabled, we have to shift the offset
310 * which is stored in bits 15:5 but interpreted as bits 18:8 of the
311 * actual offset. The effective pointer is 19-bit with 256B alignment.
312 */
313 offset <<= 3;
314 btp_pointer_bits = 19;
315 btp_alignment = 256;
316 }
317
318 const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
319 ctx->surface_base;
320
321 if (count < 0) {
322 count = update_count(ctx, bt_pool_base + offset,
323 bt_pool_base, 1, 32);
324 }
325
326 if (offset % btp_alignment != 0 || offset >= (1u << btp_pointer_bits)) {
327 fprintf(ctx->fp, " invalid binding table pointer\n");
328 return;
329 }
330
331 struct intel_batch_decode_bo bind_bo =
332 ctx_get_bo(ctx, true, bt_pool_base + offset);
333
334 if (bind_bo.map == NULL) {
335 fprintf(ctx->fp, " binding table unavailable\n");
336 return;
337 }
338
339 const uint32_t *pointers = bind_bo.map;
340 for (int i = 0; i < count; i++) {
341 if (((uintptr_t)&pointers[i] >= ((uintptr_t)bind_bo.map + bind_bo.size)))
342 break;
343
344 uint64_t addr = ctx->surface_base + pointers[i];
345 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
346 uint32_t size = strct->dw_length * 4;
347
348 if (pointers[i] % 32 != 0 ||
349 addr < bo.addr || addr + size > bo.addr + bo.size) {
350 fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
351 continue;
352 }
353
354 fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
355 if (ctx->flags & INTEL_BATCH_DECODE_SURFACES)
356 ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
357 }
358 }
359
360 static void
dump_samplers(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)361 dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
362 {
363 struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
364 uint64_t state_addr = ctx->dynamic_base + offset;
365
366 assert(count > 0);
367
368 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
369 const void *state_map = bo.map;
370
371 if (state_map == NULL) {
372 fprintf(ctx->fp, " samplers unavailable\n");
373 return;
374 }
375
376 if (offset % 32 != 0) {
377 fprintf(ctx->fp, " invalid sampler state pointer\n");
378 return;
379 }
380
381 const unsigned sampler_state_size = strct->dw_length * 4;
382
383 if (count * sampler_state_size >= bo.size) {
384 fprintf(ctx->fp, " sampler state ends after bo ends\n");
385 assert(!"sampler state ends after bo ends");
386 return;
387 }
388
389 for (int i = 0; i < count; i++) {
390 fprintf(ctx->fp, "sampler state %d\n", i);
391 if (ctx->flags & INTEL_BATCH_DECODE_SAMPLERS)
392 ctx_print_group(ctx, strct, state_addr, state_map);
393 state_addr += sampler_state_size;
394 state_map += sampler_state_size;
395 }
396 }
397
398 static void
handle_interface_descriptor_data(struct intel_batch_decode_ctx * ctx,struct intel_group * desc,const uint32_t * p)399 handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
400 struct intel_group *desc, const uint32_t *p)
401 {
402 uint64_t ksp = 0;
403 uint32_t sampler_offset = 0, sampler_count = 0;
404 uint32_t binding_table_offset = 0, binding_entry_count = 0;
405
406 struct intel_field_iterator iter;
407 intel_field_iterator_init(&iter, desc, p, 0, false);
408 while (intel_field_iterator_next(&iter)) {
409 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
410 ksp = strtoll(iter.value, NULL, 16);
411 } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
412 sampler_offset = strtol(iter.value, NULL, 16);
413 } else if (strcmp(iter.name, "Sampler Count") == 0) {
414 sampler_count = strtol(iter.value, NULL, 10);
415 } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
416 binding_table_offset = strtol(iter.value, NULL, 16);
417 } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
418 binding_entry_count = strtol(iter.value, NULL, 10);
419 }
420 }
421
422 ctx_disassemble_program(ctx, ksp, "CS", "compute shader");
423 fprintf(ctx->fp, "\n");
424
425 if (sampler_count)
426 dump_samplers(ctx, sampler_offset, sampler_count);
427 if (binding_entry_count)
428 dump_binding_table(ctx, binding_table_offset, binding_entry_count);
429 }
430
431 static void
handle_media_interface_descriptor_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)432 handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
433 const uint32_t *p)
434 {
435 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
436 struct intel_group *desc =
437 intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
438
439 struct intel_field_iterator iter;
440 intel_field_iterator_init(&iter, inst, p, 0, false);
441 uint32_t descriptor_offset = 0;
442 int descriptor_count = 0;
443 while (intel_field_iterator_next(&iter)) {
444 if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
445 descriptor_offset = strtol(iter.value, NULL, 16);
446 } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
447 descriptor_count =
448 strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
449 }
450 }
451
452 uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
453 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
454 const void *desc_map = bo.map;
455
456 if (desc_map == NULL) {
457 fprintf(ctx->fp, " interface descriptors unavailable\n");
458 return;
459 }
460
461 for (int i = 0; i < descriptor_count; i++) {
462 fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
463
464 ctx_print_group(ctx, desc, desc_addr, desc_map);
465
466 handle_interface_descriptor_data(ctx, desc, desc_map);
467
468 desc_map += desc->dw_length;
469 desc_addr += desc->dw_length * 4;
470 }
471 }
472
473 static void
handle_compute_walker(struct intel_batch_decode_ctx * ctx,const uint32_t * p)474 handle_compute_walker(struct intel_batch_decode_ctx *ctx,
475 const uint32_t *p)
476 {
477 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
478
479 struct intel_field_iterator iter;
480 intel_field_iterator_init(&iter, inst, p, 0, false);
481 while (intel_field_iterator_next(&iter)) {
482 if (strcmp(iter.name, "Interface Descriptor") == 0) {
483 handle_interface_descriptor_data(ctx, iter.struct_desc,
484 &iter.p[iter.start_bit / 32]);
485 }
486 }
487 }
488
489 static void
handle_media_curbe_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)490 handle_media_curbe_load(struct intel_batch_decode_ctx *ctx,
491 const uint32_t *p)
492 {
493 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
494
495 struct intel_field_iterator iter;
496 intel_field_iterator_init(&iter, inst, p, 0, false);
497
498 uint32_t dynamic_state_offset = 0;
499 uint32_t dynamic_state_length = 0;
500
501 while (intel_field_iterator_next(&iter)) {
502 if (strcmp(iter.name, "CURBE Data Start Address") == 0) {
503 dynamic_state_offset = iter.raw_value;
504 } else if (strcmp(iter.name, "CURBE Total Data Length") == 0) {
505 dynamic_state_length = iter.raw_value;
506 }
507 }
508
509 if (dynamic_state_length > 0) {
510 struct intel_batch_decode_bo buffer =
511 ctx_get_bo(ctx, true, ctx->dynamic_base + dynamic_state_offset);
512 if (buffer.map != NULL)
513 ctx_print_buffer(ctx, buffer, dynamic_state_length, 0, -1);
514 }
515 }
516
517 static void
handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)518 handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
519 const uint32_t *p)
520 {
521 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
522 struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
523
524 struct intel_batch_decode_bo vb = {};
525 uint32_t vb_size = 0;
526 int index = -1;
527 int pitch = -1;
528 bool ready = false;
529
530 struct intel_field_iterator iter;
531 intel_field_iterator_init(&iter, inst, p, 0, false);
532 while (intel_field_iterator_next(&iter)) {
533 if (iter.struct_desc != vbs)
534 continue;
535
536 struct intel_field_iterator vbs_iter;
537 intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
538 while (intel_field_iterator_next(&vbs_iter)) {
539 if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
540 index = vbs_iter.raw_value;
541 } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
542 pitch = vbs_iter.raw_value;
543 } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
544 vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
545 } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
546 vb_size = vbs_iter.raw_value;
547 ready = true;
548 } else if (strcmp(vbs_iter.name, "End Address") == 0) {
549 if (vb.map && vbs_iter.raw_value >= vb.addr)
550 vb_size = (vbs_iter.raw_value + 1) - vb.addr;
551 else
552 vb_size = 0;
553 ready = true;
554 }
555
556 if (!ready)
557 continue;
558
559 fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
560
561 if (vb.map == NULL) {
562 fprintf(ctx->fp, " buffer contents unavailable\n");
563 continue;
564 }
565
566 if (vb.map == 0 || vb_size == 0)
567 continue;
568
569 if (ctx->flags & INTEL_BATCH_DECODE_VB_DATA)
570 ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
571
572 vb.map = NULL;
573 vb_size = 0;
574 index = -1;
575 pitch = -1;
576 ready = false;
577 }
578 }
579 }
580
581 static void
handle_3dstate_index_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)582 handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
583 const uint32_t *p)
584 {
585 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
586
587 struct intel_batch_decode_bo ib = {};
588 uint32_t ib_size = 0;
589 uint32_t format = 0;
590
591 struct intel_field_iterator iter;
592 intel_field_iterator_init(&iter, inst, p, 0, false);
593 while (intel_field_iterator_next(&iter)) {
594 if (strcmp(iter.name, "Index Format") == 0) {
595 format = iter.raw_value;
596 } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
597 ib = ctx_get_bo(ctx, true, iter.raw_value);
598 } else if (strcmp(iter.name, "Buffer Size") == 0) {
599 ib_size = iter.raw_value;
600 }
601 }
602
603 if (ib.map == NULL) {
604 fprintf(ctx->fp, " buffer contents unavailable\n");
605 return;
606 }
607
608 const void *m = ib.map;
609 const void *ib_end = ib.map + MIN2(ib.size, ib_size);
610 for (int i = 0; m < ib_end && i < 10; i++) {
611 switch (format) {
612 case 0:
613 fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
614 m += 1;
615 break;
616 case 1:
617 fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
618 m += 2;
619 break;
620 case 2:
621 fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
622 m += 4;
623 break;
624 }
625 }
626
627 if (m < ib_end)
628 fprintf(ctx->fp, "...");
629 fprintf(ctx->fp, "\n");
630 }
631
632 static void
decode_single_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)633 decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
634 {
635 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
636
637 uint64_t ksp = 0;
638 bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
639 bool is_enabled = true;
640
641 struct intel_field_iterator iter;
642 intel_field_iterator_init(&iter, inst, p, 0, false);
643 while (intel_field_iterator_next(&iter)) {
644 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
645 ksp = iter.raw_value;
646 } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
647 is_simd8 = iter.raw_value;
648 } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
649 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
650 } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
651 is_simd8 = strcmp(iter.value, "SIMD8") == 0;
652 } else if (strcmp(iter.name, "Enable") == 0) {
653 is_enabled = iter.raw_value;
654 }
655 }
656
657 const char *type =
658 strcmp(inst->name, "VS_STATE") == 0 ? "vertex shader" :
659 strcmp(inst->name, "GS_STATE") == 0 ? "geometry shader" :
660 strcmp(inst->name, "SF_STATE") == 0 ? "strips and fans shader" :
661 strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
662 strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
663 strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
664 strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
665 strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
666 NULL;
667 const char *short_name =
668 strcmp(inst->name, "VS_STATE") == 0 ? "VS" :
669 strcmp(inst->name, "GS_STATE") == 0 ? "GS" :
670 strcmp(inst->name, "SF_STATE") == 0 ? "SF" :
671 strcmp(inst->name, "CLIP_STATE") == 0 ? "CL" :
672 strcmp(inst->name, "3DSTATE_DS") == 0 ? "DS" :
673 strcmp(inst->name, "3DSTATE_HS") == 0 ? "HS" :
674 strcmp(inst->name, "3DSTATE_VS") == 0 ? "VS" :
675 strcmp(inst->name, "3DSTATE_GS") == 0 ? "GS" :
676 NULL;
677
678 if (is_enabled) {
679 ctx_disassemble_program(ctx, ksp, short_name, type);
680 fprintf(ctx->fp, "\n");
681 }
682 }
683
684 static void
decode_mesh_task_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)685 decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
686 {
687 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
688
689 uint64_t ksp = 0;
690 uint64_t local_x_maximum = 0;
691 uint64_t threads = 0;
692
693 struct intel_field_iterator iter;
694 intel_field_iterator_init(&iter, inst, p, 0, false);
695 while (intel_field_iterator_next(&iter)) {
696 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
697 ksp = iter.raw_value;
698 } else if (strcmp(iter.name, "Local X Maximum") == 0) {
699 local_x_maximum = iter.raw_value;
700 } else if (strcmp(iter.name, "Number of Threads in GPGPU Thread Group") == 0) {
701 threads = iter.raw_value;
702 }
703 }
704
705 const char *type =
706 strcmp(inst->name, "3DSTATE_MESH_SHADER") == 0 ? "mesh shader" :
707 strcmp(inst->name, "3DSTATE_TASK_SHADER") == 0 ? "task shader" :
708 NULL;
709 const char *short_name =
710 strcmp(inst->name, "3DSTATE_MESH_SHADER") == 0 ? "MS" :
711 strcmp(inst->name, "3DSTATE_TASK_SHADER") == 0 ? "TS" :
712 NULL;
713
714 if (threads && local_x_maximum) {
715 ctx_disassemble_program(ctx, ksp, short_name, type);
716 fprintf(ctx->fp, "\n");
717 }
718 }
719
720 static void
decode_ps_kern(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)721 decode_ps_kern(struct intel_batch_decode_ctx *ctx,
722 struct intel_group *inst, const uint32_t *p)
723 {
724 bool single_ksp = ctx->devinfo.ver == 4;
725 uint64_t ksp[3] = {0, 0, 0};
726 bool enabled[3] = {false, false, false};
727
728 struct intel_field_iterator iter;
729 intel_field_iterator_init(&iter, inst, p, 0, false);
730 while (intel_field_iterator_next(&iter)) {
731 if (strncmp(iter.name, "Kernel Start Pointer ",
732 strlen("Kernel Start Pointer ")) == 0) {
733 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
734 ksp[idx] = strtol(iter.value, NULL, 16);
735 } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
736 enabled[0] = strcmp(iter.value, "true") == 0;
737 } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
738 enabled[1] = strcmp(iter.value, "true") == 0;
739 } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
740 enabled[2] = strcmp(iter.value, "true") == 0;
741 }
742 }
743
744 if (single_ksp)
745 ksp[1] = ksp[2] = ksp[0];
746
747 /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
748 if (enabled[0] + enabled[1] + enabled[2] == 1) {
749 if (enabled[1]) {
750 ksp[1] = ksp[0];
751 ksp[0] = 0;
752 } else if (enabled[2]) {
753 ksp[2] = ksp[0];
754 ksp[0] = 0;
755 }
756 } else {
757 uint64_t tmp = ksp[1];
758 ksp[1] = ksp[2];
759 ksp[2] = tmp;
760 }
761
762 if (enabled[0])
763 ctx_disassemble_program(ctx, ksp[0], "FS8", "SIMD8 fragment shader");
764 if (enabled[1])
765 ctx_disassemble_program(ctx, ksp[1], "FS16", "SIMD16 fragment shader");
766 if (enabled[2])
767 ctx_disassemble_program(ctx, ksp[2], "FS32", "SIMD32 fragment shader");
768
769 if (enabled[0] || enabled[1] || enabled[2])
770 fprintf(ctx->fp, "\n");
771 }
772
773 static void
decode_ps_kern_xe2(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)774 decode_ps_kern_xe2(struct intel_batch_decode_ctx *ctx,
775 struct intel_group *inst, const uint32_t *p)
776 {
777 uint64_t ksp[2] = {0, 0};
778 bool enabled[2] = {false, false};
779 int width[2] = {0, 0};
780
781 struct intel_field_iterator iter;
782 intel_field_iterator_init(&iter, inst, p, 0, false);
783 while (intel_field_iterator_next(&iter)) {
784 if (strncmp(iter.name, "Kernel Start Pointer ",
785 strlen("Kernel Start Pointer ")) == 0) {
786 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
787 ksp[idx] = strtol(iter.value, NULL, 16);
788 } else if (strcmp(iter.name, "Kernel 0 Enable") == 0) {
789 enabled[0] = strcmp(iter.value, "true") == 0;
790 } else if (strcmp(iter.name, "Kernel 1 Enable") == 0) {
791 enabled[1] = strcmp(iter.value, "true") == 0;
792 } else if (strcmp(iter.name, "Kernel[0] : SIMD Width") == 0) {
793 width[0] = strncmp(iter.value, "0 ", 2) == 0 ? 16 : 32;
794 } else if (strcmp(iter.name, "Kernel[1] : SIMD Width") == 0) {
795 width[1] = strncmp(iter.value, "0 ", 2) == 0 ? 16 : 32;
796 }
797 }
798
799 for (int i = 0; i < 2; i++) {
800 if (enabled[i])
801 ctx_disassemble_program(ctx, ksp[i], "FS",
802 width[i] == 16 ?
803 "SIMD16 fragment shader" :
804 "SIMD32 fragment shader");
805 }
806
807 if (enabled[0] || enabled[1])
808 fprintf(ctx->fp, "\n");
809 }
810
811 static void
decode_ps_kernels(struct intel_batch_decode_ctx * ctx,const uint32_t * p)812 decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
813 const uint32_t *p)
814 {
815 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
816 if (ctx->devinfo.ver >= 20)
817 decode_ps_kern_xe2(ctx, inst, p);
818 else
819 decode_ps_kern(ctx, inst, p);
820 }
821
822 static void
decode_3dstate_constant_all(struct intel_batch_decode_ctx * ctx,const uint32_t * p)823 decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
824 {
825 struct intel_group *inst =
826 intel_spec_find_instruction(ctx->spec, ctx->engine, p);
827 struct intel_group *body =
828 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
829
830 uint32_t read_length[4] = {0};
831 struct intel_batch_decode_bo buffer[4];
832 memset(buffer, 0, sizeof(buffer));
833
834 struct intel_field_iterator outer;
835 intel_field_iterator_init(&outer, inst, p, 0, false);
836 int idx = 0;
837 while (intel_field_iterator_next(&outer)) {
838 if (outer.struct_desc != body)
839 continue;
840
841 struct intel_field_iterator iter;
842 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
843 0, false);
844 while (intel_field_iterator_next(&iter)) {
845 if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
846 buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
847 } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
848 read_length[idx] = iter.raw_value;
849 }
850 }
851 idx++;
852 }
853
854 for (int i = 0; i < 4; i++) {
855 if (read_length[i] == 0 || buffer[i].map == NULL)
856 continue;
857
858 unsigned size = read_length[i] * 32;
859 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
860
861 ctx_print_buffer(ctx, buffer[i], size, 0, -1);
862 }
863 }
864
865 static void
decode_3dstate_constant(struct intel_batch_decode_ctx * ctx,const uint32_t * p)866 decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
867 {
868 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
869 struct intel_group *body =
870 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
871
872 uint32_t read_length[4] = {0};
873 uint64_t read_addr[4] = {0};
874
875 struct intel_field_iterator outer;
876 intel_field_iterator_init(&outer, inst, p, 0, false);
877 while (intel_field_iterator_next(&outer)) {
878 if (outer.struct_desc != body)
879 continue;
880
881 struct intel_field_iterator iter;
882 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
883 0, false);
884
885 while (intel_field_iterator_next(&iter)) {
886 int idx;
887 if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
888 read_length[idx] = iter.raw_value;
889 } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
890 read_addr[idx] = iter.raw_value;
891 }
892 }
893
894 for (int i = 0; i < 4; i++) {
895 if (read_length[i] == 0)
896 continue;
897
898 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
899 if (!buffer.map) {
900 fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
901 continue;
902 }
903
904 unsigned size = read_length[i] * 32;
905 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
906
907 ctx_print_buffer(ctx, buffer, size, 0, -1);
908 }
909 }
910 }
911
912 static void
decode_gfx4_constant_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)913 decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
914 {
915 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
916 uint64_t read_length = 0, read_addr = 0, valid = 0;
917 struct intel_field_iterator iter;
918 intel_field_iterator_init(&iter, inst, p, 0, false);
919
920 while (intel_field_iterator_next(&iter)) {
921 if (!strcmp(iter.name, "Buffer Length")) {
922 read_length = iter.raw_value;
923 } else if (!strcmp(iter.name, "Valid")) {
924 valid = iter.raw_value;
925 } else if (!strcmp(iter.name, "Buffer Starting Address")) {
926 read_addr = iter.raw_value;
927 }
928 }
929
930 if (!valid)
931 return;
932
933 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
934 if (!buffer.map) {
935 fprintf(ctx->fp, "constant buffer unavailable\n");
936 return;
937 }
938 unsigned size = (read_length + 1) * 16 * sizeof(float);
939 fprintf(ctx->fp, "constant buffer size %u\n", size);
940
941 ctx_print_buffer(ctx, buffer, size, 0, -1);
942 }
943
944
945 static void
decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)946 decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
947 const uint32_t *p)
948 {
949 fprintf(ctx->fp, "VS Binding Table:\n");
950 dump_binding_table(ctx, p[1], -1);
951
952 fprintf(ctx->fp, "GS Binding Table:\n");
953 dump_binding_table(ctx, p[2], -1);
954
955 if (ctx->devinfo.ver < 6) {
956 fprintf(ctx->fp, "CLIP Binding Table:\n");
957 dump_binding_table(ctx, p[3], -1);
958 fprintf(ctx->fp, "SF Binding Table:\n");
959 dump_binding_table(ctx, p[4], -1);
960 fprintf(ctx->fp, "PS Binding Table:\n");
961 dump_binding_table(ctx, p[5], -1);
962 } else {
963 fprintf(ctx->fp, "PS Binding Table:\n");
964 dump_binding_table(ctx, p[3], -1);
965 }
966 }
967
968 static void
decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)969 decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
970 const uint32_t *p)
971 {
972 dump_binding_table(ctx, p[1], -1);
973 }
974
975 static void
decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)976 decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
977 const uint32_t *p)
978 {
979 dump_samplers(ctx, p[1], 1);
980 }
981
982 static void
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx * ctx,const uint32_t * p)983 decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
984 const uint32_t *p)
985 {
986 dump_samplers(ctx, p[1], 1);
987 dump_samplers(ctx, p[2], 1);
988 dump_samplers(ctx, p[3], 1);
989 }
990
991 static bool
str_ends_with(const char * str,const char * end)992 str_ends_with(const char *str, const char *end)
993 {
994 int offset = strlen(str) - strlen(end);
995 if (offset < 0)
996 return false;
997
998 return strcmp(str + offset, end) == 0;
999 }
1000
1001 static void
decode_dynamic_state(struct intel_batch_decode_ctx * ctx,const char * struct_type,uint32_t state_offset,int count)1002 decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
1003 const char *struct_type, uint32_t state_offset,
1004 int count)
1005 {
1006 uint64_t state_addr = ctx->dynamic_base + state_offset;
1007 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
1008 const void *state_map = bo.map;
1009
1010 if (state_map == NULL) {
1011 fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
1012 return;
1013 }
1014
1015 struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
1016 if (strcmp(struct_type, "BLEND_STATE") == 0) {
1017 /* Blend states are different from the others because they have a header
1018 * struct called BLEND_STATE which is followed by a variable number of
1019 * BLEND_STATE_ENTRY structs.
1020 */
1021 fprintf(ctx->fp, "%s\n", struct_type);
1022 ctx_print_group(ctx, state, state_addr, state_map);
1023
1024 state_addr += state->dw_length * 4;
1025 state_map += state->dw_length * 4;
1026
1027 struct_type = "BLEND_STATE_ENTRY";
1028 state = intel_spec_find_struct(ctx->spec, struct_type);
1029 }
1030
1031 count = update_count(ctx, ctx->dynamic_base + state_offset,
1032 ctx->dynamic_base, state->dw_length, count);
1033
1034 for (int i = 0; i < count; i++) {
1035 fprintf(ctx->fp, "%s %d\n", struct_type, i);
1036 ctx_print_group(ctx, state, state_addr, state_map);
1037
1038 state_addr += state->dw_length * 4;
1039 state_map += state->dw_length * 4;
1040 }
1041 }
1042
1043 static void
decode_dynamic_state_pointers(struct intel_batch_decode_ctx * ctx,const char * struct_type,const uint32_t * p,int count)1044 decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
1045 const char *struct_type, const uint32_t *p,
1046 int count)
1047 {
1048 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1049
1050 uint32_t state_offset = 0;
1051
1052 struct intel_field_iterator iter;
1053 intel_field_iterator_init(&iter, inst, p, 0, false);
1054 while (intel_field_iterator_next(&iter)) {
1055 if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
1056 state_offset = iter.raw_value;
1057 break;
1058 }
1059 }
1060 decode_dynamic_state(ctx, struct_type, state_offset, count);
1061 }
1062
1063 static void
decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1064 decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
1065 const uint32_t *p)
1066 {
1067 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1068 uint32_t state_offset = 0;
1069 bool clip = false, sf = false, cc = false;
1070 struct intel_field_iterator iter;
1071 intel_field_iterator_init(&iter, inst, p, 0, false);
1072 while (intel_field_iterator_next(&iter)) {
1073 if (!strcmp(iter.name, "CLIP Viewport State Change"))
1074 clip = iter.raw_value;
1075 if (!strcmp(iter.name, "SF Viewport State Change"))
1076 sf = iter.raw_value;
1077 if (!strcmp(iter.name, "CC Viewport State Change"))
1078 cc = iter.raw_value;
1079 else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
1080 state_offset = iter.raw_value;
1081 decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
1082 }
1083 else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
1084 state_offset = iter.raw_value;
1085 decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
1086 }
1087 else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
1088 state_offset = iter.raw_value;
1089 decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
1090 }
1091 }
1092 }
1093
1094 static void
decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1095 decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
1096 const uint32_t *p)
1097 {
1098 decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
1099 }
1100
1101 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1102 decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
1103 const uint32_t *p)
1104 {
1105 decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
1106 }
1107
1108 static void
decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1109 decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
1110 const uint32_t *p)
1111 {
1112 decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
1113 }
1114
1115 static void
decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1116 decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
1117 const uint32_t *p)
1118 {
1119 if (ctx->devinfo.ver != 6) {
1120 decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
1121 return;
1122 }
1123
1124 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1125
1126 uint32_t state_offset = 0;
1127 bool blend_change = false, ds_change = false, cc_change = false;
1128 struct intel_field_iterator iter;
1129 intel_field_iterator_init(&iter, inst, p, 0, false);
1130 while (intel_field_iterator_next(&iter)) {
1131 if (!strcmp(iter.name, "BLEND_STATE Change"))
1132 blend_change = iter.raw_value;
1133 else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
1134 ds_change = iter.raw_value;
1135 else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
1136 cc_change = iter.raw_value;
1137 else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
1138 state_offset = iter.raw_value;
1139 decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
1140 }
1141 else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
1142 state_offset = iter.raw_value;
1143 decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
1144 }
1145 else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
1146 state_offset = iter.raw_value;
1147 decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1148 }
1149 }
1150 }
1151
1152 static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1153 decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1154 const uint32_t *p)
1155 {
1156 decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1157 }
1158
1159 static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1160 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1161 const uint32_t *p)
1162 {
1163 decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1164 }
1165
1166 static void
decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1167 decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1168 const uint32_t *p)
1169 {
1170 decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1171 }
1172
1173 static void
handle_gt_mode(struct intel_batch_decode_ctx * ctx,uint32_t reg_addr,uint32_t val)1174 handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1175 uint32_t reg_addr, uint32_t val)
1176 {
1177 struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1178
1179 assert(intel_group_get_length(reg, &val) == 1);
1180
1181 struct intel_field_iterator iter;
1182 intel_field_iterator_init(&iter, reg, &val, 0, false);
1183
1184 uint32_t bt_alignment;
1185 bool bt_alignment_mask = 0;
1186
1187 while (intel_field_iterator_next(&iter)) {
1188 if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1189 bt_alignment = iter.raw_value;
1190 } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1191 bt_alignment_mask = iter.raw_value;
1192 }
1193 }
1194
1195 if (bt_alignment_mask)
1196 ctx->use_256B_binding_tables = bt_alignment;
1197 }
1198
1199 struct reg_handler {
1200 const char *name;
1201 void (*handler)(struct intel_batch_decode_ctx *ctx,
1202 uint32_t reg_addr, uint32_t val);
1203 } reg_handlers[] = {
1204 { "GT_MODE", handle_gt_mode }
1205 };
1206
1207 static void
decode_load_register_imm(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1208 decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1209 {
1210 struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1211 const unsigned length = intel_group_get_length(inst, p);
1212 assert(length & 1);
1213 const unsigned nr_regs = (length - 1) / 2;
1214
1215 for (unsigned i = 0; i < nr_regs; i++) {
1216 struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1217 if (reg != NULL) {
1218 fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1219 reg->name, reg->register_offset, p[2]);
1220 ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1221
1222 for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1223 if (strcmp(reg->name, reg_handlers[i].name) == 0)
1224 reg_handlers[i].handler(ctx, p[1], p[2]);
1225 }
1226 }
1227 }
1228 }
1229
1230 static void
disasm_program_from_group(struct intel_batch_decode_ctx * ctx,struct intel_group * strct,const void * map,const char * short_name,const char * type)1231 disasm_program_from_group(struct intel_batch_decode_ctx *ctx,
1232 struct intel_group *strct, const void *map,
1233 const char *short_name, const char *type)
1234 {
1235 uint64_t ksp = 0;
1236 bool is_enabled = true;
1237 struct intel_field_iterator iter;
1238
1239 intel_field_iterator_init(&iter, strct, map, 0, false);
1240
1241 while (intel_field_iterator_next(&iter)) {
1242 if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1243 ksp = iter.raw_value;
1244 } else if (strcmp(iter.name, "Enable") == 0) {
1245 is_enabled = iter.raw_value;
1246 }
1247 }
1248
1249 if (is_enabled) {
1250 ctx_disassemble_program(ctx, ksp, short_name, type);
1251 fprintf(ctx->fp, "\n");
1252 }
1253 }
1254
1255 static void
decode_vs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1256 decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1257 {
1258 struct intel_group *strct =
1259 intel_spec_find_struct(ctx->spec, "VS_STATE");
1260 if (strct == NULL) {
1261 fprintf(ctx->fp, "did not find VS_STATE info\n");
1262 return;
1263 }
1264
1265 struct intel_batch_decode_bo bind_bo =
1266 ctx_get_bo(ctx, true, offset);
1267
1268 if (bind_bo.map == NULL) {
1269 fprintf(ctx->fp, " vs state unavailable\n");
1270 return;
1271 }
1272
1273 ctx_print_group(ctx, strct, offset, bind_bo.map);
1274 disasm_program_from_group(ctx, strct, bind_bo.map, "VS", "vertex shader");
1275 }
1276
1277 static void
decode_gs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1278 decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1279 {
1280 struct intel_group *strct =
1281 intel_spec_find_struct(ctx->spec, "GS_STATE");
1282 if (strct == NULL) {
1283 fprintf(ctx->fp, "did not find GS_STATE info\n");
1284 return;
1285 }
1286
1287 struct intel_batch_decode_bo bind_bo =
1288 ctx_get_bo(ctx, true, offset);
1289
1290 if (bind_bo.map == NULL) {
1291 fprintf(ctx->fp, " gs state unavailable\n");
1292 return;
1293 }
1294
1295 ctx_print_group(ctx, strct, offset, bind_bo.map);
1296 disasm_program_from_group(ctx, strct, bind_bo.map, "GS", "geometry shader");
1297 }
1298
1299 static void
decode_clip_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1300 decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1301 {
1302 struct intel_group *strct =
1303 intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1304 if (strct == NULL) {
1305 fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1306 return;
1307 }
1308
1309 struct intel_batch_decode_bo bind_bo =
1310 ctx_get_bo(ctx, true, offset);
1311
1312 if (bind_bo.map == NULL) {
1313 fprintf(ctx->fp, " clip state unavailable\n");
1314 return;
1315 }
1316
1317 ctx_print_group(ctx, strct, offset, bind_bo.map);
1318 disasm_program_from_group(ctx, strct, bind_bo.map, "CL", "clip shader");
1319
1320 struct intel_group *vp_strct =
1321 intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1322 if (vp_strct == NULL) {
1323 fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1324 return;
1325 }
1326 uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1327 struct intel_batch_decode_bo vp_bo =
1328 ctx_get_bo(ctx, true, clip_vp_offset);
1329 if (vp_bo.map == NULL) {
1330 fprintf(ctx->fp, " clip vp state unavailable\n");
1331 return;
1332 }
1333 ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1334 }
1335
1336 static void
decode_sf_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1337 decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1338 {
1339 struct intel_group *strct =
1340 intel_spec_find_struct(ctx->spec, "SF_STATE");
1341 if (strct == NULL) {
1342 fprintf(ctx->fp, "did not find SF_STATE info\n");
1343 return;
1344 }
1345
1346 struct intel_batch_decode_bo bind_bo =
1347 ctx_get_bo(ctx, true, offset);
1348
1349 if (bind_bo.map == NULL) {
1350 fprintf(ctx->fp, " sf state unavailable\n");
1351 return;
1352 }
1353
1354 ctx_print_group(ctx, strct, offset, bind_bo.map);
1355 disasm_program_from_group(ctx, strct, bind_bo.map, "SF", "strips and fans shader");
1356
1357 struct intel_group *vp_strct =
1358 intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1359 if (vp_strct == NULL) {
1360 fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1361 return;
1362 }
1363
1364 uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1365 struct intel_batch_decode_bo vp_bo =
1366 ctx_get_bo(ctx, true, sf_vp_offset);
1367 if (vp_bo.map == NULL) {
1368 fprintf(ctx->fp, " sf vp state unavailable\n");
1369 return;
1370 }
1371 ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1372 }
1373
1374 static void
decode_wm_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1375 decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1376 {
1377 struct intel_group *strct =
1378 intel_spec_find_struct(ctx->spec, "WM_STATE");
1379 if (strct == NULL) {
1380 fprintf(ctx->fp, "did not find WM_STATE info\n");
1381 return;
1382 }
1383
1384 struct intel_batch_decode_bo bind_bo =
1385 ctx_get_bo(ctx, true, offset);
1386
1387 if (bind_bo.map == NULL) {
1388 fprintf(ctx->fp, " wm state unavailable\n");
1389 return;
1390 }
1391
1392 ctx_print_group(ctx, strct, offset, bind_bo.map);
1393
1394 decode_ps_kern(ctx, strct, bind_bo.map);
1395 }
1396
1397 static void
decode_cc_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1398 decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1399 {
1400 struct intel_group *strct =
1401 intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1402 if (strct == NULL) {
1403 fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1404 return;
1405 }
1406
1407 struct intel_batch_decode_bo bind_bo =
1408 ctx_get_bo(ctx, true, offset);
1409
1410 if (bind_bo.map == NULL) {
1411 fprintf(ctx->fp, " cc state unavailable\n");
1412 return;
1413 }
1414
1415 ctx_print_group(ctx, strct, offset, bind_bo.map);
1416
1417 struct intel_group *vp_strct =
1418 intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1419 if (vp_strct == NULL) {
1420 fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1421 return;
1422 }
1423 uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1424 struct intel_batch_decode_bo vp_bo =
1425 ctx_get_bo(ctx, true, cc_vp_offset);
1426 if (vp_bo.map == NULL) {
1427 fprintf(ctx->fp, " cc vp state unavailable\n");
1428 return;
1429 }
1430 ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1431 }
1432 static void
decode_pipelined_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1433 decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1434 {
1435 fprintf(ctx->fp, "VS State Table:\n");
1436 decode_vs_state(ctx, p[1]);
1437 if (p[2] & 1) {
1438 fprintf(ctx->fp, "GS State Table:\n");
1439 decode_gs_state(ctx, p[2] & ~1);
1440 }
1441 fprintf(ctx->fp, "Clip State Table:\n");
1442 decode_clip_state(ctx, p[3] & ~1);
1443 fprintf(ctx->fp, "SF State Table:\n");
1444 decode_sf_state(ctx, p[4]);
1445 fprintf(ctx->fp, "WM State Table:\n");
1446 decode_wm_state(ctx, p[5]);
1447 fprintf(ctx->fp, "CC State Table:\n");
1448 decode_cc_state(ctx, p[6]);
1449 }
1450
1451 static void
decode_cps_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1452 decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1453 {
1454 decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1455 }
1456
1457 struct custom_decoder {
1458 const char *cmd_name;
1459 void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1460 } custom_decoders[] = {
1461 { "STATE_BASE_ADDRESS", handle_state_base_address },
1462 { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1463 { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1464 { "COMPUTE_WALKER", handle_compute_walker },
1465 { "MEDIA_CURBE_LOAD", handle_media_curbe_load },
1466 { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1467 { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1468 { "3DSTATE_VS", decode_single_ksp },
1469 { "3DSTATE_GS", decode_single_ksp },
1470 { "3DSTATE_DS", decode_single_ksp },
1471 { "3DSTATE_HS", decode_single_ksp },
1472 { "3DSTATE_PS", decode_ps_kernels },
1473 { "3DSTATE_WM", decode_ps_kernels },
1474 { "3DSTATE_MESH_SHADER", decode_mesh_task_ksp },
1475 { "3DSTATE_TASK_SHADER", decode_mesh_task_ksp },
1476 { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1477 { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1478 { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1479 { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1480 { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1481 { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1482
1483 { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1484 { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1485 { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1486 { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1487 { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1488 { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1489
1490 { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1491 { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1492 { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1493 { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1494 { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1495 { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1496
1497 { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1498 { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1499 { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1500 { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1501 { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1502 { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1503 { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1504 { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1505 { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1506 { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1507 { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1508 { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1509 };
1510
1511 static void
get_inst_color(const struct intel_batch_decode_ctx * ctx,const struct intel_group * inst,char ** const out_color,char ** const out_reset_color)1512 get_inst_color(const struct intel_batch_decode_ctx *ctx,
1513 const struct intel_group *inst,
1514 char **const out_color,
1515 char **const out_reset_color)
1516 {
1517 const char *inst_name = intel_group_get_name(inst);
1518 if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1519 *out_reset_color = NORMAL;
1520 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1521 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1522 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1523 *out_color = GREEN_HEADER;
1524 else
1525 *out_color = BLUE_HEADER;
1526 } else {
1527 *out_color = NORMAL;
1528 }
1529 } else {
1530 *out_color = "";
1531 *out_reset_color = "";
1532 }
1533 }
1534
1535 struct inst_ptr {
1536 struct intel_group *inst;
1537 uint32_t *ptr;
1538 };
1539
1540 static int
compare_inst_ptr(const void * v1,const void * v2)1541 compare_inst_ptr(const void *v1, const void *v2)
1542 {
1543 const struct inst_ptr *i1 = v1, *i2 = v2;
1544 return strcmp(i1->inst->name, i2->inst->name);
1545 }
1546
1547 static void
intel_print_accumulated_instrs(struct intel_batch_decode_ctx * ctx)1548 intel_print_accumulated_instrs(struct intel_batch_decode_ctx *ctx)
1549 {
1550 struct util_dynarray arr;
1551 util_dynarray_init(&arr, NULL);
1552
1553 hash_table_foreach(ctx->commands, entry) {
1554 struct inst_ptr inst = {
1555 .inst = (struct intel_group *)entry->key,
1556 .ptr = entry->data,
1557 };
1558 util_dynarray_append(&arr, struct inst_ptr, inst);
1559 }
1560 qsort(util_dynarray_begin(&arr),
1561 util_dynarray_num_elements(&arr, struct inst_ptr),
1562 sizeof(struct inst_ptr),
1563 compare_inst_ptr);
1564
1565 fprintf(ctx->fp, "----\n");
1566 util_dynarray_foreach(&arr, struct inst_ptr, i) {
1567 char *begin_color;
1568 char *end_color;
1569 get_inst_color(ctx, i->inst, &begin_color, &end_color);
1570
1571 uint64_t offset = 0;
1572 fprintf(ctx->fp, "%s0x%08"PRIx64": 0x%08x: %-80s%s\n",
1573 begin_color, offset, i->ptr[0], i->inst->name, end_color);
1574 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1575 ctx_print_group(ctx, i->inst, 0, i->ptr);
1576 for (int d = 0; d < ARRAY_SIZE(custom_decoders); d++) {
1577 if (strcmp(i->inst->name, custom_decoders[d].cmd_name) == 0) {
1578 custom_decoders[d].decode(ctx, i->ptr);
1579 break;
1580 }
1581 }
1582 }
1583 }
1584 util_dynarray_fini(&arr);
1585 }
1586
1587 void
intel_print_batch(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1588 intel_print_batch(struct intel_batch_decode_ctx *ctx,
1589 const uint32_t *batch, uint32_t batch_size,
1590 uint64_t batch_addr, bool from_ring)
1591 {
1592 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1593 int length;
1594 struct intel_group *inst;
1595 const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1596
1597 if (ctx->n_batch_buffer_start >= 100) {
1598 fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1599 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1600 (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1601 reset_color);
1602 return;
1603 }
1604
1605 ctx->n_batch_buffer_start++;
1606
1607 for (p = batch; p < end; p += length) {
1608 inst = intel_ctx_find_instruction(ctx, p);
1609 length = intel_group_get_length(inst, p);
1610 assert(inst == NULL || length > 0);
1611 length = MAX2(1, length);
1612
1613 uint64_t offset;
1614 if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1615 offset = batch_addr + ((char *)p - (char *)batch);
1616 else
1617 offset = 0;
1618
1619 if (inst == NULL) {
1620 fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1621 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1622 offset, p[0], reset_color);
1623
1624 for (int i=1; i < length; i++) {
1625 fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1626 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1627 offset + i * 4, p[i], reset_color);
1628 }
1629
1630 continue;
1631 }
1632
1633 if (ctx->flags & INTEL_BATCH_DECODE_ACCUMULATE) {
1634 struct hash_entry *entry = _mesa_hash_table_search(ctx->commands, inst);
1635 if (entry != NULL) {
1636 entry->data = (void *)p;
1637 } else {
1638 _mesa_hash_table_insert(ctx->commands, inst, (void *)p);
1639 }
1640
1641 if (!strcmp(inst->name, "3DPRIMITIVE") ||
1642 !strcmp(inst->name, "3DPRIMITIVE_EXTENDED") ||
1643 !strcmp(inst->name, "GPGPU_WALKER") ||
1644 !strcmp(inst->name, "3DSTATE_WM_HZ_OP") ||
1645 !strcmp(inst->name, "COMPUTE_WALKER")) {
1646 intel_print_accumulated_instrs(ctx);
1647 }
1648 } else {
1649 char *begin_color;
1650 char *end_color;
1651 get_inst_color(ctx, inst, &begin_color, &end_color);
1652
1653 fprintf(ctx->fp, "%s0x%08"PRIx64"%s: 0x%08x: %-80s%s\n",
1654 begin_color, offset,
1655 ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1656 inst->name, end_color);
1657
1658 if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1659 ctx_print_group(ctx, inst, offset, p);
1660
1661 for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1662 if (strcmp(inst->name, custom_decoders[i].cmd_name) == 0) {
1663 custom_decoders[i].decode(ctx, p);
1664 break;
1665 }
1666 }
1667 }
1668 }
1669
1670 if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
1671 uint64_t next_batch_addr = 0;
1672 bool ppgtt = false;
1673 bool second_level = false;
1674 bool predicate = false;
1675 struct intel_field_iterator iter;
1676 intel_field_iterator_init(&iter, inst, p, 0, false);
1677 while (intel_field_iterator_next(&iter)) {
1678 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1679 next_batch_addr = iter.raw_value;
1680 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1681 second_level = iter.raw_value;
1682 } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1683 ppgtt = iter.raw_value;
1684 } else if (strcmp(iter.name, "Predication Enable") == 0) {
1685 predicate = iter.raw_value;
1686 }
1687 }
1688
1689 if (!predicate) {
1690 struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1691
1692 if (next_batch.map == NULL) {
1693 fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1694 next_batch_addr);
1695 } else {
1696 intel_print_batch(ctx, next_batch.map, next_batch.size,
1697 next_batch.addr, false);
1698 }
1699 if (second_level) {
1700 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1701 * like a subroutine call. Commands that come afterwards get
1702 * processed once the 2nd level batch buffer returns with
1703 * MI_BATCH_BUFFER_END.
1704 */
1705 continue;
1706 } else if (!from_ring) {
1707 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1708 * like a goto. Nothing after it will ever get processed. In
1709 * order to prevent the recursion from growing, we just reset the
1710 * loop and continue;
1711 */
1712 break;
1713 }
1714 }
1715 } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
1716 break;
1717 }
1718 }
1719
1720 ctx->n_batch_buffer_start--;
1721 }
1722
1723 void
intel_batch_stats_reset(struct intel_batch_decode_ctx * ctx)1724 intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx)
1725 {
1726 _mesa_hash_table_clear(ctx->stats, NULL);
1727 }
1728
1729 void
intel_batch_stats(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1730 intel_batch_stats(struct intel_batch_decode_ctx *ctx,
1731 const uint32_t *batch, uint32_t batch_size,
1732 uint64_t batch_addr, bool from_ring)
1733 {
1734 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1735 int length;
1736 struct intel_group *inst;
1737
1738 if (ctx->n_batch_buffer_start >= 100) {
1739 fprintf(stderr, "Max batch buffer jumps exceeded\n");
1740 return;
1741 }
1742
1743 ctx->n_batch_buffer_start++;
1744
1745 for (p = batch; p < end; p += length) {
1746 inst = intel_ctx_find_instruction(ctx, p);
1747 length = intel_group_get_length(inst, p);
1748 assert(inst == NULL || length > 0);
1749 length = MAX2(1, length);
1750
1751 const char *name =
1752 inst != NULL ? inst->name : "unknown";
1753
1754 struct hash_entry *entry = _mesa_hash_table_search(ctx->stats, name);
1755 if (entry != NULL) {
1756 entry->data = (void *)((uintptr_t)entry->data + 1);
1757 } else {
1758 _mesa_hash_table_insert(ctx->stats, name, (void *)(uintptr_t)1);
1759 }
1760
1761 if (inst == NULL)
1762 continue;
1763
1764 if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
1765 uint64_t next_batch_addr = 0;
1766 bool ppgtt = false;
1767 bool second_level = false;
1768 bool predicate = false;
1769 struct intel_field_iterator iter;
1770 intel_field_iterator_init(&iter, inst, p, 0, false);
1771 while (intel_field_iterator_next(&iter)) {
1772 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1773 next_batch_addr = iter.raw_value;
1774 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1775 second_level = iter.raw_value;
1776 } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1777 ppgtt = iter.raw_value;
1778 } else if (strcmp(iter.name, "Predication Enable") == 0) {
1779 predicate = iter.raw_value;
1780 }
1781 }
1782
1783 if (!predicate) {
1784 struct intel_batch_decode_bo next_batch =
1785 ctx_get_bo(ctx, ppgtt, next_batch_addr);
1786
1787 if (next_batch.map == NULL) {
1788 fprintf(stderr, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1789 next_batch_addr);
1790 } else {
1791 intel_batch_stats(ctx, next_batch.map, next_batch.size,
1792 next_batch.addr, false);
1793 }
1794 if (second_level) {
1795 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1796 * like a subroutine call. Commands that come afterwards get
1797 * processed once the 2nd level batch buffer returns with
1798 * MI_BATCH_BUFFER_END.
1799 */
1800 continue;
1801 } else if (!from_ring) {
1802 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1803 * like a goto. Nothing after it will ever get processed. In
1804 * order to prevent the recursion from growing, we just reset the
1805 * loop and continue;
1806 */
1807 break;
1808 }
1809 }
1810 } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
1811 break;
1812 }
1813 }
1814
1815 ctx->n_batch_buffer_start--;
1816 }
1817
1818 struct inst_stat {
1819 const char *name;
1820 uint32_t count;
1821 };
1822
1823 static int
compare_inst_stat(const void * v1,const void * v2)1824 compare_inst_stat(const void *v1, const void *v2)
1825 {
1826 const struct inst_stat *i1 = v1, *i2 = v2;
1827 return strcmp(i1->name, i2->name);
1828 }
1829
1830 void
intel_batch_print_stats(struct intel_batch_decode_ctx * ctx)1831 intel_batch_print_stats(struct intel_batch_decode_ctx *ctx)
1832 {
1833 struct util_dynarray arr;
1834 util_dynarray_init(&arr, NULL);
1835
1836 hash_table_foreach(ctx->stats, entry) {
1837 struct inst_stat inst = {
1838 .name = (const char *)entry->key,
1839 .count = (uintptr_t)entry->data,
1840 };
1841 util_dynarray_append(&arr, struct inst_stat, inst);
1842 }
1843 qsort(util_dynarray_begin(&arr),
1844 util_dynarray_num_elements(&arr, struct inst_stat),
1845 sizeof(struct inst_stat),
1846 compare_inst_stat);
1847 util_dynarray_foreach(&arr, struct inst_stat, i)
1848 fprintf(ctx->fp, "%-40s: %u\n", i->name, i->count);
1849
1850 util_dynarray_fini(&arr);
1851 }
1852