xref: /aosp_15_r20/external/mesa3d/src/intel/decoder/intel_batch_decoder.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "intel_decoder.h"
25 #include "intel_decoder_private.h"
26 
27 #include "util/macros.h"
28 #include "util/u_debug.h"
29 #include "util/u_dynarray.h"
30 #include "util/u_math.h" /* Needed for ROUND_DOWN_TO */
31 
32 #include <string.h>
33 
34 static const struct debug_control debug_control[] = {
35    { "color",      INTEL_BATCH_DECODE_IN_COLOR },
36    { "full",       INTEL_BATCH_DECODE_FULL },
37    { "offsets",    INTEL_BATCH_DECODE_OFFSETS },
38    { "floats",     INTEL_BATCH_DECODE_FLOATS },
39    { "surfaces",   INTEL_BATCH_DECODE_SURFACES },
40    { "accumulate", INTEL_BATCH_DECODE_ACCUMULATE },
41    { "vb-data",    INTEL_BATCH_DECODE_VB_DATA },
42    { NULL,    0 }
43 };
44 
45 void
intel_batch_decode_ctx_init(struct intel_batch_decode_ctx * ctx,const struct intel_device_info * devinfo,FILE * fp,enum intel_batch_decode_flags flags,const char * xml_path,struct intel_batch_decode_bo (* get_bo)(void *,bool,uint64_t),unsigned (* get_state_size)(void *,uint64_t,uint64_t),void * user_data)46 intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
47                             const struct intel_device_info *devinfo,
48                             FILE *fp, enum intel_batch_decode_flags flags,
49                             const char *xml_path,
50                             struct intel_batch_decode_bo (*get_bo)(void *,
51                                                                    bool,
52                                                                    uint64_t),
53                             unsigned (*get_state_size)(void *, uint64_t,
54                                                        uint64_t),
55                             void *user_data)
56 {
57    memset(ctx, 0, sizeof(*ctx));
58 
59    ctx->devinfo = *devinfo;
60    ctx->get_bo = get_bo;
61    ctx->get_state_size = get_state_size;
62    ctx->user_data = user_data;
63    ctx->fp = fp;
64    ctx->flags = parse_enable_string(getenv("INTEL_DECODE"), flags, debug_control);
65    ctx->max_vbo_decoded_lines = -1; /* No limit! */
66    ctx->engine = INTEL_ENGINE_CLASS_RENDER;
67 
68    if (xml_path == NULL)
69       ctx->spec = intel_spec_load(devinfo);
70    else
71       ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
72 
73    ctx->commands =
74       _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
75    ctx->stats =
76       _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
77 }
78 
79 void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx * ctx)80 intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
81 {
82    _mesa_hash_table_destroy(ctx->commands, NULL);
83    _mesa_hash_table_destroy(ctx->stats, NULL);
84    intel_spec_destroy(ctx->spec);
85 }
86 
87 #define CSI "\e["
88 #define RED_COLOR    CSI "31m"
89 #define BLUE_HEADER  CSI "0;44m" CSI "1;37m"
90 #define GREEN_HEADER CSI "1;42m"
91 #define NORMAL       CSI "0m"
92 
93 static void
ctx_print_group(struct intel_batch_decode_ctx * ctx,struct intel_group * group,uint64_t address,const void * map)94 ctx_print_group(struct intel_batch_decode_ctx *ctx,
95                 struct intel_group *group,
96                 uint64_t address, const void *map)
97 {
98    intel_print_group(ctx->fp, group, address, map, 0,
99                    (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
100 }
101 
102 struct intel_batch_decode_bo
ctx_get_bo(struct intel_batch_decode_ctx * ctx,bool ppgtt,uint64_t addr)103 ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
104 {
105    if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
106       /* On Broadwell and above, we have 48-bit addresses which consume two
107        * dwords.  Some packets require that these get stored in a "canonical
108        * form" which means that bit 47 is sign-extended through the upper
109        * bits. In order to correctly handle those aub dumps, we need to mask
110        * off the top 16 bits.
111        */
112       addr &= (~0ull >> 16);
113    }
114 
115    struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
116 
117    if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
118       bo.addr &= (~0ull >> 16);
119 
120    /* We may actually have an offset into the bo */
121    if (bo.map != NULL) {
122       assert(bo.addr <= addr);
123       uint64_t offset = addr - bo.addr;
124       bo.map += offset;
125       bo.addr += offset;
126       bo.size -= offset;
127    }
128 
129    return bo;
130 }
131 
132 static int
update_count(struct intel_batch_decode_ctx * ctx,uint64_t address,uint64_t base_address,unsigned element_dwords,unsigned guess)133 update_count(struct intel_batch_decode_ctx *ctx,
134              uint64_t address,
135              uint64_t base_address,
136              unsigned element_dwords,
137              unsigned guess)
138 {
139    unsigned size = 0;
140 
141    if (ctx->get_state_size)
142       size = ctx->get_state_size(ctx->user_data, address, base_address);
143 
144    if (size > 0)
145       return size / (sizeof(uint32_t) * element_dwords);
146 
147    /* In the absence of any information, just guess arbitrarily. */
148    return guess;
149 }
150 
151 static inline void
ctx_disassemble_program(struct intel_batch_decode_ctx * ctx,uint32_t ksp,const char * short_name,const char * name)152 ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
153                         uint32_t ksp,
154                         const char *short_name,
155                         const char *name)
156 {
157    ctx->disassemble_program(ctx, ksp, short_name, name);
158 }
159 
160 /* Heuristic to determine whether a uint32_t is probably actually a float
161  * (http://stackoverflow.com/a/2953466)
162  */
163 
164 static bool
probably_float(uint32_t bits)165 probably_float(uint32_t bits)
166 {
167    int exp = ((bits & 0x7f800000U) >> 23) - 127;
168    uint32_t mant = bits & 0x007fffff;
169 
170    /* +- 0.0 */
171    if (exp == -127 && mant == 0)
172       return true;
173 
174    /* +- 1 billionth to 1 billion */
175    if (-30 <= exp && exp <= 30)
176       return true;
177 
178    /* some value with only a few binary digits */
179    if ((mant & 0x0000ffff) == 0)
180       return true;
181 
182    return false;
183 }
184 
185 static void
ctx_print_buffer(struct intel_batch_decode_ctx * ctx,struct intel_batch_decode_bo bo,uint32_t read_length,uint32_t pitch,int max_lines)186 ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
187                  struct intel_batch_decode_bo bo,
188                  uint32_t read_length,
189                  uint32_t pitch,
190                  int max_lines)
191 {
192    const uint32_t *dw_end =
193          bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
194 
195    int column_count = 0, pitch_col_count = 0, line_count = -1;
196    for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
197       if (pitch_col_count * 4 == pitch || column_count == 8) {
198          fprintf(ctx->fp, "\n");
199          column_count = 0;
200          if (pitch_col_count * 4 == pitch)
201             pitch_col_count = 0;
202          line_count++;
203 
204          if (max_lines >= 0 && line_count >= max_lines)
205             break;
206       }
207       fprintf(ctx->fp, column_count == 0 ? "  " : " ");
208 
209       if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
210          fprintf(ctx->fp, "  %8.2f", *(float *) dw);
211       else
212          fprintf(ctx->fp, "  0x%08x", *dw);
213 
214       column_count++;
215       pitch_col_count++;
216    }
217    fprintf(ctx->fp, "\n");
218 }
219 
220 static struct intel_group *
intel_ctx_find_instruction(struct intel_batch_decode_ctx * ctx,const uint32_t * p)221 intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
222 {
223    return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
224 }
225 
226 static void
handle_state_base_address(struct intel_batch_decode_ctx * ctx,const uint32_t * p)227 handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
228 {
229    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
230 
231    struct intel_field_iterator iter;
232    intel_field_iterator_init(&iter, inst, p, 0, false);
233 
234    uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
235    bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
236 
237    while (intel_field_iterator_next(&iter)) {
238       if (strcmp(iter.name, "Surface State Base Address") == 0) {
239          surface_base = iter.raw_value;
240       } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
241          dynamic_base = iter.raw_value;
242       } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
243          instruction_base = iter.raw_value;
244       } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
245          surface_modify = iter.raw_value;
246       } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
247          dynamic_modify = iter.raw_value;
248       } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
249          instruction_modify = iter.raw_value;
250       }
251    }
252 
253    if (dynamic_modify)
254       ctx->dynamic_base = dynamic_base;
255 
256    if (surface_modify)
257       ctx->surface_base = surface_base;
258 
259    if (instruction_modify)
260       ctx->instruction_base = instruction_base;
261 }
262 
263 static void
handle_binding_table_pool_alloc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)264 handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
265                                 const uint32_t *p)
266 {
267    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
268 
269    struct intel_field_iterator iter;
270    intel_field_iterator_init(&iter, inst, p, 0, false);
271 
272    uint64_t bt_pool_base = 0;
273    bool bt_pool_enable = false;
274 
275    while (intel_field_iterator_next(&iter)) {
276       if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
277          bt_pool_base = iter.raw_value;
278       } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
279          bt_pool_enable = iter.raw_value;
280       }
281    }
282 
283    if (bt_pool_enable || ctx->devinfo.verx10 >= 125) {
284       ctx->bt_pool_base = bt_pool_base;
285    } else {
286       ctx->bt_pool_base = 0;
287    }
288 }
289 
290 static void
dump_binding_table(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)291 dump_binding_table(struct intel_batch_decode_ctx *ctx,
292                    uint32_t offset, int count)
293 {
294    struct intel_group *strct =
295       intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
296    if (strct == NULL) {
297       fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
298       return;
299    }
300 
301    /* Most platforms use a 16-bit pointer with 32B alignment in bits 15:5. */
302    uint32_t btp_alignment = 32;
303    uint32_t btp_pointer_bits = 16;
304 
305    if (ctx->devinfo.verx10 >= 125) {
306       /* The pointer is now 21-bit with 32B alignment in bits 20:5. */
307       btp_pointer_bits = 21;
308    } else if (ctx->use_256B_binding_tables) {
309       /* When 256B binding tables are enabled, we have to shift the offset
310        * which is stored in bits 15:5 but interpreted as bits 18:8 of the
311        * actual offset.  The effective pointer is 19-bit with 256B alignment.
312        */
313       offset <<= 3;
314       btp_pointer_bits = 19;
315       btp_alignment = 256;
316    }
317 
318    const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
319                                                      ctx->surface_base;
320 
321    if (count < 0) {
322       count = update_count(ctx, bt_pool_base + offset,
323                            bt_pool_base, 1, 32);
324    }
325 
326    if (offset % btp_alignment != 0 || offset >= (1u << btp_pointer_bits)) {
327       fprintf(ctx->fp, "  invalid binding table pointer\n");
328       return;
329    }
330 
331    struct intel_batch_decode_bo bind_bo =
332       ctx_get_bo(ctx, true, bt_pool_base + offset);
333 
334    if (bind_bo.map == NULL) {
335       fprintf(ctx->fp, "  binding table unavailable\n");
336       return;
337    }
338 
339    const uint32_t *pointers = bind_bo.map;
340    for (int i = 0; i < count; i++) {
341       if (((uintptr_t)&pointers[i] >= ((uintptr_t)bind_bo.map + bind_bo.size)))
342          break;
343 
344       uint64_t addr = ctx->surface_base + pointers[i];
345       struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
346       uint32_t size = strct->dw_length * 4;
347 
348       if (pointers[i] % 32 != 0 ||
349           addr < bo.addr || addr + size > bo.addr + bo.size) {
350          fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
351          continue;
352       }
353 
354       fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
355       if (ctx->flags & INTEL_BATCH_DECODE_SURFACES)
356          ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
357    }
358 }
359 
360 static void
dump_samplers(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)361 dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
362 {
363    struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
364    uint64_t state_addr = ctx->dynamic_base + offset;
365 
366    assert(count > 0);
367 
368    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
369    const void *state_map = bo.map;
370 
371    if (state_map == NULL) {
372       fprintf(ctx->fp, "  samplers unavailable\n");
373       return;
374    }
375 
376    if (offset % 32 != 0) {
377       fprintf(ctx->fp, "  invalid sampler state pointer\n");
378       return;
379    }
380 
381    const unsigned sampler_state_size = strct->dw_length * 4;
382 
383    if (count * sampler_state_size >= bo.size) {
384       fprintf(ctx->fp, "  sampler state ends after bo ends\n");
385       assert(!"sampler state ends after bo ends");
386       return;
387    }
388 
389    for (int i = 0; i < count; i++) {
390       fprintf(ctx->fp, "sampler state %d\n", i);
391       if (ctx->flags & INTEL_BATCH_DECODE_SAMPLERS)
392          ctx_print_group(ctx, strct, state_addr, state_map);
393       state_addr += sampler_state_size;
394       state_map += sampler_state_size;
395    }
396 }
397 
398 static void
handle_interface_descriptor_data(struct intel_batch_decode_ctx * ctx,struct intel_group * desc,const uint32_t * p)399 handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
400                                  struct intel_group *desc, const uint32_t *p)
401 {
402    uint64_t ksp = 0;
403    uint32_t sampler_offset = 0, sampler_count = 0;
404    uint32_t binding_table_offset = 0, binding_entry_count = 0;
405 
406    struct intel_field_iterator iter;
407    intel_field_iterator_init(&iter, desc, p, 0, false);
408    while (intel_field_iterator_next(&iter)) {
409       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
410          ksp = strtoll(iter.value, NULL, 16);
411       } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
412          sampler_offset = strtol(iter.value, NULL, 16);
413       } else if (strcmp(iter.name, "Sampler Count") == 0) {
414          sampler_count = strtol(iter.value, NULL, 10);
415       } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
416          binding_table_offset = strtol(iter.value, NULL, 16);
417       } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
418          binding_entry_count = strtol(iter.value, NULL, 10);
419       }
420    }
421 
422    ctx_disassemble_program(ctx, ksp, "CS", "compute shader");
423    fprintf(ctx->fp, "\n");
424 
425    if (sampler_count)
426       dump_samplers(ctx, sampler_offset, sampler_count);
427    if (binding_entry_count)
428       dump_binding_table(ctx, binding_table_offset, binding_entry_count);
429 }
430 
431 static void
handle_media_interface_descriptor_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)432 handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
433                                        const uint32_t *p)
434 {
435    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
436    struct intel_group *desc =
437       intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
438 
439    struct intel_field_iterator iter;
440    intel_field_iterator_init(&iter, inst, p, 0, false);
441    uint32_t descriptor_offset = 0;
442    int descriptor_count = 0;
443    while (intel_field_iterator_next(&iter)) {
444       if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
445          descriptor_offset = strtol(iter.value, NULL, 16);
446       } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
447          descriptor_count =
448             strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
449       }
450    }
451 
452    uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
453    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
454    const void *desc_map = bo.map;
455 
456    if (desc_map == NULL) {
457       fprintf(ctx->fp, "  interface descriptors unavailable\n");
458       return;
459    }
460 
461    for (int i = 0; i < descriptor_count; i++) {
462       fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
463 
464       ctx_print_group(ctx, desc, desc_addr, desc_map);
465 
466       handle_interface_descriptor_data(ctx, desc, desc_map);
467 
468       desc_map += desc->dw_length;
469       desc_addr += desc->dw_length * 4;
470    }
471 }
472 
473 static void
handle_compute_walker(struct intel_batch_decode_ctx * ctx,const uint32_t * p)474 handle_compute_walker(struct intel_batch_decode_ctx *ctx,
475                       const uint32_t *p)
476 {
477    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
478 
479    struct intel_field_iterator iter;
480    intel_field_iterator_init(&iter, inst, p, 0, false);
481    while (intel_field_iterator_next(&iter)) {
482       if (strcmp(iter.name, "Interface Descriptor") == 0) {
483          handle_interface_descriptor_data(ctx, iter.struct_desc,
484                                           &iter.p[iter.start_bit / 32]);
485       }
486    }
487 }
488 
489 static void
handle_media_curbe_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)490 handle_media_curbe_load(struct intel_batch_decode_ctx *ctx,
491                         const uint32_t *p)
492 {
493    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
494 
495    struct intel_field_iterator iter;
496    intel_field_iterator_init(&iter, inst, p, 0, false);
497 
498    uint32_t dynamic_state_offset = 0;
499    uint32_t dynamic_state_length = 0;
500 
501    while (intel_field_iterator_next(&iter)) {
502       if (strcmp(iter.name, "CURBE Data Start Address") == 0) {
503          dynamic_state_offset = iter.raw_value;
504       } else if (strcmp(iter.name, "CURBE Total Data Length") == 0) {
505          dynamic_state_length = iter.raw_value;
506       }
507    }
508 
509    if (dynamic_state_length > 0) {
510       struct intel_batch_decode_bo buffer =
511          ctx_get_bo(ctx, true, ctx->dynamic_base + dynamic_state_offset);
512       if (buffer.map != NULL)
513          ctx_print_buffer(ctx, buffer, dynamic_state_length, 0, -1);
514    }
515 }
516 
517 static void
handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)518 handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
519                               const uint32_t *p)
520 {
521    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
522    struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
523 
524    struct intel_batch_decode_bo vb = {};
525    uint32_t vb_size = 0;
526    int index = -1;
527    int pitch = -1;
528    bool ready = false;
529 
530    struct intel_field_iterator iter;
531    intel_field_iterator_init(&iter, inst, p, 0, false);
532    while (intel_field_iterator_next(&iter)) {
533       if (iter.struct_desc != vbs)
534          continue;
535 
536       struct intel_field_iterator vbs_iter;
537       intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
538       while (intel_field_iterator_next(&vbs_iter)) {
539          if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
540             index = vbs_iter.raw_value;
541          } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
542             pitch = vbs_iter.raw_value;
543          } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
544             vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
545          } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
546             vb_size = vbs_iter.raw_value;
547             ready = true;
548          } else if (strcmp(vbs_iter.name, "End Address") == 0) {
549             if (vb.map && vbs_iter.raw_value >= vb.addr)
550                vb_size = (vbs_iter.raw_value + 1) - vb.addr;
551             else
552                vb_size = 0;
553             ready = true;
554          }
555 
556          if (!ready)
557             continue;
558 
559          fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
560 
561          if (vb.map == NULL) {
562             fprintf(ctx->fp, "  buffer contents unavailable\n");
563             continue;
564          }
565 
566          if (vb.map == 0 || vb_size == 0)
567             continue;
568 
569          if (ctx->flags & INTEL_BATCH_DECODE_VB_DATA)
570             ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
571 
572          vb.map = NULL;
573          vb_size = 0;
574          index = -1;
575          pitch = -1;
576          ready = false;
577       }
578    }
579 }
580 
581 static void
handle_3dstate_index_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)582 handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
583                             const uint32_t *p)
584 {
585    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
586 
587    struct intel_batch_decode_bo ib = {};
588    uint32_t ib_size = 0;
589    uint32_t format = 0;
590 
591    struct intel_field_iterator iter;
592    intel_field_iterator_init(&iter, inst, p, 0, false);
593    while (intel_field_iterator_next(&iter)) {
594       if (strcmp(iter.name, "Index Format") == 0) {
595          format = iter.raw_value;
596       } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
597          ib = ctx_get_bo(ctx, true, iter.raw_value);
598       } else if (strcmp(iter.name, "Buffer Size") == 0) {
599          ib_size = iter.raw_value;
600       }
601    }
602 
603    if (ib.map == NULL) {
604       fprintf(ctx->fp, "  buffer contents unavailable\n");
605       return;
606    }
607 
608    const void *m = ib.map;
609    const void *ib_end = ib.map + MIN2(ib.size, ib_size);
610    for (int i = 0; m < ib_end && i < 10; i++) {
611       switch (format) {
612       case 0:
613          fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
614          m += 1;
615          break;
616       case 1:
617          fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
618          m += 2;
619          break;
620       case 2:
621          fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
622          m += 4;
623          break;
624       }
625    }
626 
627    if (m < ib_end)
628       fprintf(ctx->fp, "...");
629    fprintf(ctx->fp, "\n");
630 }
631 
632 static void
decode_single_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)633 decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
634 {
635    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
636 
637    uint64_t ksp = 0;
638    bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
639    bool is_enabled = true;
640 
641    struct intel_field_iterator iter;
642    intel_field_iterator_init(&iter, inst, p, 0, false);
643    while (intel_field_iterator_next(&iter)) {
644       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
645          ksp = iter.raw_value;
646       } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
647          is_simd8 = iter.raw_value;
648       } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
649          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
650       } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
651          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
652       } else if (strcmp(iter.name, "Enable") == 0) {
653          is_enabled = iter.raw_value;
654       }
655    }
656 
657    const char *type =
658       strcmp(inst->name,   "VS_STATE") == 0 ? "vertex shader" :
659       strcmp(inst->name,   "GS_STATE") == 0 ? "geometry shader" :
660       strcmp(inst->name,   "SF_STATE") == 0 ? "strips and fans shader" :
661       strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
662       strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
663       strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
664       strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
665       strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
666       NULL;
667    const char *short_name =
668       strcmp(inst->name,   "VS_STATE") == 0 ? "VS" :
669       strcmp(inst->name,   "GS_STATE") == 0 ? "GS" :
670       strcmp(inst->name,   "SF_STATE") == 0 ? "SF" :
671       strcmp(inst->name, "CLIP_STATE") == 0 ? "CL" :
672       strcmp(inst->name, "3DSTATE_DS") == 0 ? "DS" :
673       strcmp(inst->name, "3DSTATE_HS") == 0 ? "HS" :
674       strcmp(inst->name, "3DSTATE_VS") == 0 ? "VS" :
675       strcmp(inst->name, "3DSTATE_GS") == 0 ? "GS" :
676       NULL;
677 
678    if (is_enabled) {
679       ctx_disassemble_program(ctx, ksp, short_name, type);
680       fprintf(ctx->fp, "\n");
681    }
682 }
683 
684 static void
decode_mesh_task_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)685 decode_mesh_task_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
686 {
687    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
688 
689    uint64_t ksp = 0;
690    uint64_t local_x_maximum = 0;
691    uint64_t threads = 0;
692 
693    struct intel_field_iterator iter;
694    intel_field_iterator_init(&iter, inst, p, 0, false);
695    while (intel_field_iterator_next(&iter)) {
696       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
697          ksp = iter.raw_value;
698       } else if (strcmp(iter.name, "Local X Maximum") == 0) {
699          local_x_maximum = iter.raw_value;
700       } else if (strcmp(iter.name, "Number of Threads in GPGPU Thread Group") == 0) {
701          threads = iter.raw_value;
702       }
703    }
704 
705    const char *type =
706       strcmp(inst->name,   "3DSTATE_MESH_SHADER") == 0 ? "mesh shader" :
707       strcmp(inst->name,   "3DSTATE_TASK_SHADER") == 0 ? "task shader" :
708       NULL;
709    const char *short_name =
710       strcmp(inst->name,   "3DSTATE_MESH_SHADER") == 0 ? "MS" :
711       strcmp(inst->name,   "3DSTATE_TASK_SHADER") == 0 ? "TS" :
712       NULL;
713 
714    if (threads && local_x_maximum) {
715       ctx_disassemble_program(ctx, ksp, short_name, type);
716       fprintf(ctx->fp, "\n");
717    }
718 }
719 
720 static void
decode_ps_kern(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)721 decode_ps_kern(struct intel_batch_decode_ctx *ctx,
722                struct intel_group *inst, const uint32_t *p)
723 {
724    bool single_ksp = ctx->devinfo.ver == 4;
725    uint64_t ksp[3] = {0, 0, 0};
726    bool enabled[3] = {false, false, false};
727 
728    struct intel_field_iterator iter;
729    intel_field_iterator_init(&iter, inst, p, 0, false);
730    while (intel_field_iterator_next(&iter)) {
731       if (strncmp(iter.name, "Kernel Start Pointer ",
732                   strlen("Kernel Start Pointer ")) == 0) {
733          int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
734          ksp[idx] = strtol(iter.value, NULL, 16);
735       } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
736          enabled[0] = strcmp(iter.value, "true") == 0;
737       } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
738          enabled[1] = strcmp(iter.value, "true") == 0;
739       } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
740          enabled[2] = strcmp(iter.value, "true") == 0;
741       }
742    }
743 
744    if (single_ksp)
745       ksp[1] = ksp[2] = ksp[0];
746 
747    /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
748    if (enabled[0] + enabled[1] + enabled[2] == 1) {
749       if (enabled[1]) {
750          ksp[1] = ksp[0];
751          ksp[0] = 0;
752       } else if (enabled[2]) {
753          ksp[2] = ksp[0];
754          ksp[0] = 0;
755       }
756    } else {
757       uint64_t tmp = ksp[1];
758       ksp[1] = ksp[2];
759       ksp[2] = tmp;
760    }
761 
762    if (enabled[0])
763       ctx_disassemble_program(ctx, ksp[0], "FS8", "SIMD8 fragment shader");
764    if (enabled[1])
765       ctx_disassemble_program(ctx, ksp[1], "FS16", "SIMD16 fragment shader");
766    if (enabled[2])
767       ctx_disassemble_program(ctx, ksp[2], "FS32", "SIMD32 fragment shader");
768 
769    if (enabled[0] || enabled[1] || enabled[2])
770       fprintf(ctx->fp, "\n");
771 }
772 
773 static void
decode_ps_kern_xe2(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)774 decode_ps_kern_xe2(struct intel_batch_decode_ctx *ctx,
775                      struct intel_group *inst, const uint32_t *p)
776 {
777    uint64_t ksp[2] = {0, 0};
778    bool enabled[2] = {false, false};
779    int width[2] = {0, 0};
780 
781    struct intel_field_iterator iter;
782    intel_field_iterator_init(&iter, inst, p, 0, false);
783    while (intel_field_iterator_next(&iter)) {
784       if (strncmp(iter.name, "Kernel Start Pointer ",
785                   strlen("Kernel Start Pointer ")) == 0) {
786          int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
787          ksp[idx] = strtol(iter.value, NULL, 16);
788       } else if (strcmp(iter.name, "Kernel 0 Enable") == 0) {
789          enabled[0] = strcmp(iter.value, "true") == 0;
790       } else if (strcmp(iter.name, "Kernel 1 Enable") == 0) {
791          enabled[1] = strcmp(iter.value, "true") == 0;
792       } else if (strcmp(iter.name, "Kernel[0] : SIMD Width") == 0) {
793          width[0] = strncmp(iter.value, "0 ", 2) == 0 ? 16 : 32;
794       } else if (strcmp(iter.name, "Kernel[1] : SIMD Width") == 0) {
795          width[1] = strncmp(iter.value, "0 ", 2) == 0 ? 16 : 32;
796       }
797    }
798 
799    for (int i = 0; i < 2; i++) {
800       if (enabled[i])
801          ctx_disassemble_program(ctx, ksp[i], "FS",
802                                  width[i] == 16 ?
803                                  "SIMD16 fragment shader" :
804                                  "SIMD32 fragment shader");
805    }
806 
807    if (enabled[0] || enabled[1])
808       fprintf(ctx->fp, "\n");
809 }
810 
811 static void
decode_ps_kernels(struct intel_batch_decode_ctx * ctx,const uint32_t * p)812 decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
813                   const uint32_t *p)
814 {
815    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
816    if (ctx->devinfo.ver >= 20)
817       decode_ps_kern_xe2(ctx, inst, p);
818    else
819       decode_ps_kern(ctx, inst, p);
820 }
821 
822 static void
decode_3dstate_constant_all(struct intel_batch_decode_ctx * ctx,const uint32_t * p)823 decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
824 {
825    struct intel_group *inst =
826       intel_spec_find_instruction(ctx->spec, ctx->engine, p);
827    struct intel_group *body =
828       intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
829 
830    uint32_t read_length[4] = {0};
831    struct intel_batch_decode_bo buffer[4];
832    memset(buffer, 0, sizeof(buffer));
833 
834    struct intel_field_iterator outer;
835    intel_field_iterator_init(&outer, inst, p, 0, false);
836    int idx = 0;
837    while (intel_field_iterator_next(&outer)) {
838       if (outer.struct_desc != body)
839          continue;
840 
841       struct intel_field_iterator iter;
842       intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
843                               0, false);
844       while (intel_field_iterator_next(&iter)) {
845          if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
846             buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
847          } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
848             read_length[idx] = iter.raw_value;
849          }
850       }
851       idx++;
852    }
853 
854    for (int i = 0; i < 4; i++) {
855       if (read_length[i] == 0 || buffer[i].map == NULL)
856          continue;
857 
858       unsigned size = read_length[i] * 32;
859       fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
860 
861       ctx_print_buffer(ctx, buffer[i], size, 0, -1);
862    }
863 }
864 
865 static void
decode_3dstate_constant(struct intel_batch_decode_ctx * ctx,const uint32_t * p)866 decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
867 {
868    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
869    struct intel_group *body =
870       intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
871 
872    uint32_t read_length[4] = {0};
873    uint64_t read_addr[4] = {0};
874 
875    struct intel_field_iterator outer;
876    intel_field_iterator_init(&outer, inst, p, 0, false);
877    while (intel_field_iterator_next(&outer)) {
878       if (outer.struct_desc != body)
879          continue;
880 
881       struct intel_field_iterator iter;
882       intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
883                               0, false);
884 
885       while (intel_field_iterator_next(&iter)) {
886          int idx;
887          if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
888             read_length[idx] = iter.raw_value;
889          } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
890             read_addr[idx] = iter.raw_value;
891          }
892       }
893 
894       for (int i = 0; i < 4; i++) {
895          if (read_length[i] == 0)
896             continue;
897 
898          struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
899          if (!buffer.map) {
900             fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
901             continue;
902          }
903 
904          unsigned size = read_length[i] * 32;
905          fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
906 
907          ctx_print_buffer(ctx, buffer, size, 0, -1);
908       }
909    }
910 }
911 
912 static void
decode_gfx4_constant_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)913 decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
914 {
915    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
916    uint64_t read_length = 0, read_addr = 0, valid = 0;
917    struct intel_field_iterator iter;
918    intel_field_iterator_init(&iter, inst, p, 0, false);
919 
920    while (intel_field_iterator_next(&iter)) {
921       if (!strcmp(iter.name, "Buffer Length")) {
922          read_length = iter.raw_value;
923       } else if (!strcmp(iter.name, "Valid")) {
924          valid = iter.raw_value;
925       } else if (!strcmp(iter.name, "Buffer Starting Address")) {
926          read_addr = iter.raw_value;
927       }
928    }
929 
930    if (!valid)
931       return;
932 
933    struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
934    if (!buffer.map) {
935       fprintf(ctx->fp, "constant buffer unavailable\n");
936       return;
937    }
938    unsigned size = (read_length + 1) * 16 * sizeof(float);
939    fprintf(ctx->fp, "constant buffer size %u\n", size);
940 
941    ctx_print_buffer(ctx, buffer, size, 0, -1);
942 }
943 
944 
945 static void
decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)946 decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
947                                            const uint32_t *p)
948 {
949    fprintf(ctx->fp, "VS Binding Table:\n");
950    dump_binding_table(ctx, p[1], -1);
951 
952    fprintf(ctx->fp, "GS Binding Table:\n");
953    dump_binding_table(ctx, p[2], -1);
954 
955    if (ctx->devinfo.ver < 6) {
956       fprintf(ctx->fp, "CLIP Binding Table:\n");
957       dump_binding_table(ctx, p[3], -1);
958       fprintf(ctx->fp, "SF Binding Table:\n");
959       dump_binding_table(ctx, p[4], -1);
960       fprintf(ctx->fp, "PS Binding Table:\n");
961       dump_binding_table(ctx, p[5], -1);
962    } else {
963       fprintf(ctx->fp, "PS Binding Table:\n");
964       dump_binding_table(ctx, p[3], -1);
965    }
966 }
967 
968 static void
decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)969 decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
970                                       const uint32_t *p)
971 {
972    dump_binding_table(ctx, p[1], -1);
973 }
974 
975 static void
decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)976 decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
977                                       const uint32_t *p)
978 {
979    dump_samplers(ctx, p[1], 1);
980 }
981 
982 static void
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx * ctx,const uint32_t * p)983 decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
984                                            const uint32_t *p)
985 {
986    dump_samplers(ctx, p[1], 1);
987    dump_samplers(ctx, p[2], 1);
988    dump_samplers(ctx, p[3], 1);
989 }
990 
991 static bool
str_ends_with(const char * str,const char * end)992 str_ends_with(const char *str, const char *end)
993 {
994    int offset = strlen(str) - strlen(end);
995    if (offset < 0)
996       return false;
997 
998    return strcmp(str + offset, end) == 0;
999 }
1000 
1001 static void
decode_dynamic_state(struct intel_batch_decode_ctx * ctx,const char * struct_type,uint32_t state_offset,int count)1002 decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
1003                        const char *struct_type, uint32_t state_offset,
1004                        int count)
1005 {
1006    uint64_t state_addr = ctx->dynamic_base + state_offset;
1007    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
1008    const void *state_map = bo.map;
1009 
1010    if (state_map == NULL) {
1011       fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
1012       return;
1013    }
1014 
1015    struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
1016    if (strcmp(struct_type, "BLEND_STATE") == 0) {
1017       /* Blend states are different from the others because they have a header
1018        * struct called BLEND_STATE which is followed by a variable number of
1019        * BLEND_STATE_ENTRY structs.
1020        */
1021       fprintf(ctx->fp, "%s\n", struct_type);
1022       ctx_print_group(ctx, state, state_addr, state_map);
1023 
1024       state_addr += state->dw_length * 4;
1025       state_map += state->dw_length * 4;
1026 
1027       struct_type = "BLEND_STATE_ENTRY";
1028       state = intel_spec_find_struct(ctx->spec, struct_type);
1029    }
1030 
1031    count = update_count(ctx, ctx->dynamic_base + state_offset,
1032                         ctx->dynamic_base, state->dw_length, count);
1033 
1034    for (int i = 0; i < count; i++) {
1035       fprintf(ctx->fp, "%s %d\n", struct_type, i);
1036       ctx_print_group(ctx, state, state_addr, state_map);
1037 
1038       state_addr += state->dw_length * 4;
1039       state_map += state->dw_length * 4;
1040    }
1041 }
1042 
1043 static void
decode_dynamic_state_pointers(struct intel_batch_decode_ctx * ctx,const char * struct_type,const uint32_t * p,int count)1044 decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
1045                               const char *struct_type, const uint32_t *p,
1046                               int count)
1047 {
1048    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1049 
1050    uint32_t state_offset = 0;
1051 
1052    struct intel_field_iterator iter;
1053    intel_field_iterator_init(&iter, inst, p, 0, false);
1054    while (intel_field_iterator_next(&iter)) {
1055       if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
1056          state_offset = iter.raw_value;
1057          break;
1058       }
1059    }
1060    decode_dynamic_state(ctx, struct_type, state_offset, count);
1061 }
1062 
1063 static void
decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1064 decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
1065                                        const uint32_t *p)
1066 {
1067    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1068    uint32_t state_offset = 0;
1069    bool clip = false, sf = false, cc = false;
1070    struct intel_field_iterator iter;
1071    intel_field_iterator_init(&iter, inst, p, 0, false);
1072    while (intel_field_iterator_next(&iter)) {
1073       if (!strcmp(iter.name, "CLIP Viewport State Change"))
1074          clip = iter.raw_value;
1075       if (!strcmp(iter.name, "SF Viewport State Change"))
1076          sf = iter.raw_value;
1077       if (!strcmp(iter.name, "CC Viewport State Change"))
1078          cc = iter.raw_value;
1079       else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
1080          state_offset = iter.raw_value;
1081          decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
1082       }
1083       else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
1084          state_offset = iter.raw_value;
1085          decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
1086       }
1087       else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
1088          state_offset = iter.raw_value;
1089          decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
1090       }
1091    }
1092 }
1093 
1094 static void
decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1095 decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
1096                                           const uint32_t *p)
1097 {
1098    decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
1099 }
1100 
1101 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1102 decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
1103                                                const uint32_t *p)
1104 {
1105    decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
1106 }
1107 
1108 static void
decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1109 decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
1110                                     const uint32_t *p)
1111 {
1112    decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
1113 }
1114 
1115 static void
decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1116 decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
1117                                  const uint32_t *p)
1118 {
1119    if (ctx->devinfo.ver != 6) {
1120       decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
1121       return;
1122    }
1123 
1124    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1125 
1126    uint32_t state_offset = 0;
1127    bool blend_change = false, ds_change = false, cc_change = false;
1128    struct intel_field_iterator iter;
1129    intel_field_iterator_init(&iter, inst, p, 0, false);
1130    while (intel_field_iterator_next(&iter)) {
1131       if (!strcmp(iter.name, "BLEND_STATE Change"))
1132          blend_change = iter.raw_value;
1133       else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
1134          ds_change = iter.raw_value;
1135       else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
1136          cc_change = iter.raw_value;
1137       else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
1138          state_offset = iter.raw_value;
1139          decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
1140       }
1141       else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
1142          state_offset = iter.raw_value;
1143          decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
1144       }
1145       else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
1146          state_offset = iter.raw_value;
1147          decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1148       }
1149    }
1150 }
1151 
1152 static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1153 decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1154                                  const uint32_t *p)
1155 {
1156    decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1157 }
1158 
1159 static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1160 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1161                                       const uint32_t *p)
1162 {
1163    decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1164 }
1165 
1166 static void
decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1167 decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1168                                           const uint32_t *p)
1169 {
1170    decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1171 }
1172 
1173 static void
handle_gt_mode(struct intel_batch_decode_ctx * ctx,uint32_t reg_addr,uint32_t val)1174 handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1175                uint32_t reg_addr, uint32_t val)
1176 {
1177    struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1178 
1179    assert(intel_group_get_length(reg, &val) == 1);
1180 
1181    struct intel_field_iterator iter;
1182    intel_field_iterator_init(&iter, reg, &val, 0, false);
1183 
1184    uint32_t bt_alignment;
1185    bool bt_alignment_mask = 0;
1186 
1187    while (intel_field_iterator_next(&iter)) {
1188       if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1189          bt_alignment = iter.raw_value;
1190       } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1191          bt_alignment_mask = iter.raw_value;
1192       }
1193    }
1194 
1195    if (bt_alignment_mask)
1196       ctx->use_256B_binding_tables = bt_alignment;
1197 }
1198 
1199 struct reg_handler {
1200    const char *name;
1201    void (*handler)(struct intel_batch_decode_ctx *ctx,
1202                    uint32_t reg_addr, uint32_t val);
1203 } reg_handlers[] = {
1204    { "GT_MODE", handle_gt_mode }
1205 };
1206 
1207 static void
decode_load_register_imm(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1208 decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1209 {
1210    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1211    const unsigned length = intel_group_get_length(inst, p);
1212    assert(length & 1);
1213    const unsigned nr_regs = (length - 1) / 2;
1214 
1215    for (unsigned i = 0; i < nr_regs; i++) {
1216       struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1217       if (reg != NULL) {
1218          fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1219                  reg->name, reg->register_offset, p[2]);
1220          ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1221 
1222          for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1223             if (strcmp(reg->name, reg_handlers[i].name) == 0)
1224                reg_handlers[i].handler(ctx, p[1], p[2]);
1225          }
1226       }
1227    }
1228 }
1229 
1230 static void
disasm_program_from_group(struct intel_batch_decode_ctx * ctx,struct intel_group * strct,const void * map,const char * short_name,const char * type)1231 disasm_program_from_group(struct intel_batch_decode_ctx *ctx,
1232                           struct intel_group *strct, const void *map,
1233                           const char *short_name, const char *type)
1234 {
1235    uint64_t ksp = 0;
1236    bool is_enabled = true;
1237    struct intel_field_iterator iter;
1238 
1239    intel_field_iterator_init(&iter, strct, map, 0, false);
1240 
1241    while (intel_field_iterator_next(&iter)) {
1242       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1243          ksp = iter.raw_value;
1244       } else if (strcmp(iter.name, "Enable") == 0) {
1245          is_enabled = iter.raw_value;
1246       }
1247    }
1248 
1249    if (is_enabled) {
1250       ctx_disassemble_program(ctx, ksp, short_name, type);
1251       fprintf(ctx->fp, "\n");
1252    }
1253 }
1254 
1255 static void
decode_vs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1256 decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1257 {
1258    struct intel_group *strct =
1259       intel_spec_find_struct(ctx->spec, "VS_STATE");
1260    if (strct == NULL) {
1261       fprintf(ctx->fp, "did not find VS_STATE info\n");
1262       return;
1263    }
1264 
1265    struct intel_batch_decode_bo bind_bo =
1266       ctx_get_bo(ctx, true, offset);
1267 
1268    if (bind_bo.map == NULL) {
1269       fprintf(ctx->fp, " vs state unavailable\n");
1270       return;
1271    }
1272 
1273    ctx_print_group(ctx, strct, offset, bind_bo.map);
1274    disasm_program_from_group(ctx, strct, bind_bo.map, "VS", "vertex shader");
1275 }
1276 
1277 static void
decode_gs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1278 decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1279 {
1280    struct intel_group *strct =
1281       intel_spec_find_struct(ctx->spec, "GS_STATE");
1282    if (strct == NULL) {
1283       fprintf(ctx->fp, "did not find GS_STATE info\n");
1284       return;
1285    }
1286 
1287    struct intel_batch_decode_bo bind_bo =
1288       ctx_get_bo(ctx, true, offset);
1289 
1290    if (bind_bo.map == NULL) {
1291       fprintf(ctx->fp, " gs state unavailable\n");
1292       return;
1293    }
1294 
1295    ctx_print_group(ctx, strct, offset, bind_bo.map);
1296    disasm_program_from_group(ctx, strct, bind_bo.map, "GS", "geometry shader");
1297 }
1298 
1299 static void
decode_clip_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1300 decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1301 {
1302    struct intel_group *strct =
1303       intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1304    if (strct == NULL) {
1305       fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1306       return;
1307    }
1308 
1309    struct intel_batch_decode_bo bind_bo =
1310       ctx_get_bo(ctx, true, offset);
1311 
1312    if (bind_bo.map == NULL) {
1313       fprintf(ctx->fp, " clip state unavailable\n");
1314       return;
1315    }
1316 
1317    ctx_print_group(ctx, strct, offset, bind_bo.map);
1318    disasm_program_from_group(ctx, strct, bind_bo.map, "CL", "clip shader");
1319 
1320    struct intel_group *vp_strct =
1321       intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1322    if (vp_strct == NULL) {
1323       fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1324       return;
1325    }
1326    uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1327    struct intel_batch_decode_bo vp_bo =
1328       ctx_get_bo(ctx, true, clip_vp_offset);
1329    if (vp_bo.map == NULL) {
1330       fprintf(ctx->fp, " clip vp state unavailable\n");
1331       return;
1332    }
1333    ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1334 }
1335 
1336 static void
decode_sf_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1337 decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1338 {
1339    struct intel_group *strct =
1340       intel_spec_find_struct(ctx->spec, "SF_STATE");
1341    if (strct == NULL) {
1342       fprintf(ctx->fp, "did not find SF_STATE info\n");
1343       return;
1344    }
1345 
1346    struct intel_batch_decode_bo bind_bo =
1347       ctx_get_bo(ctx, true, offset);
1348 
1349    if (bind_bo.map == NULL) {
1350       fprintf(ctx->fp, " sf state unavailable\n");
1351       return;
1352    }
1353 
1354    ctx_print_group(ctx, strct, offset, bind_bo.map);
1355    disasm_program_from_group(ctx, strct, bind_bo.map, "SF", "strips and fans shader");
1356 
1357    struct intel_group *vp_strct =
1358       intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1359    if (vp_strct == NULL) {
1360       fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1361       return;
1362    }
1363 
1364    uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1365    struct intel_batch_decode_bo vp_bo =
1366       ctx_get_bo(ctx, true, sf_vp_offset);
1367    if (vp_bo.map == NULL) {
1368       fprintf(ctx->fp, " sf vp state unavailable\n");
1369       return;
1370    }
1371    ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1372 }
1373 
1374 static void
decode_wm_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1375 decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1376 {
1377    struct intel_group *strct =
1378       intel_spec_find_struct(ctx->spec, "WM_STATE");
1379    if (strct == NULL) {
1380       fprintf(ctx->fp, "did not find WM_STATE info\n");
1381       return;
1382    }
1383 
1384    struct intel_batch_decode_bo bind_bo =
1385       ctx_get_bo(ctx, true, offset);
1386 
1387    if (bind_bo.map == NULL) {
1388       fprintf(ctx->fp, " wm state unavailable\n");
1389       return;
1390    }
1391 
1392    ctx_print_group(ctx, strct, offset, bind_bo.map);
1393 
1394    decode_ps_kern(ctx, strct, bind_bo.map);
1395 }
1396 
1397 static void
decode_cc_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1398 decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1399 {
1400    struct intel_group *strct =
1401       intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1402    if (strct == NULL) {
1403       fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1404       return;
1405    }
1406 
1407    struct intel_batch_decode_bo bind_bo =
1408       ctx_get_bo(ctx, true, offset);
1409 
1410    if (bind_bo.map == NULL) {
1411       fprintf(ctx->fp, " cc state unavailable\n");
1412       return;
1413    }
1414 
1415    ctx_print_group(ctx, strct, offset, bind_bo.map);
1416 
1417    struct intel_group *vp_strct =
1418       intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1419    if (vp_strct == NULL) {
1420       fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1421       return;
1422    }
1423    uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1424    struct intel_batch_decode_bo vp_bo =
1425       ctx_get_bo(ctx, true, cc_vp_offset);
1426    if (vp_bo.map == NULL) {
1427       fprintf(ctx->fp, " cc vp state unavailable\n");
1428       return;
1429    }
1430    ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1431 }
1432 static void
decode_pipelined_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1433 decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1434 {
1435    fprintf(ctx->fp, "VS State Table:\n");
1436    decode_vs_state(ctx, p[1]);
1437    if (p[2] & 1) {
1438       fprintf(ctx->fp, "GS State Table:\n");
1439       decode_gs_state(ctx, p[2] & ~1);
1440    }
1441    fprintf(ctx->fp, "Clip State Table:\n");
1442    decode_clip_state(ctx, p[3] & ~1);
1443    fprintf(ctx->fp, "SF State Table:\n");
1444    decode_sf_state(ctx, p[4]);
1445    fprintf(ctx->fp, "WM State Table:\n");
1446    decode_wm_state(ctx, p[5]);
1447    fprintf(ctx->fp, "CC State Table:\n");
1448    decode_cc_state(ctx, p[6]);
1449 }
1450 
1451 static void
decode_cps_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1452 decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1453 {
1454    decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1455 }
1456 
1457 struct custom_decoder {
1458    const char *cmd_name;
1459    void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1460 } custom_decoders[] = {
1461    { "STATE_BASE_ADDRESS", handle_state_base_address },
1462    { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1463    { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1464    { "COMPUTE_WALKER", handle_compute_walker },
1465    { "MEDIA_CURBE_LOAD", handle_media_curbe_load },
1466    { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1467    { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1468    { "3DSTATE_VS", decode_single_ksp },
1469    { "3DSTATE_GS", decode_single_ksp },
1470    { "3DSTATE_DS", decode_single_ksp },
1471    { "3DSTATE_HS", decode_single_ksp },
1472    { "3DSTATE_PS", decode_ps_kernels },
1473    { "3DSTATE_WM", decode_ps_kernels },
1474    { "3DSTATE_MESH_SHADER", decode_mesh_task_ksp },
1475    { "3DSTATE_TASK_SHADER", decode_mesh_task_ksp },
1476    { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1477    { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1478    { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1479    { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1480    { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1481    { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1482 
1483    { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1484    { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1485    { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1486    { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1487    { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1488    { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1489 
1490    { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1491    { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1492    { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1493    { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1494    { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1495    { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1496 
1497    { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1498    { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1499    { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1500    { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1501    { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1502    { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1503    { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1504    { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1505    { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1506    { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1507    { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1508    { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1509 };
1510 
1511 static void
get_inst_color(const struct intel_batch_decode_ctx * ctx,const struct intel_group * inst,char ** const out_color,char ** const out_reset_color)1512 get_inst_color(const struct intel_batch_decode_ctx *ctx,
1513                const struct intel_group *inst,
1514                char **const out_color,
1515                char **const out_reset_color)
1516 {
1517    const char *inst_name = intel_group_get_name(inst);
1518    if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1519       *out_reset_color = NORMAL;
1520       if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1521          if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1522              strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1523             *out_color = GREEN_HEADER;
1524          else
1525             *out_color = BLUE_HEADER;
1526       } else {
1527          *out_color = NORMAL;
1528       }
1529    } else {
1530       *out_color = "";
1531       *out_reset_color = "";
1532    }
1533 }
1534 
1535 struct inst_ptr {
1536    struct intel_group *inst;
1537    uint32_t           *ptr;
1538 };
1539 
1540 static int
compare_inst_ptr(const void * v1,const void * v2)1541 compare_inst_ptr(const void *v1, const void *v2)
1542 {
1543    const struct inst_ptr *i1 = v1, *i2 = v2;
1544    return strcmp(i1->inst->name, i2->inst->name);
1545 }
1546 
1547 static void
intel_print_accumulated_instrs(struct intel_batch_decode_ctx * ctx)1548 intel_print_accumulated_instrs(struct intel_batch_decode_ctx *ctx)
1549 {
1550    struct util_dynarray arr;
1551    util_dynarray_init(&arr, NULL);
1552 
1553    hash_table_foreach(ctx->commands, entry) {
1554       struct inst_ptr inst = {
1555          .inst = (struct intel_group *)entry->key,
1556          .ptr  = entry->data,
1557       };
1558       util_dynarray_append(&arr, struct inst_ptr, inst);
1559    }
1560    qsort(util_dynarray_begin(&arr),
1561          util_dynarray_num_elements(&arr, struct inst_ptr),
1562          sizeof(struct inst_ptr),
1563          compare_inst_ptr);
1564 
1565    fprintf(ctx->fp, "----\n");
1566    util_dynarray_foreach(&arr, struct inst_ptr, i) {
1567       char *begin_color;
1568       char *end_color;
1569       get_inst_color(ctx, i->inst, &begin_color, &end_color);
1570 
1571       uint64_t offset = 0;
1572       fprintf(ctx->fp, "%s0x%08"PRIx64":  0x%08x:  %-80s%s\n",
1573               begin_color, offset, i->ptr[0], i->inst->name, end_color);
1574       if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1575          ctx_print_group(ctx, i->inst, 0, i->ptr);
1576          for (int d = 0; d < ARRAY_SIZE(custom_decoders); d++) {
1577             if (strcmp(i->inst->name, custom_decoders[d].cmd_name) == 0) {
1578                custom_decoders[d].decode(ctx, i->ptr);
1579                break;
1580             }
1581          }
1582       }
1583    }
1584    util_dynarray_fini(&arr);
1585 }
1586 
1587 void
intel_print_batch(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1588 intel_print_batch(struct intel_batch_decode_ctx *ctx,
1589                   const uint32_t *batch, uint32_t batch_size,
1590                   uint64_t batch_addr, bool from_ring)
1591 {
1592    const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1593    int length;
1594    struct intel_group *inst;
1595    const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1596 
1597    if (ctx->n_batch_buffer_start >= 100) {
1598       fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1599               (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1600               (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1601               reset_color);
1602       return;
1603    }
1604 
1605    ctx->n_batch_buffer_start++;
1606 
1607    for (p = batch; p < end; p += length) {
1608       inst = intel_ctx_find_instruction(ctx, p);
1609       length = intel_group_get_length(inst, p);
1610       assert(inst == NULL || length > 0);
1611       length = MAX2(1, length);
1612 
1613       uint64_t offset;
1614       if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1615          offset = batch_addr + ((char *)p - (char *)batch);
1616       else
1617          offset = 0;
1618 
1619       if (inst == NULL) {
1620          fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1621                  (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1622                  offset, p[0], reset_color);
1623 
1624          for (int i=1; i < length; i++) {
1625             fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1626                  (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1627                  offset + i * 4, p[i], reset_color);
1628          }
1629 
1630          continue;
1631       }
1632 
1633       if (ctx->flags & INTEL_BATCH_DECODE_ACCUMULATE) {
1634          struct hash_entry *entry = _mesa_hash_table_search(ctx->commands, inst);
1635          if (entry != NULL) {
1636             entry->data = (void *)p;
1637          } else {
1638             _mesa_hash_table_insert(ctx->commands, inst, (void *)p);
1639          }
1640 
1641          if (!strcmp(inst->name, "3DPRIMITIVE") ||
1642              !strcmp(inst->name, "3DPRIMITIVE_EXTENDED") ||
1643              !strcmp(inst->name, "GPGPU_WALKER") ||
1644              !strcmp(inst->name, "3DSTATE_WM_HZ_OP") ||
1645              !strcmp(inst->name, "COMPUTE_WALKER")) {
1646             intel_print_accumulated_instrs(ctx);
1647          }
1648       } else {
1649          char *begin_color;
1650          char *end_color;
1651          get_inst_color(ctx, inst, &begin_color, &end_color);
1652 
1653          fprintf(ctx->fp, "%s0x%08"PRIx64"%s:  0x%08x:  %-80s%s\n",
1654                  begin_color, offset,
1655                  ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1656                  inst->name, end_color);
1657 
1658          if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1659             ctx_print_group(ctx, inst, offset, p);
1660 
1661             for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1662                if (strcmp(inst->name, custom_decoders[i].cmd_name) == 0) {
1663                   custom_decoders[i].decode(ctx, p);
1664                   break;
1665                }
1666             }
1667          }
1668       }
1669 
1670       if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
1671          uint64_t next_batch_addr = 0;
1672          bool ppgtt = false;
1673          bool second_level = false;
1674          bool predicate = false;
1675          struct intel_field_iterator iter;
1676          intel_field_iterator_init(&iter, inst, p, 0, false);
1677          while (intel_field_iterator_next(&iter)) {
1678             if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1679                next_batch_addr = iter.raw_value;
1680             } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1681                second_level = iter.raw_value;
1682             } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1683                ppgtt = iter.raw_value;
1684             } else if (strcmp(iter.name, "Predication Enable") == 0) {
1685                predicate = iter.raw_value;
1686             }
1687          }
1688 
1689          if (!predicate) {
1690             struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1691 
1692             if (next_batch.map == NULL) {
1693                fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1694                        next_batch_addr);
1695             } else {
1696                intel_print_batch(ctx, next_batch.map, next_batch.size,
1697                                  next_batch.addr, false);
1698             }
1699             if (second_level) {
1700                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1701                 * like a subroutine call.  Commands that come afterwards get
1702                 * processed once the 2nd level batch buffer returns with
1703                 * MI_BATCH_BUFFER_END.
1704                 */
1705                continue;
1706             } else if (!from_ring) {
1707                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1708                 * like a goto.  Nothing after it will ever get processed.  In
1709                 * order to prevent the recursion from growing, we just reset the
1710                 * loop and continue;
1711                 */
1712                break;
1713             }
1714          }
1715       } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
1716          break;
1717       }
1718    }
1719 
1720    ctx->n_batch_buffer_start--;
1721 }
1722 
1723 void
intel_batch_stats_reset(struct intel_batch_decode_ctx * ctx)1724 intel_batch_stats_reset(struct intel_batch_decode_ctx *ctx)
1725 {
1726    _mesa_hash_table_clear(ctx->stats, NULL);
1727 }
1728 
1729 void
intel_batch_stats(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1730 intel_batch_stats(struct intel_batch_decode_ctx *ctx,
1731                   const uint32_t *batch, uint32_t batch_size,
1732                   uint64_t batch_addr, bool from_ring)
1733 {
1734    const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1735    int length;
1736    struct intel_group *inst;
1737 
1738    if (ctx->n_batch_buffer_start >= 100) {
1739       fprintf(stderr, "Max batch buffer jumps exceeded\n");
1740       return;
1741    }
1742 
1743    ctx->n_batch_buffer_start++;
1744 
1745    for (p = batch; p < end; p += length) {
1746       inst = intel_ctx_find_instruction(ctx, p);
1747       length = intel_group_get_length(inst, p);
1748       assert(inst == NULL || length > 0);
1749       length = MAX2(1, length);
1750 
1751       const char *name =
1752          inst != NULL ? inst->name : "unknown";
1753 
1754       struct hash_entry *entry = _mesa_hash_table_search(ctx->stats, name);
1755       if (entry != NULL) {
1756          entry->data = (void *)((uintptr_t)entry->data + 1);
1757       } else {
1758          _mesa_hash_table_insert(ctx->stats, name, (void *)(uintptr_t)1);
1759       }
1760 
1761       if (inst == NULL)
1762          continue;
1763 
1764       if (strcmp(inst->name, "MI_BATCH_BUFFER_START") == 0) {
1765          uint64_t next_batch_addr = 0;
1766          bool ppgtt = false;
1767          bool second_level = false;
1768          bool predicate = false;
1769          struct intel_field_iterator iter;
1770          intel_field_iterator_init(&iter, inst, p, 0, false);
1771          while (intel_field_iterator_next(&iter)) {
1772             if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1773                next_batch_addr = iter.raw_value;
1774             } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1775                second_level = iter.raw_value;
1776             } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1777                ppgtt = iter.raw_value;
1778             } else if (strcmp(iter.name, "Predication Enable") == 0) {
1779                predicate = iter.raw_value;
1780             }
1781          }
1782 
1783          if (!predicate) {
1784             struct intel_batch_decode_bo next_batch =
1785                ctx_get_bo(ctx, ppgtt, next_batch_addr);
1786 
1787             if (next_batch.map == NULL) {
1788                fprintf(stderr, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1789                        next_batch_addr);
1790             } else {
1791                intel_batch_stats(ctx, next_batch.map, next_batch.size,
1792                                  next_batch.addr, false);
1793             }
1794             if (second_level) {
1795                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1796                 * like a subroutine call.  Commands that come afterwards get
1797                 * processed once the 2nd level batch buffer returns with
1798                 * MI_BATCH_BUFFER_END.
1799                 */
1800                continue;
1801             } else if (!from_ring) {
1802                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1803                 * like a goto.  Nothing after it will ever get processed.  In
1804                 * order to prevent the recursion from growing, we just reset the
1805                 * loop and continue;
1806                 */
1807                break;
1808             }
1809          }
1810       } else if (strcmp(inst->name, "MI_BATCH_BUFFER_END") == 0) {
1811          break;
1812       }
1813    }
1814 
1815    ctx->n_batch_buffer_start--;
1816 }
1817 
1818 struct inst_stat {
1819    const char *name;
1820    uint32_t    count;
1821 };
1822 
1823 static int
compare_inst_stat(const void * v1,const void * v2)1824 compare_inst_stat(const void *v1, const void *v2)
1825 {
1826    const struct inst_stat *i1 = v1, *i2 = v2;
1827    return strcmp(i1->name, i2->name);
1828 }
1829 
1830 void
intel_batch_print_stats(struct intel_batch_decode_ctx * ctx)1831 intel_batch_print_stats(struct intel_batch_decode_ctx *ctx)
1832 {
1833    struct util_dynarray arr;
1834    util_dynarray_init(&arr, NULL);
1835 
1836    hash_table_foreach(ctx->stats, entry) {
1837       struct inst_stat inst = {
1838          .name = (const char *)entry->key,
1839          .count = (uintptr_t)entry->data,
1840       };
1841       util_dynarray_append(&arr, struct inst_stat, inst);
1842    }
1843    qsort(util_dynarray_begin(&arr),
1844          util_dynarray_num_elements(&arr, struct inst_stat),
1845          sizeof(struct inst_stat),
1846          compare_inst_stat);
1847    util_dynarray_foreach(&arr, struct inst_stat, i)
1848       fprintf(ctx->fp, "%-40s: %u\n", i->name, i->count);
1849 
1850    util_dynarray_fini(&arr);
1851 }
1852