1 /*
2 * Copyright © 2018 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 /*
7 * Decoder for "new" GL_OES_get_program_binary format.
8 *
9 * Overall structure is:
10 *
11 * - header at top, contains, amongst other things, offsets of
12 * per shader stage sections.
13 * - per shader stage section (shader_info) starts with a header,
14 * followed by a variably length list of descriptors. Each
15 * descriptor has a type/count/size plus offset from the start
16 * of shader_info section where the data is found
17 */
18
19 #include <assert.h>
20 #include <ctype.h>
21 #include <fcntl.h>
22 #include <stddef.h>
23 #include <stdint.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30
31 #include "disasm.h"
32 #include "io.h"
33 #include "redump.h"
34 #include "util.h"
35
36 const char *infile;
37 static int dump_full = 0;
38 static int dump_offsets = 0;
39 static int gpu_id = 320;
40 static int shaderdb = 0; /* output shaderdb style traces to stderr */
41
42 struct state {
43 char *buf;
44 int sz;
45 int lvl;
46
47 /* current shader_info section, some offsets calculated relative to
48 * this, rather than relative to start of buffer.
49 */
50 void *shader;
51
52 /* size of each entry within a shader_descriptor_blk: */
53 int desc_size;
54
55 const char *shader_type;
56 int full_regs;
57 int half_regs;
58 };
59
60 #define PACKED __attribute__((__packed__))
61
62 #define OFF(field) \
63 do { \
64 if (dump_offsets) \
65 printf("%08x: ", (uint32_t)((char *)&field - state->buf)); \
66 } while (0)
67
68 /* decode field as hex */
69 #define X(s, field) \
70 do { \
71 OFF(s->field); \
72 printf("%s%12s:\t0x%x\n", tab(state->lvl), #field, s->field); \
73 } while (0)
74
75 /* decode field as digit */
76 #define D(s, field) \
77 do { \
78 OFF(s->field); \
79 printf("%s%12s:\t%u\n", tab(state->lvl), #field, s->field); \
80 } while (0)
81
82 /* decode field as float/hex */
83 #define F(s, field) \
84 do { \
85 OFF(s->field); \
86 printf("%s%12s:\t%f (0x%0x)\n", tab(state->lvl), #field, uif(s->field), \
87 s->field); \
88 } while (0)
89
90 /* decode field as register: (type is 'r' or 'c') */
91 #define R(s, field, type) \
92 do { \
93 OFF(s->field); \
94 printf("%s%12s:\t%c%u.%c\n", tab(state->lvl), #field, type, \
95 (s->field >> 2), "xyzw"[s->field & 0x3]); \
96 } while (0)
97
98 /* decode inline string (presumably null terminated?) */
99 #define S(s, field) \
100 do { \
101 OFF(s->field); \
102 printf("%s%12s:\t%s\n", tab(state->lvl), #field, s->field); \
103 } while (0)
104
105 /* decode string-table string */
106 #define T(s, field) TODO
107
108 /* decode field as unknown */
109 #define U(s, start, end) \
110 dump_unknown(state, s->unk_##start##_##end, 0x##start, \
111 (4 + 0x##end - 0x##start) / 4)
112
113 /* decode field as offset to other section */
114 #define O(s, field, type) \
115 do { \
116 X(s, field); \
117 assert(s->field < state->sz); \
118 void *_p = &state->buf[s->field]; \
119 state->lvl++; \
120 decode_##type(state, _p); \
121 state->lvl--; \
122 } while (0)
123
124 struct shader_info;
125 static void decode_shader_info(struct state *state, struct shader_info *info);
126
127 static void
dump_unknown(struct state * state,void * buf,unsigned start,unsigned n)128 dump_unknown(struct state *state, void *buf, unsigned start, unsigned n)
129 {
130 uint32_t *ptr = buf;
131 uint8_t *ascii = buf;
132
133 for (unsigned i = 0; i < n; i++) {
134 uint32_t d = ptr[i];
135
136 if (dump_offsets)
137 printf("%08x:", (uint32_t)((char *)&ptr[i] - state->buf));
138
139 printf("%s %04x:\t%08x", tab(state->lvl), start + i * 4, d);
140
141 printf("\t|");
142 for (unsigned j = 0; j < 4; j++) {
143 uint8_t c = *(ascii++);
144 printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.');
145 }
146 printf("|\t%f", uif(d));
147
148 /* TODO maybe scan for first non-null and non-ascii char starting from
149 * end of shader binary to (roughly) establish the start of the string
150 * table.. that would be a bit better filter for deciding if something
151 * might be a pointer into the string table. Also, the previous char
152 * to what it points to should probably be null.
153 */
154 if ((d < state->sz) && isascii(state->buf[d]) &&
155 (strlen(&state->buf[d]) > 2) && isascii(state->buf[d + 1]))
156 printf("\t<== %s", &state->buf[d]);
157
158 printf("\n");
159 }
160 }
161
162 struct PACKED header {
163 uint32_t version; /* I guess, always b10bcace ? */
164 uint32_t unk_0004_0014[5];
165 uint32_t size;
166 uint32_t size2; /* just to be sure? */
167 uint32_t unk_0020_0020[1];
168 uint32_t
169 chksum; /* I guess? Small changes seem to result in big diffs here */
170 uint32_t unk_0028_0050[11];
171 uint32_t fs_info; /* offset of FS shader_info section */
172 uint32_t unk_0058_0090[15];
173 uint32_t vs_info; /* offset of VS shader_info section */
174 uint32_t unk_0098_00b0[7];
175 uint32_t vs_info2; /* offset of VS shader_info section (again?) */
176 uint32_t unk_00b8_0110[23];
177 uint32_t bs_info; /* offset of binning shader_info section */
178 };
179
180 static void
decode_header(struct state * state,struct header * hdr)181 decode_header(struct state *state, struct header *hdr)
182 {
183 X(hdr, version);
184 U(hdr, 0004, 0014);
185 X(hdr, size);
186 X(hdr, size2);
187 U(hdr, 0020, 0020);
188 X(hdr, chksum);
189 U(hdr, 0028, 0050);
190 state->shader_type = "FRAG";
191 O(hdr, fs_info, shader_info);
192 U(hdr, 0058, 0090);
193 state->shader_type = "VERT";
194 O(hdr, vs_info, shader_info);
195 U(hdr, 0098, 00b0);
196 assert(hdr->vs_info ==
197 hdr->vs_info2); /* not sure what this if it is ever different */
198 X(hdr, vs_info2);
199 U(hdr, 00b8, 0110);
200 state->shader_type = "BVERT";
201 O(hdr, bs_info, shader_info);
202
203 /* not sure how much of the rest of contents before start of fs_info
204 * is the header, vs other things.. just dump it all as unknown for
205 * now:
206 */
207 dump_unknown(state, (void *)hdr + sizeof(*hdr), sizeof(*hdr),
208 (hdr->fs_info - sizeof(*hdr)) / 4);
209 }
210
211 struct PACKED shader_entry_point {
212 /* entry point name, ie. "main" of TBD length, followed by unknown */
213 char name[8];
214 };
215
216 static void
decode_shader_entry_point(struct state * state,struct shader_entry_point * e)217 decode_shader_entry_point(struct state *state, struct shader_entry_point *e)
218 {
219 S(e, name);
220 }
221
222 struct PACKED shader_config {
223 uint32_t unk_0000_0008[3];
224 uint32_t full_regs;
225 uint32_t half_regs;
226 };
227
228 static void
decode_shader_config(struct state * state,struct shader_config * cfg)229 decode_shader_config(struct state *state, struct shader_config *cfg)
230 {
231 U(cfg, 0000, 0008);
232 D(cfg, full_regs);
233 D(cfg, half_regs);
234
235 state->full_regs = cfg->full_regs;
236 state->half_regs = cfg->half_regs;
237
238 /* dump reset of unknown (size differs btwn versions) */
239 dump_unknown(state, (void *)cfg + sizeof(*cfg), sizeof(*cfg),
240 (state->desc_size - sizeof(*cfg)) / 4);
241 }
242
243 struct PACKED shader_io_block {
244 /* name of TBD length followed by unknown.. 42 dwords total */
245 char name[20];
246 uint32_t unk_0014_00a4[37];
247 };
248
249 static void
decode_shader_io_block(struct state * state,struct shader_io_block * io)250 decode_shader_io_block(struct state *state, struct shader_io_block *io)
251 {
252 S(io, name);
253 U(io, 0014, 00a4);
254 }
255
256 struct PACKED shader_constant_block {
257 uint32_t value;
258 uint32_t unk_0004_000c[3];
259 uint32_t regid;
260 uint32_t unk_0014_0024[5];
261 };
262
263 static void
decode_shader_constant_block(struct state * state,struct shader_constant_block * c)264 decode_shader_constant_block(struct state *state,
265 struct shader_constant_block *c)
266 {
267 F(c, value);
268 U(c, 0004, 000c);
269 R(c, regid, 'c');
270 U(c, 0014, 0024);
271 }
272
273 enum {
274 ENTRY_POINT = 0, /* shader_entry_point */
275 SHADER_CONFIG = 1, /* XXX placeholder name */
276 SHADER_INPUT = 2, /* shader_io_block */
277 SHADER_OUTPUT = 3, /* shader_io_block */
278 CONSTANTS = 6, /* shader_constant_block */
279 INTERNAL = 8, /* internal input, like bary.f coord */
280 SHADER = 10,
281 } shader_info_block_type;
282
283 /* Refers to location of some type of records, with an offset relative to
284 * start of shader_info block.
285 */
286 struct PACKED shader_descriptor_block {
287 uint32_t type; /* block type */
288 uint32_t offset; /* offset (relative to start of shader_info block) */
289 uint32_t size; /* size in bytes */
290 uint32_t count; /* number of records */
291 uint32_t unk_0010_0010[1];
292 };
293
294 static void
decode_shader_descriptor_block(struct state * state,struct shader_descriptor_block * blk)295 decode_shader_descriptor_block(struct state *state,
296 struct shader_descriptor_block *blk)
297 {
298 D(blk, type);
299 X(blk, offset);
300 D(blk, size);
301 D(blk, count);
302 U(blk, 0010, 0010);
303
304 /* offset relative to current shader block: */
305 void *ptr = state->shader + blk->offset;
306
307 if (blk->count == 0) {
308 assert(blk->size == 0);
309 } else {
310 assert((blk->size % blk->count) == 0);
311 }
312
313 state->desc_size = blk->size / blk->count;
314 state->lvl++;
315 for (unsigned i = 0; i < blk->count; i++) {
316 switch (blk->type) {
317 case ENTRY_POINT:
318 printf("%sentry point %u:\n", tab(state->lvl - 1), i);
319 decode_shader_entry_point(state, ptr);
320 break;
321 case SHADER_CONFIG:
322 printf("%sconfig %u:\n", tab(state->lvl - 1), i);
323 decode_shader_config(state, ptr);
324 break;
325 case SHADER_INPUT:
326 printf("%sinput %u:\n", tab(state->lvl - 1), i);
327 decode_shader_io_block(state, ptr);
328 break;
329 case SHADER_OUTPUT:
330 printf("%soutput %u:\n", tab(state->lvl - 1), i);
331 decode_shader_io_block(state, ptr);
332 break;
333 case INTERNAL:
334 printf("%sinternal input %u:\n", tab(state->lvl - 1), i);
335 decode_shader_io_block(state, ptr);
336 break;
337 case CONSTANTS:
338 printf("%sconstant %u:\n", tab(state->lvl - 1), i);
339 decode_shader_constant_block(state, ptr);
340 break;
341 case SHADER: {
342 struct shader_stats stats;
343 printf("%sshader %u:\n", tab(state->lvl - 1), i);
344 disasm_a3xx_stat(ptr, blk->size / 4, state->lvl, stdout, gpu_id,
345 &stats);
346 if (shaderdb) {
347 unsigned dwords = 2 * stats.instlen;
348
349 if (gpu_id >= 400) {
350 dwords = ALIGN(dwords, 16 * 2);
351 } else {
352 dwords = ALIGN(dwords, 4 * 2);
353 }
354
355 unsigned half_regs = state->half_regs;
356 unsigned full_regs = state->full_regs;
357
358 /* On a6xx w/ merged/conflicting half and full regs, the
359 * full_regs footprint will be max of full_regs and half
360 * of half_regs.. we only care about which value is higher.
361 */
362 if (gpu_id >= 600) {
363 /* footprint of half_regs in units of full_regs: */
364 unsigned half_full = (half_regs + 1) / 2;
365 if (half_full > full_regs)
366 full_regs = half_full;
367 half_regs = 0;
368 }
369
370 fprintf(stderr,
371 "%s shader: %u inst, %u nops, %u non-nops, %u dwords, "
372 "%u half, %u full, %u constlen, "
373 "%u (ss), %u (sy), %d max_sun, %d loops\n",
374 state->shader_type, stats.instructions, stats.nops,
375 stats.instructions - stats.nops, dwords, half_regs,
376 full_regs, stats.constlen, stats.ss, stats.sy, 0,
377 0); /* max_sun or loops not possible */
378 }
379 /* this is a special case in a way, blk->count is # of
380 * instructions but disasm_a3xx() decodes all instructions,
381 * so just bail.
382 */
383 i = blk->count;
384 break;
385 }
386 default:
387 dump_unknown(state, ptr, 0, state->desc_size / 4);
388 break;
389 }
390 ptr += state->desc_size;
391 }
392 state->lvl--;
393 }
394
395 /* there looks like one of these per shader, followed by "main" and
396 * some more info, and then the shader itself.
397 */
398 struct PACKED shader_info {
399 uint32_t unk_0000_0010[5];
400 uint32_t desc_off; /* offset to first descriptor block */
401 uint32_t num_blocks;
402 };
403
404 static void
decode_shader_info(struct state * state,struct shader_info * info)405 decode_shader_info(struct state *state, struct shader_info *info)
406 {
407 assert((info->desc_off % 4) == 0);
408
409 U(info, 0000, 0010);
410 X(info, desc_off);
411 D(info, num_blocks);
412
413 dump_unknown(state, &info[1], 0, (info->desc_off - sizeof(*info)) / 4);
414
415 state->shader = info;
416
417 struct shader_descriptor_block *blocks = ((void *)info) + info->desc_off;
418 for (unsigned i = 0; i < info->num_blocks; i++) {
419 printf("%sdescriptor %u:\n", tab(state->lvl), i);
420 state->lvl++;
421 decode_shader_descriptor_block(state, &blocks[i]);
422 state->lvl--;
423 }
424 }
425
426 static void
dump_program(struct state * state)427 dump_program(struct state *state)
428 {
429 struct header *hdr = (void *)state->buf;
430
431 if (dump_full)
432 dump_unknown(state, state->buf, 0, state->sz / 4);
433
434 decode_header(state, hdr);
435 }
436
437 int
main(int argc,char ** argv)438 main(int argc, char **argv)
439 {
440 enum rd_sect_type type = RD_NONE;
441 enum debug_t debug = PRINT_RAW | PRINT_STATS;
442 void *buf = NULL;
443 int sz;
444 struct io *io;
445 int raw_program = 0;
446
447 /* lame argument parsing: */
448
449 while (1) {
450 if ((argc > 1) && !strcmp(argv[1], "--verbose")) {
451 debug |= PRINT_RAW | PRINT_VERBOSE;
452 argv++;
453 argc--;
454 continue;
455 }
456 if ((argc > 1) && !strcmp(argv[1], "--expand")) {
457 debug |= EXPAND_REPEAT;
458 argv++;
459 argc--;
460 continue;
461 }
462 if ((argc > 1) && !strcmp(argv[1], "--full")) {
463 /* only short dump, original shader, symbol table, and disassembly */
464 dump_full = 1;
465 argv++;
466 argc--;
467 continue;
468 }
469 if ((argc > 1) && !strcmp(argv[1], "--dump-offsets")) {
470 dump_offsets = 1;
471 argv++;
472 argc--;
473 continue;
474 }
475 if ((argc > 1) && !strcmp(argv[1], "--raw")) {
476 raw_program = 1;
477 argv++;
478 argc--;
479 continue;
480 }
481 if ((argc > 1) && !strcmp(argv[1], "--shaderdb")) {
482 shaderdb = 1;
483 argv++;
484 argc--;
485 continue;
486 }
487 break;
488 }
489
490 if (argc != 2) {
491 fprintf(stderr, "usage: pgmdump2 [--verbose] [--expand] [--full] "
492 "[--dump-offsets] [--raw] [--shaderdb] testlog.rd\n");
493 return -1;
494 }
495
496 disasm_a3xx_set_debug(debug);
497
498 infile = argv[1];
499
500 io = io_open(infile);
501 if (!io) {
502 fprintf(stderr, "could not open: %s\n", infile);
503 return -1;
504 }
505
506 if (raw_program) {
507 io_readn(io, &sz, 4);
508 free(buf);
509
510 /* note: allow hex dumps to go a bit past the end of the buffer..
511 * might see some garbage, but better than missing the last few bytes..
512 */
513 buf = calloc(1, sz + 3);
514 io_readn(io, buf + 4, sz);
515 (*(int *)buf) = sz;
516
517 struct state state = {
518 .buf = buf,
519 .sz = sz,
520 };
521 printf("############################################################\n");
522 printf("program:\n");
523 dump_program(&state);
524 printf("############################################################\n");
525 return 0;
526 }
527
528 /* figure out what sort of input we are dealing with: */
529 if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) {
530 int ret;
531 buf = calloc(1, 100 * 1024);
532 ret = io_readn(io, buf, 100 * 1024);
533 if (ret < 0) {
534 fprintf(stderr, "error: %m");
535 return -1;
536 }
537 return disasm_a3xx(buf, ret / 4, 0, stdout, gpu_id);
538 }
539
540 while ((io_readn(io, &type, sizeof(type)) > 0) &&
541 (io_readn(io, &sz, 4) > 0)) {
542 free(buf);
543
544 /* note: allow hex dumps to go a bit past the end of the buffer..
545 * might see some garbage, but better than missing the last few bytes..
546 */
547 buf = calloc(1, sz + 3);
548 io_readn(io, buf, sz);
549
550 switch (type) {
551 case RD_TEST:
552 if (dump_full)
553 printf("test: %s\n", (char *)buf);
554 break;
555 case RD_VERT_SHADER:
556 printf("vertex shader:\n%s\n", (char *)buf);
557 break;
558 case RD_FRAG_SHADER:
559 printf("fragment shader:\n%s\n", (char *)buf);
560 break;
561 case RD_PROGRAM: {
562 struct state state = {
563 .buf = buf,
564 .sz = sz,
565 };
566 printf(
567 "############################################################\n");
568 printf("program:\n");
569 dump_program(&state);
570 printf(
571 "############################################################\n");
572 break;
573 }
574 case RD_GPU_ID:
575 gpu_id = *((unsigned int *)buf);
576 printf("gpu_id: %d\n", gpu_id);
577 break;
578 default:
579 break;
580 }
581 }
582
583 io_close(io);
584
585 return 0;
586 }
587