1 /*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_serialize.h"
25 #include "util/u_dynarray.h"
26 #include "util/u_math.h"
27 #include "nir_control_flow.h"
28 #include "nir_xfb_info.h"
29
30 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
31 #define MAX_OBJECT_IDS (1 << 20)
32
33 typedef struct {
34 size_t blob_offset;
35 nir_def *src;
36 nir_block *block;
37 } write_phi_fixup;
38
39 typedef struct {
40 const nir_shader *nir;
41
42 struct blob *blob;
43
44 /* maps pointer to index */
45 struct hash_table *remap_table;
46
47 /* the next index to assign to a NIR in-memory object */
48 uint32_t next_idx;
49
50 /* Array of write_phi_fixup structs representing phi sources that need to
51 * be resolved in the second pass.
52 */
53 struct util_dynarray phi_fixups;
54
55 /* The last serialized type. */
56 const struct glsl_type *last_type;
57 const struct glsl_type *last_interface_type;
58 struct nir_variable_data last_var_data;
59
60 /* For skipping equal ALU headers (typical after scalarization). */
61 nir_instr_type last_instr_type;
62 uintptr_t last_alu_header_offset;
63 uint32_t last_alu_header;
64
65 /* Don't write optional data such as variable names. */
66 bool strip;
67 } write_ctx;
68
69 typedef struct {
70 nir_shader *nir;
71
72 struct blob_reader *blob;
73
74 /* the next index to assign to a NIR in-memory object */
75 uint32_t next_idx;
76
77 /* The length of the index -> object table */
78 uint32_t idx_table_len;
79
80 /* map from index to deserialized pointer */
81 void **idx_table;
82
83 /* List of phi sources. */
84 struct list_head phi_srcs;
85
86 /* The last deserialized type. */
87 const struct glsl_type *last_type;
88 const struct glsl_type *last_interface_type;
89 struct nir_variable_data last_var_data;
90 } read_ctx;
91
92 static void
write_add_object(write_ctx * ctx,const void * obj)93 write_add_object(write_ctx *ctx, const void *obj)
94 {
95 uint32_t index = ctx->next_idx++;
96 assert(index != MAX_OBJECT_IDS);
97 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t)index);
98 }
99
100 static uint32_t
write_lookup_object(write_ctx * ctx,const void * obj)101 write_lookup_object(write_ctx *ctx, const void *obj)
102 {
103 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
104 assert(entry);
105 return (uint32_t)(uintptr_t)entry->data;
106 }
107
108 static void
read_add_object(read_ctx * ctx,void * obj)109 read_add_object(read_ctx *ctx, void *obj)
110 {
111 assert(ctx->next_idx < ctx->idx_table_len);
112 ctx->idx_table[ctx->next_idx++] = obj;
113 }
114
115 static void *
read_lookup_object(read_ctx * ctx,uint32_t idx)116 read_lookup_object(read_ctx *ctx, uint32_t idx)
117 {
118 assert(idx < ctx->idx_table_len);
119 return ctx->idx_table[idx];
120 }
121
122 static void *
read_object(read_ctx * ctx)123 read_object(read_ctx *ctx)
124 {
125 return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126 }
127
128 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)129 encode_bit_size_3bits(uint8_t bit_size)
130 {
131 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133 if (bit_size)
134 return util_logbase2(bit_size) + 1;
135 return 0;
136 }
137
138 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)139 decode_bit_size_3bits(uint8_t bit_size)
140 {
141 if (bit_size)
142 return 1 << (bit_size - 1);
143 return 0;
144 }
145
146 #define NUM_COMPONENTS_IS_SEPARATE_7 7
147
148 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)149 encode_num_components_in_3bits(uint8_t num_components)
150 {
151 if (num_components <= 4)
152 return num_components;
153 if (num_components == 8)
154 return 5;
155 if (num_components == 16)
156 return 6;
157
158 /* special value indicating that num_components is in the next uint32 */
159 return NUM_COMPONENTS_IS_SEPARATE_7;
160 }
161
162 static uint8_t
decode_num_components_in_3bits(uint8_t value)163 decode_num_components_in_3bits(uint8_t value)
164 {
165 if (value <= 4)
166 return value;
167 if (value == 5)
168 return 8;
169 if (value == 6)
170 return 16;
171
172 unreachable("invalid num_components encoding");
173 return 0;
174 }
175
176 static void
write_constant(write_ctx * ctx,const nir_constant * c)177 write_constant(write_ctx *ctx, const nir_constant *c)
178 {
179 blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
180 blob_write_uint32(ctx->blob, c->num_elements);
181 for (unsigned i = 0; i < c->num_elements; i++)
182 write_constant(ctx, c->elements[i]);
183 }
184
185 static nir_constant *
read_constant(read_ctx * ctx,nir_variable * nvar)186 read_constant(read_ctx *ctx, nir_variable *nvar)
187 {
188 nir_constant *c = ralloc(nvar, nir_constant);
189
190 static const nir_const_value zero_vals[ARRAY_SIZE(c->values)] = { 0 };
191 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
192 c->is_null_constant = memcmp(c->values, zero_vals, sizeof(c->values)) == 0;
193 c->num_elements = blob_read_uint32(ctx->blob);
194 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
195 for (unsigned i = 0; i < c->num_elements; i++) {
196 c->elements[i] = read_constant(ctx, nvar);
197 c->is_null_constant &= c->elements[i]->is_null_constant;
198 }
199
200 return c;
201 }
202
203 enum var_data_encoding {
204 var_encode_full,
205 var_encode_location_diff,
206 };
207
208 union packed_var {
209 uint32_t u32;
210 struct {
211 unsigned has_name : 1;
212 unsigned has_constant_initializer : 1;
213 unsigned has_pointer_initializer : 1;
214 unsigned has_interface_type : 1;
215 unsigned num_state_slots : 7;
216 unsigned data_encoding : 2;
217 unsigned type_same_as_last : 1;
218 unsigned interface_type_same_as_last : 1;
219 unsigned ray_query : 1;
220 unsigned num_members : 16;
221 } u;
222 };
223
224 union packed_var_data_diff {
225 uint32_t u32;
226 struct {
227 int location : 13;
228 int location_frac : 3;
229 int driver_location : 16;
230 } u;
231 };
232
233 static void
write_variable(write_ctx * ctx,const nir_variable * var)234 write_variable(write_ctx *ctx, const nir_variable *var)
235 {
236 write_add_object(ctx, var);
237
238 assert(var->num_state_slots < (1 << 7));
239
240 STATIC_ASSERT(sizeof(union packed_var) == 4);
241 union packed_var flags;
242 flags.u32 = 0;
243
244 flags.u.has_name = !ctx->strip && var->name;
245 flags.u.has_constant_initializer = !!(var->constant_initializer);
246 flags.u.has_pointer_initializer = !!(var->pointer_initializer);
247 flags.u.has_interface_type = !!(var->interface_type);
248 flags.u.type_same_as_last = var->type == ctx->last_type;
249 flags.u.interface_type_same_as_last =
250 var->interface_type && var->interface_type == ctx->last_interface_type;
251 flags.u.num_state_slots = var->num_state_slots;
252 flags.u.num_members = var->num_members;
253
254 struct nir_variable_data data = var->data;
255
256 /* When stripping, we expect that the location is no longer needed,
257 * which is typically after shaders are linked.
258 */
259 if (ctx->strip &&
260 data.mode != nir_var_system_value &&
261 data.mode != nir_var_shader_in &&
262 data.mode != nir_var_shader_out)
263 data.location = 0;
264
265 struct nir_variable_data tmp = data;
266
267 tmp.location = ctx->last_var_data.location;
268 tmp.location_frac = ctx->last_var_data.location_frac;
269 tmp.driver_location = ctx->last_var_data.driver_location;
270
271 /* See if we can encode only the difference in locations from the last
272 * variable.
273 */
274 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
275 abs((int)data.location -
276 (int)ctx->last_var_data.location) < (1 << 12) &&
277 abs((int)data.driver_location -
278 (int)ctx->last_var_data.driver_location) < (1 << 15))
279 flags.u.data_encoding = var_encode_location_diff;
280 else
281 flags.u.data_encoding = var_encode_full;
282
283 flags.u.ray_query = var->data.ray_query;
284
285 blob_write_uint32(ctx->blob, flags.u32);
286
287 if (!flags.u.type_same_as_last) {
288 encode_type_to_blob(ctx->blob, var->type);
289 ctx->last_type = var->type;
290 }
291
292 if (var->interface_type && !flags.u.interface_type_same_as_last) {
293 encode_type_to_blob(ctx->blob, var->interface_type);
294 ctx->last_interface_type = var->interface_type;
295 }
296
297 if (flags.u.has_name)
298 blob_write_string(ctx->blob, var->name);
299
300 if (flags.u.data_encoding == var_encode_full) {
301 blob_write_bytes(ctx->blob, &data, sizeof(data));
302 } else {
303 /* Serialize only the difference in locations from the last variable.
304 */
305 union packed_var_data_diff diff;
306
307 diff.u.location = data.location - ctx->last_var_data.location;
308 diff.u.location_frac = data.location_frac -
309 ctx->last_var_data.location_frac;
310 diff.u.driver_location = data.driver_location -
311 ctx->last_var_data.driver_location;
312
313 blob_write_uint32(ctx->blob, diff.u32);
314 }
315
316 ctx->last_var_data = data;
317
318 for (unsigned i = 0; i < var->num_state_slots; i++) {
319 blob_write_bytes(ctx->blob, &var->state_slots[i],
320 sizeof(var->state_slots[i]));
321 }
322 if (var->constant_initializer)
323 write_constant(ctx, var->constant_initializer);
324 if (var->pointer_initializer)
325 blob_write_uint32(ctx->blob,
326 write_lookup_object(ctx, var->pointer_initializer));
327 if (var->num_members > 0) {
328 blob_write_bytes(ctx->blob, (uint8_t *)var->members,
329 var->num_members * sizeof(*var->members));
330 }
331 }
332
333 static nir_variable *
read_variable(read_ctx * ctx)334 read_variable(read_ctx *ctx)
335 {
336 nir_variable *var = rzalloc(ctx->nir, nir_variable);
337 read_add_object(ctx, var);
338
339 union packed_var flags;
340 flags.u32 = blob_read_uint32(ctx->blob);
341
342 if (flags.u.type_same_as_last) {
343 var->type = ctx->last_type;
344 } else {
345 var->type = decode_type_from_blob(ctx->blob);
346 ctx->last_type = var->type;
347 }
348
349 if (flags.u.has_interface_type) {
350 if (flags.u.interface_type_same_as_last) {
351 var->interface_type = ctx->last_interface_type;
352 } else {
353 var->interface_type = decode_type_from_blob(ctx->blob);
354 ctx->last_interface_type = var->interface_type;
355 }
356 }
357
358 if (flags.u.has_name) {
359 const char *name = blob_read_string(ctx->blob);
360 var->name = ralloc_strdup(var, name);
361 } else {
362 var->name = NULL;
363 }
364
365 if (flags.u.data_encoding == var_encode_full) {
366 blob_copy_bytes(ctx->blob, (uint8_t *)&var->data, sizeof(var->data));
367 ctx->last_var_data = var->data;
368 } else { /* var_encode_location_diff */
369 union packed_var_data_diff diff;
370 diff.u32 = blob_read_uint32(ctx->blob);
371
372 var->data = ctx->last_var_data;
373 var->data.location += diff.u.location;
374 var->data.location_frac += diff.u.location_frac;
375 var->data.driver_location += diff.u.driver_location;
376
377 ctx->last_var_data = var->data;
378 }
379
380 var->data.ray_query = flags.u.ray_query;
381
382 var->num_state_slots = flags.u.num_state_slots;
383 if (var->num_state_slots != 0) {
384 var->state_slots = ralloc_array(var, nir_state_slot,
385 var->num_state_slots);
386 for (unsigned i = 0; i < var->num_state_slots; i++) {
387 blob_copy_bytes(ctx->blob, &var->state_slots[i],
388 sizeof(var->state_slots[i]));
389 }
390 }
391 if (flags.u.has_constant_initializer)
392 var->constant_initializer = read_constant(ctx, var);
393 else
394 var->constant_initializer = NULL;
395
396 if (flags.u.has_pointer_initializer)
397 var->pointer_initializer = read_object(ctx);
398 else
399 var->pointer_initializer = NULL;
400
401 var->num_members = flags.u.num_members;
402 if (var->num_members > 0) {
403 var->members = ralloc_array(var, struct nir_variable_data,
404 var->num_members);
405 blob_copy_bytes(ctx->blob, (uint8_t *)var->members,
406 var->num_members * sizeof(*var->members));
407 }
408
409 return var;
410 }
411
412 static void
write_var_list(write_ctx * ctx,const struct exec_list * src)413 write_var_list(write_ctx *ctx, const struct exec_list *src)
414 {
415 blob_write_uint32(ctx->blob, exec_list_length(src));
416 foreach_list_typed(nir_variable, var, node, src) {
417 write_variable(ctx, var);
418 }
419 }
420
421 static void
read_var_list(read_ctx * ctx,struct exec_list * dst)422 read_var_list(read_ctx *ctx, struct exec_list *dst)
423 {
424 exec_list_make_empty(dst);
425 unsigned num_vars = blob_read_uint32(ctx->blob);
426 for (unsigned i = 0; i < num_vars; i++) {
427 nir_variable *var = read_variable(ctx);
428 exec_list_push_tail(dst, &var->node);
429 }
430 }
431
432 union packed_src {
433 uint32_t u32;
434 struct {
435 unsigned _pad : 2; /* <-- Header */
436 unsigned object_idx : 20;
437 unsigned _footer : 10; /* <-- Footer */
438 } any;
439 struct {
440 unsigned _header : 22; /* <-- Header */
441 unsigned _pad : 2; /* <-- Footer */
442 unsigned swizzle_x : 2;
443 unsigned swizzle_y : 2;
444 unsigned swizzle_z : 2;
445 unsigned swizzle_w : 2;
446 } alu;
447 struct {
448 unsigned _header : 22; /* <-- Header */
449 unsigned src_type : 5; /* <-- Footer */
450 unsigned _pad : 5;
451 } tex;
452 };
453
454 static void
write_src_full(write_ctx * ctx,const nir_src * src,union packed_src header)455 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
456 {
457 header.any.object_idx = write_lookup_object(ctx, src->ssa);
458 blob_write_uint32(ctx->blob, header.u32);
459 }
460
461 static void
write_src(write_ctx * ctx,const nir_src * src)462 write_src(write_ctx *ctx, const nir_src *src)
463 {
464 union packed_src header = { 0 };
465 write_src_full(ctx, src, header);
466 }
467
468 static union packed_src
read_src(read_ctx * ctx,nir_src * src)469 read_src(read_ctx *ctx, nir_src *src)
470 {
471 STATIC_ASSERT(sizeof(union packed_src) == 4);
472 union packed_src header;
473 header.u32 = blob_read_uint32(ctx->blob);
474
475 src->ssa = read_lookup_object(ctx, header.any.object_idx);
476 return header;
477 }
478
479 union packed_def {
480 uint8_t u8;
481 struct {
482 uint8_t _pad : 1;
483 uint8_t num_components : 3;
484 uint8_t bit_size : 3;
485 uint8_t divergent : 1;
486 };
487 };
488
489 enum intrinsic_const_indices_encoding {
490 /* Use packed_const_indices to store tightly packed indices.
491 *
492 * The common case for load_ubo is 0, 0, 0, which is trivially represented.
493 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
494 */
495 const_indices_all_combined,
496
497 const_indices_8bit, /* 8 bits per element */
498 const_indices_16bit, /* 16 bits per element */
499 const_indices_32bit, /* 32 bits per element */
500 };
501
502 enum load_const_packing {
503 /* Constants are not packed and are stored in following dwords. */
504 load_const_full,
505
506 /* packed_value contains high 19 bits, low bits are 0,
507 * good for floating-point decimals
508 */
509 load_const_scalar_hi_19bits,
510
511 /* packed_value contains low 19 bits, high bits are sign-extended */
512 load_const_scalar_lo_19bits_sext,
513 };
514
515 union packed_instr {
516 uint32_t u32;
517 struct {
518 unsigned instr_type : 4; /* always present */
519 unsigned _pad : 20;
520 unsigned def : 8; /* always last */
521 } any;
522 struct {
523 unsigned instr_type : 4;
524 unsigned exact : 1;
525 unsigned no_signed_wrap : 1;
526 unsigned no_unsigned_wrap : 1;
527 unsigned padding : 1;
528 /* Reg: writemask; SSA: swizzles for 2 srcs */
529 unsigned writemask_or_two_swizzles : 4;
530 unsigned op : 9;
531 unsigned packed_src_ssa_16bit : 1;
532 /* Scalarized ALUs always have the same header. */
533 unsigned num_followup_alu_sharing_header : 2;
534 unsigned def : 8;
535 } alu;
536 struct {
537 unsigned instr_type : 4;
538 unsigned deref_type : 3;
539 unsigned cast_type_same_as_last : 1;
540 unsigned modes : 5; /* See (de|en)code_deref_modes() */
541 unsigned _pad : 9;
542 unsigned in_bounds : 1;
543 unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */
544 unsigned def : 8;
545 } deref;
546 struct {
547 unsigned instr_type : 4;
548 unsigned deref_type : 3;
549 unsigned _pad : 1;
550 unsigned object_idx : 16; /* if 0, the object ID is a separate uint32 */
551 unsigned def : 8;
552 } deref_var;
553 struct {
554 unsigned instr_type : 4;
555 unsigned intrinsic : 10;
556 unsigned const_indices_encoding : 2;
557 unsigned packed_const_indices : 8;
558 unsigned def : 8;
559 } intrinsic;
560 struct {
561 unsigned instr_type : 4;
562 unsigned last_component : 4;
563 unsigned bit_size : 3;
564 unsigned packing : 2; /* enum load_const_packing */
565 unsigned packed_value : 19; /* meaning determined by packing */
566 } load_const;
567 struct {
568 unsigned instr_type : 4;
569 unsigned last_component : 4;
570 unsigned bit_size : 3;
571 unsigned _pad : 21;
572 } undef;
573 struct {
574 unsigned instr_type : 4;
575 unsigned num_srcs : 4;
576 unsigned op : 5;
577 unsigned _pad : 11;
578 unsigned def : 8;
579 } tex;
580 struct {
581 unsigned instr_type : 4;
582 unsigned num_srcs : 20;
583 unsigned def : 8;
584 } phi;
585 struct {
586 unsigned instr_type : 4;
587 unsigned type : 2;
588 unsigned _pad : 26;
589 } jump;
590 struct {
591 unsigned instr_type : 4;
592 unsigned type : 4;
593 unsigned string_length : 16;
594 unsigned def : 8;
595 } debug_info;
596 };
597
598 /* Write "lo24" as low 24 bits in the first uint32. */
599 static void
write_def(write_ctx * ctx,const nir_def * def,union packed_instr header,nir_instr_type instr_type)600 write_def(write_ctx *ctx, const nir_def *def, union packed_instr header,
601 nir_instr_type instr_type)
602 {
603 STATIC_ASSERT(sizeof(union packed_def) == 1);
604 union packed_def pdef;
605 pdef.u8 = 0;
606
607 pdef.num_components =
608 encode_num_components_in_3bits(def->num_components);
609 pdef.bit_size = encode_bit_size_3bits(def->bit_size);
610 pdef.divergent = def->divergent;
611 header.any.def = pdef.u8;
612
613 /* Check if the current ALU instruction has the same header as the previous
614 * instruction that is also ALU. If it is, we don't have to write
615 * the current header. This is a typical occurence after scalarization.
616 */
617 if (instr_type == nir_instr_type_alu) {
618 bool equal_header = false;
619
620 if (ctx->last_instr_type == nir_instr_type_alu) {
621 assert(ctx->last_alu_header_offset);
622 union packed_instr last_header;
623 last_header.u32 = ctx->last_alu_header;
624
625 /* Clear the field that counts ALUs with equal headers. */
626 union packed_instr clean_header;
627 clean_header.u32 = last_header.u32;
628 clean_header.alu.num_followup_alu_sharing_header = 0;
629
630 /* There can be at most 4 consecutive ALU instructions
631 * sharing the same header.
632 */
633 if (last_header.alu.num_followup_alu_sharing_header < 3 &&
634 header.u32 == clean_header.u32) {
635 last_header.alu.num_followup_alu_sharing_header++;
636 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset,
637 last_header.u32);
638 ctx->last_alu_header = last_header.u32;
639 equal_header = true;
640 }
641 }
642
643 if (!equal_header) {
644 ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob);
645 blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32);
646 ctx->last_alu_header = header.u32;
647 }
648 } else {
649 blob_write_uint32(ctx->blob, header.u32);
650 }
651
652 if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
653 blob_write_uint32(ctx->blob, def->num_components);
654
655 write_add_object(ctx, def);
656 }
657
658 static void
read_def(read_ctx * ctx,nir_def * def,nir_instr * instr,union packed_instr header)659 read_def(read_ctx *ctx, nir_def *def, nir_instr *instr,
660 union packed_instr header)
661 {
662 union packed_def pdef;
663 pdef.u8 = header.any.def;
664
665 unsigned bit_size = decode_bit_size_3bits(pdef.bit_size);
666 unsigned num_components;
667 if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
668 num_components = blob_read_uint32(ctx->blob);
669 else
670 num_components = decode_num_components_in_3bits(pdef.num_components);
671 nir_def_init(instr, def, num_components, bit_size);
672 def->divergent = pdef.divergent;
673 read_add_object(ctx, def);
674 }
675
676 static bool
are_object_ids_16bit(write_ctx * ctx)677 are_object_ids_16bit(write_ctx *ctx)
678 {
679 /* Check the highest object ID, because they are monotonic. */
680 return ctx->next_idx < (1 << 16);
681 }
682
683 static bool
is_alu_src_ssa_16bit(write_ctx * ctx,const nir_alu_instr * alu)684 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
685 {
686 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
687
688 for (unsigned i = 0; i < num_srcs; i++) {
689 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
690
691 for (unsigned chan = 0; chan < src_components; chan++) {
692 /* The swizzles for src0.x and src1.x are stored
693 * in writemask_or_two_swizzles for SSA ALUs.
694 */
695 if (i < 2 && chan == 0 && alu->src[i].swizzle[chan] < 4)
696 continue;
697
698 if (alu->src[i].swizzle[chan] != chan)
699 return false;
700 }
701 }
702
703 return are_object_ids_16bit(ctx);
704 }
705
706 static void
write_alu(write_ctx * ctx,const nir_alu_instr * alu)707 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
708 {
709 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
710
711 /* 9 bits for nir_op */
712 STATIC_ASSERT(nir_num_opcodes <= 512);
713 union packed_instr header;
714 header.u32 = 0;
715
716 header.alu.instr_type = alu->instr.type;
717 header.alu.exact = alu->exact;
718 header.alu.no_signed_wrap = alu->no_signed_wrap;
719 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
720 header.alu.op = alu->op;
721 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
722
723 if (header.alu.packed_src_ssa_16bit) {
724 /* For packed srcs of SSA ALUs, this field stores the swizzles. */
725 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
726 if (num_srcs > 1)
727 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
728 }
729
730 write_def(ctx, &alu->def, header, alu->instr.type);
731 blob_write_uint32(ctx->blob, alu->fp_fast_math);
732
733 if (header.alu.packed_src_ssa_16bit) {
734 for (unsigned i = 0; i < num_srcs; i++) {
735 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
736 assert(idx < (1 << 16));
737 blob_write_uint16(ctx->blob, idx);
738 }
739 } else {
740 for (unsigned i = 0; i < num_srcs; i++) {
741 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
742 unsigned src_components = nir_src_num_components(alu->src[i].src);
743 union packed_src src;
744 bool packed = src_components <= 4 && src_channels <= 4;
745 src.u32 = 0;
746
747 if (packed) {
748 src.alu.swizzle_x = alu->src[i].swizzle[0];
749 src.alu.swizzle_y = alu->src[i].swizzle[1];
750 src.alu.swizzle_z = alu->src[i].swizzle[2];
751 src.alu.swizzle_w = alu->src[i].swizzle[3];
752 }
753
754 write_src_full(ctx, &alu->src[i].src, src);
755
756 /* Store swizzles for vec8 and vec16. */
757 if (!packed) {
758 for (unsigned o = 0; o < src_channels; o += 8) {
759 unsigned value = 0;
760
761 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
762 value |= (uint32_t)alu->src[i].swizzle[o + j] << (4 * j); /* 4 bits per swizzle */
763 }
764
765 blob_write_uint32(ctx->blob, value);
766 }
767 }
768 }
769 }
770 }
771
772 static nir_alu_instr *
read_alu(read_ctx * ctx,union packed_instr header)773 read_alu(read_ctx *ctx, union packed_instr header)
774 {
775 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
776 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
777
778 alu->exact = header.alu.exact;
779 alu->no_signed_wrap = header.alu.no_signed_wrap;
780 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
781
782 read_def(ctx, &alu->def, &alu->instr, header);
783 alu->fp_fast_math = blob_read_uint32(ctx->blob);
784
785 if (header.alu.packed_src_ssa_16bit) {
786 for (unsigned i = 0; i < num_srcs; i++) {
787 nir_alu_src *src = &alu->src[i];
788 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
789
790 memset(&src->swizzle, 0, sizeof(src->swizzle));
791
792 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
793
794 for (unsigned chan = 0; chan < src_components; chan++)
795 src->swizzle[chan] = chan;
796 }
797 } else {
798 for (unsigned i = 0; i < num_srcs; i++) {
799 union packed_src src = read_src(ctx, &alu->src[i].src);
800 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
801 unsigned src_components = nir_src_num_components(alu->src[i].src);
802 bool packed = src_components <= 4 && src_channels <= 4;
803
804 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
805
806 if (packed) {
807 alu->src[i].swizzle[0] = src.alu.swizzle_x;
808 alu->src[i].swizzle[1] = src.alu.swizzle_y;
809 alu->src[i].swizzle[2] = src.alu.swizzle_z;
810 alu->src[i].swizzle[3] = src.alu.swizzle_w;
811 } else {
812 /* Load swizzles for vec8 and vec16. */
813 for (unsigned o = 0; o < src_channels; o += 8) {
814 unsigned value = blob_read_uint32(ctx->blob);
815
816 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
817 alu->src[i].swizzle[o + j] =
818 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
819 }
820 }
821 }
822 }
823 }
824
825 if (header.alu.packed_src_ssa_16bit) {
826 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
827 if (num_srcs > 1)
828 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
829 }
830
831 return alu;
832 }
833
834 #define NUM_GENERIC_MODES 4
835 #define MODE_ENC_GENERIC_BIT (1 << 4)
836
837 static nir_variable_mode
decode_deref_modes(unsigned modes)838 decode_deref_modes(unsigned modes)
839 {
840 if (modes & MODE_ENC_GENERIC_BIT) {
841 modes &= ~MODE_ENC_GENERIC_BIT;
842 return modes << (ffs(nir_var_mem_generic) - 1);
843 } else {
844 return 1 << modes;
845 }
846 }
847
848 static unsigned
encode_deref_modes(nir_variable_mode modes)849 encode_deref_modes(nir_variable_mode modes)
850 {
851 /* Mode sets on derefs generally come in two forms. For certain OpenCL
852 * cases, we can have more than one of the generic modes set. In this
853 * case, we need the full bitfield. Fortunately, there are only 4 of
854 * these. For all other modes, we can only have one mode at a time so we
855 * can compress them by only storing the bit position. This, plus one bit
856 * to select encoding, lets us pack the entire bitfield in 5 bits.
857 */
858
859 /* Assert that the modes we are compressing fit along with the generic bit
860 */
861 STATIC_ASSERT((nir_num_variable_modes - NUM_GENERIC_MODES) <
862 MODE_ENC_GENERIC_BIT);
863
864 /* Assert that the generic modes are defined at the end of the modes enum
865 */
866 STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) <
867 (1 << (nir_num_variable_modes - NUM_GENERIC_MODES)));
868
869 unsigned enc;
870 if (modes == 0 || (modes & nir_var_mem_generic)) {
871 assert(!(modes & ~nir_var_mem_generic));
872 enc = modes >> (ffs(nir_var_mem_generic) - 1);
873 assert(enc < MODE_ENC_GENERIC_BIT);
874 enc |= MODE_ENC_GENERIC_BIT;
875 } else {
876 assert(util_is_power_of_two_nonzero(modes));
877 enc = ffs(modes) - 1;
878 assert(enc < MODE_ENC_GENERIC_BIT);
879 }
880 assert(modes == decode_deref_modes(enc));
881 return enc;
882 }
883
884 static void
write_deref(write_ctx * ctx,const nir_deref_instr * deref)885 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
886 {
887 assert(deref->deref_type < 8);
888
889 union packed_instr header;
890 header.u32 = 0;
891
892 header.deref.instr_type = deref->instr.type;
893 header.deref.deref_type = deref->deref_type;
894
895 if (deref->deref_type == nir_deref_type_cast) {
896 header.deref.modes = encode_deref_modes(deref->modes);
897 header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
898 }
899
900 unsigned var_idx = 0;
901 if (deref->deref_type == nir_deref_type_var) {
902 var_idx = write_lookup_object(ctx, deref->var);
903 if (var_idx && var_idx < (1 << 16))
904 header.deref_var.object_idx = var_idx;
905 }
906
907 if (deref->deref_type == nir_deref_type_array ||
908 deref->deref_type == nir_deref_type_ptr_as_array) {
909 header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx);
910
911 header.deref.in_bounds = deref->arr.in_bounds;
912 }
913
914 write_def(ctx, &deref->def, header, deref->instr.type);
915
916 switch (deref->deref_type) {
917 case nir_deref_type_var:
918 if (!header.deref_var.object_idx)
919 blob_write_uint32(ctx->blob, var_idx);
920 break;
921
922 case nir_deref_type_struct:
923 write_src(ctx, &deref->parent);
924 blob_write_uint32(ctx->blob, deref->strct.index);
925 break;
926
927 case nir_deref_type_array:
928 case nir_deref_type_ptr_as_array:
929 if (header.deref.packed_src_ssa_16bit) {
930 blob_write_uint16(ctx->blob,
931 write_lookup_object(ctx, deref->parent.ssa));
932 blob_write_uint16(ctx->blob,
933 write_lookup_object(ctx, deref->arr.index.ssa));
934 } else {
935 write_src(ctx, &deref->parent);
936 write_src(ctx, &deref->arr.index);
937 }
938 break;
939
940 case nir_deref_type_cast:
941 write_src(ctx, &deref->parent);
942 blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
943 blob_write_uint32(ctx->blob, deref->cast.align_mul);
944 blob_write_uint32(ctx->blob, deref->cast.align_offset);
945 if (!header.deref.cast_type_same_as_last) {
946 encode_type_to_blob(ctx->blob, deref->type);
947 ctx->last_type = deref->type;
948 }
949 break;
950
951 case nir_deref_type_array_wildcard:
952 write_src(ctx, &deref->parent);
953 break;
954
955 default:
956 unreachable("Invalid deref type");
957 }
958 }
959
960 static nir_deref_instr *
read_deref(read_ctx * ctx,union packed_instr header)961 read_deref(read_ctx *ctx, union packed_instr header)
962 {
963 nir_deref_type deref_type = header.deref.deref_type;
964 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
965
966 read_def(ctx, &deref->def, &deref->instr, header);
967
968 nir_deref_instr *parent;
969
970 switch (deref->deref_type) {
971 case nir_deref_type_var:
972 if (header.deref_var.object_idx)
973 deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
974 else
975 deref->var = read_object(ctx);
976
977 deref->type = deref->var->type;
978 break;
979
980 case nir_deref_type_struct:
981 read_src(ctx, &deref->parent);
982 parent = nir_src_as_deref(deref->parent);
983 deref->strct.index = blob_read_uint32(ctx->blob);
984 deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
985 break;
986
987 case nir_deref_type_array:
988 case nir_deref_type_ptr_as_array:
989 if (header.deref.packed_src_ssa_16bit) {
990 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
991 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
992 } else {
993 read_src(ctx, &deref->parent);
994 read_src(ctx, &deref->arr.index);
995 }
996
997 deref->arr.in_bounds = header.deref.in_bounds;
998
999 parent = nir_src_as_deref(deref->parent);
1000 if (deref->deref_type == nir_deref_type_array)
1001 deref->type = glsl_get_array_element(parent->type);
1002 else
1003 deref->type = parent->type;
1004 break;
1005
1006 case nir_deref_type_cast:
1007 read_src(ctx, &deref->parent);
1008 deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1009 deref->cast.align_mul = blob_read_uint32(ctx->blob);
1010 deref->cast.align_offset = blob_read_uint32(ctx->blob);
1011 if (header.deref.cast_type_same_as_last) {
1012 deref->type = ctx->last_type;
1013 } else {
1014 deref->type = decode_type_from_blob(ctx->blob);
1015 ctx->last_type = deref->type;
1016 }
1017 break;
1018
1019 case nir_deref_type_array_wildcard:
1020 read_src(ctx, &deref->parent);
1021 parent = nir_src_as_deref(deref->parent);
1022 deref->type = glsl_get_array_element(parent->type);
1023 break;
1024
1025 default:
1026 unreachable("Invalid deref type");
1027 }
1028
1029 if (deref_type == nir_deref_type_var) {
1030 deref->modes = deref->var->data.mode;
1031 } else if (deref->deref_type == nir_deref_type_cast) {
1032 deref->modes = decode_deref_modes(header.deref.modes);
1033 } else {
1034 deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1035 }
1036
1037 return deref;
1038 }
1039
1040 static void
write_intrinsic(write_ctx * ctx,const nir_intrinsic_instr * intrin)1041 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1042 {
1043 /* 10 bits for nir_intrinsic_op */
1044 STATIC_ASSERT(nir_num_intrinsics <= 1024);
1045 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1046 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1047 assert(intrin->intrinsic < 1024);
1048
1049 union packed_instr header;
1050 header.u32 = 0;
1051
1052 header.intrinsic.instr_type = intrin->instr.type;
1053 header.intrinsic.intrinsic = intrin->intrinsic;
1054
1055 /* Analyze constant indices to decide how to encode them. */
1056 if (num_indices) {
1057 unsigned max_bits = 0;
1058 for (unsigned i = 0; i < num_indices; i++) {
1059 unsigned max = util_last_bit(intrin->const_index[i]);
1060 max_bits = MAX2(max_bits, max);
1061 }
1062
1063 if (max_bits * num_indices <= 8) {
1064 header.intrinsic.const_indices_encoding = const_indices_all_combined;
1065
1066 /* Pack all const indices into 8 bits. */
1067 unsigned bit_size = 8 / num_indices;
1068 for (unsigned i = 0; i < num_indices; i++) {
1069 header.intrinsic.packed_const_indices |=
1070 intrin->const_index[i] << (i * bit_size);
1071 }
1072 } else if (max_bits <= 8)
1073 header.intrinsic.const_indices_encoding = const_indices_8bit;
1074 else if (max_bits <= 16)
1075 header.intrinsic.const_indices_encoding = const_indices_16bit;
1076 else
1077 header.intrinsic.const_indices_encoding = const_indices_32bit;
1078 }
1079
1080 if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1081 write_def(ctx, &intrin->def, header, intrin->instr.type);
1082 else
1083 blob_write_uint32(ctx->blob, header.u32);
1084
1085 for (unsigned i = 0; i < num_srcs; i++)
1086 write_src(ctx, &intrin->src[i]);
1087
1088 if (num_indices) {
1089 switch (header.intrinsic.const_indices_encoding) {
1090 case const_indices_8bit:
1091 for (unsigned i = 0; i < num_indices; i++)
1092 blob_write_uint8(ctx->blob, intrin->const_index[i]);
1093 break;
1094 case const_indices_16bit:
1095 for (unsigned i = 0; i < num_indices; i++)
1096 blob_write_uint16(ctx->blob, intrin->const_index[i]);
1097 break;
1098 case const_indices_32bit:
1099 for (unsigned i = 0; i < num_indices; i++)
1100 blob_write_uint32(ctx->blob, intrin->const_index[i]);
1101 break;
1102 }
1103 }
1104 }
1105
1106 static nir_intrinsic_instr *
read_intrinsic(read_ctx * ctx,union packed_instr header)1107 read_intrinsic(read_ctx *ctx, union packed_instr header)
1108 {
1109 nir_intrinsic_op op = header.intrinsic.intrinsic;
1110 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1111
1112 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1113 unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1114
1115 if (nir_intrinsic_infos[op].has_dest)
1116 read_def(ctx, &intrin->def, &intrin->instr, header);
1117
1118 for (unsigned i = 0; i < num_srcs; i++)
1119 read_src(ctx, &intrin->src[i]);
1120
1121 /* Vectorized instrinsics have num_components same as dst or src that has
1122 * 0 components in the info. Find it.
1123 */
1124 if (nir_intrinsic_infos[op].has_dest &&
1125 nir_intrinsic_infos[op].dest_components == 0) {
1126 intrin->num_components = intrin->def.num_components;
1127 } else {
1128 for (unsigned i = 0; i < num_srcs; i++) {
1129 if (nir_intrinsic_infos[op].src_components[i] == 0) {
1130 intrin->num_components = nir_src_num_components(intrin->src[i]);
1131 break;
1132 }
1133 }
1134 }
1135
1136 if (num_indices) {
1137 switch (header.intrinsic.const_indices_encoding) {
1138 case const_indices_all_combined: {
1139 unsigned bit_size = 8 / num_indices;
1140 unsigned bit_mask = u_bit_consecutive(0, bit_size);
1141 for (unsigned i = 0; i < num_indices; i++) {
1142 intrin->const_index[i] =
1143 (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1144 bit_mask;
1145 }
1146 break;
1147 }
1148 case const_indices_8bit:
1149 for (unsigned i = 0; i < num_indices; i++)
1150 intrin->const_index[i] = blob_read_uint8(ctx->blob);
1151 break;
1152 case const_indices_16bit:
1153 for (unsigned i = 0; i < num_indices; i++)
1154 intrin->const_index[i] = blob_read_uint16(ctx->blob);
1155 break;
1156 case const_indices_32bit:
1157 for (unsigned i = 0; i < num_indices; i++)
1158 intrin->const_index[i] = blob_read_uint32(ctx->blob);
1159 break;
1160 }
1161 }
1162
1163 return intrin;
1164 }
1165
1166 static void
write_load_const(write_ctx * ctx,const nir_load_const_instr * lc)1167 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1168 {
1169 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1170 union packed_instr header;
1171 header.u32 = 0;
1172
1173 header.load_const.instr_type = lc->instr.type;
1174 header.load_const.last_component = lc->def.num_components - 1;
1175 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1176 header.load_const.packing = load_const_full;
1177
1178 /* Try to pack 1-component constants into the 19 free bits in the header. */
1179 if (lc->def.num_components == 1) {
1180 switch (lc->def.bit_size) {
1181 case 64:
1182 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1183 /* packed_value contains high 19 bits, low bits are 0 */
1184 header.load_const.packing = load_const_scalar_hi_19bits;
1185 header.load_const.packed_value = lc->value[0].u64 >> 45;
1186 } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) {
1187 /* packed_value contains low 19 bits, high bits are sign-extended */
1188 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1189 header.load_const.packed_value = lc->value[0].u64;
1190 }
1191 break;
1192
1193 case 32:
1194 if ((lc->value[0].u32 & 0x1fff) == 0) {
1195 header.load_const.packing = load_const_scalar_hi_19bits;
1196 header.load_const.packed_value = lc->value[0].u32 >> 13;
1197 } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) {
1198 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1199 header.load_const.packed_value = lc->value[0].u32;
1200 }
1201 break;
1202
1203 case 16:
1204 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1205 header.load_const.packed_value = lc->value[0].u16;
1206 break;
1207 case 8:
1208 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1209 header.load_const.packed_value = lc->value[0].u8;
1210 break;
1211 case 1:
1212 header.load_const.packing = load_const_scalar_lo_19bits_sext;
1213 header.load_const.packed_value = lc->value[0].b;
1214 break;
1215 default:
1216 unreachable("invalid bit_size");
1217 }
1218 }
1219
1220 blob_write_uint32(ctx->blob, header.u32);
1221
1222 if (header.load_const.packing == load_const_full) {
1223 switch (lc->def.bit_size) {
1224 case 64:
1225 blob_write_bytes(ctx->blob, lc->value,
1226 sizeof(*lc->value) * lc->def.num_components);
1227 break;
1228
1229 case 32:
1230 for (unsigned i = 0; i < lc->def.num_components; i++)
1231 blob_write_uint32(ctx->blob, lc->value[i].u32);
1232 break;
1233
1234 case 16:
1235 for (unsigned i = 0; i < lc->def.num_components; i++)
1236 blob_write_uint16(ctx->blob, lc->value[i].u16);
1237 break;
1238
1239 default:
1240 assert(lc->def.bit_size <= 8);
1241 for (unsigned i = 0; i < lc->def.num_components; i++)
1242 blob_write_uint8(ctx->blob, lc->value[i].u8);
1243 break;
1244 }
1245 }
1246
1247 write_add_object(ctx, &lc->def);
1248 }
1249
1250 static nir_load_const_instr *
read_load_const(read_ctx * ctx,union packed_instr header)1251 read_load_const(read_ctx *ctx, union packed_instr header)
1252 {
1253 nir_load_const_instr *lc =
1254 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1255 decode_bit_size_3bits(header.load_const.bit_size));
1256 lc->def.divergent = false;
1257
1258 switch (header.load_const.packing) {
1259 case load_const_scalar_hi_19bits:
1260 switch (lc->def.bit_size) {
1261 case 64:
1262 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1263 break;
1264 case 32:
1265 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1266 break;
1267 default:
1268 unreachable("invalid bit_size");
1269 }
1270 break;
1271
1272 case load_const_scalar_lo_19bits_sext:
1273 switch (lc->def.bit_size) {
1274 case 64:
1275 lc->value[0].u64 = header.load_const.packed_value;
1276 if (lc->value[0].u64 >> 18)
1277 lc->value[0].u64 |= UINT64_C(0xfffffffffff80000);
1278 break;
1279 case 32:
1280 lc->value[0].u32 = header.load_const.packed_value;
1281 if (lc->value[0].u32 >> 18)
1282 lc->value[0].u32 |= 0xfff80000;
1283 break;
1284 case 16:
1285 lc->value[0].u16 = header.load_const.packed_value;
1286 break;
1287 case 8:
1288 lc->value[0].u8 = header.load_const.packed_value;
1289 break;
1290 case 1:
1291 lc->value[0].b = header.load_const.packed_value;
1292 break;
1293 default:
1294 unreachable("invalid bit_size");
1295 }
1296 break;
1297
1298 case load_const_full:
1299 switch (lc->def.bit_size) {
1300 case 64:
1301 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1302 break;
1303
1304 case 32:
1305 for (unsigned i = 0; i < lc->def.num_components; i++)
1306 lc->value[i].u32 = blob_read_uint32(ctx->blob);
1307 break;
1308
1309 case 16:
1310 for (unsigned i = 0; i < lc->def.num_components; i++)
1311 lc->value[i].u16 = blob_read_uint16(ctx->blob);
1312 break;
1313
1314 default:
1315 assert(lc->def.bit_size <= 8);
1316 for (unsigned i = 0; i < lc->def.num_components; i++)
1317 lc->value[i].u8 = blob_read_uint8(ctx->blob);
1318 break;
1319 }
1320 break;
1321 }
1322
1323 read_add_object(ctx, &lc->def);
1324 return lc;
1325 }
1326
1327 static void
write_ssa_undef(write_ctx * ctx,const nir_undef_instr * undef)1328 write_ssa_undef(write_ctx *ctx, const nir_undef_instr *undef)
1329 {
1330 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1331
1332 union packed_instr header;
1333 header.u32 = 0;
1334
1335 header.undef.instr_type = undef->instr.type;
1336 header.undef.last_component = undef->def.num_components - 1;
1337 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1338
1339 blob_write_uint32(ctx->blob, header.u32);
1340 write_add_object(ctx, &undef->def);
1341 }
1342
1343 static nir_undef_instr *
read_ssa_undef(read_ctx * ctx,union packed_instr header)1344 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1345 {
1346 nir_undef_instr *undef =
1347 nir_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1348 decode_bit_size_3bits(header.undef.bit_size));
1349
1350 undef->def.divergent = false;
1351
1352 read_add_object(ctx, &undef->def);
1353 return undef;
1354 }
1355
1356 union packed_tex_data {
1357 uint32_t u32;
1358 struct {
1359 unsigned sampler_dim : 4;
1360 unsigned dest_type : 8;
1361 unsigned coord_components : 3;
1362 unsigned is_array : 1;
1363 unsigned is_shadow : 1;
1364 unsigned is_new_style_shadow : 1;
1365 unsigned is_sparse : 1;
1366 unsigned component : 2;
1367 unsigned texture_non_uniform : 1;
1368 unsigned sampler_non_uniform : 1;
1369 unsigned array_is_lowered_cube : 1;
1370 unsigned is_gather_implicit_lod : 1;
1371 unsigned unused : 5; /* Mark unused for valgrind. */
1372 } u;
1373 };
1374
1375 static void
write_tex(write_ctx * ctx,const nir_tex_instr * tex)1376 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1377 {
1378 assert(tex->num_srcs < 16);
1379 assert(tex->op < 32);
1380
1381 union packed_instr header;
1382 header.u32 = 0;
1383
1384 header.tex.instr_type = tex->instr.type;
1385 header.tex.num_srcs = tex->num_srcs;
1386 header.tex.op = tex->op;
1387
1388 write_def(ctx, &tex->def, header, tex->instr.type);
1389
1390 blob_write_uint32(ctx->blob, tex->texture_index);
1391 blob_write_uint32(ctx->blob, tex->sampler_index);
1392 blob_write_uint32(ctx->blob, tex->backend_flags);
1393 if (tex->op == nir_texop_tg4)
1394 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1395
1396 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1397 union packed_tex_data packed = {
1398 .u.sampler_dim = tex->sampler_dim,
1399 .u.dest_type = tex->dest_type,
1400 .u.coord_components = tex->coord_components,
1401 .u.is_array = tex->is_array,
1402 .u.is_shadow = tex->is_shadow,
1403 .u.is_new_style_shadow = tex->is_new_style_shadow,
1404 .u.is_sparse = tex->is_sparse,
1405 .u.component = tex->component,
1406 .u.texture_non_uniform = tex->texture_non_uniform,
1407 .u.sampler_non_uniform = tex->sampler_non_uniform,
1408 .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1409 .u.is_gather_implicit_lod = tex->is_gather_implicit_lod,
1410 };
1411 blob_write_uint32(ctx->blob, packed.u32);
1412
1413 for (unsigned i = 0; i < tex->num_srcs; i++) {
1414 union packed_src src;
1415 src.u32 = 0;
1416 src.tex.src_type = tex->src[i].src_type;
1417 write_src_full(ctx, &tex->src[i].src, src);
1418 }
1419 }
1420
1421 static nir_tex_instr *
read_tex(read_ctx * ctx,union packed_instr header)1422 read_tex(read_ctx *ctx, union packed_instr header)
1423 {
1424 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1425
1426 read_def(ctx, &tex->def, &tex->instr, header);
1427
1428 tex->op = header.tex.op;
1429 tex->texture_index = blob_read_uint32(ctx->blob);
1430 tex->sampler_index = blob_read_uint32(ctx->blob);
1431 tex->backend_flags = blob_read_uint32(ctx->blob);
1432 if (tex->op == nir_texop_tg4)
1433 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1434
1435 union packed_tex_data packed;
1436 packed.u32 = blob_read_uint32(ctx->blob);
1437 tex->sampler_dim = packed.u.sampler_dim;
1438 tex->dest_type = packed.u.dest_type;
1439 tex->coord_components = packed.u.coord_components;
1440 tex->is_array = packed.u.is_array;
1441 tex->is_shadow = packed.u.is_shadow;
1442 tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1443 tex->is_sparse = packed.u.is_sparse;
1444 tex->component = packed.u.component;
1445 tex->texture_non_uniform = packed.u.texture_non_uniform;
1446 tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1447 tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1448 tex->is_gather_implicit_lod = packed.u.is_gather_implicit_lod;
1449
1450 for (unsigned i = 0; i < tex->num_srcs; i++) {
1451 union packed_src src = read_src(ctx, &tex->src[i].src);
1452 tex->src[i].src_type = src.tex.src_type;
1453 }
1454
1455 return tex;
1456 }
1457
1458 static void
write_phi(write_ctx * ctx,const nir_phi_instr * phi)1459 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1460 {
1461 union packed_instr header;
1462 header.u32 = 0;
1463
1464 header.phi.instr_type = phi->instr.type;
1465 header.phi.num_srcs = exec_list_length(&phi->srcs);
1466
1467 /* Phi nodes are special, since they may reference SSA definitions and
1468 * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1469 * and then store enough information so that a later fixup pass can fill
1470 * them in correctly.
1471 */
1472 write_def(ctx, &phi->def, header, phi->instr.type);
1473
1474 nir_foreach_phi_src(src, phi) {
1475 size_t blob_offset = blob_reserve_uint32(ctx->blob);
1476 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1477 assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1478 write_phi_fixup fixup = {
1479 .blob_offset = blob_offset,
1480 .src = src->src.ssa,
1481 .block = src->pred,
1482 };
1483 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1484 }
1485 }
1486
1487 static void
write_fixup_phis(write_ctx * ctx)1488 write_fixup_phis(write_ctx *ctx)
1489 {
1490 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1491 blob_overwrite_uint32(ctx->blob, fixup->blob_offset,
1492 write_lookup_object(ctx, fixup->src));
1493 blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t),
1494 write_lookup_object(ctx, fixup->block));
1495 }
1496
1497 util_dynarray_clear(&ctx->phi_fixups);
1498 }
1499
1500 static nir_phi_instr *
read_phi(read_ctx * ctx,nir_block * blk,union packed_instr header)1501 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1502 {
1503 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1504
1505 read_def(ctx, &phi->def, &phi->instr, header);
1506
1507 /* For similar reasons as before, we just store the index directly into the
1508 * pointer, and let a later pass resolve the phi sources.
1509 *
1510 * In order to ensure that the copied sources (which are just the indices
1511 * from the blob for now) don't get inserted into the old shader's use-def
1512 * lists, we have to add the phi instruction *before* we set up its
1513 * sources.
1514 */
1515 nir_instr_insert_after_block(blk, &phi->instr);
1516
1517 for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1518 nir_def *def = (nir_def *)(uintptr_t)blob_read_uint32(ctx->blob);
1519 nir_block *pred = (nir_block *)(uintptr_t)blob_read_uint32(ctx->blob);
1520 nir_phi_src *src = nir_phi_instr_add_src(phi, pred, def);
1521
1522 /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1523 * we have to set the parent_instr manually. It doesn't really matter
1524 * when we do it, so we might as well do it here.
1525 */
1526 nir_src_set_parent_instr(&src->src, &phi->instr);
1527
1528 /* Stash it in the list of phi sources. We'll walk this list and fix up
1529 * sources at the very end of read_function_impl.
1530 */
1531 list_add(&src->src.use_link, &ctx->phi_srcs);
1532 }
1533
1534 return phi;
1535 }
1536
1537 static void
read_fixup_phis(read_ctx * ctx)1538 read_fixup_phis(read_ctx *ctx)
1539 {
1540 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1541 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1542 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1543
1544 /* Remove from this list */
1545 list_del(&src->src.use_link);
1546
1547 list_addtail(&src->src.use_link, &src->src.ssa->uses);
1548 }
1549 assert(list_is_empty(&ctx->phi_srcs));
1550 }
1551
1552 static void
write_jump(write_ctx * ctx,const nir_jump_instr * jmp)1553 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1554 {
1555 /* These aren't handled because they require special block linking */
1556 assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1557
1558 assert(jmp->type < 4);
1559
1560 union packed_instr header;
1561 header.u32 = 0;
1562
1563 header.jump.instr_type = jmp->instr.type;
1564 header.jump.type = jmp->type;
1565
1566 blob_write_uint32(ctx->blob, header.u32);
1567 }
1568
1569 static nir_jump_instr *
read_jump(read_ctx * ctx,union packed_instr header)1570 read_jump(read_ctx *ctx, union packed_instr header)
1571 {
1572 /* These aren't handled because they require special block linking */
1573 assert(header.jump.type != nir_jump_goto &&
1574 header.jump.type != nir_jump_goto_if);
1575
1576 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1577 return jmp;
1578 }
1579
1580 static void
write_call(write_ctx * ctx,const nir_call_instr * call)1581 write_call(write_ctx *ctx, const nir_call_instr *call)
1582 {
1583 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1584
1585 for (unsigned i = 0; i < call->num_params; i++)
1586 write_src(ctx, &call->params[i]);
1587 }
1588
1589 static nir_call_instr *
read_call(read_ctx * ctx)1590 read_call(read_ctx *ctx)
1591 {
1592 nir_function *callee = read_object(ctx);
1593 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1594
1595 for (unsigned i = 0; i < call->num_params; i++)
1596 read_src(ctx, &call->params[i]);
1597
1598 return call;
1599 }
1600
1601 static void
write_debug_info(write_ctx * ctx,const nir_debug_info_instr * di)1602 write_debug_info(write_ctx *ctx, const nir_debug_info_instr *di)
1603 {
1604 union packed_instr header;
1605 header.u32 = 0;
1606
1607 header.debug_info.instr_type = nir_instr_type_debug_info;
1608 header.debug_info.type = di->type;
1609 header.debug_info.string_length = di->string_length;
1610
1611 switch (di->type) {
1612 case nir_debug_info_src_loc:
1613 blob_write_uint32(ctx->blob, header.u32);
1614 blob_write_uint32(ctx->blob, di->src_loc.line);
1615 blob_write_uint32(ctx->blob, di->src_loc.column);
1616 blob_write_uint32(ctx->blob, di->src_loc.spirv_offset);
1617 blob_write_uint8(ctx->blob, di->src_loc.source);
1618 if (di->src_loc.line)
1619 write_src(ctx, &di->src_loc.filename);
1620 return;
1621 case nir_debug_info_string:
1622 write_def(ctx, &di->def, header, di->instr.type);
1623 blob_write_bytes(ctx->blob, di->string, di->string_length);
1624 return;
1625 }
1626
1627 unreachable("Unimplemented nir_debug_info_type");
1628 }
1629
1630 static nir_debug_info_instr *
read_debug_info(read_ctx * ctx,union packed_instr header)1631 read_debug_info(read_ctx *ctx, union packed_instr header)
1632 {
1633 nir_debug_info_type type = header.debug_info.type;
1634
1635 switch (type) {
1636 case nir_debug_info_src_loc: {
1637 nir_debug_info_instr *di = nir_debug_info_instr_create(ctx->nir, type, 0);
1638 di->src_loc.line = blob_read_uint32(ctx->blob);
1639 di->src_loc.column = blob_read_uint32(ctx->blob);
1640 di->src_loc.spirv_offset = blob_read_uint32(ctx->blob);
1641 di->src_loc.source = blob_read_uint8(ctx->blob);
1642 if (di->src_loc.line)
1643 read_src(ctx, &di->src_loc.filename);
1644 return di;
1645 }
1646 case nir_debug_info_string: {
1647 nir_debug_info_instr *di =
1648 nir_debug_info_instr_create(ctx->nir, type, header.debug_info.string_length);
1649 read_def(ctx, &di->def, &di->instr, header);
1650 memcpy(di->string, blob_read_bytes(ctx->blob, di->string_length), di->string_length);
1651 return di;
1652 }
1653 }
1654
1655 unreachable("Unimplemented nir_debug_info_type");
1656 }
1657
1658 static void
write_instr(write_ctx * ctx,const nir_instr * instr)1659 write_instr(write_ctx *ctx, const nir_instr *instr)
1660 {
1661 /* We have only 4 bits for the instruction type. */
1662 assert(instr->type < 16);
1663
1664 switch (instr->type) {
1665 case nir_instr_type_alu:
1666 write_alu(ctx, nir_instr_as_alu(instr));
1667 break;
1668 case nir_instr_type_deref:
1669 write_deref(ctx, nir_instr_as_deref(instr));
1670 break;
1671 case nir_instr_type_intrinsic:
1672 write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1673 break;
1674 case nir_instr_type_load_const:
1675 write_load_const(ctx, nir_instr_as_load_const(instr));
1676 break;
1677 case nir_instr_type_undef:
1678 write_ssa_undef(ctx, nir_instr_as_undef(instr));
1679 break;
1680 case nir_instr_type_tex:
1681 write_tex(ctx, nir_instr_as_tex(instr));
1682 break;
1683 case nir_instr_type_phi:
1684 write_phi(ctx, nir_instr_as_phi(instr));
1685 break;
1686 case nir_instr_type_jump:
1687 write_jump(ctx, nir_instr_as_jump(instr));
1688 break;
1689 case nir_instr_type_call:
1690 blob_write_uint32(ctx->blob, instr->type);
1691 write_call(ctx, nir_instr_as_call(instr));
1692 break;
1693 case nir_instr_type_debug_info:
1694 write_debug_info(ctx, nir_instr_as_debug_info(instr));
1695 break;
1696 case nir_instr_type_parallel_copy:
1697 unreachable("Cannot write parallel copies");
1698 default:
1699 unreachable("bad instr type");
1700 }
1701 }
1702
1703 /* Return the number of instructions read. */
1704 static unsigned
read_instr(read_ctx * ctx,nir_block * block)1705 read_instr(read_ctx *ctx, nir_block *block)
1706 {
1707 STATIC_ASSERT(sizeof(union packed_instr) == 4);
1708 union packed_instr header;
1709 header.u32 = blob_read_uint32(ctx->blob);
1710 nir_instr *instr;
1711
1712 switch (header.any.instr_type) {
1713 case nir_instr_type_alu:
1714 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1715 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1716 return header.alu.num_followup_alu_sharing_header + 1;
1717 case nir_instr_type_deref:
1718 instr = &read_deref(ctx, header)->instr;
1719 break;
1720 case nir_instr_type_intrinsic:
1721 instr = &read_intrinsic(ctx, header)->instr;
1722 break;
1723 case nir_instr_type_load_const:
1724 instr = &read_load_const(ctx, header)->instr;
1725 break;
1726 case nir_instr_type_undef:
1727 instr = &read_ssa_undef(ctx, header)->instr;
1728 break;
1729 case nir_instr_type_tex:
1730 instr = &read_tex(ctx, header)->instr;
1731 break;
1732 case nir_instr_type_phi:
1733 /* Phi instructions are a bit of a special case when reading because we
1734 * don't want inserting the instruction to automatically handle use/defs
1735 * for us. Instead, we need to wait until all the blocks/instructions
1736 * are read so that we can set their sources up.
1737 */
1738 read_phi(ctx, block, header);
1739 return 1;
1740 case nir_instr_type_jump:
1741 instr = &read_jump(ctx, header)->instr;
1742 break;
1743 case nir_instr_type_call:
1744 instr = &read_call(ctx)->instr;
1745 break;
1746 case nir_instr_type_debug_info:
1747 instr = &read_debug_info(ctx, header)->instr;
1748 break;
1749 case nir_instr_type_parallel_copy:
1750 unreachable("Cannot read parallel copies");
1751 default:
1752 unreachable("bad instr type");
1753 }
1754
1755 nir_instr_insert_after_block(block, instr);
1756 return 1;
1757 }
1758
1759 static void
write_block(write_ctx * ctx,const nir_block * block)1760 write_block(write_ctx *ctx, const nir_block *block)
1761 {
1762 write_add_object(ctx, block);
1763 blob_write_uint8(ctx->blob, block->divergent);
1764 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1765
1766 ctx->last_instr_type = ~0;
1767 ctx->last_alu_header_offset = 0;
1768
1769 nir_foreach_instr(instr, block) {
1770 write_instr(ctx, instr);
1771 ctx->last_instr_type = instr->type;
1772 }
1773 }
1774
1775 static void
read_block(read_ctx * ctx,struct exec_list * cf_list)1776 read_block(read_ctx *ctx, struct exec_list *cf_list)
1777 {
1778 /* Don't actually create a new block. Just use the one from the tail of
1779 * the list. NIR guarantees that the tail of the list is a block and that
1780 * no two blocks are side-by-side in the IR; It should be empty.
1781 */
1782 nir_block *block =
1783 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1784
1785 read_add_object(ctx, block);
1786 block->divergent = blob_read_uint8(ctx->blob);
1787 unsigned num_instrs = blob_read_uint32(ctx->blob);
1788 for (unsigned i = 0; i < num_instrs;) {
1789 i += read_instr(ctx, block);
1790 }
1791 }
1792
1793 static void
1794 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1795
1796 static void
1797 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1798
1799 static void
write_if(write_ctx * ctx,nir_if * nif)1800 write_if(write_ctx *ctx, nir_if *nif)
1801 {
1802 write_src(ctx, &nif->condition);
1803 blob_write_uint8(ctx->blob, nif->control);
1804
1805 write_cf_list(ctx, &nif->then_list);
1806 write_cf_list(ctx, &nif->else_list);
1807 }
1808
1809 static void
read_if(read_ctx * ctx,struct exec_list * cf_list)1810 read_if(read_ctx *ctx, struct exec_list *cf_list)
1811 {
1812 nir_if *nif = nir_if_create(ctx->nir);
1813
1814 read_src(ctx, &nif->condition);
1815 nif->control = blob_read_uint8(ctx->blob);
1816
1817 nir_cf_node_insert_end(cf_list, &nif->cf_node);
1818
1819 read_cf_list(ctx, &nif->then_list);
1820 read_cf_list(ctx, &nif->else_list);
1821 }
1822
1823 static void
write_loop(write_ctx * ctx,nir_loop * loop)1824 write_loop(write_ctx *ctx, nir_loop *loop)
1825 {
1826 blob_write_uint8(ctx->blob, loop->control);
1827 blob_write_uint8(ctx->blob, loop->divergent);
1828 bool has_continue_construct = nir_loop_has_continue_construct(loop);
1829 blob_write_uint8(ctx->blob, has_continue_construct);
1830
1831 write_cf_list(ctx, &loop->body);
1832 if (has_continue_construct) {
1833 write_cf_list(ctx, &loop->continue_list);
1834 }
1835 }
1836
1837 static void
read_loop(read_ctx * ctx,struct exec_list * cf_list)1838 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1839 {
1840 nir_loop *loop = nir_loop_create(ctx->nir);
1841
1842 nir_cf_node_insert_end(cf_list, &loop->cf_node);
1843
1844 loop->control = blob_read_uint8(ctx->blob);
1845 loop->divergent = blob_read_uint8(ctx->blob);
1846 bool has_continue_construct = blob_read_uint8(ctx->blob);
1847
1848 read_cf_list(ctx, &loop->body);
1849 if (has_continue_construct) {
1850 nir_loop_add_continue_construct(loop);
1851 read_cf_list(ctx, &loop->continue_list);
1852 }
1853 }
1854
1855 static void
write_cf_node(write_ctx * ctx,nir_cf_node * cf)1856 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1857 {
1858 blob_write_uint32(ctx->blob, cf->type);
1859
1860 switch (cf->type) {
1861 case nir_cf_node_block:
1862 write_block(ctx, nir_cf_node_as_block(cf));
1863 break;
1864 case nir_cf_node_if:
1865 write_if(ctx, nir_cf_node_as_if(cf));
1866 break;
1867 case nir_cf_node_loop:
1868 write_loop(ctx, nir_cf_node_as_loop(cf));
1869 break;
1870 default:
1871 unreachable("bad cf type");
1872 }
1873 }
1874
1875 static void
read_cf_node(read_ctx * ctx,struct exec_list * list)1876 read_cf_node(read_ctx *ctx, struct exec_list *list)
1877 {
1878 nir_cf_node_type type = blob_read_uint32(ctx->blob);
1879
1880 switch (type) {
1881 case nir_cf_node_block:
1882 read_block(ctx, list);
1883 break;
1884 case nir_cf_node_if:
1885 read_if(ctx, list);
1886 break;
1887 case nir_cf_node_loop:
1888 read_loop(ctx, list);
1889 break;
1890 default:
1891 unreachable("bad cf type");
1892 }
1893 }
1894
1895 static void
write_cf_list(write_ctx * ctx,const struct exec_list * cf_list)1896 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1897 {
1898 blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1899 foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1900 write_cf_node(ctx, cf);
1901 }
1902 }
1903
1904 static void
read_cf_list(read_ctx * ctx,struct exec_list * cf_list)1905 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1906 {
1907 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1908 for (unsigned i = 0; i < num_cf_nodes; i++)
1909 read_cf_node(ctx, cf_list);
1910 }
1911
1912 static void
write_function_impl(write_ctx * ctx,const nir_function_impl * fi)1913 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1914 {
1915 blob_write_uint8(ctx->blob, fi->structured);
1916 blob_write_uint8(ctx->blob, !!fi->preamble);
1917
1918 if (fi->preamble)
1919 blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble));
1920
1921 write_var_list(ctx, &fi->locals);
1922
1923 write_cf_list(ctx, &fi->body);
1924 write_fixup_phis(ctx);
1925 }
1926
1927 static nir_function_impl *
read_function_impl(read_ctx * ctx)1928 read_function_impl(read_ctx *ctx)
1929 {
1930 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1931
1932 fi->structured = blob_read_uint8(ctx->blob);
1933 bool preamble = blob_read_uint8(ctx->blob);
1934
1935 if (preamble)
1936 fi->preamble = read_object(ctx);
1937
1938 read_var_list(ctx, &fi->locals);
1939
1940 read_cf_list(ctx, &fi->body);
1941 read_fixup_phis(ctx);
1942
1943 fi->valid_metadata = 0;
1944
1945 return fi;
1946 }
1947
1948 static void
write_function(write_ctx * ctx,const nir_function * fxn)1949 write_function(write_ctx *ctx, const nir_function *fxn)
1950 {
1951 uint32_t flags = 0;
1952 if (fxn->is_entrypoint)
1953 flags |= 0x1;
1954 if (fxn->is_preamble)
1955 flags |= 0x2;
1956 if (fxn->name)
1957 flags |= 0x4;
1958 if (fxn->impl)
1959 flags |= 0x8;
1960 if (fxn->should_inline)
1961 flags |= 0x10;
1962 if (fxn->dont_inline)
1963 flags |= 0x20;
1964 if (fxn->is_subroutine)
1965 flags |= 0x40;
1966 blob_write_uint32(ctx->blob, flags);
1967 if (fxn->name)
1968 blob_write_string(ctx->blob, fxn->name);
1969
1970 blob_write_uint32(ctx->blob, fxn->subroutine_index);
1971 blob_write_uint32(ctx->blob, fxn->num_subroutine_types);
1972 for (unsigned i = 0; i < fxn->num_subroutine_types; i++) {
1973 encode_type_to_blob(ctx->blob, fxn->subroutine_types[i]);
1974 }
1975
1976 write_add_object(ctx, fxn);
1977
1978 blob_write_uint32(ctx->blob, fxn->num_params);
1979 for (unsigned i = 0; i < fxn->num_params; i++) {
1980 uint32_t val =
1981 ((uint32_t)fxn->params[i].num_components) |
1982 ((uint32_t)fxn->params[i].bit_size) << 8;
1983 blob_write_uint32(ctx->blob, val);
1984 }
1985
1986 /* At first glance, it looks like we should write the function_impl here.
1987 * However, call instructions need to be able to reference at least the
1988 * function and those will get processed as we write the function_impls.
1989 * We stop here and write function_impls as a second pass.
1990 */
1991 }
1992
1993 static void
read_function(read_ctx * ctx)1994 read_function(read_ctx *ctx)
1995 {
1996 uint32_t flags = blob_read_uint32(ctx->blob);
1997
1998 bool has_name = flags & 0x4;
1999 char *name = has_name ? blob_read_string(ctx->blob) : NULL;
2000
2001 nir_function *fxn = nir_function_create(ctx->nir, name);
2002
2003 fxn->subroutine_index = blob_read_uint32(ctx->blob);
2004 fxn->num_subroutine_types = blob_read_uint32(ctx->blob);
2005 for (unsigned i = 0; i < fxn->num_subroutine_types; i++) {
2006 fxn->subroutine_types[i] = decode_type_from_blob(ctx->blob);
2007 }
2008
2009 read_add_object(ctx, fxn);
2010
2011 fxn->num_params = blob_read_uint32(ctx->blob);
2012 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
2013 for (unsigned i = 0; i < fxn->num_params; i++) {
2014 uint32_t val = blob_read_uint32(ctx->blob);
2015 fxn->params[i].num_components = val & 0xff;
2016 fxn->params[i].bit_size = (val >> 8) & 0xff;
2017 }
2018
2019 fxn->is_entrypoint = flags & 0x1;
2020 fxn->is_preamble = flags & 0x2;
2021 if (flags & 0x8)
2022 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2023 fxn->should_inline = flags & 0x10;
2024 fxn->dont_inline = flags & 0x20;
2025 fxn->is_subroutine = flags & 0x40;
2026 }
2027
2028 static void
write_xfb_info(write_ctx * ctx,const nir_xfb_info * xfb)2029 write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)
2030 {
2031 if (xfb == NULL) {
2032 blob_write_uint32(ctx->blob, 0);
2033 } else {
2034 size_t size = nir_xfb_info_size(xfb->output_count);
2035 assert(size <= UINT32_MAX);
2036 blob_write_uint32(ctx->blob, size);
2037 blob_write_bytes(ctx->blob, xfb, size);
2038 }
2039 }
2040
2041 static nir_xfb_info *
read_xfb_info(read_ctx * ctx)2042 read_xfb_info(read_ctx *ctx)
2043 {
2044 uint32_t size = blob_read_uint32(ctx->blob);
2045 if (size == 0)
2046 return NULL;
2047
2048 struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size);
2049 blob_copy_bytes(ctx->blob, (void *)xfb, size);
2050
2051 return xfb;
2052 }
2053
2054 /**
2055 * Serialize NIR into a binary blob.
2056 *
2057 * \param strip Don't serialize information only useful for debugging,
2058 * such as variable names, making cache hits from similar
2059 * shaders more likely.
2060 */
2061 void
nir_serialize(struct blob * blob,const nir_shader * nir,bool strip)2062 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2063 {
2064 write_ctx ctx = { 0 };
2065 ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2066 ctx.blob = blob;
2067 ctx.nir = nir;
2068 ctx.strip = strip;
2069 util_dynarray_init(&ctx.phi_fixups, NULL);
2070
2071 size_t idx_size_offset = blob_reserve_uint32(blob);
2072
2073 struct shader_info info = nir->info;
2074 uint32_t strings = 0;
2075 if (!strip && info.name)
2076 strings |= 0x1;
2077 if (!strip && info.label)
2078 strings |= 0x2;
2079 blob_write_uint32(blob, strings);
2080 if (!strip && info.name)
2081 blob_write_string(blob, info.name);
2082 if (!strip && info.label)
2083 blob_write_string(blob, info.label);
2084 info.name = info.label = NULL;
2085 blob_write_bytes(blob, (uint8_t *)&info, sizeof(info));
2086
2087 write_var_list(&ctx, &nir->variables);
2088
2089 blob_write_uint32(blob, nir->num_inputs);
2090 blob_write_uint32(blob, nir->num_uniforms);
2091 blob_write_uint32(blob, nir->num_outputs);
2092 blob_write_uint32(blob, nir->scratch_size);
2093
2094 blob_write_uint32(blob, exec_list_length(&nir->functions));
2095 nir_foreach_function(fxn, nir) {
2096 write_function(&ctx, fxn);
2097 }
2098
2099 nir_foreach_function_impl(impl, nir) {
2100 write_function_impl(&ctx, impl);
2101 }
2102
2103 blob_write_uint32(blob, nir->constant_data_size);
2104 if (nir->constant_data_size > 0)
2105 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2106
2107 write_xfb_info(&ctx, nir->xfb_info);
2108
2109 if (nir->info.uses_printf)
2110 nir_serialize_printf_info(blob, nir->printf_info, nir->printf_info_count);
2111
2112 blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx);
2113
2114 _mesa_hash_table_destroy(ctx.remap_table, NULL);
2115 util_dynarray_fini(&ctx.phi_fixups);
2116 }
2117
2118 nir_shader *
nir_deserialize(void * mem_ctx,const struct nir_shader_compiler_options * options,struct blob_reader * blob)2119 nir_deserialize(void *mem_ctx,
2120 const struct nir_shader_compiler_options *options,
2121 struct blob_reader *blob)
2122 {
2123 read_ctx ctx = { 0 };
2124 ctx.blob = blob;
2125 list_inithead(&ctx.phi_srcs);
2126 ctx.idx_table_len = blob_read_uint32(blob);
2127 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2128
2129 uint32_t strings = blob_read_uint32(blob);
2130 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2131 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2132
2133 struct shader_info info;
2134 blob_copy_bytes(blob, (uint8_t *)&info, sizeof(info));
2135
2136 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2137
2138 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2139 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2140
2141 ctx.nir->info = info;
2142
2143 read_var_list(&ctx, &ctx.nir->variables);
2144
2145 ctx.nir->num_inputs = blob_read_uint32(blob);
2146 ctx.nir->num_uniforms = blob_read_uint32(blob);
2147 ctx.nir->num_outputs = blob_read_uint32(blob);
2148 ctx.nir->scratch_size = blob_read_uint32(blob);
2149
2150 unsigned num_functions = blob_read_uint32(blob);
2151 for (unsigned i = 0; i < num_functions; i++)
2152 read_function(&ctx);
2153
2154 nir_foreach_function(fxn, ctx.nir) {
2155 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2156 nir_function_set_impl(fxn, read_function_impl(&ctx));
2157 }
2158
2159 ctx.nir->constant_data_size = blob_read_uint32(blob);
2160 if (ctx.nir->constant_data_size > 0) {
2161 ctx.nir->constant_data =
2162 ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2163 blob_copy_bytes(blob, ctx.nir->constant_data,
2164 ctx.nir->constant_data_size);
2165 }
2166
2167 ctx.nir->xfb_info = read_xfb_info(&ctx);
2168
2169 if (ctx.nir->info.uses_printf) {
2170 ctx.nir->printf_info =
2171 nir_deserialize_printf_info(ctx.nir, blob,
2172 &ctx.nir->printf_info_count);
2173 }
2174
2175 free(ctx.idx_table);
2176
2177 nir_validate_shader(ctx.nir, "after deserialize");
2178
2179 return ctx.nir;
2180 }
2181
2182 void
nir_shader_serialize_deserialize(nir_shader * shader)2183 nir_shader_serialize_deserialize(nir_shader *shader)
2184 {
2185 const struct nir_shader_compiler_options *options = shader->options;
2186
2187 struct blob writer;
2188 blob_init(&writer);
2189 nir_serialize(&writer, shader, false);
2190
2191 /* Delete all of dest's ralloc children but leave dest alone */
2192 void *dead_ctx = ralloc_context(NULL);
2193 ralloc_adopt(dead_ctx, shader);
2194 ralloc_free(dead_ctx);
2195
2196 dead_ctx = ralloc_context(NULL);
2197
2198 struct blob_reader reader;
2199 blob_reader_init(&reader, writer.data, writer.size);
2200 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2201
2202 blob_finish(&writer);
2203
2204 nir_shader_replace(shader, copy);
2205 ralloc_free(dead_ctx);
2206 }
2207
2208 void
nir_serialize_printf_info(struct blob * blob,const u_printf_info * printf_info,unsigned printf_info_count)2209 nir_serialize_printf_info(struct blob *blob,
2210 const u_printf_info *printf_info,
2211 unsigned printf_info_count)
2212 {
2213 blob_write_uint32(blob, printf_info_count);
2214 for (int i = 0; i < printf_info_count; i++) {
2215 const u_printf_info *info = &printf_info[i];
2216 blob_write_uint32(blob, info->num_args);
2217 blob_write_uint32(blob, info->string_size);
2218 blob_write_bytes(blob, info->arg_sizes,
2219 info->num_args * sizeof(info->arg_sizes[0]));
2220 /* we can't use blob_write_string, because it contains multiple NULL
2221 * terminated strings */
2222 blob_write_bytes(blob, info->strings, info->string_size);
2223 }
2224 }
2225
2226 u_printf_info *
nir_deserialize_printf_info(void * mem_ctx,struct blob_reader * blob,unsigned * printf_info_count)2227 nir_deserialize_printf_info(void *mem_ctx,
2228 struct blob_reader *blob,
2229 unsigned *printf_info_count)
2230 {
2231 *printf_info_count = blob_read_uint32(blob);
2232
2233 u_printf_info *printf_info =
2234 ralloc_array(mem_ctx, u_printf_info, *printf_info_count);
2235
2236 for (int i = 0; i < *printf_info_count; i++) {
2237 u_printf_info *info = &printf_info[i];
2238 info->num_args = blob_read_uint32(blob);
2239 info->string_size = blob_read_uint32(blob);
2240 info->arg_sizes = ralloc_array(mem_ctx, unsigned, info->num_args);
2241 blob_copy_bytes(blob, info->arg_sizes,
2242 info->num_args * sizeof(info->arg_sizes[0]));
2243 info->strings = ralloc_array(mem_ctx, char, info->string_size);
2244 blob_copy_bytes(blob, info->strings, info->string_size);
2245 }
2246
2247 return printf_info;
2248 }
2249