xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_serialize.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_serialize.h"
25 #include "util/u_dynarray.h"
26 #include "util/u_math.h"
27 #include "nir_control_flow.h"
28 #include "nir_xfb_info.h"
29 
30 #define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
31 #define MAX_OBJECT_IDS              (1 << 20)
32 
33 typedef struct {
34    size_t blob_offset;
35    nir_def *src;
36    nir_block *block;
37 } write_phi_fixup;
38 
39 typedef struct {
40    const nir_shader *nir;
41 
42    struct blob *blob;
43 
44    /* maps pointer to index */
45    struct hash_table *remap_table;
46 
47    /* the next index to assign to a NIR in-memory object */
48    uint32_t next_idx;
49 
50    /* Array of write_phi_fixup structs representing phi sources that need to
51     * be resolved in the second pass.
52     */
53    struct util_dynarray phi_fixups;
54 
55    /* The last serialized type. */
56    const struct glsl_type *last_type;
57    const struct glsl_type *last_interface_type;
58    struct nir_variable_data last_var_data;
59 
60    /* For skipping equal ALU headers (typical after scalarization). */
61    nir_instr_type last_instr_type;
62    uintptr_t last_alu_header_offset;
63    uint32_t last_alu_header;
64 
65    /* Don't write optional data such as variable names. */
66    bool strip;
67 } write_ctx;
68 
69 typedef struct {
70    nir_shader *nir;
71 
72    struct blob_reader *blob;
73 
74    /* the next index to assign to a NIR in-memory object */
75    uint32_t next_idx;
76 
77    /* The length of the index -> object table */
78    uint32_t idx_table_len;
79 
80    /* map from index to deserialized pointer */
81    void **idx_table;
82 
83    /* List of phi sources. */
84    struct list_head phi_srcs;
85 
86    /* The last deserialized type. */
87    const struct glsl_type *last_type;
88    const struct glsl_type *last_interface_type;
89    struct nir_variable_data last_var_data;
90 } read_ctx;
91 
92 static void
write_add_object(write_ctx * ctx,const void * obj)93 write_add_object(write_ctx *ctx, const void *obj)
94 {
95    uint32_t index = ctx->next_idx++;
96    assert(index != MAX_OBJECT_IDS);
97    _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t)index);
98 }
99 
100 static uint32_t
write_lookup_object(write_ctx * ctx,const void * obj)101 write_lookup_object(write_ctx *ctx, const void *obj)
102 {
103    struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
104    assert(entry);
105    return (uint32_t)(uintptr_t)entry->data;
106 }
107 
108 static void
read_add_object(read_ctx * ctx,void * obj)109 read_add_object(read_ctx *ctx, void *obj)
110 {
111    assert(ctx->next_idx < ctx->idx_table_len);
112    ctx->idx_table[ctx->next_idx++] = obj;
113 }
114 
115 static void *
read_lookup_object(read_ctx * ctx,uint32_t idx)116 read_lookup_object(read_ctx *ctx, uint32_t idx)
117 {
118    assert(idx < ctx->idx_table_len);
119    return ctx->idx_table[idx];
120 }
121 
122 static void *
read_object(read_ctx * ctx)123 read_object(read_ctx *ctx)
124 {
125    return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
126 }
127 
128 static uint32_t
encode_bit_size_3bits(uint8_t bit_size)129 encode_bit_size_3bits(uint8_t bit_size)
130 {
131    /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
132    assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
133    if (bit_size)
134       return util_logbase2(bit_size) + 1;
135    return 0;
136 }
137 
138 static uint8_t
decode_bit_size_3bits(uint8_t bit_size)139 decode_bit_size_3bits(uint8_t bit_size)
140 {
141    if (bit_size)
142       return 1 << (bit_size - 1);
143    return 0;
144 }
145 
146 #define NUM_COMPONENTS_IS_SEPARATE_7 7
147 
148 static uint8_t
encode_num_components_in_3bits(uint8_t num_components)149 encode_num_components_in_3bits(uint8_t num_components)
150 {
151    if (num_components <= 4)
152       return num_components;
153    if (num_components == 8)
154       return 5;
155    if (num_components == 16)
156       return 6;
157 
158    /* special value indicating that num_components is in the next uint32 */
159    return NUM_COMPONENTS_IS_SEPARATE_7;
160 }
161 
162 static uint8_t
decode_num_components_in_3bits(uint8_t value)163 decode_num_components_in_3bits(uint8_t value)
164 {
165    if (value <= 4)
166       return value;
167    if (value == 5)
168       return 8;
169    if (value == 6)
170       return 16;
171 
172    unreachable("invalid num_components encoding");
173    return 0;
174 }
175 
176 static void
write_constant(write_ctx * ctx,const nir_constant * c)177 write_constant(write_ctx *ctx, const nir_constant *c)
178 {
179    blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
180    blob_write_uint32(ctx->blob, c->num_elements);
181    for (unsigned i = 0; i < c->num_elements; i++)
182       write_constant(ctx, c->elements[i]);
183 }
184 
185 static nir_constant *
read_constant(read_ctx * ctx,nir_variable * nvar)186 read_constant(read_ctx *ctx, nir_variable *nvar)
187 {
188    nir_constant *c = ralloc(nvar, nir_constant);
189 
190    static const nir_const_value zero_vals[ARRAY_SIZE(c->values)] = { 0 };
191    blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
192    c->is_null_constant = memcmp(c->values, zero_vals, sizeof(c->values)) == 0;
193    c->num_elements = blob_read_uint32(ctx->blob);
194    c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
195    for (unsigned i = 0; i < c->num_elements; i++) {
196       c->elements[i] = read_constant(ctx, nvar);
197       c->is_null_constant &= c->elements[i]->is_null_constant;
198    }
199 
200    return c;
201 }
202 
203 enum var_data_encoding {
204    var_encode_full,
205    var_encode_location_diff,
206 };
207 
208 union packed_var {
209    uint32_t u32;
210    struct {
211       unsigned has_name : 1;
212       unsigned has_constant_initializer : 1;
213       unsigned has_pointer_initializer : 1;
214       unsigned has_interface_type : 1;
215       unsigned num_state_slots : 7;
216       unsigned data_encoding : 2;
217       unsigned type_same_as_last : 1;
218       unsigned interface_type_same_as_last : 1;
219       unsigned ray_query : 1;
220       unsigned num_members : 16;
221    } u;
222 };
223 
224 union packed_var_data_diff {
225    uint32_t u32;
226    struct {
227       int location : 13;
228       int location_frac : 3;
229       int driver_location : 16;
230    } u;
231 };
232 
233 static void
write_variable(write_ctx * ctx,const nir_variable * var)234 write_variable(write_ctx *ctx, const nir_variable *var)
235 {
236    write_add_object(ctx, var);
237 
238    assert(var->num_state_slots < (1 << 7));
239 
240    STATIC_ASSERT(sizeof(union packed_var) == 4);
241    union packed_var flags;
242    flags.u32 = 0;
243 
244    flags.u.has_name = !ctx->strip && var->name;
245    flags.u.has_constant_initializer = !!(var->constant_initializer);
246    flags.u.has_pointer_initializer = !!(var->pointer_initializer);
247    flags.u.has_interface_type = !!(var->interface_type);
248    flags.u.type_same_as_last = var->type == ctx->last_type;
249    flags.u.interface_type_same_as_last =
250       var->interface_type && var->interface_type == ctx->last_interface_type;
251    flags.u.num_state_slots = var->num_state_slots;
252    flags.u.num_members = var->num_members;
253 
254    struct nir_variable_data data = var->data;
255 
256    /* When stripping, we expect that the location is no longer needed,
257     * which is typically after shaders are linked.
258     */
259    if (ctx->strip &&
260        data.mode != nir_var_system_value &&
261        data.mode != nir_var_shader_in &&
262        data.mode != nir_var_shader_out)
263       data.location = 0;
264 
265    struct nir_variable_data tmp = data;
266 
267    tmp.location = ctx->last_var_data.location;
268    tmp.location_frac = ctx->last_var_data.location_frac;
269    tmp.driver_location = ctx->last_var_data.driver_location;
270 
271    /* See if we can encode only the difference in locations from the last
272     * variable.
273     */
274    if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
275        abs((int)data.location -
276            (int)ctx->last_var_data.location) < (1 << 12) &&
277        abs((int)data.driver_location -
278            (int)ctx->last_var_data.driver_location) < (1 << 15))
279       flags.u.data_encoding = var_encode_location_diff;
280    else
281       flags.u.data_encoding = var_encode_full;
282 
283    flags.u.ray_query = var->data.ray_query;
284 
285    blob_write_uint32(ctx->blob, flags.u32);
286 
287    if (!flags.u.type_same_as_last) {
288       encode_type_to_blob(ctx->blob, var->type);
289       ctx->last_type = var->type;
290    }
291 
292    if (var->interface_type && !flags.u.interface_type_same_as_last) {
293       encode_type_to_blob(ctx->blob, var->interface_type);
294       ctx->last_interface_type = var->interface_type;
295    }
296 
297    if (flags.u.has_name)
298       blob_write_string(ctx->blob, var->name);
299 
300    if (flags.u.data_encoding == var_encode_full) {
301       blob_write_bytes(ctx->blob, &data, sizeof(data));
302    } else {
303       /* Serialize only the difference in locations from the last variable.
304        */
305       union packed_var_data_diff diff;
306 
307       diff.u.location = data.location - ctx->last_var_data.location;
308       diff.u.location_frac = data.location_frac -
309                              ctx->last_var_data.location_frac;
310       diff.u.driver_location = data.driver_location -
311                                ctx->last_var_data.driver_location;
312 
313       blob_write_uint32(ctx->blob, diff.u32);
314    }
315 
316    ctx->last_var_data = data;
317 
318    for (unsigned i = 0; i < var->num_state_slots; i++) {
319       blob_write_bytes(ctx->blob, &var->state_slots[i],
320                        sizeof(var->state_slots[i]));
321    }
322    if (var->constant_initializer)
323       write_constant(ctx, var->constant_initializer);
324    if (var->pointer_initializer)
325       blob_write_uint32(ctx->blob,
326                         write_lookup_object(ctx, var->pointer_initializer));
327    if (var->num_members > 0) {
328       blob_write_bytes(ctx->blob, (uint8_t *)var->members,
329                        var->num_members * sizeof(*var->members));
330    }
331 }
332 
333 static nir_variable *
read_variable(read_ctx * ctx)334 read_variable(read_ctx *ctx)
335 {
336    nir_variable *var = rzalloc(ctx->nir, nir_variable);
337    read_add_object(ctx, var);
338 
339    union packed_var flags;
340    flags.u32 = blob_read_uint32(ctx->blob);
341 
342    if (flags.u.type_same_as_last) {
343       var->type = ctx->last_type;
344    } else {
345       var->type = decode_type_from_blob(ctx->blob);
346       ctx->last_type = var->type;
347    }
348 
349    if (flags.u.has_interface_type) {
350       if (flags.u.interface_type_same_as_last) {
351          var->interface_type = ctx->last_interface_type;
352       } else {
353          var->interface_type = decode_type_from_blob(ctx->blob);
354          ctx->last_interface_type = var->interface_type;
355       }
356    }
357 
358    if (flags.u.has_name) {
359       const char *name = blob_read_string(ctx->blob);
360       var->name = ralloc_strdup(var, name);
361    } else {
362       var->name = NULL;
363    }
364 
365    if (flags.u.data_encoding == var_encode_full) {
366       blob_copy_bytes(ctx->blob, (uint8_t *)&var->data, sizeof(var->data));
367       ctx->last_var_data = var->data;
368    } else { /* var_encode_location_diff */
369       union packed_var_data_diff diff;
370       diff.u32 = blob_read_uint32(ctx->blob);
371 
372       var->data = ctx->last_var_data;
373       var->data.location += diff.u.location;
374       var->data.location_frac += diff.u.location_frac;
375       var->data.driver_location += diff.u.driver_location;
376 
377       ctx->last_var_data = var->data;
378    }
379 
380    var->data.ray_query = flags.u.ray_query;
381 
382    var->num_state_slots = flags.u.num_state_slots;
383    if (var->num_state_slots != 0) {
384       var->state_slots = ralloc_array(var, nir_state_slot,
385                                       var->num_state_slots);
386       for (unsigned i = 0; i < var->num_state_slots; i++) {
387          blob_copy_bytes(ctx->blob, &var->state_slots[i],
388                          sizeof(var->state_slots[i]));
389       }
390    }
391    if (flags.u.has_constant_initializer)
392       var->constant_initializer = read_constant(ctx, var);
393    else
394       var->constant_initializer = NULL;
395 
396    if (flags.u.has_pointer_initializer)
397       var->pointer_initializer = read_object(ctx);
398    else
399       var->pointer_initializer = NULL;
400 
401    var->num_members = flags.u.num_members;
402    if (var->num_members > 0) {
403       var->members = ralloc_array(var, struct nir_variable_data,
404                                   var->num_members);
405       blob_copy_bytes(ctx->blob, (uint8_t *)var->members,
406                       var->num_members * sizeof(*var->members));
407    }
408 
409    return var;
410 }
411 
412 static void
write_var_list(write_ctx * ctx,const struct exec_list * src)413 write_var_list(write_ctx *ctx, const struct exec_list *src)
414 {
415    blob_write_uint32(ctx->blob, exec_list_length(src));
416    foreach_list_typed(nir_variable, var, node, src) {
417       write_variable(ctx, var);
418    }
419 }
420 
421 static void
read_var_list(read_ctx * ctx,struct exec_list * dst)422 read_var_list(read_ctx *ctx, struct exec_list *dst)
423 {
424    exec_list_make_empty(dst);
425    unsigned num_vars = blob_read_uint32(ctx->blob);
426    for (unsigned i = 0; i < num_vars; i++) {
427       nir_variable *var = read_variable(ctx);
428       exec_list_push_tail(dst, &var->node);
429    }
430 }
431 
432 union packed_src {
433    uint32_t u32;
434    struct {
435       unsigned _pad : 2; /* <-- Header */
436       unsigned object_idx : 20;
437       unsigned _footer : 10; /* <-- Footer */
438    } any;
439    struct {
440       unsigned _header : 22; /* <-- Header */
441       unsigned _pad : 2;     /* <-- Footer */
442       unsigned swizzle_x : 2;
443       unsigned swizzle_y : 2;
444       unsigned swizzle_z : 2;
445       unsigned swizzle_w : 2;
446    } alu;
447    struct {
448       unsigned _header : 22; /* <-- Header */
449       unsigned src_type : 5; /* <-- Footer */
450       unsigned _pad : 5;
451    } tex;
452 };
453 
454 static void
write_src_full(write_ctx * ctx,const nir_src * src,union packed_src header)455 write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
456 {
457    header.any.object_idx = write_lookup_object(ctx, src->ssa);
458    blob_write_uint32(ctx->blob, header.u32);
459 }
460 
461 static void
write_src(write_ctx * ctx,const nir_src * src)462 write_src(write_ctx *ctx, const nir_src *src)
463 {
464    union packed_src header = { 0 };
465    write_src_full(ctx, src, header);
466 }
467 
468 static union packed_src
read_src(read_ctx * ctx,nir_src * src)469 read_src(read_ctx *ctx, nir_src *src)
470 {
471    STATIC_ASSERT(sizeof(union packed_src) == 4);
472    union packed_src header;
473    header.u32 = blob_read_uint32(ctx->blob);
474 
475    src->ssa = read_lookup_object(ctx, header.any.object_idx);
476    return header;
477 }
478 
479 union packed_def {
480    uint8_t u8;
481    struct {
482       uint8_t _pad : 1;
483       uint8_t num_components : 3;
484       uint8_t bit_size : 3;
485       uint8_t divergent : 1;
486    };
487 };
488 
489 enum intrinsic_const_indices_encoding {
490    /* Use packed_const_indices to store tightly packed indices.
491     *
492     * The common case for load_ubo is 0, 0, 0, which is trivially represented.
493     * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
494     */
495    const_indices_all_combined,
496 
497    const_indices_8bit,  /* 8 bits per element */
498    const_indices_16bit, /* 16 bits per element */
499    const_indices_32bit, /* 32 bits per element */
500 };
501 
502 enum load_const_packing {
503    /* Constants are not packed and are stored in following dwords. */
504    load_const_full,
505 
506    /* packed_value contains high 19 bits, low bits are 0,
507     * good for floating-point decimals
508     */
509    load_const_scalar_hi_19bits,
510 
511    /* packed_value contains low 19 bits, high bits are sign-extended */
512    load_const_scalar_lo_19bits_sext,
513 };
514 
515 union packed_instr {
516    uint32_t u32;
517    struct {
518       unsigned instr_type : 4; /* always present */
519       unsigned _pad : 20;
520       unsigned def : 8; /* always last */
521    } any;
522    struct {
523       unsigned instr_type : 4;
524       unsigned exact : 1;
525       unsigned no_signed_wrap : 1;
526       unsigned no_unsigned_wrap : 1;
527       unsigned padding : 1;
528       /* Reg: writemask; SSA: swizzles for 2 srcs */
529       unsigned writemask_or_two_swizzles : 4;
530       unsigned op : 9;
531       unsigned packed_src_ssa_16bit : 1;
532       /* Scalarized ALUs always have the same header. */
533       unsigned num_followup_alu_sharing_header : 2;
534       unsigned def : 8;
535    } alu;
536    struct {
537       unsigned instr_type : 4;
538       unsigned deref_type : 3;
539       unsigned cast_type_same_as_last : 1;
540       unsigned modes : 5; /* See (de|en)code_deref_modes() */
541       unsigned _pad : 9;
542       unsigned in_bounds : 1;
543       unsigned packed_src_ssa_16bit : 1; /* deref_var redefines this */
544       unsigned def : 8;
545    } deref;
546    struct {
547       unsigned instr_type : 4;
548       unsigned deref_type : 3;
549       unsigned _pad : 1;
550       unsigned object_idx : 16; /* if 0, the object ID is a separate uint32 */
551       unsigned def : 8;
552    } deref_var;
553    struct {
554       unsigned instr_type : 4;
555       unsigned intrinsic : 10;
556       unsigned const_indices_encoding : 2;
557       unsigned packed_const_indices : 8;
558       unsigned def : 8;
559    } intrinsic;
560    struct {
561       unsigned instr_type : 4;
562       unsigned last_component : 4;
563       unsigned bit_size : 3;
564       unsigned packing : 2;       /* enum load_const_packing */
565       unsigned packed_value : 19; /* meaning determined by packing */
566    } load_const;
567    struct {
568       unsigned instr_type : 4;
569       unsigned last_component : 4;
570       unsigned bit_size : 3;
571       unsigned _pad : 21;
572    } undef;
573    struct {
574       unsigned instr_type : 4;
575       unsigned num_srcs : 4;
576       unsigned op : 5;
577       unsigned _pad : 11;
578       unsigned def : 8;
579    } tex;
580    struct {
581       unsigned instr_type : 4;
582       unsigned num_srcs : 20;
583       unsigned def : 8;
584    } phi;
585    struct {
586       unsigned instr_type : 4;
587       unsigned type : 2;
588       unsigned _pad : 26;
589    } jump;
590    struct {
591       unsigned instr_type : 4;
592       unsigned type : 4;
593       unsigned string_length : 16;
594       unsigned def : 8;
595    } debug_info;
596 };
597 
598 /* Write "lo24" as low 24 bits in the first uint32. */
599 static void
write_def(write_ctx * ctx,const nir_def * def,union packed_instr header,nir_instr_type instr_type)600 write_def(write_ctx *ctx, const nir_def *def, union packed_instr header,
601           nir_instr_type instr_type)
602 {
603    STATIC_ASSERT(sizeof(union packed_def) == 1);
604    union packed_def pdef;
605    pdef.u8 = 0;
606 
607    pdef.num_components =
608       encode_num_components_in_3bits(def->num_components);
609    pdef.bit_size = encode_bit_size_3bits(def->bit_size);
610    pdef.divergent = def->divergent;
611    header.any.def = pdef.u8;
612 
613    /* Check if the current ALU instruction has the same header as the previous
614     * instruction that is also ALU. If it is, we don't have to write
615     * the current header. This is a typical occurence after scalarization.
616     */
617    if (instr_type == nir_instr_type_alu) {
618       bool equal_header = false;
619 
620       if (ctx->last_instr_type == nir_instr_type_alu) {
621          assert(ctx->last_alu_header_offset);
622          union packed_instr last_header;
623          last_header.u32 = ctx->last_alu_header;
624 
625          /* Clear the field that counts ALUs with equal headers. */
626          union packed_instr clean_header;
627          clean_header.u32 = last_header.u32;
628          clean_header.alu.num_followup_alu_sharing_header = 0;
629 
630          /* There can be at most 4 consecutive ALU instructions
631           * sharing the same header.
632           */
633          if (last_header.alu.num_followup_alu_sharing_header < 3 &&
634              header.u32 == clean_header.u32) {
635             last_header.alu.num_followup_alu_sharing_header++;
636             blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset,
637                                   last_header.u32);
638             ctx->last_alu_header = last_header.u32;
639             equal_header = true;
640          }
641       }
642 
643       if (!equal_header) {
644          ctx->last_alu_header_offset = blob_reserve_uint32(ctx->blob);
645          blob_overwrite_uint32(ctx->blob, ctx->last_alu_header_offset, header.u32);
646          ctx->last_alu_header = header.u32;
647       }
648    } else {
649       blob_write_uint32(ctx->blob, header.u32);
650    }
651 
652    if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
653       blob_write_uint32(ctx->blob, def->num_components);
654 
655    write_add_object(ctx, def);
656 }
657 
658 static void
read_def(read_ctx * ctx,nir_def * def,nir_instr * instr,union packed_instr header)659 read_def(read_ctx *ctx, nir_def *def, nir_instr *instr,
660          union packed_instr header)
661 {
662    union packed_def pdef;
663    pdef.u8 = header.any.def;
664 
665    unsigned bit_size = decode_bit_size_3bits(pdef.bit_size);
666    unsigned num_components;
667    if (pdef.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
668       num_components = blob_read_uint32(ctx->blob);
669    else
670       num_components = decode_num_components_in_3bits(pdef.num_components);
671    nir_def_init(instr, def, num_components, bit_size);
672    def->divergent = pdef.divergent;
673    read_add_object(ctx, def);
674 }
675 
676 static bool
are_object_ids_16bit(write_ctx * ctx)677 are_object_ids_16bit(write_ctx *ctx)
678 {
679    /* Check the highest object ID, because they are monotonic. */
680    return ctx->next_idx < (1 << 16);
681 }
682 
683 static bool
is_alu_src_ssa_16bit(write_ctx * ctx,const nir_alu_instr * alu)684 is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
685 {
686    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
687 
688    for (unsigned i = 0; i < num_srcs; i++) {
689       unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
690 
691       for (unsigned chan = 0; chan < src_components; chan++) {
692          /* The swizzles for src0.x and src1.x are stored
693           * in writemask_or_two_swizzles for SSA ALUs.
694           */
695          if (i < 2 && chan == 0 && alu->src[i].swizzle[chan] < 4)
696             continue;
697 
698          if (alu->src[i].swizzle[chan] != chan)
699             return false;
700       }
701    }
702 
703    return are_object_ids_16bit(ctx);
704 }
705 
706 static void
write_alu(write_ctx * ctx,const nir_alu_instr * alu)707 write_alu(write_ctx *ctx, const nir_alu_instr *alu)
708 {
709    unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
710 
711    /* 9 bits for nir_op */
712    STATIC_ASSERT(nir_num_opcodes <= 512);
713    union packed_instr header;
714    header.u32 = 0;
715 
716    header.alu.instr_type = alu->instr.type;
717    header.alu.exact = alu->exact;
718    header.alu.no_signed_wrap = alu->no_signed_wrap;
719    header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
720    header.alu.op = alu->op;
721    header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
722 
723    if (header.alu.packed_src_ssa_16bit) {
724       /* For packed srcs of SSA ALUs, this field stores the swizzles. */
725       header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
726       if (num_srcs > 1)
727          header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
728    }
729 
730    write_def(ctx, &alu->def, header, alu->instr.type);
731    blob_write_uint32(ctx->blob, alu->fp_fast_math);
732 
733    if (header.alu.packed_src_ssa_16bit) {
734       for (unsigned i = 0; i < num_srcs; i++) {
735          unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
736          assert(idx < (1 << 16));
737          blob_write_uint16(ctx->blob, idx);
738       }
739    } else {
740       for (unsigned i = 0; i < num_srcs; i++) {
741          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
742          unsigned src_components = nir_src_num_components(alu->src[i].src);
743          union packed_src src;
744          bool packed = src_components <= 4 && src_channels <= 4;
745          src.u32 = 0;
746 
747          if (packed) {
748             src.alu.swizzle_x = alu->src[i].swizzle[0];
749             src.alu.swizzle_y = alu->src[i].swizzle[1];
750             src.alu.swizzle_z = alu->src[i].swizzle[2];
751             src.alu.swizzle_w = alu->src[i].swizzle[3];
752          }
753 
754          write_src_full(ctx, &alu->src[i].src, src);
755 
756          /* Store swizzles for vec8 and vec16. */
757          if (!packed) {
758             for (unsigned o = 0; o < src_channels; o += 8) {
759                unsigned value = 0;
760 
761                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
762                   value |= (uint32_t)alu->src[i].swizzle[o + j] << (4 * j); /* 4 bits per swizzle */
763                }
764 
765                blob_write_uint32(ctx->blob, value);
766             }
767          }
768       }
769    }
770 }
771 
772 static nir_alu_instr *
read_alu(read_ctx * ctx,union packed_instr header)773 read_alu(read_ctx *ctx, union packed_instr header)
774 {
775    unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
776    nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
777 
778    alu->exact = header.alu.exact;
779    alu->no_signed_wrap = header.alu.no_signed_wrap;
780    alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
781 
782    read_def(ctx, &alu->def, &alu->instr, header);
783    alu->fp_fast_math = blob_read_uint32(ctx->blob);
784 
785    if (header.alu.packed_src_ssa_16bit) {
786       for (unsigned i = 0; i < num_srcs; i++) {
787          nir_alu_src *src = &alu->src[i];
788          src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
789 
790          memset(&src->swizzle, 0, sizeof(src->swizzle));
791 
792          unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
793 
794          for (unsigned chan = 0; chan < src_components; chan++)
795             src->swizzle[chan] = chan;
796       }
797    } else {
798       for (unsigned i = 0; i < num_srcs; i++) {
799          union packed_src src = read_src(ctx, &alu->src[i].src);
800          unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
801          unsigned src_components = nir_src_num_components(alu->src[i].src);
802          bool packed = src_components <= 4 && src_channels <= 4;
803 
804          memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
805 
806          if (packed) {
807             alu->src[i].swizzle[0] = src.alu.swizzle_x;
808             alu->src[i].swizzle[1] = src.alu.swizzle_y;
809             alu->src[i].swizzle[2] = src.alu.swizzle_z;
810             alu->src[i].swizzle[3] = src.alu.swizzle_w;
811          } else {
812             /* Load swizzles for vec8 and vec16. */
813             for (unsigned o = 0; o < src_channels; o += 8) {
814                unsigned value = blob_read_uint32(ctx->blob);
815 
816                for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
817                   alu->src[i].swizzle[o + j] =
818                      (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
819                }
820             }
821          }
822       }
823    }
824 
825    if (header.alu.packed_src_ssa_16bit) {
826       alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
827       if (num_srcs > 1)
828          alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
829    }
830 
831    return alu;
832 }
833 
834 #define NUM_GENERIC_MODES 4
835 #define MODE_ENC_GENERIC_BIT (1 << 4)
836 
837 static nir_variable_mode
decode_deref_modes(unsigned modes)838 decode_deref_modes(unsigned modes)
839 {
840    if (modes & MODE_ENC_GENERIC_BIT) {
841       modes &= ~MODE_ENC_GENERIC_BIT;
842       return modes << (ffs(nir_var_mem_generic) - 1);
843    } else {
844       return 1 << modes;
845    }
846 }
847 
848 static unsigned
encode_deref_modes(nir_variable_mode modes)849 encode_deref_modes(nir_variable_mode modes)
850 {
851    /* Mode sets on derefs generally come in two forms.  For certain OpenCL
852     * cases, we can have more than one of the generic modes set.  In this
853     * case, we need the full bitfield.  Fortunately, there are only 4 of
854     * these.  For all other modes, we can only have one mode at a time so we
855     * can compress them by only storing the bit position.  This, plus one bit
856     * to select encoding, lets us pack the entire bitfield in 5 bits.
857     */
858 
859    /* Assert that the modes we are compressing fit along with the generic bit
860     */
861    STATIC_ASSERT((nir_num_variable_modes - NUM_GENERIC_MODES) <
862                  MODE_ENC_GENERIC_BIT);
863 
864    /* Assert that the generic modes are defined at the end of the modes enum
865     */
866    STATIC_ASSERT((nir_var_all & ~nir_var_mem_generic) <
867                  (1 << (nir_num_variable_modes - NUM_GENERIC_MODES)));
868 
869    unsigned enc;
870    if (modes == 0 || (modes & nir_var_mem_generic)) {
871       assert(!(modes & ~nir_var_mem_generic));
872       enc = modes >> (ffs(nir_var_mem_generic) - 1);
873       assert(enc < MODE_ENC_GENERIC_BIT);
874       enc |= MODE_ENC_GENERIC_BIT;
875    } else {
876       assert(util_is_power_of_two_nonzero(modes));
877       enc = ffs(modes) - 1;
878       assert(enc < MODE_ENC_GENERIC_BIT);
879    }
880    assert(modes == decode_deref_modes(enc));
881    return enc;
882 }
883 
884 static void
write_deref(write_ctx * ctx,const nir_deref_instr * deref)885 write_deref(write_ctx *ctx, const nir_deref_instr *deref)
886 {
887    assert(deref->deref_type < 8);
888 
889    union packed_instr header;
890    header.u32 = 0;
891 
892    header.deref.instr_type = deref->instr.type;
893    header.deref.deref_type = deref->deref_type;
894 
895    if (deref->deref_type == nir_deref_type_cast) {
896       header.deref.modes = encode_deref_modes(deref->modes);
897       header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
898    }
899 
900    unsigned var_idx = 0;
901    if (deref->deref_type == nir_deref_type_var) {
902       var_idx = write_lookup_object(ctx, deref->var);
903       if (var_idx && var_idx < (1 << 16))
904          header.deref_var.object_idx = var_idx;
905    }
906 
907    if (deref->deref_type == nir_deref_type_array ||
908        deref->deref_type == nir_deref_type_ptr_as_array) {
909       header.deref.packed_src_ssa_16bit = are_object_ids_16bit(ctx);
910 
911       header.deref.in_bounds = deref->arr.in_bounds;
912    }
913 
914    write_def(ctx, &deref->def, header, deref->instr.type);
915 
916    switch (deref->deref_type) {
917    case nir_deref_type_var:
918       if (!header.deref_var.object_idx)
919          blob_write_uint32(ctx->blob, var_idx);
920       break;
921 
922    case nir_deref_type_struct:
923       write_src(ctx, &deref->parent);
924       blob_write_uint32(ctx->blob, deref->strct.index);
925       break;
926 
927    case nir_deref_type_array:
928    case nir_deref_type_ptr_as_array:
929       if (header.deref.packed_src_ssa_16bit) {
930          blob_write_uint16(ctx->blob,
931                            write_lookup_object(ctx, deref->parent.ssa));
932          blob_write_uint16(ctx->blob,
933                            write_lookup_object(ctx, deref->arr.index.ssa));
934       } else {
935          write_src(ctx, &deref->parent);
936          write_src(ctx, &deref->arr.index);
937       }
938       break;
939 
940    case nir_deref_type_cast:
941       write_src(ctx, &deref->parent);
942       blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
943       blob_write_uint32(ctx->blob, deref->cast.align_mul);
944       blob_write_uint32(ctx->blob, deref->cast.align_offset);
945       if (!header.deref.cast_type_same_as_last) {
946          encode_type_to_blob(ctx->blob, deref->type);
947          ctx->last_type = deref->type;
948       }
949       break;
950 
951    case nir_deref_type_array_wildcard:
952       write_src(ctx, &deref->parent);
953       break;
954 
955    default:
956       unreachable("Invalid deref type");
957    }
958 }
959 
960 static nir_deref_instr *
read_deref(read_ctx * ctx,union packed_instr header)961 read_deref(read_ctx *ctx, union packed_instr header)
962 {
963    nir_deref_type deref_type = header.deref.deref_type;
964    nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
965 
966    read_def(ctx, &deref->def, &deref->instr, header);
967 
968    nir_deref_instr *parent;
969 
970    switch (deref->deref_type) {
971    case nir_deref_type_var:
972       if (header.deref_var.object_idx)
973          deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
974       else
975          deref->var = read_object(ctx);
976 
977       deref->type = deref->var->type;
978       break;
979 
980    case nir_deref_type_struct:
981       read_src(ctx, &deref->parent);
982       parent = nir_src_as_deref(deref->parent);
983       deref->strct.index = blob_read_uint32(ctx->blob);
984       deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
985       break;
986 
987    case nir_deref_type_array:
988    case nir_deref_type_ptr_as_array:
989       if (header.deref.packed_src_ssa_16bit) {
990          deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
991          deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
992       } else {
993          read_src(ctx, &deref->parent);
994          read_src(ctx, &deref->arr.index);
995       }
996 
997       deref->arr.in_bounds = header.deref.in_bounds;
998 
999       parent = nir_src_as_deref(deref->parent);
1000       if (deref->deref_type == nir_deref_type_array)
1001          deref->type = glsl_get_array_element(parent->type);
1002       else
1003          deref->type = parent->type;
1004       break;
1005 
1006    case nir_deref_type_cast:
1007       read_src(ctx, &deref->parent);
1008       deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1009       deref->cast.align_mul = blob_read_uint32(ctx->blob);
1010       deref->cast.align_offset = blob_read_uint32(ctx->blob);
1011       if (header.deref.cast_type_same_as_last) {
1012          deref->type = ctx->last_type;
1013       } else {
1014          deref->type = decode_type_from_blob(ctx->blob);
1015          ctx->last_type = deref->type;
1016       }
1017       break;
1018 
1019    case nir_deref_type_array_wildcard:
1020       read_src(ctx, &deref->parent);
1021       parent = nir_src_as_deref(deref->parent);
1022       deref->type = glsl_get_array_element(parent->type);
1023       break;
1024 
1025    default:
1026       unreachable("Invalid deref type");
1027    }
1028 
1029    if (deref_type == nir_deref_type_var) {
1030       deref->modes = deref->var->data.mode;
1031    } else if (deref->deref_type == nir_deref_type_cast) {
1032       deref->modes = decode_deref_modes(header.deref.modes);
1033    } else {
1034       deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1035    }
1036 
1037    return deref;
1038 }
1039 
1040 static void
write_intrinsic(write_ctx * ctx,const nir_intrinsic_instr * intrin)1041 write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1042 {
1043    /* 10 bits for nir_intrinsic_op */
1044    STATIC_ASSERT(nir_num_intrinsics <= 1024);
1045    unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1046    unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1047    assert(intrin->intrinsic < 1024);
1048 
1049    union packed_instr header;
1050    header.u32 = 0;
1051 
1052    header.intrinsic.instr_type = intrin->instr.type;
1053    header.intrinsic.intrinsic = intrin->intrinsic;
1054 
1055    /* Analyze constant indices to decide how to encode them. */
1056    if (num_indices) {
1057       unsigned max_bits = 0;
1058       for (unsigned i = 0; i < num_indices; i++) {
1059          unsigned max = util_last_bit(intrin->const_index[i]);
1060          max_bits = MAX2(max_bits, max);
1061       }
1062 
1063       if (max_bits * num_indices <= 8) {
1064          header.intrinsic.const_indices_encoding = const_indices_all_combined;
1065 
1066          /* Pack all const indices into 8 bits. */
1067          unsigned bit_size = 8 / num_indices;
1068          for (unsigned i = 0; i < num_indices; i++) {
1069             header.intrinsic.packed_const_indices |=
1070                intrin->const_index[i] << (i * bit_size);
1071          }
1072       } else if (max_bits <= 8)
1073          header.intrinsic.const_indices_encoding = const_indices_8bit;
1074       else if (max_bits <= 16)
1075          header.intrinsic.const_indices_encoding = const_indices_16bit;
1076       else
1077          header.intrinsic.const_indices_encoding = const_indices_32bit;
1078    }
1079 
1080    if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1081       write_def(ctx, &intrin->def, header, intrin->instr.type);
1082    else
1083       blob_write_uint32(ctx->blob, header.u32);
1084 
1085    for (unsigned i = 0; i < num_srcs; i++)
1086       write_src(ctx, &intrin->src[i]);
1087 
1088    if (num_indices) {
1089       switch (header.intrinsic.const_indices_encoding) {
1090       case const_indices_8bit:
1091          for (unsigned i = 0; i < num_indices; i++)
1092             blob_write_uint8(ctx->blob, intrin->const_index[i]);
1093          break;
1094       case const_indices_16bit:
1095          for (unsigned i = 0; i < num_indices; i++)
1096             blob_write_uint16(ctx->blob, intrin->const_index[i]);
1097          break;
1098       case const_indices_32bit:
1099          for (unsigned i = 0; i < num_indices; i++)
1100             blob_write_uint32(ctx->blob, intrin->const_index[i]);
1101          break;
1102       }
1103    }
1104 }
1105 
1106 static nir_intrinsic_instr *
read_intrinsic(read_ctx * ctx,union packed_instr header)1107 read_intrinsic(read_ctx *ctx, union packed_instr header)
1108 {
1109    nir_intrinsic_op op = header.intrinsic.intrinsic;
1110    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1111 
1112    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1113    unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1114 
1115    if (nir_intrinsic_infos[op].has_dest)
1116       read_def(ctx, &intrin->def, &intrin->instr, header);
1117 
1118    for (unsigned i = 0; i < num_srcs; i++)
1119       read_src(ctx, &intrin->src[i]);
1120 
1121    /* Vectorized instrinsics have num_components same as dst or src that has
1122     * 0 components in the info. Find it.
1123     */
1124    if (nir_intrinsic_infos[op].has_dest &&
1125        nir_intrinsic_infos[op].dest_components == 0) {
1126       intrin->num_components = intrin->def.num_components;
1127    } else {
1128       for (unsigned i = 0; i < num_srcs; i++) {
1129          if (nir_intrinsic_infos[op].src_components[i] == 0) {
1130             intrin->num_components = nir_src_num_components(intrin->src[i]);
1131             break;
1132          }
1133       }
1134    }
1135 
1136    if (num_indices) {
1137       switch (header.intrinsic.const_indices_encoding) {
1138       case const_indices_all_combined: {
1139          unsigned bit_size = 8 / num_indices;
1140          unsigned bit_mask = u_bit_consecutive(0, bit_size);
1141          for (unsigned i = 0; i < num_indices; i++) {
1142             intrin->const_index[i] =
1143                (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1144                bit_mask;
1145          }
1146          break;
1147       }
1148       case const_indices_8bit:
1149          for (unsigned i = 0; i < num_indices; i++)
1150             intrin->const_index[i] = blob_read_uint8(ctx->blob);
1151          break;
1152       case const_indices_16bit:
1153          for (unsigned i = 0; i < num_indices; i++)
1154             intrin->const_index[i] = blob_read_uint16(ctx->blob);
1155          break;
1156       case const_indices_32bit:
1157          for (unsigned i = 0; i < num_indices; i++)
1158             intrin->const_index[i] = blob_read_uint32(ctx->blob);
1159          break;
1160       }
1161    }
1162 
1163    return intrin;
1164 }
1165 
1166 static void
write_load_const(write_ctx * ctx,const nir_load_const_instr * lc)1167 write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1168 {
1169    assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1170    union packed_instr header;
1171    header.u32 = 0;
1172 
1173    header.load_const.instr_type = lc->instr.type;
1174    header.load_const.last_component = lc->def.num_components - 1;
1175    header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1176    header.load_const.packing = load_const_full;
1177 
1178    /* Try to pack 1-component constants into the 19 free bits in the header. */
1179    if (lc->def.num_components == 1) {
1180       switch (lc->def.bit_size) {
1181       case 64:
1182          if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1183             /* packed_value contains high 19 bits, low bits are 0 */
1184             header.load_const.packing = load_const_scalar_hi_19bits;
1185             header.load_const.packed_value = lc->value[0].u64 >> 45;
1186          } else if (util_mask_sign_extend(lc->value[0].i64, 19) == lc->value[0].i64) {
1187             /* packed_value contains low 19 bits, high bits are sign-extended */
1188             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1189             header.load_const.packed_value = lc->value[0].u64;
1190          }
1191          break;
1192 
1193       case 32:
1194          if ((lc->value[0].u32 & 0x1fff) == 0) {
1195             header.load_const.packing = load_const_scalar_hi_19bits;
1196             header.load_const.packed_value = lc->value[0].u32 >> 13;
1197          } else if (util_mask_sign_extend(lc->value[0].i32, 19) == lc->value[0].i32) {
1198             header.load_const.packing = load_const_scalar_lo_19bits_sext;
1199             header.load_const.packed_value = lc->value[0].u32;
1200          }
1201          break;
1202 
1203       case 16:
1204          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1205          header.load_const.packed_value = lc->value[0].u16;
1206          break;
1207       case 8:
1208          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1209          header.load_const.packed_value = lc->value[0].u8;
1210          break;
1211       case 1:
1212          header.load_const.packing = load_const_scalar_lo_19bits_sext;
1213          header.load_const.packed_value = lc->value[0].b;
1214          break;
1215       default:
1216          unreachable("invalid bit_size");
1217       }
1218    }
1219 
1220    blob_write_uint32(ctx->blob, header.u32);
1221 
1222    if (header.load_const.packing == load_const_full) {
1223       switch (lc->def.bit_size) {
1224       case 64:
1225          blob_write_bytes(ctx->blob, lc->value,
1226                           sizeof(*lc->value) * lc->def.num_components);
1227          break;
1228 
1229       case 32:
1230          for (unsigned i = 0; i < lc->def.num_components; i++)
1231             blob_write_uint32(ctx->blob, lc->value[i].u32);
1232          break;
1233 
1234       case 16:
1235          for (unsigned i = 0; i < lc->def.num_components; i++)
1236             blob_write_uint16(ctx->blob, lc->value[i].u16);
1237          break;
1238 
1239       default:
1240          assert(lc->def.bit_size <= 8);
1241          for (unsigned i = 0; i < lc->def.num_components; i++)
1242             blob_write_uint8(ctx->blob, lc->value[i].u8);
1243          break;
1244       }
1245    }
1246 
1247    write_add_object(ctx, &lc->def);
1248 }
1249 
1250 static nir_load_const_instr *
read_load_const(read_ctx * ctx,union packed_instr header)1251 read_load_const(read_ctx *ctx, union packed_instr header)
1252 {
1253    nir_load_const_instr *lc =
1254       nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1255                                   decode_bit_size_3bits(header.load_const.bit_size));
1256    lc->def.divergent = false;
1257 
1258    switch (header.load_const.packing) {
1259    case load_const_scalar_hi_19bits:
1260       switch (lc->def.bit_size) {
1261       case 64:
1262          lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1263          break;
1264       case 32:
1265          lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1266          break;
1267       default:
1268          unreachable("invalid bit_size");
1269       }
1270       break;
1271 
1272    case load_const_scalar_lo_19bits_sext:
1273       switch (lc->def.bit_size) {
1274       case 64:
1275          lc->value[0].u64 = header.load_const.packed_value;
1276          if (lc->value[0].u64 >> 18)
1277             lc->value[0].u64 |= UINT64_C(0xfffffffffff80000);
1278          break;
1279       case 32:
1280          lc->value[0].u32 = header.load_const.packed_value;
1281          if (lc->value[0].u32 >> 18)
1282             lc->value[0].u32 |= 0xfff80000;
1283          break;
1284       case 16:
1285          lc->value[0].u16 = header.load_const.packed_value;
1286          break;
1287       case 8:
1288          lc->value[0].u8 = header.load_const.packed_value;
1289          break;
1290       case 1:
1291          lc->value[0].b = header.load_const.packed_value;
1292          break;
1293       default:
1294          unreachable("invalid bit_size");
1295       }
1296       break;
1297 
1298    case load_const_full:
1299       switch (lc->def.bit_size) {
1300       case 64:
1301          blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1302          break;
1303 
1304       case 32:
1305          for (unsigned i = 0; i < lc->def.num_components; i++)
1306             lc->value[i].u32 = blob_read_uint32(ctx->blob);
1307          break;
1308 
1309       case 16:
1310          for (unsigned i = 0; i < lc->def.num_components; i++)
1311             lc->value[i].u16 = blob_read_uint16(ctx->blob);
1312          break;
1313 
1314       default:
1315          assert(lc->def.bit_size <= 8);
1316          for (unsigned i = 0; i < lc->def.num_components; i++)
1317             lc->value[i].u8 = blob_read_uint8(ctx->blob);
1318          break;
1319       }
1320       break;
1321    }
1322 
1323    read_add_object(ctx, &lc->def);
1324    return lc;
1325 }
1326 
1327 static void
write_ssa_undef(write_ctx * ctx,const nir_undef_instr * undef)1328 write_ssa_undef(write_ctx *ctx, const nir_undef_instr *undef)
1329 {
1330    assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1331 
1332    union packed_instr header;
1333    header.u32 = 0;
1334 
1335    header.undef.instr_type = undef->instr.type;
1336    header.undef.last_component = undef->def.num_components - 1;
1337    header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1338 
1339    blob_write_uint32(ctx->blob, header.u32);
1340    write_add_object(ctx, &undef->def);
1341 }
1342 
1343 static nir_undef_instr *
read_ssa_undef(read_ctx * ctx,union packed_instr header)1344 read_ssa_undef(read_ctx *ctx, union packed_instr header)
1345 {
1346    nir_undef_instr *undef =
1347       nir_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1348                              decode_bit_size_3bits(header.undef.bit_size));
1349 
1350    undef->def.divergent = false;
1351 
1352    read_add_object(ctx, &undef->def);
1353    return undef;
1354 }
1355 
1356 union packed_tex_data {
1357    uint32_t u32;
1358    struct {
1359       unsigned sampler_dim : 4;
1360       unsigned dest_type : 8;
1361       unsigned coord_components : 3;
1362       unsigned is_array : 1;
1363       unsigned is_shadow : 1;
1364       unsigned is_new_style_shadow : 1;
1365       unsigned is_sparse : 1;
1366       unsigned component : 2;
1367       unsigned texture_non_uniform : 1;
1368       unsigned sampler_non_uniform : 1;
1369       unsigned array_is_lowered_cube : 1;
1370       unsigned is_gather_implicit_lod : 1;
1371       unsigned unused : 5; /* Mark unused for valgrind. */
1372    } u;
1373 };
1374 
1375 static void
write_tex(write_ctx * ctx,const nir_tex_instr * tex)1376 write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1377 {
1378    assert(tex->num_srcs < 16);
1379    assert(tex->op < 32);
1380 
1381    union packed_instr header;
1382    header.u32 = 0;
1383 
1384    header.tex.instr_type = tex->instr.type;
1385    header.tex.num_srcs = tex->num_srcs;
1386    header.tex.op = tex->op;
1387 
1388    write_def(ctx, &tex->def, header, tex->instr.type);
1389 
1390    blob_write_uint32(ctx->blob, tex->texture_index);
1391    blob_write_uint32(ctx->blob, tex->sampler_index);
1392    blob_write_uint32(ctx->blob, tex->backend_flags);
1393    if (tex->op == nir_texop_tg4)
1394       blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1395 
1396    STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1397    union packed_tex_data packed = {
1398       .u.sampler_dim = tex->sampler_dim,
1399       .u.dest_type = tex->dest_type,
1400       .u.coord_components = tex->coord_components,
1401       .u.is_array = tex->is_array,
1402       .u.is_shadow = tex->is_shadow,
1403       .u.is_new_style_shadow = tex->is_new_style_shadow,
1404       .u.is_sparse = tex->is_sparse,
1405       .u.component = tex->component,
1406       .u.texture_non_uniform = tex->texture_non_uniform,
1407       .u.sampler_non_uniform = tex->sampler_non_uniform,
1408       .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1409       .u.is_gather_implicit_lod = tex->is_gather_implicit_lod,
1410    };
1411    blob_write_uint32(ctx->blob, packed.u32);
1412 
1413    for (unsigned i = 0; i < tex->num_srcs; i++) {
1414       union packed_src src;
1415       src.u32 = 0;
1416       src.tex.src_type = tex->src[i].src_type;
1417       write_src_full(ctx, &tex->src[i].src, src);
1418    }
1419 }
1420 
1421 static nir_tex_instr *
read_tex(read_ctx * ctx,union packed_instr header)1422 read_tex(read_ctx *ctx, union packed_instr header)
1423 {
1424    nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1425 
1426    read_def(ctx, &tex->def, &tex->instr, header);
1427 
1428    tex->op = header.tex.op;
1429    tex->texture_index = blob_read_uint32(ctx->blob);
1430    tex->sampler_index = blob_read_uint32(ctx->blob);
1431    tex->backend_flags = blob_read_uint32(ctx->blob);
1432    if (tex->op == nir_texop_tg4)
1433       blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1434 
1435    union packed_tex_data packed;
1436    packed.u32 = blob_read_uint32(ctx->blob);
1437    tex->sampler_dim = packed.u.sampler_dim;
1438    tex->dest_type = packed.u.dest_type;
1439    tex->coord_components = packed.u.coord_components;
1440    tex->is_array = packed.u.is_array;
1441    tex->is_shadow = packed.u.is_shadow;
1442    tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1443    tex->is_sparse = packed.u.is_sparse;
1444    tex->component = packed.u.component;
1445    tex->texture_non_uniform = packed.u.texture_non_uniform;
1446    tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1447    tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1448    tex->is_gather_implicit_lod = packed.u.is_gather_implicit_lod;
1449 
1450    for (unsigned i = 0; i < tex->num_srcs; i++) {
1451       union packed_src src = read_src(ctx, &tex->src[i].src);
1452       tex->src[i].src_type = src.tex.src_type;
1453    }
1454 
1455    return tex;
1456 }
1457 
1458 static void
write_phi(write_ctx * ctx,const nir_phi_instr * phi)1459 write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1460 {
1461    union packed_instr header;
1462    header.u32 = 0;
1463 
1464    header.phi.instr_type = phi->instr.type;
1465    header.phi.num_srcs = exec_list_length(&phi->srcs);
1466 
1467    /* Phi nodes are special, since they may reference SSA definitions and
1468     * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1469     * and then store enough information so that a later fixup pass can fill
1470     * them in correctly.
1471     */
1472    write_def(ctx, &phi->def, header, phi->instr.type);
1473 
1474    nir_foreach_phi_src(src, phi) {
1475       size_t blob_offset = blob_reserve_uint32(ctx->blob);
1476       ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1477       assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1478       write_phi_fixup fixup = {
1479          .blob_offset = blob_offset,
1480          .src = src->src.ssa,
1481          .block = src->pred,
1482       };
1483       util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1484    }
1485 }
1486 
1487 static void
write_fixup_phis(write_ctx * ctx)1488 write_fixup_phis(write_ctx *ctx)
1489 {
1490    util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1491       blob_overwrite_uint32(ctx->blob, fixup->blob_offset,
1492                             write_lookup_object(ctx, fixup->src));
1493       blob_overwrite_uint32(ctx->blob, fixup->blob_offset + sizeof(uint32_t),
1494                             write_lookup_object(ctx, fixup->block));
1495    }
1496 
1497    util_dynarray_clear(&ctx->phi_fixups);
1498 }
1499 
1500 static nir_phi_instr *
read_phi(read_ctx * ctx,nir_block * blk,union packed_instr header)1501 read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1502 {
1503    nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1504 
1505    read_def(ctx, &phi->def, &phi->instr, header);
1506 
1507    /* For similar reasons as before, we just store the index directly into the
1508     * pointer, and let a later pass resolve the phi sources.
1509     *
1510     * In order to ensure that the copied sources (which are just the indices
1511     * from the blob for now) don't get inserted into the old shader's use-def
1512     * lists, we have to add the phi instruction *before* we set up its
1513     * sources.
1514     */
1515    nir_instr_insert_after_block(blk, &phi->instr);
1516 
1517    for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1518       nir_def *def = (nir_def *)(uintptr_t)blob_read_uint32(ctx->blob);
1519       nir_block *pred = (nir_block *)(uintptr_t)blob_read_uint32(ctx->blob);
1520       nir_phi_src *src = nir_phi_instr_add_src(phi, pred, def);
1521 
1522       /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1523        * we have to set the parent_instr manually.  It doesn't really matter
1524        * when we do it, so we might as well do it here.
1525        */
1526       nir_src_set_parent_instr(&src->src, &phi->instr);
1527 
1528       /* Stash it in the list of phi sources.  We'll walk this list and fix up
1529        * sources at the very end of read_function_impl.
1530        */
1531       list_add(&src->src.use_link, &ctx->phi_srcs);
1532    }
1533 
1534    return phi;
1535 }
1536 
1537 static void
read_fixup_phis(read_ctx * ctx)1538 read_fixup_phis(read_ctx *ctx)
1539 {
1540    list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1541       src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1542       src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1543 
1544       /* Remove from this list */
1545       list_del(&src->src.use_link);
1546 
1547       list_addtail(&src->src.use_link, &src->src.ssa->uses);
1548    }
1549    assert(list_is_empty(&ctx->phi_srcs));
1550 }
1551 
1552 static void
write_jump(write_ctx * ctx,const nir_jump_instr * jmp)1553 write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1554 {
1555    /* These aren't handled because they require special block linking */
1556    assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1557 
1558    assert(jmp->type < 4);
1559 
1560    union packed_instr header;
1561    header.u32 = 0;
1562 
1563    header.jump.instr_type = jmp->instr.type;
1564    header.jump.type = jmp->type;
1565 
1566    blob_write_uint32(ctx->blob, header.u32);
1567 }
1568 
1569 static nir_jump_instr *
read_jump(read_ctx * ctx,union packed_instr header)1570 read_jump(read_ctx *ctx, union packed_instr header)
1571 {
1572    /* These aren't handled because they require special block linking */
1573    assert(header.jump.type != nir_jump_goto &&
1574           header.jump.type != nir_jump_goto_if);
1575 
1576    nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1577    return jmp;
1578 }
1579 
1580 static void
write_call(write_ctx * ctx,const nir_call_instr * call)1581 write_call(write_ctx *ctx, const nir_call_instr *call)
1582 {
1583    blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1584 
1585    for (unsigned i = 0; i < call->num_params; i++)
1586       write_src(ctx, &call->params[i]);
1587 }
1588 
1589 static nir_call_instr *
read_call(read_ctx * ctx)1590 read_call(read_ctx *ctx)
1591 {
1592    nir_function *callee = read_object(ctx);
1593    nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1594 
1595    for (unsigned i = 0; i < call->num_params; i++)
1596       read_src(ctx, &call->params[i]);
1597 
1598    return call;
1599 }
1600 
1601 static void
write_debug_info(write_ctx * ctx,const nir_debug_info_instr * di)1602 write_debug_info(write_ctx *ctx, const nir_debug_info_instr *di)
1603 {
1604    union packed_instr header;
1605    header.u32 = 0;
1606 
1607    header.debug_info.instr_type = nir_instr_type_debug_info;
1608    header.debug_info.type = di->type;
1609    header.debug_info.string_length = di->string_length;
1610 
1611    switch (di->type) {
1612    case nir_debug_info_src_loc:
1613       blob_write_uint32(ctx->blob, header.u32);
1614       blob_write_uint32(ctx->blob, di->src_loc.line);
1615       blob_write_uint32(ctx->blob, di->src_loc.column);
1616       blob_write_uint32(ctx->blob, di->src_loc.spirv_offset);
1617       blob_write_uint8(ctx->blob, di->src_loc.source);
1618       if (di->src_loc.line)
1619          write_src(ctx, &di->src_loc.filename);
1620       return;
1621    case nir_debug_info_string:
1622       write_def(ctx, &di->def, header, di->instr.type);
1623       blob_write_bytes(ctx->blob, di->string, di->string_length);
1624       return;
1625    }
1626 
1627    unreachable("Unimplemented nir_debug_info_type");
1628 }
1629 
1630 static nir_debug_info_instr *
read_debug_info(read_ctx * ctx,union packed_instr header)1631 read_debug_info(read_ctx *ctx, union packed_instr header)
1632 {
1633    nir_debug_info_type type = header.debug_info.type;
1634 
1635    switch (type) {
1636    case nir_debug_info_src_loc: {
1637       nir_debug_info_instr *di = nir_debug_info_instr_create(ctx->nir, type, 0);
1638       di->src_loc.line = blob_read_uint32(ctx->blob);
1639       di->src_loc.column = blob_read_uint32(ctx->blob);
1640       di->src_loc.spirv_offset = blob_read_uint32(ctx->blob);
1641       di->src_loc.source = blob_read_uint8(ctx->blob);
1642       if (di->src_loc.line)
1643          read_src(ctx, &di->src_loc.filename);
1644       return di;
1645    }
1646    case nir_debug_info_string: {
1647       nir_debug_info_instr *di =
1648          nir_debug_info_instr_create(ctx->nir, type, header.debug_info.string_length);
1649       read_def(ctx, &di->def, &di->instr, header);
1650       memcpy(di->string, blob_read_bytes(ctx->blob, di->string_length), di->string_length);
1651       return di;
1652    }
1653    }
1654 
1655    unreachable("Unimplemented nir_debug_info_type");
1656 }
1657 
1658 static void
write_instr(write_ctx * ctx,const nir_instr * instr)1659 write_instr(write_ctx *ctx, const nir_instr *instr)
1660 {
1661    /* We have only 4 bits for the instruction type. */
1662    assert(instr->type < 16);
1663 
1664    switch (instr->type) {
1665    case nir_instr_type_alu:
1666       write_alu(ctx, nir_instr_as_alu(instr));
1667       break;
1668    case nir_instr_type_deref:
1669       write_deref(ctx, nir_instr_as_deref(instr));
1670       break;
1671    case nir_instr_type_intrinsic:
1672       write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1673       break;
1674    case nir_instr_type_load_const:
1675       write_load_const(ctx, nir_instr_as_load_const(instr));
1676       break;
1677    case nir_instr_type_undef:
1678       write_ssa_undef(ctx, nir_instr_as_undef(instr));
1679       break;
1680    case nir_instr_type_tex:
1681       write_tex(ctx, nir_instr_as_tex(instr));
1682       break;
1683    case nir_instr_type_phi:
1684       write_phi(ctx, nir_instr_as_phi(instr));
1685       break;
1686    case nir_instr_type_jump:
1687       write_jump(ctx, nir_instr_as_jump(instr));
1688       break;
1689    case nir_instr_type_call:
1690       blob_write_uint32(ctx->blob, instr->type);
1691       write_call(ctx, nir_instr_as_call(instr));
1692       break;
1693    case nir_instr_type_debug_info:
1694       write_debug_info(ctx, nir_instr_as_debug_info(instr));
1695       break;
1696    case nir_instr_type_parallel_copy:
1697       unreachable("Cannot write parallel copies");
1698    default:
1699       unreachable("bad instr type");
1700    }
1701 }
1702 
1703 /* Return the number of instructions read. */
1704 static unsigned
read_instr(read_ctx * ctx,nir_block * block)1705 read_instr(read_ctx *ctx, nir_block *block)
1706 {
1707    STATIC_ASSERT(sizeof(union packed_instr) == 4);
1708    union packed_instr header;
1709    header.u32 = blob_read_uint32(ctx->blob);
1710    nir_instr *instr;
1711 
1712    switch (header.any.instr_type) {
1713    case nir_instr_type_alu:
1714       for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1715          nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1716       return header.alu.num_followup_alu_sharing_header + 1;
1717    case nir_instr_type_deref:
1718       instr = &read_deref(ctx, header)->instr;
1719       break;
1720    case nir_instr_type_intrinsic:
1721       instr = &read_intrinsic(ctx, header)->instr;
1722       break;
1723    case nir_instr_type_load_const:
1724       instr = &read_load_const(ctx, header)->instr;
1725       break;
1726    case nir_instr_type_undef:
1727       instr = &read_ssa_undef(ctx, header)->instr;
1728       break;
1729    case nir_instr_type_tex:
1730       instr = &read_tex(ctx, header)->instr;
1731       break;
1732    case nir_instr_type_phi:
1733       /* Phi instructions are a bit of a special case when reading because we
1734        * don't want inserting the instruction to automatically handle use/defs
1735        * for us.  Instead, we need to wait until all the blocks/instructions
1736        * are read so that we can set their sources up.
1737        */
1738       read_phi(ctx, block, header);
1739       return 1;
1740    case nir_instr_type_jump:
1741       instr = &read_jump(ctx, header)->instr;
1742       break;
1743    case nir_instr_type_call:
1744       instr = &read_call(ctx)->instr;
1745       break;
1746    case nir_instr_type_debug_info:
1747       instr = &read_debug_info(ctx, header)->instr;
1748       break;
1749    case nir_instr_type_parallel_copy:
1750       unreachable("Cannot read parallel copies");
1751    default:
1752       unreachable("bad instr type");
1753    }
1754 
1755    nir_instr_insert_after_block(block, instr);
1756    return 1;
1757 }
1758 
1759 static void
write_block(write_ctx * ctx,const nir_block * block)1760 write_block(write_ctx *ctx, const nir_block *block)
1761 {
1762    write_add_object(ctx, block);
1763    blob_write_uint8(ctx->blob, block->divergent);
1764    blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1765 
1766    ctx->last_instr_type = ~0;
1767    ctx->last_alu_header_offset = 0;
1768 
1769    nir_foreach_instr(instr, block) {
1770       write_instr(ctx, instr);
1771       ctx->last_instr_type = instr->type;
1772    }
1773 }
1774 
1775 static void
read_block(read_ctx * ctx,struct exec_list * cf_list)1776 read_block(read_ctx *ctx, struct exec_list *cf_list)
1777 {
1778    /* Don't actually create a new block.  Just use the one from the tail of
1779     * the list.  NIR guarantees that the tail of the list is a block and that
1780     * no two blocks are side-by-side in the IR;  It should be empty.
1781     */
1782    nir_block *block =
1783       exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1784 
1785    read_add_object(ctx, block);
1786    block->divergent = blob_read_uint8(ctx->blob);
1787    unsigned num_instrs = blob_read_uint32(ctx->blob);
1788    for (unsigned i = 0; i < num_instrs;) {
1789       i += read_instr(ctx, block);
1790    }
1791 }
1792 
1793 static void
1794 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1795 
1796 static void
1797 read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1798 
1799 static void
write_if(write_ctx * ctx,nir_if * nif)1800 write_if(write_ctx *ctx, nir_if *nif)
1801 {
1802    write_src(ctx, &nif->condition);
1803    blob_write_uint8(ctx->blob, nif->control);
1804 
1805    write_cf_list(ctx, &nif->then_list);
1806    write_cf_list(ctx, &nif->else_list);
1807 }
1808 
1809 static void
read_if(read_ctx * ctx,struct exec_list * cf_list)1810 read_if(read_ctx *ctx, struct exec_list *cf_list)
1811 {
1812    nir_if *nif = nir_if_create(ctx->nir);
1813 
1814    read_src(ctx, &nif->condition);
1815    nif->control = blob_read_uint8(ctx->blob);
1816 
1817    nir_cf_node_insert_end(cf_list, &nif->cf_node);
1818 
1819    read_cf_list(ctx, &nif->then_list);
1820    read_cf_list(ctx, &nif->else_list);
1821 }
1822 
1823 static void
write_loop(write_ctx * ctx,nir_loop * loop)1824 write_loop(write_ctx *ctx, nir_loop *loop)
1825 {
1826    blob_write_uint8(ctx->blob, loop->control);
1827    blob_write_uint8(ctx->blob, loop->divergent);
1828    bool has_continue_construct = nir_loop_has_continue_construct(loop);
1829    blob_write_uint8(ctx->blob, has_continue_construct);
1830 
1831    write_cf_list(ctx, &loop->body);
1832    if (has_continue_construct) {
1833       write_cf_list(ctx, &loop->continue_list);
1834    }
1835 }
1836 
1837 static void
read_loop(read_ctx * ctx,struct exec_list * cf_list)1838 read_loop(read_ctx *ctx, struct exec_list *cf_list)
1839 {
1840    nir_loop *loop = nir_loop_create(ctx->nir);
1841 
1842    nir_cf_node_insert_end(cf_list, &loop->cf_node);
1843 
1844    loop->control = blob_read_uint8(ctx->blob);
1845    loop->divergent = blob_read_uint8(ctx->blob);
1846    bool has_continue_construct = blob_read_uint8(ctx->blob);
1847 
1848    read_cf_list(ctx, &loop->body);
1849    if (has_continue_construct) {
1850       nir_loop_add_continue_construct(loop);
1851       read_cf_list(ctx, &loop->continue_list);
1852    }
1853 }
1854 
1855 static void
write_cf_node(write_ctx * ctx,nir_cf_node * cf)1856 write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1857 {
1858    blob_write_uint32(ctx->blob, cf->type);
1859 
1860    switch (cf->type) {
1861    case nir_cf_node_block:
1862       write_block(ctx, nir_cf_node_as_block(cf));
1863       break;
1864    case nir_cf_node_if:
1865       write_if(ctx, nir_cf_node_as_if(cf));
1866       break;
1867    case nir_cf_node_loop:
1868       write_loop(ctx, nir_cf_node_as_loop(cf));
1869       break;
1870    default:
1871       unreachable("bad cf type");
1872    }
1873 }
1874 
1875 static void
read_cf_node(read_ctx * ctx,struct exec_list * list)1876 read_cf_node(read_ctx *ctx, struct exec_list *list)
1877 {
1878    nir_cf_node_type type = blob_read_uint32(ctx->blob);
1879 
1880    switch (type) {
1881    case nir_cf_node_block:
1882       read_block(ctx, list);
1883       break;
1884    case nir_cf_node_if:
1885       read_if(ctx, list);
1886       break;
1887    case nir_cf_node_loop:
1888       read_loop(ctx, list);
1889       break;
1890    default:
1891       unreachable("bad cf type");
1892    }
1893 }
1894 
1895 static void
write_cf_list(write_ctx * ctx,const struct exec_list * cf_list)1896 write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1897 {
1898    blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1899    foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1900       write_cf_node(ctx, cf);
1901    }
1902 }
1903 
1904 static void
read_cf_list(read_ctx * ctx,struct exec_list * cf_list)1905 read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1906 {
1907    uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1908    for (unsigned i = 0; i < num_cf_nodes; i++)
1909       read_cf_node(ctx, cf_list);
1910 }
1911 
1912 static void
write_function_impl(write_ctx * ctx,const nir_function_impl * fi)1913 write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1914 {
1915    blob_write_uint8(ctx->blob, fi->structured);
1916    blob_write_uint8(ctx->blob, !!fi->preamble);
1917 
1918    if (fi->preamble)
1919       blob_write_uint32(ctx->blob, write_lookup_object(ctx, fi->preamble));
1920 
1921    write_var_list(ctx, &fi->locals);
1922 
1923    write_cf_list(ctx, &fi->body);
1924    write_fixup_phis(ctx);
1925 }
1926 
1927 static nir_function_impl *
read_function_impl(read_ctx * ctx)1928 read_function_impl(read_ctx *ctx)
1929 {
1930    nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1931 
1932    fi->structured = blob_read_uint8(ctx->blob);
1933    bool preamble = blob_read_uint8(ctx->blob);
1934 
1935    if (preamble)
1936       fi->preamble = read_object(ctx);
1937 
1938    read_var_list(ctx, &fi->locals);
1939 
1940    read_cf_list(ctx, &fi->body);
1941    read_fixup_phis(ctx);
1942 
1943    fi->valid_metadata = 0;
1944 
1945    return fi;
1946 }
1947 
1948 static void
write_function(write_ctx * ctx,const nir_function * fxn)1949 write_function(write_ctx *ctx, const nir_function *fxn)
1950 {
1951    uint32_t flags = 0;
1952    if (fxn->is_entrypoint)
1953       flags |= 0x1;
1954    if (fxn->is_preamble)
1955       flags |= 0x2;
1956    if (fxn->name)
1957       flags |= 0x4;
1958    if (fxn->impl)
1959       flags |= 0x8;
1960    if (fxn->should_inline)
1961       flags |= 0x10;
1962    if (fxn->dont_inline)
1963       flags |= 0x20;
1964    if (fxn->is_subroutine)
1965       flags |= 0x40;
1966    blob_write_uint32(ctx->blob, flags);
1967    if (fxn->name)
1968       blob_write_string(ctx->blob, fxn->name);
1969 
1970    blob_write_uint32(ctx->blob, fxn->subroutine_index);
1971    blob_write_uint32(ctx->blob, fxn->num_subroutine_types);
1972    for (unsigned i = 0; i < fxn->num_subroutine_types; i++) {
1973       encode_type_to_blob(ctx->blob, fxn->subroutine_types[i]);
1974    }
1975 
1976    write_add_object(ctx, fxn);
1977 
1978    blob_write_uint32(ctx->blob, fxn->num_params);
1979    for (unsigned i = 0; i < fxn->num_params; i++) {
1980       uint32_t val =
1981          ((uint32_t)fxn->params[i].num_components) |
1982          ((uint32_t)fxn->params[i].bit_size) << 8;
1983       blob_write_uint32(ctx->blob, val);
1984    }
1985 
1986    /* At first glance, it looks like we should write the function_impl here.
1987     * However, call instructions need to be able to reference at least the
1988     * function and those will get processed as we write the function_impls.
1989     * We stop here and write function_impls as a second pass.
1990     */
1991 }
1992 
1993 static void
read_function(read_ctx * ctx)1994 read_function(read_ctx *ctx)
1995 {
1996    uint32_t flags = blob_read_uint32(ctx->blob);
1997 
1998    bool has_name = flags & 0x4;
1999    char *name = has_name ? blob_read_string(ctx->blob) : NULL;
2000 
2001    nir_function *fxn = nir_function_create(ctx->nir, name);
2002 
2003    fxn->subroutine_index = blob_read_uint32(ctx->blob);
2004    fxn->num_subroutine_types = blob_read_uint32(ctx->blob);
2005    for (unsigned i = 0; i < fxn->num_subroutine_types; i++) {
2006       fxn->subroutine_types[i] = decode_type_from_blob(ctx->blob);
2007    }
2008 
2009    read_add_object(ctx, fxn);
2010 
2011    fxn->num_params = blob_read_uint32(ctx->blob);
2012    fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
2013    for (unsigned i = 0; i < fxn->num_params; i++) {
2014       uint32_t val = blob_read_uint32(ctx->blob);
2015       fxn->params[i].num_components = val & 0xff;
2016       fxn->params[i].bit_size = (val >> 8) & 0xff;
2017    }
2018 
2019    fxn->is_entrypoint = flags & 0x1;
2020    fxn->is_preamble = flags & 0x2;
2021    if (flags & 0x8)
2022       fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
2023    fxn->should_inline = flags & 0x10;
2024    fxn->dont_inline = flags & 0x20;
2025    fxn->is_subroutine = flags & 0x40;
2026 }
2027 
2028 static void
write_xfb_info(write_ctx * ctx,const nir_xfb_info * xfb)2029 write_xfb_info(write_ctx *ctx, const nir_xfb_info *xfb)
2030 {
2031    if (xfb == NULL) {
2032       blob_write_uint32(ctx->blob, 0);
2033    } else {
2034       size_t size = nir_xfb_info_size(xfb->output_count);
2035       assert(size <= UINT32_MAX);
2036       blob_write_uint32(ctx->blob, size);
2037       blob_write_bytes(ctx->blob, xfb, size);
2038    }
2039 }
2040 
2041 static nir_xfb_info *
read_xfb_info(read_ctx * ctx)2042 read_xfb_info(read_ctx *ctx)
2043 {
2044    uint32_t size = blob_read_uint32(ctx->blob);
2045    if (size == 0)
2046       return NULL;
2047 
2048    struct nir_xfb_info *xfb = ralloc_size(ctx->nir, size);
2049    blob_copy_bytes(ctx->blob, (void *)xfb, size);
2050 
2051    return xfb;
2052 }
2053 
2054 /**
2055  * Serialize NIR into a binary blob.
2056  *
2057  * \param strip  Don't serialize information only useful for debugging,
2058  *               such as variable names, making cache hits from similar
2059  *               shaders more likely.
2060  */
2061 void
nir_serialize(struct blob * blob,const nir_shader * nir,bool strip)2062 nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2063 {
2064    write_ctx ctx = { 0 };
2065    ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2066    ctx.blob = blob;
2067    ctx.nir = nir;
2068    ctx.strip = strip;
2069    util_dynarray_init(&ctx.phi_fixups, NULL);
2070 
2071    size_t idx_size_offset = blob_reserve_uint32(blob);
2072 
2073    struct shader_info info = nir->info;
2074    uint32_t strings = 0;
2075    if (!strip && info.name)
2076       strings |= 0x1;
2077    if (!strip && info.label)
2078       strings |= 0x2;
2079    blob_write_uint32(blob, strings);
2080    if (!strip && info.name)
2081       blob_write_string(blob, info.name);
2082    if (!strip && info.label)
2083       blob_write_string(blob, info.label);
2084    info.name = info.label = NULL;
2085    blob_write_bytes(blob, (uint8_t *)&info, sizeof(info));
2086 
2087    write_var_list(&ctx, &nir->variables);
2088 
2089    blob_write_uint32(blob, nir->num_inputs);
2090    blob_write_uint32(blob, nir->num_uniforms);
2091    blob_write_uint32(blob, nir->num_outputs);
2092    blob_write_uint32(blob, nir->scratch_size);
2093 
2094    blob_write_uint32(blob, exec_list_length(&nir->functions));
2095    nir_foreach_function(fxn, nir) {
2096       write_function(&ctx, fxn);
2097    }
2098 
2099    nir_foreach_function_impl(impl, nir) {
2100       write_function_impl(&ctx, impl);
2101    }
2102 
2103    blob_write_uint32(blob, nir->constant_data_size);
2104    if (nir->constant_data_size > 0)
2105       blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2106 
2107    write_xfb_info(&ctx, nir->xfb_info);
2108 
2109    if (nir->info.uses_printf)
2110       nir_serialize_printf_info(blob, nir->printf_info, nir->printf_info_count);
2111 
2112    blob_overwrite_uint32(blob, idx_size_offset, ctx.next_idx);
2113 
2114    _mesa_hash_table_destroy(ctx.remap_table, NULL);
2115    util_dynarray_fini(&ctx.phi_fixups);
2116 }
2117 
2118 nir_shader *
nir_deserialize(void * mem_ctx,const struct nir_shader_compiler_options * options,struct blob_reader * blob)2119 nir_deserialize(void *mem_ctx,
2120                 const struct nir_shader_compiler_options *options,
2121                 struct blob_reader *blob)
2122 {
2123    read_ctx ctx = { 0 };
2124    ctx.blob = blob;
2125    list_inithead(&ctx.phi_srcs);
2126    ctx.idx_table_len = blob_read_uint32(blob);
2127    ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2128 
2129    uint32_t strings = blob_read_uint32(blob);
2130    char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2131    char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2132 
2133    struct shader_info info;
2134    blob_copy_bytes(blob, (uint8_t *)&info, sizeof(info));
2135 
2136    ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2137 
2138    info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2139    info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2140 
2141    ctx.nir->info = info;
2142 
2143    read_var_list(&ctx, &ctx.nir->variables);
2144 
2145    ctx.nir->num_inputs = blob_read_uint32(blob);
2146    ctx.nir->num_uniforms = blob_read_uint32(blob);
2147    ctx.nir->num_outputs = blob_read_uint32(blob);
2148    ctx.nir->scratch_size = blob_read_uint32(blob);
2149 
2150    unsigned num_functions = blob_read_uint32(blob);
2151    for (unsigned i = 0; i < num_functions; i++)
2152       read_function(&ctx);
2153 
2154    nir_foreach_function(fxn, ctx.nir) {
2155       if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2156          nir_function_set_impl(fxn, read_function_impl(&ctx));
2157    }
2158 
2159    ctx.nir->constant_data_size = blob_read_uint32(blob);
2160    if (ctx.nir->constant_data_size > 0) {
2161       ctx.nir->constant_data =
2162          ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2163       blob_copy_bytes(blob, ctx.nir->constant_data,
2164                       ctx.nir->constant_data_size);
2165    }
2166 
2167    ctx.nir->xfb_info = read_xfb_info(&ctx);
2168 
2169    if (ctx.nir->info.uses_printf) {
2170       ctx.nir->printf_info =
2171          nir_deserialize_printf_info(ctx.nir, blob,
2172                                      &ctx.nir->printf_info_count);
2173    }
2174 
2175    free(ctx.idx_table);
2176 
2177    nir_validate_shader(ctx.nir, "after deserialize");
2178 
2179    return ctx.nir;
2180 }
2181 
2182 void
nir_shader_serialize_deserialize(nir_shader * shader)2183 nir_shader_serialize_deserialize(nir_shader *shader)
2184 {
2185    const struct nir_shader_compiler_options *options = shader->options;
2186 
2187    struct blob writer;
2188    blob_init(&writer);
2189    nir_serialize(&writer, shader, false);
2190 
2191    /* Delete all of dest's ralloc children but leave dest alone */
2192    void *dead_ctx = ralloc_context(NULL);
2193    ralloc_adopt(dead_ctx, shader);
2194    ralloc_free(dead_ctx);
2195 
2196    dead_ctx = ralloc_context(NULL);
2197 
2198    struct blob_reader reader;
2199    blob_reader_init(&reader, writer.data, writer.size);
2200    nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2201 
2202    blob_finish(&writer);
2203 
2204    nir_shader_replace(shader, copy);
2205    ralloc_free(dead_ctx);
2206 }
2207 
2208 void
nir_serialize_printf_info(struct blob * blob,const u_printf_info * printf_info,unsigned printf_info_count)2209 nir_serialize_printf_info(struct blob *blob,
2210                           const u_printf_info *printf_info,
2211                           unsigned printf_info_count)
2212 {
2213    blob_write_uint32(blob, printf_info_count);
2214    for (int i = 0; i < printf_info_count; i++) {
2215       const u_printf_info *info = &printf_info[i];
2216       blob_write_uint32(blob, info->num_args);
2217       blob_write_uint32(blob, info->string_size);
2218       blob_write_bytes(blob, info->arg_sizes,
2219                        info->num_args * sizeof(info->arg_sizes[0]));
2220       /* we can't use blob_write_string, because it contains multiple NULL
2221        * terminated strings */
2222       blob_write_bytes(blob, info->strings, info->string_size);
2223    }
2224 }
2225 
2226 u_printf_info *
nir_deserialize_printf_info(void * mem_ctx,struct blob_reader * blob,unsigned * printf_info_count)2227 nir_deserialize_printf_info(void *mem_ctx,
2228                             struct blob_reader *blob,
2229                             unsigned *printf_info_count)
2230 {
2231    *printf_info_count = blob_read_uint32(blob);
2232 
2233    u_printf_info *printf_info =
2234       ralloc_array(mem_ctx, u_printf_info, *printf_info_count);
2235 
2236    for (int i = 0; i < *printf_info_count; i++) {
2237       u_printf_info *info = &printf_info[i];
2238       info->num_args = blob_read_uint32(blob);
2239       info->string_size = blob_read_uint32(blob);
2240       info->arg_sizes = ralloc_array(mem_ctx, unsigned, info->num_args);
2241       blob_copy_bytes(blob, info->arg_sizes,
2242                       info->num_args * sizeof(info->arg_sizes[0]));
2243       info->strings = ralloc_array(mem_ctx, char, info->string_size);
2244       blob_copy_bytes(blob, info->strings, info->string_size);
2245    }
2246 
2247    return printf_info;
2248 }
2249