xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/tests/helpers.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "helpers.h"
7 
8 #include "common/amd_family.h"
9 #include "vk_format.h"
10 
11 #include <llvm-c/Target.h>
12 
13 #include <mutex>
14 #include <sstream>
15 #include <stdio.h>
16 
17 using namespace aco;
18 
19 extern "C" {
20 PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char* pName);
21 }
22 
23 ac_shader_config config;
24 aco_shader_info info;
25 std::unique_ptr<Program> program;
26 Builder bld(NULL);
27 Temp inputs[16];
28 
29 static radeon_info rad_info;
30 static nir_shader_compiler_options nir_options;
31 static nir_builder _nb;
32 nir_builder *nb;
33 
34 static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
35 static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
36 static std::mutex create_device_mutex;
37 
38 #define FUNCTION_LIST                                                                              \
39    ITEM(CreateInstance)                                                                            \
40    ITEM(DestroyInstance)                                                                           \
41    ITEM(EnumeratePhysicalDevices)                                                                  \
42    ITEM(GetPhysicalDeviceProperties2)                                                              \
43    ITEM(CreateDevice)                                                                              \
44    ITEM(DestroyDevice)                                                                             \
45    ITEM(CreateShaderModule)                                                                        \
46    ITEM(DestroyShaderModule)                                                                       \
47    ITEM(CreateGraphicsPipelines)                                                                   \
48    ITEM(CreateComputePipelines)                                                                    \
49    ITEM(DestroyPipeline)                                                                           \
50    ITEM(CreateDescriptorSetLayout)                                                                 \
51    ITEM(DestroyDescriptorSetLayout)                                                                \
52    ITEM(CreatePipelineLayout)                                                                      \
53    ITEM(DestroyPipelineLayout)                                                                     \
54    ITEM(CreateRenderPass)                                                                          \
55    ITEM(DestroyRenderPass)                                                                         \
56    ITEM(GetPipelineExecutablePropertiesKHR)                                                        \
57    ITEM(GetPipelineExecutableInternalRepresentationsKHR)
58 
59 #define ITEM(n) PFN_vk##n n;
60 FUNCTION_LIST
61 #undef ITEM
62 
63 void
create_program(enum amd_gfx_level gfx_level,Stage stage,unsigned wave_size,enum radeon_family family)64 create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size,
65                enum radeon_family family)
66 {
67    memset(&config, 0, sizeof(config));
68    info.wave_size = wave_size;
69 
70    program.reset(new Program);
71    aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config);
72    program->workgroup_size = UINT_MAX;
73    calc_min_waves(program.get());
74 
75    program->debug.func = nullptr;
76    program->debug.private_data = nullptr;
77 
78    program->debug.output = output;
79    program->debug.shorten_messages = true;
80    program->debug.func = nullptr;
81    program->debug.private_data = nullptr;
82 
83    Block* block = program->create_and_insert_block();
84    block->kind = block_kind_top_level;
85 
86    bld = Builder(program.get(), &program->blocks[0]);
87 
88    config.float_mode = program->blocks[0].fp_mode.val;
89 }
90 
91 bool
setup_cs(const char * input_spec,enum amd_gfx_level gfx_level,enum radeon_family family,const char * subvariant,unsigned wave_size)92 setup_cs(const char* input_spec, enum amd_gfx_level gfx_level, enum radeon_family family,
93          const char* subvariant, unsigned wave_size)
94 {
95    if (!set_variant(gfx_level, subvariant))
96       return false;
97 
98    memset(&info, 0, sizeof(info));
99    create_program(gfx_level, compute_cs, wave_size, family);
100 
101    if (input_spec) {
102       std::vector<RegClass> input_classes;
103       while (input_spec[0]) {
104          RegType type = input_spec[0] == 'v' ? RegType::vgpr : RegType::sgpr;
105          unsigned size = input_spec[1] - '0';
106          bool in_bytes = input_spec[2] == 'b';
107          input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
108 
109          input_spec += 2 + in_bytes;
110          while (input_spec[0] == ' ')
111             input_spec++;
112       }
113 
114       aco_ptr<Instruction> startpgm{
115          create_instruction(aco_opcode::p_startpgm, Format::PSEUDO, 0, input_classes.size())};
116       for (unsigned i = 0; i < input_classes.size(); i++) {
117          inputs[i] = bld.tmp(input_classes[i]);
118          startpgm->definitions[i] = Definition(inputs[i]);
119       }
120       bld.insert(std::move(startpgm));
121    }
122 
123    return true;
124 }
125 
126 bool
setup_nir_cs(enum amd_gfx_level gfx_level,gl_shader_stage stage,enum radeon_family family,const char * subvariant)127 setup_nir_cs(enum amd_gfx_level gfx_level, gl_shader_stage stage, enum radeon_family family, const char* subvariant)
128 {
129    if (!set_variant(gfx_level, subvariant))
130       return false;
131 
132    if (family == CHIP_UNKNOWN) {
133       switch (gfx_level) {
134       case GFX6: family = CHIP_TAHITI; break;
135       case GFX7: family = CHIP_BONAIRE; break;
136       case GFX8: family = CHIP_POLARIS10; break;
137       case GFX9: family = CHIP_VEGA10; break;
138       case GFX10: family = CHIP_NAVI10; break;
139       case GFX10_3: family = CHIP_NAVI21; break;
140       case GFX11: family = CHIP_NAVI31; break;
141       default: family = CHIP_UNKNOWN; break;
142       }
143    }
144 
145    memset(&rad_info, 0, sizeof(rad_info));
146    rad_info.gfx_level = gfx_level;
147    rad_info.family = family;
148 
149    memset(&nir_options, 0, sizeof(nir_options));
150    ac_set_nir_options(&rad_info, false, &nir_options);
151 
152    glsl_type_singleton_init_or_ref();
153 
154    _nb = nir_builder_init_simple_shader(stage, &nir_options, "aco_test");
155    nb = &_nb;
156 
157    return true;
158 }
159 
160 void
finish_program(Program * prog,bool endpgm,bool dominance)161 finish_program(Program* prog, bool endpgm, bool dominance)
162 {
163    for (Block& BB : prog->blocks) {
164       for (unsigned idx : BB.linear_preds)
165          prog->blocks[idx].linear_succs.emplace_back(BB.index);
166       for (unsigned idx : BB.logical_preds)
167          prog->blocks[idx].logical_succs.emplace_back(BB.index);
168    }
169 
170    for (Block& block : prog->blocks) {
171       if (block.linear_succs.size() == 0) {
172          block.kind |= block_kind_uniform;
173          if (endpgm)
174             Builder(prog, &block).sopp(aco_opcode::s_endpgm);
175       }
176    }
177 
178    if (dominance)
179       dominator_tree(program.get());
180 }
181 
182 void
finish_validator_test()183 finish_validator_test()
184 {
185    finish_program(program.get(), true, true);
186    aco_print_program(program.get(), output);
187    fprintf(output, "Validation results:\n");
188    if (aco::validate_ir(program.get()))
189       fprintf(output, "Validation passed\n");
190    else
191       fprintf(output, "Validation failed\n");
192 }
193 
194 void
finish_opt_test()195 finish_opt_test()
196 {
197    finish_program(program.get(), true, true);
198    if (!aco::validate_ir(program.get())) {
199       fail_test("Validation before optimization failed");
200       return;
201    }
202    aco::optimize(program.get());
203    if (!aco::validate_ir(program.get())) {
204       fail_test("Validation after optimization failed");
205       return;
206    }
207    aco_print_program(program.get(), output);
208 }
209 
210 void
finish_setup_reduce_temp_test()211 finish_setup_reduce_temp_test()
212 {
213    finish_program(program.get(), true, true);
214    if (!aco::validate_ir(program.get())) {
215       fail_test("Validation before setup_reduce_temp failed");
216       return;
217    }
218    aco::setup_reduce_temp(program.get());
219    if (!aco::validate_ir(program.get())) {
220       fail_test("Validation after setup_reduce_temp failed");
221       return;
222    }
223    aco_print_program(program.get(), output);
224 }
225 
226 void
finish_lower_subdword_test()227 finish_lower_subdword_test()
228 {
229    finish_program(program.get(), true, true);
230    if (!aco::validate_ir(program.get())) {
231       fail_test("Validation before lower_subdword failed");
232       return;
233    }
234    aco::lower_subdword(program.get());
235    if (!aco::validate_ir(program.get())) {
236       fail_test("Validation after lower_subdword failed");
237       return;
238    }
239    aco_print_program(program.get(), output);
240 }
241 
242 void
finish_ra_test(ra_test_policy policy)243 finish_ra_test(ra_test_policy policy)
244 {
245    finish_program(program.get(), true, true);
246    if (!aco::validate_ir(program.get())) {
247       fail_test("Validation before register allocation failed");
248       return;
249    }
250 
251    program->workgroup_size = program->wave_size;
252    aco::live_var_analysis(program.get());
253    aco::register_allocation(program.get(), policy);
254 
255    if (aco::validate_ra(program.get())) {
256       fail_test("Validation after register allocation failed");
257       return;
258    }
259 
260    aco_print_program(program.get(), output);
261 }
262 
263 void
finish_optimizer_postRA_test()264 finish_optimizer_postRA_test()
265 {
266    finish_program(program.get(), true, true);
267 
268    if (!aco::validate_ir(program.get())) {
269       fail_test("Validation before optimize_postRA failed");
270       return;
271    }
272 
273    aco::optimize_postRA(program.get());
274 
275    if (!aco::validate_ir(program.get())) {
276       fail_test("Validation after optimize_postRA failed");
277       return;
278    }
279 
280    aco_print_program(program.get(), output);
281 }
282 
283 void
finish_to_hw_instr_test()284 finish_to_hw_instr_test()
285 {
286    finish_program(program.get(), true, true);
287 
288    if (!aco::validate_ir(program.get())) {
289       fail_test("Validation before lower_to_hw_instr failed");
290       return;
291    }
292 
293    aco::lower_to_hw_instr(program.get());
294 
295    if (!aco::validate_ir(program.get())) {
296       fail_test("Validation after lower_to_hw_instr failed");
297       return;
298    }
299 
300    aco_print_program(program.get(), output);
301 }
302 
303 void
finish_schedule_vopd_test()304 finish_schedule_vopd_test()
305 {
306    finish_program(program.get());
307    aco::schedule_vopd(program.get());
308    aco_print_program(program.get(), output);
309 }
310 
311 void
finish_waitcnt_test()312 finish_waitcnt_test()
313 {
314    finish_program(program.get());
315    aco::insert_waitcnt(program.get());
316    aco_print_program(program.get(), output);
317 }
318 
319 void
finish_insert_nops_test(bool endpgm)320 finish_insert_nops_test(bool endpgm)
321 {
322    finish_program(program.get(), endpgm);
323    aco::insert_NOPs(program.get());
324    aco_print_program(program.get(), output);
325 }
326 
327 void
finish_form_hard_clause_test()328 finish_form_hard_clause_test()
329 {
330    finish_program(program.get());
331    aco::form_hard_clauses(program.get());
332    aco_print_program(program.get(), output);
333 }
334 
335 void
finish_assembler_test()336 finish_assembler_test()
337 {
338    finish_program(program.get());
339    std::vector<uint32_t> binary;
340    unsigned exec_size = emit_program(program.get(), binary);
341 
342    /* we could use CLRX for disassembly but that would require it to be
343     * installed */
344    if (program->gfx_level >= GFX8) {
345       print_asm(program.get(), binary, exec_size / 4u, output);
346    } else {
347       // TODO: maybe we should use CLRX and skip this test if it's not available?
348       for (uint32_t dword : binary)
349          fprintf(output, "%.8x\n", dword);
350    }
351 }
352 
353 void
live_var_analysis_debug_func(void * private_data,enum aco_compiler_debug_level level,const char * message)354 live_var_analysis_debug_func(void* private_data, enum aco_compiler_debug_level level, const char* message)
355 {
356    if (level == ACO_COMPILER_DEBUG_LEVEL_ERROR)
357       *(bool *)private_data = true;
358 }
359 
360 void
finish_isel_test(enum ac_hw_stage hw_stage,unsigned wave_size)361 finish_isel_test(enum ac_hw_stage hw_stage, unsigned wave_size)
362 {
363    nir_validate_shader(nb->shader, "in finish_isel_test");
364    nir_validate_ssa_dominance(nb->shader, "in finish_isel_test");
365 
366    program.reset(new Program);
367    program->debug.func = nullptr;
368    program->debug.private_data = nullptr;
369 
370    ac_shader_args args = {};
371 
372    aco_compiler_options options = {};
373    options.family = rad_info.family;
374    options.gfx_level = rad_info.gfx_level;
375 
376    memset(&info, 0, sizeof(info));
377    info.hw_stage = hw_stage;
378    info.wave_size = wave_size;
379    info.workgroup_size = nb->shader->info.workgroup_size[0] * nb->shader->info.workgroup_size[1] * nb->shader->info.workgroup_size[2];
380 
381    memset(&config, 0, sizeof(config));
382 
383    select_program(program.get(), 1, &nb->shader, &config, &options, &info, &args);
384    dominator_tree(program.get());
385    lower_phis(program.get());
386 
387    ralloc_free(nb->shader);
388    glsl_type_singleton_decref();
389 
390    aco_print_program(program.get(), output);
391 
392    if (!aco::validate_ir(program.get())) {
393       fail_test("Validation after instruction selection failed");
394       return;
395    }
396    if (!aco::validate_cfg(program.get())) {
397       fail_test("Invalidate CFG");
398       return;
399    }
400 
401    bool live_var_fail = false;
402    program->debug.func = &live_var_analysis_debug_func;
403    program->debug.private_data = &live_var_fail;
404    aco::live_var_analysis(program.get());
405    if (live_var_fail) {
406       fail_test("Live var analysis failed");
407       return;
408    }
409 }
410 
411 void
writeout(unsigned i,Temp tmp)412 writeout(unsigned i, Temp tmp)
413 {
414    if (tmp.id())
415       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
416    else
417       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
418 }
419 
420 void
writeout(unsigned i,aco::Builder::Result res)421 writeout(unsigned i, aco::Builder::Result res)
422 {
423    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
424 }
425 
426 void
writeout(unsigned i,Operand op)427 writeout(unsigned i, Operand op)
428 {
429    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
430 }
431 
432 void
writeout(unsigned i,Operand op0,Operand op1)433 writeout(unsigned i, Operand op0, Operand op1)
434 {
435    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
436 }
437 
438 Temp
fneg(Temp src,Builder b)439 fneg(Temp src, Builder b)
440 {
441    if (src.bytes() == 2)
442       return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
443    else
444       return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
445 }
446 
447 Temp
fabs(Temp src,Builder b)448 fabs(Temp src, Builder b)
449 {
450    if (src.bytes() == 2) {
451       Builder::Result res =
452          b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
453       res->valu().abs[1] = true;
454       return res;
455    } else {
456       Builder::Result res =
457          b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
458       res->valu().abs[1] = true;
459       return res;
460    }
461 }
462 
463 Temp
f2f32(Temp src,Builder b)464 f2f32(Temp src, Builder b)
465 {
466    return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
467 }
468 
469 Temp
f2f16(Temp src,Builder b)470 f2f16(Temp src, Builder b)
471 {
472    return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
473 }
474 
475 Temp
u2u16(Temp src,Builder b)476 u2u16(Temp src, Builder b)
477 {
478    return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
479 }
480 
481 Temp
fadd(Temp src0,Temp src1,Builder b)482 fadd(Temp src0, Temp src1, Builder b)
483 {
484    if (src0.bytes() == 2)
485       return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
486    else
487       return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
488 }
489 
490 Temp
fmul(Temp src0,Temp src1,Builder b)491 fmul(Temp src0, Temp src1, Builder b)
492 {
493    if (src0.bytes() == 2)
494       return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
495    else
496       return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
497 }
498 
499 Temp
fma(Temp src0,Temp src1,Temp src2,Builder b)500 fma(Temp src0, Temp src1, Temp src2, Builder b)
501 {
502    if (src0.bytes() == 2)
503       return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
504    else
505       return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
506 }
507 
508 Temp
fsat(Temp src,Builder b)509 fsat(Temp src, Builder b)
510 {
511    if (src.bytes() == 2)
512       return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), Operand::c16(0x3c00u),
513                     src);
514    else
515       return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
516                     src);
517 }
518 
519 Temp
fmin(Temp src0,Temp src1,Builder b)520 fmin(Temp src0, Temp src1, Builder b)
521 {
522    return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1);
523 }
524 
525 Temp
fmax(Temp src0,Temp src1,Builder b)526 fmax(Temp src0, Temp src1, Builder b)
527 {
528    return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1);
529 }
530 
531 Temp
ext_ushort(Temp src,unsigned idx,Builder b)532 ext_ushort(Temp src, unsigned idx, Builder b)
533 {
534    return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
535                    Operand::c32(16u), Operand::c32(false));
536 }
537 
538 Temp
ext_ubyte(Temp src,unsigned idx,Builder b)539 ext_ubyte(Temp src, unsigned idx, Builder b)
540 {
541    return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
542                    Operand::c32(8u), Operand::c32(false));
543 }
544 
545 void
emit_divergent_if_else(Program * prog,aco::Builder & b,Operand cond,std::function<void ()> then,std::function<void ()> els)546 emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
547                        std::function<void()> els)
548 {
549    prog->blocks.reserve(prog->blocks.size() + 6);
550 
551    Block* if_block = &prog->blocks.back();
552    Block* then_logical = prog->create_and_insert_block();
553    Block* then_linear = prog->create_and_insert_block();
554    Block* invert = prog->create_and_insert_block();
555    Block* else_logical = prog->create_and_insert_block();
556    Block* else_linear = prog->create_and_insert_block();
557    Block* endif_block = prog->create_and_insert_block();
558 
559    if_block->kind |= block_kind_branch;
560    invert->kind |= block_kind_invert;
561    endif_block->kind |= block_kind_merge | (if_block->kind & block_kind_top_level);
562 
563    /* Set up logical CF */
564    then_logical->logical_preds.push_back(if_block->index);
565    else_logical->logical_preds.push_back(if_block->index);
566    endif_block->logical_preds.push_back(then_logical->index);
567    endif_block->logical_preds.push_back(else_logical->index);
568 
569    /* Set up linear CF */
570    then_logical->linear_preds.push_back(if_block->index);
571    then_linear->linear_preds.push_back(if_block->index);
572    invert->linear_preds.push_back(then_logical->index);
573    invert->linear_preds.push_back(then_linear->index);
574    else_logical->linear_preds.push_back(invert->index);
575    else_linear->linear_preds.push_back(invert->index);
576    endif_block->linear_preds.push_back(else_logical->index);
577    endif_block->linear_preds.push_back(else_linear->index);
578 
579    PhysReg saved_exec_reg(84);
580 
581    b.reset(if_block);
582    Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
583                             Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
584    b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index,
585             then_linear->index);
586 
587    b.reset(then_logical);
588    b.pseudo(aco_opcode::p_logical_start);
589    then();
590    b.pseudo(aco_opcode::p_logical_end);
591    b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
592 
593    b.reset(then_linear);
594    b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
595 
596    b.reset(invert);
597    b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
598           Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
599    b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index,
600             else_linear->index);
601 
602    b.reset(else_logical);
603    b.pseudo(aco_opcode::p_logical_start);
604    els();
605    b.pseudo(aco_opcode::p_logical_end);
606    b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
607 
608    b.reset(else_linear);
609    b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
610 
611    b.reset(endif_block);
612    b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
613             Operand(saved_exec, saved_exec_reg));
614 }
615 
616 VkDevice
get_vk_device(enum amd_gfx_level gfx_level)617 get_vk_device(enum amd_gfx_level gfx_level)
618 {
619    enum radeon_family family;
620    switch (gfx_level) {
621    case GFX6: family = CHIP_TAHITI; break;
622    case GFX7: family = CHIP_BONAIRE; break;
623    case GFX8: family = CHIP_POLARIS10; break;
624    case GFX9: family = CHIP_VEGA10; break;
625    case GFX10: family = CHIP_NAVI10; break;
626    case GFX10_3: family = CHIP_NAVI21; break;
627    case GFX11: family = CHIP_NAVI31; break;
628    case GFX12: family = CHIP_GFX1200; break;
629    default: family = CHIP_UNKNOWN; break;
630    }
631    return get_vk_device(family);
632 }
633 
634 VkDevice
get_vk_device(enum radeon_family family)635 get_vk_device(enum radeon_family family)
636 {
637    assert(family != CHIP_UNKNOWN);
638 
639    std::lock_guard<std::mutex> guard(create_device_mutex);
640 
641    if (device_cache[family])
642       return device_cache[family];
643 
644    setenv("RADV_FORCE_FAMILY", ac_get_family_name(family), 1);
645 
646    VkApplicationInfo app_info = {};
647    app_info.pApplicationName = "aco_tests";
648    app_info.apiVersion = VK_API_VERSION_1_2;
649    VkInstanceCreateInfo instance_create_info = {};
650    instance_create_info.pApplicationInfo = &app_info;
651    instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
652    ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(
653       NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
654    assert(result == VK_SUCCESS);
655 
656 #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
657    FUNCTION_LIST
658 #undef ITEM
659 
660    uint32_t device_count = 1;
661    VkPhysicalDevice device = VK_NULL_HANDLE;
662    result = EnumeratePhysicalDevices(instance_cache[family], &device_count, &device);
663    assert(result == VK_SUCCESS);
664    assert(device != VK_NULL_HANDLE);
665 
666    VkDeviceCreateInfo device_create_info = {};
667    device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
668    static const char* extensions[] = {"VK_KHR_pipeline_executable_properties"};
669    device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
670    device_create_info.ppEnabledExtensionNames = extensions;
671    result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
672 
673    return device_cache[family];
674 }
675 
676 static struct DestroyDevices {
~DestroyDevicesDestroyDevices677    ~DestroyDevices()
678    {
679       for (unsigned i = 0; i < CHIP_LAST; i++) {
680          if (!device_cache[i])
681             continue;
682          DestroyDevice(device_cache[i], NULL);
683          DestroyInstance(instance_cache[i], NULL);
684       }
685    }
686 } destroy_devices;
687 
688 void
print_pipeline_ir(VkDevice device,VkPipeline pipeline,VkShaderStageFlagBits stages,const char * name,bool remove_encoding)689 print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
690                   const char* name, bool remove_encoding)
691 {
692    uint32_t executable_count = 16;
693    VkPipelineExecutablePropertiesKHR executables[16];
694    VkPipelineInfoKHR pipeline_info;
695    pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
696    pipeline_info.pNext = NULL;
697    pipeline_info.pipeline = pipeline;
698    ASSERTED VkResult result =
699       GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
700    assert(result == VK_SUCCESS);
701 
702    uint32_t executable = 0;
703    for (; executable < executable_count; executable++) {
704       if (executables[executable].stages == stages)
705          break;
706    }
707    assert(executable != executable_count);
708 
709    VkPipelineExecutableInfoKHR exec_info;
710    exec_info.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR;
711    exec_info.pNext = NULL;
712    exec_info.pipeline = pipeline;
713    exec_info.executableIndex = executable;
714 
715    uint32_t ir_count = 16;
716    VkPipelineExecutableInternalRepresentationKHR ir[16];
717    memset(ir, 0, sizeof(ir));
718    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
719    assert(result == VK_SUCCESS);
720 
721    VkPipelineExecutableInternalRepresentationKHR* requested_ir = nullptr;
722    for (unsigned i = 0; i < ir_count; ++i) {
723       if (strcmp(ir[i].name, name) == 0) {
724          requested_ir = &ir[i];
725          break;
726       }
727    }
728    assert(requested_ir && "Could not find requested IR");
729 
730    char* data = (char*)malloc(requested_ir->dataSize);
731    requested_ir->pData = data;
732    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
733    assert(result == VK_SUCCESS);
734 
735    if (remove_encoding) {
736       for (char* c = data; *c; c++) {
737          if (*c == ';') {
738             for (; *c && *c != '\n'; c++)
739                *c = ' ';
740          }
741       }
742    }
743 
744    fprintf(output, "%s", data);
745    free(data);
746 }
747 
748 VkShaderModule
__qoCreateShaderModule(VkDevice dev,const QoShaderModuleCreateInfo * module_info)749 __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* module_info)
750 {
751    VkShaderModuleCreateInfo vk_module_info;
752    vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
753    vk_module_info.pNext = NULL;
754    vk_module_info.flags = 0;
755    vk_module_info.codeSize = module_info->spirvSize;
756    vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
757 
758    VkShaderModule module;
759    ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
760    assert(result == VK_SUCCESS);
761 
762    return module;
763 }
764 
PipelineBuilder(VkDevice dev)765 PipelineBuilder::PipelineBuilder(VkDevice dev)
766 {
767    memset(this, 0, sizeof(*this));
768    topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
769    device = dev;
770 }
771 
~PipelineBuilder()772 PipelineBuilder::~PipelineBuilder()
773 {
774    DestroyPipeline(device, pipeline, NULL);
775 
776    for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
777       VkPipelineShaderStageCreateInfo* stage_info = &stages[i];
778       if (owned_stages & stage_info->stage)
779          DestroyShaderModule(device, stage_info->module, NULL);
780    }
781 
782    DestroyPipelineLayout(device, pipeline_layout, NULL);
783 
784    for (unsigned i = 0; i < util_bitcount64(desc_layouts_used); i++)
785       DestroyDescriptorSetLayout(device, desc_layouts[i], NULL);
786 
787    DestroyRenderPass(device, render_pass, NULL);
788 }
789 
790 void
add_desc_binding(VkShaderStageFlags stage_flags,uint32_t layout,uint32_t binding,VkDescriptorType type,uint32_t count)791 PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
792                                   VkDescriptorType type, uint32_t count)
793 {
794    desc_layouts_used |= 1ull << layout;
795    desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
796 }
797 
798 void
add_vertex_binding(uint32_t binding,uint32_t stride,VkVertexInputRate rate)799 PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
800 {
801    vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
802 }
803 
804 void
add_vertex_attribute(uint32_t location,uint32_t binding,VkFormat format,uint32_t offset)805 PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format,
806                                       uint32_t offset)
807 {
808    vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
809 }
810 
811 void
add_resource_decls(QoShaderModuleCreateInfo * module)812 PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo* module)
813 {
814    for (unsigned i = 0; i < module->declarationCount; i++) {
815       const QoShaderDecl* decl = &module->pDeclarations[i];
816       switch (decl->decl_type) {
817       case QoShaderDeclType_ubo:
818          add_desc_binding(module->stage, decl->set, decl->binding,
819                           VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
820          break;
821       case QoShaderDeclType_ssbo:
822          add_desc_binding(module->stage, decl->set, decl->binding,
823                           VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
824          break;
825       case QoShaderDeclType_img_buf:
826          add_desc_binding(module->stage, decl->set, decl->binding,
827                           VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
828          break;
829       case QoShaderDeclType_img:
830          add_desc_binding(module->stage, decl->set, decl->binding,
831                           VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
832          break;
833       case QoShaderDeclType_tex_buf:
834          add_desc_binding(module->stage, decl->set, decl->binding,
835                           VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
836          break;
837       case QoShaderDeclType_combined:
838          add_desc_binding(module->stage, decl->set, decl->binding,
839                           VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
840          break;
841       case QoShaderDeclType_tex:
842          add_desc_binding(module->stage, decl->set, decl->binding,
843                           VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
844          break;
845       case QoShaderDeclType_samp:
846          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
847          break;
848       default: break;
849       }
850    }
851 }
852 
853 void
add_io_decls(QoShaderModuleCreateInfo * module)854 PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo* module)
855 {
856    unsigned next_vtx_offset = 0;
857    for (unsigned i = 0; i < module->declarationCount; i++) {
858       const QoShaderDecl* decl = &module->pDeclarations[i];
859       switch (decl->decl_type) {
860       case QoShaderDeclType_in:
861          if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
862             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
863                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
864                                     next_vtx_offset);
865             else if (decl->type[0] == 'u')
866                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT,
867                                     next_vtx_offset);
868             else if (decl->type[0] == 'i')
869                add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT,
870                                     next_vtx_offset);
871             next_vtx_offset += 16;
872          }
873          break;
874       case QoShaderDeclType_out:
875          if (module->stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
876             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
877                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SFLOAT;
878             else if (decl->type[0] == 'u')
879                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_UINT;
880             else if (decl->type[0] == 'i')
881                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
882          }
883          break;
884       default: break;
885       }
886    }
887    if (next_vtx_offset)
888       add_vertex_binding(0, next_vtx_offset);
889 }
890 
891 void
add_stage(VkShaderStageFlagBits stage,VkShaderModule module,const char * name)892 PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name)
893 {
894    VkPipelineShaderStageCreateInfo* stage_info;
895    if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
896       stage_info = &stages[0];
897    else
898       stage_info = &stages[gfx_pipeline_info.stageCount++];
899    stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
900    stage_info->pNext = NULL;
901    stage_info->flags = 0;
902    stage_info->stage = stage;
903    stage_info->module = module;
904    stage_info->pName = name;
905    stage_info->pSpecializationInfo = NULL;
906    owned_stages |= stage;
907 }
908 
909 void
add_stage(VkShaderStageFlagBits stage,QoShaderModuleCreateInfo module,const char * name)910 PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
911                            const char* name)
912 {
913    add_stage(stage, __qoCreateShaderModule(device, &module), name);
914    add_resource_decls(&module);
915    add_io_decls(&module);
916 }
917 
918 void
add_vsfs(VkShaderModule vs,VkShaderModule fs)919 PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
920 {
921    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
922    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
923 }
924 
925 void
add_vsfs(QoShaderModuleCreateInfo vs,QoShaderModuleCreateInfo fs)926 PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
927 {
928    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
929    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
930 }
931 
932 void
add_cs(VkShaderModule cs)933 PipelineBuilder::add_cs(VkShaderModule cs)
934 {
935    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
936 }
937 
938 void
add_cs(QoShaderModuleCreateInfo cs)939 PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
940 {
941    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
942 }
943 
944 bool
is_compute()945 PipelineBuilder::is_compute()
946 {
947    return gfx_pipeline_info.stageCount == 0;
948 }
949 
950 void
create_compute_pipeline()951 PipelineBuilder::create_compute_pipeline()
952 {
953    VkComputePipelineCreateInfo create_info;
954    create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
955    create_info.pNext = NULL;
956    create_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
957    create_info.stage = stages[0];
958    create_info.layout = pipeline_layout;
959    create_info.basePipelineHandle = VK_NULL_HANDLE;
960    create_info.basePipelineIndex = 0;
961 
962    ASSERTED VkResult result =
963       CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
964    assert(result == VK_SUCCESS);
965 }
966 
967 void
create_graphics_pipeline()968 PipelineBuilder::create_graphics_pipeline()
969 {
970    /* create the create infos */
971    if (!samples)
972       samples = VK_SAMPLE_COUNT_1_BIT;
973 
974    unsigned num_color_attachments = 0;
975    VkPipelineColorBlendAttachmentState blend_attachment_states[16];
976    VkAttachmentReference color_attachments[16];
977    VkAttachmentDescription attachment_descs[17];
978    for (unsigned i = 0; i < 16; i++) {
979       if (color_outputs[i] == VK_FORMAT_UNDEFINED)
980          continue;
981 
982       VkAttachmentDescription* desc = &attachment_descs[num_color_attachments];
983       desc->flags = 0;
984       desc->format = color_outputs[i];
985       desc->samples = samples;
986       desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
987       desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
988       desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
989       desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
990       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
991       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
992 
993       VkAttachmentReference* ref = &color_attachments[num_color_attachments];
994       ref->attachment = num_color_attachments;
995       ref->layout = VK_IMAGE_LAYOUT_GENERAL;
996 
997       VkPipelineColorBlendAttachmentState* blend = &blend_attachment_states[num_color_attachments];
998       blend->blendEnable = false;
999       blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
1000                               VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
1001 
1002       num_color_attachments++;
1003    }
1004 
1005    unsigned num_attachments = num_color_attachments;
1006    VkAttachmentReference ds_attachment;
1007    if (ds_output != VK_FORMAT_UNDEFINED) {
1008       VkAttachmentDescription* desc = &attachment_descs[num_attachments];
1009       desc->flags = 0;
1010       desc->format = ds_output;
1011       desc->samples = samples;
1012       desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1013       desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
1014       desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
1015       desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
1016       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
1017       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
1018 
1019       ds_attachment.attachment = num_color_attachments;
1020       ds_attachment.layout = VK_IMAGE_LAYOUT_GENERAL;
1021 
1022       num_attachments++;
1023    }
1024 
1025    vs_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
1026    vs_input.pNext = NULL;
1027    vs_input.flags = 0;
1028    vs_input.pVertexBindingDescriptions = vs_bindings;
1029    vs_input.pVertexAttributeDescriptions = vs_attributes;
1030 
1031    VkPipelineInputAssemblyStateCreateInfo assembly_state;
1032    assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
1033    assembly_state.pNext = NULL;
1034    assembly_state.flags = 0;
1035    assembly_state.topology = topology;
1036    assembly_state.primitiveRestartEnable = false;
1037 
1038    VkPipelineTessellationStateCreateInfo tess_state;
1039    tess_state.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
1040    tess_state.pNext = NULL;
1041    tess_state.flags = 0;
1042    tess_state.patchControlPoints = patch_size;
1043 
1044    VkPipelineViewportStateCreateInfo viewport_state;
1045    viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
1046    viewport_state.pNext = NULL;
1047    viewport_state.flags = 0;
1048    viewport_state.viewportCount = 1;
1049    viewport_state.pViewports = NULL;
1050    viewport_state.scissorCount = 1;
1051    viewport_state.pScissors = NULL;
1052 
1053    VkPipelineRasterizationStateCreateInfo rasterization_state;
1054    rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
1055    rasterization_state.pNext = NULL;
1056    rasterization_state.flags = 0;
1057    rasterization_state.depthClampEnable = false;
1058    rasterization_state.rasterizerDiscardEnable = false;
1059    rasterization_state.polygonMode = VK_POLYGON_MODE_FILL;
1060    rasterization_state.cullMode = VK_CULL_MODE_NONE;
1061    rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
1062    rasterization_state.depthBiasEnable = false;
1063    rasterization_state.lineWidth = 1.0;
1064 
1065    VkPipelineMultisampleStateCreateInfo ms_state;
1066    ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
1067    ms_state.pNext = NULL;
1068    ms_state.flags = 0;
1069    ms_state.rasterizationSamples = samples;
1070    ms_state.sampleShadingEnable = sample_shading_enable;
1071    ms_state.minSampleShading = min_sample_shading;
1072    VkSampleMask sample_mask = 0xffffffff;
1073    ms_state.pSampleMask = &sample_mask;
1074    ms_state.alphaToCoverageEnable = false;
1075    ms_state.alphaToOneEnable = false;
1076 
1077    VkPipelineDepthStencilStateCreateInfo ds_state;
1078    ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
1079    ds_state.pNext = NULL;
1080    ds_state.flags = 0;
1081    ds_state.depthTestEnable = ds_output != VK_FORMAT_UNDEFINED;
1082    ds_state.depthWriteEnable = true;
1083    ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
1084    ds_state.depthBoundsTestEnable = false;
1085    ds_state.stencilTestEnable = true;
1086    ds_state.front.failOp = VK_STENCIL_OP_KEEP;
1087    ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
1088    ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
1089    ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
1090    ds_state.front.compareMask = 0xffffffff, ds_state.front.writeMask = 0;
1091    ds_state.front.reference = 0;
1092    ds_state.back = ds_state.front;
1093 
1094    VkPipelineColorBlendStateCreateInfo color_blend_state;
1095    color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
1096    color_blend_state.pNext = NULL;
1097    color_blend_state.flags = 0;
1098    color_blend_state.logicOpEnable = false;
1099    color_blend_state.attachmentCount = num_color_attachments;
1100    color_blend_state.pAttachments = blend_attachment_states;
1101 
1102    VkDynamicState dynamic_states[9] = {VK_DYNAMIC_STATE_VIEWPORT,
1103                                        VK_DYNAMIC_STATE_SCISSOR,
1104                                        VK_DYNAMIC_STATE_LINE_WIDTH,
1105                                        VK_DYNAMIC_STATE_DEPTH_BIAS,
1106                                        VK_DYNAMIC_STATE_BLEND_CONSTANTS,
1107                                        VK_DYNAMIC_STATE_DEPTH_BOUNDS,
1108                                        VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
1109                                        VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
1110                                        VK_DYNAMIC_STATE_STENCIL_REFERENCE};
1111 
1112    VkPipelineDynamicStateCreateInfo dynamic_state;
1113    dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
1114    dynamic_state.pNext = NULL;
1115    dynamic_state.flags = 0;
1116    dynamic_state.dynamicStateCount = sizeof(dynamic_states) / sizeof(VkDynamicState);
1117    dynamic_state.pDynamicStates = dynamic_states;
1118 
1119    gfx_pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
1120    gfx_pipeline_info.pNext = NULL;
1121    gfx_pipeline_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
1122    gfx_pipeline_info.pVertexInputState = &vs_input;
1123    gfx_pipeline_info.pInputAssemblyState = &assembly_state;
1124    gfx_pipeline_info.pTessellationState = &tess_state;
1125    gfx_pipeline_info.pViewportState = &viewport_state;
1126    gfx_pipeline_info.pRasterizationState = &rasterization_state;
1127    gfx_pipeline_info.pMultisampleState = &ms_state;
1128    gfx_pipeline_info.pDepthStencilState = &ds_state;
1129    gfx_pipeline_info.pColorBlendState = &color_blend_state;
1130    gfx_pipeline_info.pDynamicState = &dynamic_state;
1131    gfx_pipeline_info.subpass = 0;
1132 
1133    /* create the objects used to create the pipeline */
1134    VkSubpassDescription subpass;
1135    subpass.flags = 0;
1136    subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
1137    subpass.inputAttachmentCount = 0;
1138    subpass.pInputAttachments = NULL;
1139    subpass.colorAttachmentCount = num_color_attachments;
1140    subpass.pColorAttachments = color_attachments;
1141    subpass.pResolveAttachments = NULL;
1142    subpass.pDepthStencilAttachment = ds_output == VK_FORMAT_UNDEFINED ? NULL : &ds_attachment;
1143    subpass.preserveAttachmentCount = 0;
1144    subpass.pPreserveAttachments = NULL;
1145 
1146    VkRenderPassCreateInfo renderpass_info;
1147    renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
1148    renderpass_info.pNext = NULL;
1149    renderpass_info.flags = 0;
1150    renderpass_info.attachmentCount = num_attachments;
1151    renderpass_info.pAttachments = attachment_descs;
1152    renderpass_info.subpassCount = 1;
1153    renderpass_info.pSubpasses = &subpass;
1154    renderpass_info.dependencyCount = 0;
1155    renderpass_info.pDependencies = NULL;
1156 
1157    ASSERTED VkResult result = CreateRenderPass(device, &renderpass_info, NULL, &render_pass);
1158    assert(result == VK_SUCCESS);
1159 
1160    gfx_pipeline_info.layout = pipeline_layout;
1161    gfx_pipeline_info.renderPass = render_pass;
1162 
1163    /* create the pipeline */
1164    gfx_pipeline_info.pStages = stages;
1165 
1166    result = CreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gfx_pipeline_info, NULL, &pipeline);
1167    assert(result == VK_SUCCESS);
1168 }
1169 
1170 void
create_pipeline()1171 PipelineBuilder::create_pipeline()
1172 {
1173    unsigned num_desc_layouts = 0;
1174    for (unsigned i = 0; i < 64; i++) {
1175       if (!(desc_layouts_used & (1ull << i)))
1176          continue;
1177 
1178       VkDescriptorSetLayoutCreateInfo desc_layout_info;
1179       desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1180       desc_layout_info.pNext = NULL;
1181       desc_layout_info.flags = 0;
1182       desc_layout_info.bindingCount = num_desc_bindings[i];
1183       desc_layout_info.pBindings = desc_bindings[i];
1184 
1185       ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL,
1186                                                            &desc_layouts[num_desc_layouts]);
1187       assert(result == VK_SUCCESS);
1188       num_desc_layouts++;
1189    }
1190 
1191    VkPipelineLayoutCreateInfo pipeline_layout_info;
1192    pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
1193    pipeline_layout_info.pNext = NULL;
1194    pipeline_layout_info.flags = 0;
1195    pipeline_layout_info.pushConstantRangeCount = 1;
1196    pipeline_layout_info.pPushConstantRanges = &push_constant_range;
1197    pipeline_layout_info.setLayoutCount = num_desc_layouts;
1198    pipeline_layout_info.pSetLayouts = desc_layouts;
1199 
1200    ASSERTED VkResult result =
1201       CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
1202    assert(result == VK_SUCCESS);
1203 
1204    if (is_compute())
1205       create_compute_pipeline();
1206    else
1207       create_graphics_pipeline();
1208 }
1209 
1210 void
print_ir(VkShaderStageFlagBits stage_flags,const char * name,bool remove_encoding)1211 PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char* name, bool remove_encoding)
1212 {
1213    if (!pipeline)
1214       create_pipeline();
1215    print_pipeline_ir(device, pipeline, stage_flags, name, remove_encoding);
1216 }
1217