xref: /aosp_15_r20/external/mesa3d/src/vulkan/runtime/vk_texcompress_etc2.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "vk_texcompress_etc2.h"
7 
8 #include "compiler/nir/nir_builder.h"
9 #include "vk_shader_module.h"
10 
11 /* Based on
12  * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/etc2.comp
13  * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/eac.comp
14  *
15  * With some differences:
16  *  - Use the vk format to do all the settings.
17  *  - Combine the ETC2 and EAC shaders.
18  *  - Since we combined the above, reuse the function for the ETC2 A8 component.
19  *  - the EAC shader doesn't do SNORM correctly, so this has that fixed.
20  */
21 
22 static nir_def *
flip_endian(nir_builder * b,nir_def * src,unsigned cnt)23 flip_endian(nir_builder *b, nir_def *src, unsigned cnt)
24 {
25    nir_def *v[2];
26    for (unsigned i = 0; i < cnt; ++i) {
27       nir_def *intermediate[4];
28       nir_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
29       for (unsigned j = 0; j < 4; ++j)
30          intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8);
31       v[i] = nir_ior(b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)),
32                      nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0)));
33    }
34    return cnt == 1 ? v[0] : nir_vec(b, v, cnt);
35 }
36 
37 static nir_def *
etc1_color_modifier_lookup(nir_builder * b,nir_def * x,nir_def * y)38 etc1_color_modifier_lookup(nir_builder *b, nir_def *x, nir_def *y)
39 {
40    const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}};
41    nir_def *upper = nir_ieq_imm(b, y, 1);
42    nir_def *result = NULL;
43    for (unsigned i = 0; i < 8; ++i) {
44       nir_def *tmp = nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
45       if (result)
46          result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
47       else
48          result = tmp;
49    }
50    return result;
51 }
52 
53 static nir_def *
etc2_distance_lookup(nir_builder * b,nir_def * x)54 etc2_distance_lookup(nir_builder *b, nir_def *x)
55 {
56    const unsigned table[8] = {3, 6, 11, 16, 23, 32, 41, 64};
57    nir_def *result = NULL;
58    for (unsigned i = 0; i < 8; ++i) {
59       if (result)
60          result = nir_bcsel(b, nir_ieq_imm(b, x, i), nir_imm_int(b, table[i]), result);
61       else
62          result = nir_imm_int(b, table[i]);
63    }
64    return result;
65 }
66 
67 static nir_def *
etc1_alpha_modifier_lookup(nir_builder * b,nir_def * x,nir_def * y)68 etc1_alpha_modifier_lookup(nir_builder *b, nir_def *x, nir_def *y)
69 {
70    const unsigned table[16] = {0xe852, 0xc962, 0xc741, 0xc531, 0xb752, 0xa862, 0xa763, 0xa742,
71                                0x9751, 0x9741, 0x9731, 0x9641, 0x9632, 0x9210, 0x8753, 0x8642};
72    nir_def *result = NULL;
73    for (unsigned i = 0; i < 16; ++i) {
74       nir_def *tmp = nir_imm_int(b, table[i]);
75       if (result)
76          result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
77       else
78          result = tmp;
79    }
80    return nir_ubfe(b, result, nir_imul_imm(b, y, 4), nir_imm_int(b, 4));
81 }
82 
83 static nir_def *
etc_extend(nir_builder * b,nir_def * v,int bits)84 etc_extend(nir_builder *b, nir_def *v, int bits)
85 {
86    if (bits == 4)
87       return nir_imul_imm(b, v, 0x11);
88    return nir_ior(b, nir_ishl_imm(b, v, 8 - bits), nir_ushr_imm(b, v, bits - (8 - bits)));
89 }
90 
91 static nir_def *
decode_etc2_alpha(struct nir_builder * b,nir_def * alpha_payload,nir_def * linear_pixel,bool eac,nir_def * is_signed)92 decode_etc2_alpha(struct nir_builder *b, nir_def *alpha_payload, nir_def *linear_pixel, bool eac, nir_def *is_signed)
93 {
94    alpha_payload = flip_endian(b, alpha_payload, 2);
95    nir_def *alpha_x = nir_channel(b, alpha_payload, 1);
96    nir_def *alpha_y = nir_channel(b, alpha_payload, 0);
97    nir_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3));
98    nir_def *base = nir_ubfe_imm(b, alpha_y, 24, 8);
99    nir_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4);
100    nir_def *table = nir_ubfe_imm(b, alpha_y, 16, 4);
101 
102    if (eac) {
103       nir_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8);
104       signed_base = nir_imul_imm(b, signed_base, 8);
105       base = nir_iadd_imm(b, nir_imul_imm(b, base, 8), 4);
106       base = nir_bcsel(b, is_signed, signed_base, base);
107       multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1));
108    }
109 
110    nir_def *lsb_index = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
111                                  nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2));
112    bit_offset = nir_iadd_imm(b, bit_offset, 2);
113    nir_def *msb = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
114                            nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1));
115    nir_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
116    nir_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
117 
118    nir_def *low_bound = nir_imm_int(b, 0);
119    nir_def *high_bound = nir_imm_int(b, 255);
120    nir_def *final_mult = nir_imm_float(b, 1 / 255.0);
121    if (eac) {
122       low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound);
123       high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047));
124       final_mult = nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0));
125    }
126 
127    return nir_fmul(b, nir_i2f32(b, nir_iclamp(b, a, low_bound, high_bound)), final_mult);
128 }
129 
130 static nir_def *
get_global_ids(nir_builder * b,unsigned num_components)131 get_global_ids(nir_builder *b, unsigned num_components)
132 {
133    unsigned mask = BITFIELD_MASK(num_components);
134 
135    nir_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
136    nir_def *block_ids = nir_channels(b, nir_load_workgroup_id(b), mask);
137    nir_def *block_size =
138       nir_channels(b,
139                    nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],
140                                  b->shader->info.workgroup_size[2], 0),
141                    mask);
142 
143    return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
144 }
145 
146 static nir_shader *
etc2_build_shader(struct vk_device * dev,const struct nir_shader_compiler_options * nir_options)147 etc2_build_shader(struct vk_device *dev, const struct nir_shader_compiler_options *nir_options)
148 {
149    const struct glsl_type *sampler_type_2d = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT);
150    const struct glsl_type *sampler_type_3d = glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_UINT);
151    const struct glsl_type *img_type_2d = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
152    const struct glsl_type *img_type_3d = glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT);
153    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, nir_options, "meta_decode_etc");
154    b.shader->info.workgroup_size[0] = 8;
155    b.shader->info.workgroup_size[1] = 8;
156 
157    nir_variable *input_img_2d = nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d");
158    input_img_2d->data.descriptor_set = 0;
159    input_img_2d->data.binding = 0;
160 
161    nir_variable *input_img_3d = nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d");
162    input_img_3d->data.descriptor_set = 0;
163    input_img_3d->data.binding = 0;
164 
165    nir_variable *output_img_2d = nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d");
166    output_img_2d->data.descriptor_set = 0;
167    output_img_2d->data.binding = 1;
168 
169    nir_variable *output_img_3d = nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d");
170    output_img_3d->data.descriptor_set = 0;
171    output_img_3d->data.binding = 1;
172 
173    nir_def *global_id = get_global_ids(&b, 3);
174 
175    nir_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
176    nir_def *consts2 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
177    nir_def *offset = nir_channels(&b, consts, 7);
178    nir_def *format = nir_channel(&b, consts, 3);
179    nir_def *image_type = nir_channel(&b, consts2, 0);
180    nir_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D);
181    nir_def *coord = nir_iadd(&b, global_id, offset);
182    nir_def *src_coord = nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
183                                  nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
184 
185    nir_variable *payload_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload");
186    nir_push_if(&b, is_3d);
187    {
188       nir_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, nir_imm_int(&b, 0));
189       nir_store_var(&b, payload_var, color, 0xf);
190    }
191    nir_push_else(&b, NULL);
192    {
193       nir_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, nir_imm_int(&b, 0));
194       nir_store_var(&b, payload_var, color, 0xf);
195    }
196    nir_pop_if(&b, NULL);
197 
198    nir_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3);
199    nir_def *linear_pixel =
200       nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4), nir_channel(&b, pixel_coord, 1));
201 
202    nir_def *payload = nir_load_var(&b, payload_var);
203    nir_variable *color = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color");
204    nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf);
205    nir_push_if(&b, nir_ilt_imm(&b, format, VK_FORMAT_EAC_R11_UNORM_BLOCK));
206    {
207       nir_def *alpha_bits_8 = nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK);
208       nir_def *alpha_bits_1 = nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK),
209                                        nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
210 
211       nir_def *color_payload =
212          nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3));
213       color_payload = flip_endian(&b, color_payload, 2);
214       nir_def *color_y = nir_channel(&b, color_payload, 0);
215       nir_def *color_x = nir_channel(&b, color_payload, 1);
216       nir_def *flip = nir_test_mask(&b, color_y, 1);
217       nir_def *subblock =
218          nir_ushr_imm(&b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)), 1);
219 
220       nir_variable *punchthrough = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough");
221       nir_def *punchthrough_init = nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
222       nir_store_var(&b, punchthrough, punchthrough_init, 0x1);
223 
224       nir_variable *etc1_compat = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat");
225       nir_store_var(&b, etc1_compat, nir_imm_false(&b), 0x1);
226 
227       nir_variable *alpha_result =
228          nir_variable_create(b.shader, nir_var_shader_temp, glsl_float_type(), "alpha_result");
229       nir_push_if(&b, alpha_bits_8);
230       {
231          nir_store_var(&b, alpha_result, decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL),
232                        1);
233       }
234       nir_push_else(&b, NULL);
235       {
236          nir_store_var(&b, alpha_result, nir_imm_float(&b, 1.0), 1);
237       }
238       nir_pop_if(&b, NULL);
239 
240       const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
241       nir_variable *rgb_result = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result");
242       nir_variable *base_rgb = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb");
243       nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7);
244 
245       nir_def *msb = nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
246       nir_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1);
247 
248       nir_push_if(&b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2))));
249       {
250          nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1);
251          nir_def *tmp[3];
252          for (unsigned i = 0; i < 3; ++i)
253             tmp[i] = etc_extend(
254                &b,
255                nir_iand_imm(&b, nir_ushr(&b, color_y, nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))),
256                             0xf),
257                4);
258          nir_store_var(&b, base_rgb, nir_vec(&b, tmp, 3), 0x7);
259       }
260       nir_push_else(&b, NULL);
261       {
262          nir_def *rb = nir_ubfe_imm(&b, color_y, 27, 5);
263          nir_def *rd = nir_ibfe_imm(&b, color_y, 24, 3);
264          nir_def *gb = nir_ubfe_imm(&b, color_y, 19, 5);
265          nir_def *gd = nir_ibfe_imm(&b, color_y, 16, 3);
266          nir_def *bb = nir_ubfe_imm(&b, color_y, 11, 5);
267          nir_def *bd = nir_ibfe_imm(&b, color_y, 8, 3);
268          nir_def *r1 = nir_iadd(&b, rb, rd);
269          nir_def *g1 = nir_iadd(&b, gb, gd);
270          nir_def *b1 = nir_iadd(&b, bb, bd);
271 
272          nir_push_if(&b, nir_ugt_imm(&b, r1, 31));
273          {
274             nir_def *r0 =
275                nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2));
276             nir_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4);
277             nir_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4);
278             nir_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4);
279             nir_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4);
280             nir_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4);
281             nir_def *da =
282                nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1), nir_iand_imm(&b, color_y, 1));
283             nir_def *dist = etc2_distance_lookup(&b, da);
284             nir_def *index = nir_ior(&b, lsb, msb);
285 
286             nir_store_var(&b, punchthrough,
287                           nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
288                           0x1);
289             nir_push_if(&b, nir_ieq_imm(&b, index, 0));
290             {
291                nir_store_var(&b, rgb_result, etc_extend(&b, nir_vec3(&b, r0, g0, b0), 4), 0x7);
292             }
293             nir_push_else(&b, NULL);
294             {
295 
296                nir_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4),
297                                        nir_imul(&b, dist, nir_isub_imm(&b, 2, index)));
298                nir_store_var(&b, rgb_result, tmp, 0x7);
299             }
300             nir_pop_if(&b, NULL);
301          }
302          nir_push_else(&b, NULL);
303          nir_push_if(&b, nir_ugt_imm(&b, g1, 31));
304          {
305             nir_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4);
306             nir_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1),
307                                   nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1));
308             nir_def *b0 =
309                nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8));
310             nir_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4);
311             nir_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4);
312             nir_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4);
313             nir_def *da = nir_iand_imm(&b, color_y, 4);
314             nir_def *db = nir_iand_imm(&b, color_y, 1);
315             nir_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2));
316             nir_def *d0 = nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
317             nir_def *d2 = nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
318             d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d);
319             nir_def *dist = etc2_distance_lookup(&b, d);
320             nir_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0));
321             base = etc_extend(&b, base, 4);
322             base = nir_iadd(&b, base, nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2))));
323             nir_store_var(&b, rgb_result, base, 0x7);
324             nir_store_var(&b, punchthrough,
325                           nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
326                           0x1);
327          }
328          nir_push_else(&b, NULL);
329          nir_push_if(&b, nir_ugt_imm(&b, b1, 31));
330          {
331             nir_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6);
332             nir_def *g0 =
333                nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40));
334             nir_def *b0 = nir_ior(
335                &b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3),
336                nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20), nir_ubfe_imm(&b, color_y, 7, 3)));
337             nir_def *rh =
338                nir_ior(&b, nir_iand_imm(&b, color_y, 1), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1));
339             nir_def *rv = nir_ubfe_imm(&b, color_x, 13, 6);
340             nir_def *gh = nir_ubfe_imm(&b, color_x, 25, 7);
341             nir_def *gv = nir_ubfe_imm(&b, color_x, 6, 7);
342             nir_def *bh = nir_ubfe_imm(&b, color_x, 19, 6);
343             nir_def *bv = nir_ubfe_imm(&b, color_x, 0, 6);
344 
345             r0 = etc_extend(&b, r0, 6);
346             g0 = etc_extend(&b, g0, 7);
347             b0 = etc_extend(&b, b0, 6);
348             rh = etc_extend(&b, rh, 6);
349             rv = etc_extend(&b, rv, 6);
350             gh = etc_extend(&b, gh, 7);
351             gv = etc_extend(&b, gv, 7);
352             bh = etc_extend(&b, bh, 6);
353             bv = etc_extend(&b, bv, 6);
354 
355             nir_def *rgb = nir_vec3(&b, r0, g0, b0);
356             nir_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), nir_channel(&b, pixel_coord, 0));
357             nir_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), nir_channel(&b, pixel_coord, 1));
358             rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2));
359             nir_store_var(&b, rgb_result, rgb, 0x7);
360             nir_store_var(&b, punchthrough, nir_imm_false(&b), 0x1);
361          }
362          nir_push_else(&b, NULL);
363          {
364             nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1);
365             nir_def *subblock_b = nir_ine_imm(&b, subblock, 0);
366             nir_def *tmp[] = {
367                nir_bcsel(&b, subblock_b, r1, rb),
368                nir_bcsel(&b, subblock_b, g1, gb),
369                nir_bcsel(&b, subblock_b, b1, bb),
370             };
371             nir_store_var(&b, base_rgb, etc_extend(&b, nir_vec(&b, tmp, 3), 5), 0x7);
372          }
373          nir_pop_if(&b, NULL);
374          nir_pop_if(&b, NULL);
375          nir_pop_if(&b, NULL);
376       }
377       nir_pop_if(&b, NULL);
378       nir_push_if(&b, nir_load_var(&b, etc1_compat));
379       {
380          nir_def *etc1_table_index =
381             nir_ubfe(&b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3));
382          nir_def *sgn = nir_isub_imm(&b, 1, msb);
383          sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn);
384          nir_store_var(&b, punchthrough,
385                        nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 0x1);
386          nir_def *off = nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
387          nir_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
388          nir_store_var(&b, rgb_result, result, 0x7);
389       }
390       nir_pop_if(&b, NULL);
391       nir_push_if(&b, nir_load_var(&b, punchthrough));
392       {
393          nir_store_var(&b, alpha_result, nir_imm_float(&b, 0), 0x1);
394          nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 0, 0, 0), 0x7);
395       }
396       nir_pop_if(&b, NULL);
397       nir_def *col[4];
398       for (unsigned i = 0; i < 3; ++i)
399          col[i] = nir_fdiv_imm(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)), 255.0);
400       col[3] = nir_load_var(&b, alpha_result);
401       nir_store_var(&b, color, nir_vec(&b, col, 4), 0xf);
402    }
403    nir_push_else(&b, NULL);
404    { /* EAC */
405       nir_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK),
406                                    nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK));
407       nir_def *val[4];
408       for (int i = 0; i < 2; ++i) {
409          val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true, is_signed);
410       }
411       val[2] = nir_imm_float(&b, 0.0);
412       val[3] = nir_imm_float(&b, 1.0);
413       nir_store_var(&b, color, nir_vec(&b, val, 4), 0xf);
414    }
415    nir_pop_if(&b, NULL);
416 
417    nir_def *outval = nir_load_var(&b, color);
418    nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, coord, 2),
419                                  nir_undef(&b, 1, 32));
420 
421    nir_push_if(&b, is_3d);
422    {
423       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->def, img_coord, nir_undef(&b, 1, 32), outval,
424                             nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_3D);
425    }
426    nir_push_else(&b, NULL);
427    {
428       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->def, img_coord, nir_undef(&b, 1, 32), outval,
429                             nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
430    }
431    nir_pop_if(&b, NULL);
432    return b.shader;
433 }
434 
435 static VkResult
etc2_init_pipeline(struct vk_device * device,struct vk_texcompress_etc2_state * etc2)436 etc2_init_pipeline(struct vk_device *device, struct vk_texcompress_etc2_state *etc2)
437 {
438    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
439    VkDevice _device = vk_device_to_handle(device);
440 
441    nir_shader *cs = etc2_build_shader(device, etc2->nir_options);
442 
443    const VkComputePipelineCreateInfo pipeline_create_info = {
444       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
445       .stage =
446          (VkPipelineShaderStageCreateInfo){
447             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
448             .stage = VK_SHADER_STAGE_COMPUTE_BIT,
449             .module = vk_shader_module_handle_from_nir(cs),
450             .pName = "main",
451          },
452       .layout = etc2->pipeline_layout,
453    };
454 
455    return disp->CreateComputePipelines(_device, etc2->pipeline_cache, 1, &pipeline_create_info, etc2->allocator,
456                                        &etc2->pipeline);
457 }
458 
459 static VkResult
etc2_init_pipeline_layout(struct vk_device * device,struct vk_texcompress_etc2_state * etc2)460 etc2_init_pipeline_layout(struct vk_device *device, struct vk_texcompress_etc2_state *etc2)
461 {
462    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
463    VkDevice _device = vk_device_to_handle(device);
464 
465    const VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
466       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
467       .setLayoutCount = 1,
468       .pSetLayouts = &etc2->ds_layout,
469       .pushConstantRangeCount = 1,
470       .pPushConstantRanges =
471          &(VkPushConstantRange){
472             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
473             .size = 20,
474          },
475    };
476 
477    return disp->CreatePipelineLayout(_device, &pipeline_layout_create_info, etc2->allocator, &etc2->pipeline_layout);
478 }
479 
480 static VkResult
etc2_init_ds_layout(struct vk_device * device,struct vk_texcompress_etc2_state * etc2)481 etc2_init_ds_layout(struct vk_device *device, struct vk_texcompress_etc2_state *etc2)
482 {
483    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
484    VkDevice _device = vk_device_to_handle(device);
485 
486    const VkDescriptorSetLayoutCreateInfo ds_layout_create_info = {
487       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
488       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
489       .bindingCount = 2,
490       .pBindings =
491          (VkDescriptorSetLayoutBinding[]){
492             {
493                .binding = 0,
494                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
495                .descriptorCount = 1,
496                .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
497             },
498             {
499                .binding = 1,
500                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
501                .descriptorCount = 1,
502                .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
503             },
504          },
505    };
506 
507    return disp->CreateDescriptorSetLayout(_device, &ds_layout_create_info, etc2->allocator, &etc2->ds_layout);
508 }
509 
510 void
vk_texcompress_etc2_init(struct vk_device * device,struct vk_texcompress_etc2_state * etc2)511 vk_texcompress_etc2_init(struct vk_device *device, struct vk_texcompress_etc2_state *etc2)
512 {
513    simple_mtx_init(&etc2->mutex, mtx_plain);
514 }
515 
516 VkResult
vk_texcompress_etc2_late_init(struct vk_device * device,struct vk_texcompress_etc2_state * etc2)517 vk_texcompress_etc2_late_init(struct vk_device *device, struct vk_texcompress_etc2_state *etc2)
518 {
519    VkResult result = VK_SUCCESS;
520 
521    simple_mtx_lock(&etc2->mutex);
522 
523    if (!etc2->pipeline) {
524       const struct vk_device_dispatch_table *disp = &device->dispatch_table;
525       VkDevice _device = vk_device_to_handle(device);
526 
527       result = etc2_init_ds_layout(device, etc2);
528       if (result != VK_SUCCESS)
529          goto out;
530 
531       result = etc2_init_pipeline_layout(device, etc2);
532       if (result != VK_SUCCESS) {
533          disp->DestroyDescriptorSetLayout(_device, etc2->ds_layout, etc2->allocator);
534          goto out;
535       }
536 
537       result = etc2_init_pipeline(device, etc2);
538       if (result != VK_SUCCESS) {
539          disp->DestroyPipelineLayout(_device, etc2->pipeline_layout, etc2->allocator);
540          disp->DestroyDescriptorSetLayout(_device, etc2->ds_layout, etc2->allocator);
541          goto out;
542       }
543    }
544 
545 out:
546    simple_mtx_unlock(&etc2->mutex);
547    return result;
548 }
549 
550 void
vk_texcompress_etc2_finish(struct vk_device * device,struct vk_texcompress_etc2_state * etc2)551 vk_texcompress_etc2_finish(struct vk_device *device, struct vk_texcompress_etc2_state *etc2)
552 {
553    const struct vk_device_dispatch_table *disp = &device->dispatch_table;
554    VkDevice _device = vk_device_to_handle(device);
555 
556    if (etc2->pipeline != VK_NULL_HANDLE)
557       disp->DestroyPipeline(_device, etc2->pipeline, etc2->allocator);
558 
559    if (etc2->pipeline_layout != VK_NULL_HANDLE)
560       disp->DestroyPipelineLayout(_device, etc2->pipeline_layout, etc2->allocator);
561    if (etc2->ds_layout != VK_NULL_HANDLE)
562       disp->DestroyDescriptorSetLayout(_device, etc2->ds_layout, etc2->allocator);
563 
564    simple_mtx_destroy(&etc2->mutex);
565 }
566