xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/tests/load_store_vectorizer_tests.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir_test.h"
25 
26 /* This is a macro so you get good line numbers */
27 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle)    \
28    EXPECT_EQ((instr)->src[0].src.ssa, &(load)->def);       \
29    EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
30 
31 namespace {
32 
33 class nir_load_store_vectorize_test : public nir_test {
34 protected:
nir_load_store_vectorize_test()35    nir_load_store_vectorize_test()
36       : nir_test::nir_test("nir_load_store_vectorize_test")
37    {
38    }
39 
40    unsigned count_intrinsics(nir_intrinsic_op intrinsic);
41 
42    nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
43                                       unsigned index);
44 
45    bool run_vectorizer(nir_variable_mode modes, bool cse=false,
46                        nir_variable_mode robust_modes = (nir_variable_mode)0);
47 
48    nir_def *get_resource(uint32_t binding, bool ssbo);
49 
50    nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_def *offset,
51                                              uint32_t id, unsigned bit_size=32, unsigned components=1,
52                                              unsigned access=0);
53    void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_def *offset,
54                               uint32_t id, unsigned bit_size=32, unsigned components=1,
55                               unsigned wrmask=0xf, unsigned access=0);
56 
57    nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
58                                     uint32_t id, unsigned bit_size=32, unsigned components=1,
59                                     unsigned access=0);
60    void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
61                      uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
62                      unsigned access=0);
63 
64    void create_shared_load(nir_deref_instr *deref, uint32_t id,
65                            unsigned bit_size=32, unsigned components=1);
66    void create_shared_store(nir_deref_instr *deref, uint32_t id,
67                             unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
68 
69    bool test_alu(nir_instr *instr, nir_op op);
70    bool test_alu_def(nir_instr *instr, unsigned index, nir_def *def, unsigned swizzle=0);
71 
72    static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
73                                       unsigned bit_size,
74                                       unsigned num_components,
75                                       nir_intrinsic_instr *low, nir_intrinsic_instr *high,
76                                       void *data);
77    static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
78 
79    std::string swizzle(nir_alu_instr *instr, int src);
80 
81    std::map<unsigned, nir_alu_instr*> movs;
82    std::map<unsigned, nir_alu_src*> loads;
83    std::map<unsigned, nir_def*> res_map;
84 };
85 
86 std::string
swizzle(nir_alu_instr * instr,int src)87 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
88 {
89    std::string swizzle;
90    for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
91       swizzle += "xyzw"[instr->src[src].swizzle[i]];
92    }
93 
94    return swizzle;
95 }
96 
97 unsigned
count_intrinsics(nir_intrinsic_op intrinsic)98 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
99 {
100    unsigned count = 0;
101    nir_foreach_block(block, b->impl) {
102       nir_foreach_instr(instr, block) {
103          if (instr->type != nir_instr_type_intrinsic)
104             continue;
105          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
106          if (intrin->intrinsic == intrinsic)
107             count++;
108       }
109    }
110    return count;
111 }
112 
113 nir_intrinsic_instr *
get_intrinsic(nir_intrinsic_op intrinsic,unsigned index)114 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
115                              unsigned index)
116 {
117    nir_foreach_block(block, b->impl) {
118       nir_foreach_instr(instr, block) {
119          if (instr->type != nir_instr_type_intrinsic)
120             continue;
121          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
122          if (intrin->intrinsic == intrinsic) {
123             if (index == 0)
124                return intrin;
125             index--;
126          }
127       }
128    }
129    return NULL;
130 }
131 
132 bool
run_vectorizer(nir_variable_mode modes,bool cse,nir_variable_mode robust_modes)133 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
134                                               bool cse,
135                                               nir_variable_mode robust_modes)
136 {
137    if (modes & nir_var_mem_shared)
138       nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
139 
140    nir_load_store_vectorize_options opts = { };
141    opts.callback = mem_vectorize_callback;
142    opts.modes = modes;
143    opts.robust_modes = robust_modes;
144    bool progress = nir_opt_load_store_vectorize(b->shader, &opts);
145 
146    if (progress) {
147       nir_validate_shader(b->shader, NULL);
148       if (cse)
149          nir_opt_cse(b->shader);
150       nir_copy_prop(b->shader);
151       nir_opt_algebraic(b->shader);
152       nir_opt_constant_folding(b->shader);
153    }
154    return progress;
155 }
156 
157 nir_def *
get_resource(uint32_t binding,bool ssbo)158 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
159 {
160    if (res_map.count(binding))
161       return res_map[binding];
162 
163    nir_intrinsic_instr *res = nir_intrinsic_instr_create(
164       b->shader, nir_intrinsic_vulkan_resource_index);
165    nir_def_init(&res->instr, &res->def, 1, 32);
166    res->num_components = 1;
167    res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
168    nir_intrinsic_set_desc_type(
169       res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
170    nir_intrinsic_set_desc_set(res, 0);
171    nir_intrinsic_set_binding(res, binding);
172    nir_builder_instr_insert(b, &res->instr);
173    res_map[binding] = &res->def;
174    return &res->def;
175 }
176 
177 nir_intrinsic_instr *
create_indirect_load(nir_variable_mode mode,uint32_t binding,nir_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)178 nir_load_store_vectorize_test::create_indirect_load(
179    nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
180    unsigned bit_size, unsigned components, unsigned access)
181 {
182    nir_intrinsic_op intrinsic;
183    nir_def *res = NULL;
184    switch (mode) {
185    case nir_var_mem_ubo:
186       intrinsic = nir_intrinsic_load_ubo;
187       res = get_resource(binding, false);
188       break;
189    case nir_var_mem_ssbo:
190       intrinsic = nir_intrinsic_load_ssbo;
191       res = get_resource(binding, true);
192       break;
193    case nir_var_mem_push_const:
194       intrinsic = nir_intrinsic_load_push_constant;
195       break;
196    default:
197       return NULL;
198    }
199    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
200    nir_def_init(&load->instr, &load->def, components, bit_size);
201    load->num_components = components;
202    if (res) {
203       load->src[0] = nir_src_for_ssa(res);
204       load->src[1] = nir_src_for_ssa(offset);
205    } else {
206       load->src[0] = nir_src_for_ssa(offset);
207    }
208    int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
209 
210    nir_intrinsic_set_align(load, byte_size, 0);
211    if (mode != nir_var_mem_push_const) {
212       nir_intrinsic_set_access(load, (gl_access_qualifier)access);
213    }
214 
215    if (nir_intrinsic_has_range_base(load)) {
216       uint32_t range = byte_size * components;
217       int offset_src = res ? 1 : 0;
218 
219       if (nir_src_is_const(load->src[offset_src])) {
220          nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
221          nir_intrinsic_set_range(load, range);
222       } else {
223          /* Unknown range */
224          nir_intrinsic_set_range_base(load, 0);
225          nir_intrinsic_set_range(load, ~0);
226       }
227    }
228 
229    nir_builder_instr_insert(b, &load->instr);
230    nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->def)->parent_instr);
231    movs[id] = mov;
232    loads[id] = &mov->src[0];
233 
234    return load;
235 }
236 
237 void
create_indirect_store(nir_variable_mode mode,uint32_t binding,nir_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)238 nir_load_store_vectorize_test::create_indirect_store(
239    nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
240    unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
241 {
242    nir_const_value values[NIR_MAX_VEC_COMPONENTS];
243    for (unsigned i = 0; i < components; i++)
244       values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
245    nir_def *value = nir_build_imm(b, components, bit_size, values);
246 
247    nir_intrinsic_op intrinsic;
248    nir_def *res = NULL;
249    switch (mode) {
250    case nir_var_mem_ssbo:
251       intrinsic = nir_intrinsic_store_ssbo;
252       res = get_resource(binding, true);
253       break;
254    case nir_var_mem_shared:
255       intrinsic = nir_intrinsic_store_shared;
256       break;
257    default:
258       return;
259    }
260    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
261    nir_def_init(&store->instr, &store->def, components, bit_size);
262    store->num_components = components;
263    if (res) {
264       store->src[0] = nir_src_for_ssa(value);
265       store->src[1] = nir_src_for_ssa(res);
266       store->src[2] = nir_src_for_ssa(offset);
267    } else {
268       store->src[0] = nir_src_for_ssa(value);
269       store->src[1] = nir_src_for_ssa(offset);
270    }
271    nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
272    nir_intrinsic_set_access(store, (gl_access_qualifier)access);
273    nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
274    nir_builder_instr_insert(b, &store->instr);
275 }
276 
277 nir_intrinsic_instr *
create_load(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)278 nir_load_store_vectorize_test::create_load(
279    nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
280    unsigned bit_size, unsigned components, unsigned access)
281 {
282    return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
283 }
284 
285 void
create_store(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)286 nir_load_store_vectorize_test::create_store(
287    nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
288    unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
289 {
290    create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
291 }
292 
create_shared_load(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components)293 void nir_load_store_vectorize_test::create_shared_load(
294    nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
295 {
296    nir_def *load = nir_load_deref(b, deref);
297    nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, load)->parent_instr);
298    movs[id] = mov;
299    loads[id] = &mov->src[0];
300 }
301 
create_shared_store(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask)302 void nir_load_store_vectorize_test::create_shared_store(
303    nir_deref_instr *deref, uint32_t id,
304    unsigned bit_size, unsigned components, unsigned wrmask)
305 {
306    nir_const_value values[NIR_MAX_VEC_COMPONENTS];
307    for (unsigned i = 0; i < components; i++)
308       values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
309    nir_def *value = nir_build_imm(b, components, bit_size, values);
310 
311    nir_store_deref(b, deref, value, wrmask & ((1 << components) - 1));
312 }
313 
test_alu(nir_instr * instr,nir_op op)314 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
315 {
316    return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
317 }
318 
test_alu_def(nir_instr * instr,unsigned index,nir_def * def,unsigned swizzle)319 bool nir_load_store_vectorize_test::test_alu_def(
320    nir_instr *instr, unsigned index, nir_def *def, unsigned swizzle)
321 {
322    if (instr->type != nir_instr_type_alu)
323       return false;
324 
325    nir_alu_instr *alu = nir_instr_as_alu(instr);
326 
327    if (index >= nir_op_infos[alu->op].num_inputs)
328       return false;
329    if (alu->src[index].src.ssa != def)
330       return false;
331    if (alu->src[index].swizzle[0] != swizzle)
332       return false;
333 
334    return true;
335 }
336 
mem_vectorize_callback(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)337 bool nir_load_store_vectorize_test::mem_vectorize_callback(
338    unsigned align_mul, unsigned align_offset, unsigned bit_size,
339    unsigned num_components,
340    nir_intrinsic_instr *low, nir_intrinsic_instr *high,
341    void *data)
342 {
343    /* Calculate a simple alignment, like how nir_intrinsic_align() does. */
344    uint32_t align = align_mul;
345    if (align_offset)
346       align = 1 << (ffs(align_offset) - 1);
347 
348    /* Require scalar alignment and less than 5 components. */
349    return align % (bit_size / 8) == 0 &&
350           num_components <= 4;
351 }
352 
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)353 void nir_load_store_vectorize_test::shared_type_info(
354    const struct glsl_type *type, unsigned *size, unsigned *align)
355 {
356    assert(glsl_type_is_vector_or_scalar(type));
357 
358    uint32_t comp_size = glsl_type_is_boolean(type)
359       ? 4 : glsl_get_bit_size(type) / 8;
360    unsigned length = glsl_get_vector_elements(type);
361    *size = comp_size * length,
362    *align = comp_size;
363 }
364 } // namespace
365 
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent)366 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
367 {
368    create_load(nir_var_mem_ubo, 0, 0, 0x1);
369    create_load(nir_var_mem_ubo, 0, 4, 0x2);
370 
371    nir_validate_shader(b->shader, NULL);
372    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
373 
374    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
375 
376    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
377 
378    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
379    ASSERT_EQ(load->def.bit_size, 32);
380    ASSERT_EQ(load->def.num_components, 2);
381    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
382    ASSERT_EQ(nir_intrinsic_range(load), 8);
383    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
384    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
385    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
386 }
387 
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting)388 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
389 {
390    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
391    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
392 
393    nir_validate_shader(b->shader, NULL);
394    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
395 
396    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
397 
398    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
399 
400    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
401    ASSERT_EQ(load->def.bit_size, 32);
402    ASSERT_EQ(load->def.num_components, 3);
403    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
404    ASSERT_EQ(nir_intrinsic_range(load), 12);
405    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
406    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
407    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
408 }
409 
410 /* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting_range)411 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
412 {
413    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
414    create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
415 
416    nir_validate_shader(b->shader, NULL);
417    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
418 
419    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
420 
421    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
422 
423    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
424    ASSERT_EQ(load->def.bit_size, 32);
425    ASSERT_EQ(load->def.num_components, 4);
426    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
427    ASSERT_EQ(nir_intrinsic_range(load), 16);
428    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
429    ASSERT_EQ(loads[0x1]->src.ssa, &load->def);
430    ASSERT_EQ(loads[0x2]->src.ssa, &load->def);
431    ASSERT_EQ(loads[0x1]->swizzle[0], 0);
432    ASSERT_EQ(loads[0x1]->swizzle[1], 1);
433    ASSERT_EQ(loads[0x1]->swizzle[2], 2);
434    ASSERT_EQ(loads[0x1]->swizzle[3], 3);
435    ASSERT_EQ(loads[0x2]->swizzle[0], 1);
436 }
437 
TEST_F(nir_load_store_vectorize_test,ubo_load_identical)438 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
439 {
440    create_load(nir_var_mem_ubo, 0, 0, 0x1);
441    create_load(nir_var_mem_ubo, 0, 0, 0x2);
442 
443    nir_validate_shader(b->shader, NULL);
444    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
445 
446    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
447 
448    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
449 
450    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
451    ASSERT_EQ(load->def.bit_size, 32);
452    ASSERT_EQ(load->def.num_components, 1);
453    ASSERT_EQ(nir_intrinsic_range_base(load), 0);
454    ASSERT_EQ(nir_intrinsic_range(load), 4);
455    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
456    ASSERT_EQ(loads[0x1]->src.ssa, &load->def);
457    ASSERT_EQ(loads[0x2]->src.ssa, &load->def);
458    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
459    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
460 }
461 
TEST_F(nir_load_store_vectorize_test,ubo_load_large)462 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
463 {
464    create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
465    create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
466 
467    nir_validate_shader(b->shader, NULL);
468    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
469 
470    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
471 
472    nir_validate_shader(b->shader, NULL);
473    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
474 }
475 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent)476 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
477 {
478    create_load(nir_var_mem_push_const, 0, 0, 0x1);
479    create_load(nir_var_mem_push_const, 0, 4, 0x2);
480 
481    nir_validate_shader(b->shader, NULL);
482    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
483 
484    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
485 
486    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
487 
488    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
489    ASSERT_EQ(load->def.bit_size, 32);
490    ASSERT_EQ(load->def.num_components, 2);
491    ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
492    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
493    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
494 }
495 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_base)496 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
497 {
498    create_load(nir_var_mem_push_const, 0, 0, 0x1);
499    nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
500 
501    nir_validate_shader(b->shader, NULL);
502    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
503 
504    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
505 
506    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
507 
508    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
509    ASSERT_EQ(load->def.bit_size, 32);
510    ASSERT_EQ(load->def.num_components, 2);
511    ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
512    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
513    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
514 }
515 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent)516 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
517 {
518    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
519    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
520 
521    nir_validate_shader(b->shader, NULL);
522    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
523 
524    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
525 
526    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
527 
528    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
529    ASSERT_EQ(load->def.bit_size, 32);
530    ASSERT_EQ(load->def.num_components, 2);
531    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
532    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
533    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
534 }
535 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect)536 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
537 {
538    nir_def *index_base = nir_load_local_invocation_index(b);
539    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
540    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
541 
542    nir_validate_shader(b->shader, NULL);
543    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
544 
545    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
546 
547    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
548 
549    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
550    ASSERT_EQ(load->def.bit_size, 32);
551    ASSERT_EQ(load->def.num_components, 2);
552    ASSERT_EQ(load->src[1].ssa, index_base);
553    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
554    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
555 }
556 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_sub)557 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
558 {
559    nir_def *index_base = nir_load_local_invocation_index(b);
560    nir_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
561    create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
562    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
563 
564    nir_validate_shader(b->shader, NULL);
565    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
566 
567    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
568 
569    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
570 
571    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
572    ASSERT_EQ(load->def.bit_size, 32);
573    ASSERT_EQ(load->def.num_components, 2);
574    ASSERT_EQ(load->src[1].ssa, index_base_prev);
575    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
576    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
577 }
578 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_neg_stride)579 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
580 {
581    nir_def *inv = nir_load_local_invocation_index(b);
582    nir_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
583    nir_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
584    nir_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
585    create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
586    create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
587 
588    nir_validate_shader(b->shader, NULL);
589    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
590 
591    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
592 
593    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
594 
595    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
596    ASSERT_EQ(load->def.bit_size, 32);
597    ASSERT_EQ(load->def.num_components, 2);
598    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
599    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
600 
601    /* nir_opt_algebraic optimizes the imul */
602    ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
603    nir_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
604    ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
605    nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
606    ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
607    ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
608 }
609 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_adjacent)610 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
611 {
612    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
613    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
614    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
615 
616    nir_validate_shader(b->shader, NULL);
617    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
618 
619    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
620 
621    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
622 
623    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
624    ASSERT_EQ(load->def.bit_size, 32);
625    ASSERT_EQ(load->def.num_components, 1);
626    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
627    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
628    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
629 }
630 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_intersecting)631 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
632 {
633    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
634    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
635    create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
636 
637    nir_validate_shader(b->shader, NULL);
638    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
639 
640    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
641 
642    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
643 }
644 
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_identical)645 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
646 {
647    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
648    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
649    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
650 
651    nir_validate_shader(b->shader, NULL);
652    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
653 
654    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
655 
656    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
657 }
658 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_load_identical)659 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
660 {
661    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
662    create_load(nir_var_mem_ssbo, 0, 0, 0x2);
663    create_store(nir_var_mem_ssbo, 0, 0, 0x3);
664 
665    nir_validate_shader(b->shader, NULL);
666    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
667 
668    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
669 
670    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
671 }
672 
673 /* if nir_opt_load_store_vectorize were implemented like many load/store
674  * optimization passes are (for example, nir_opt_combine_stores and
675  * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
676  * encountered, this case wouldn't be optimized.
677  * A similar test for derefs is shared_load_adjacent_store_identical. */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_store_identical)678 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
679 {
680    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
681    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
682    create_load(nir_var_mem_ssbo, 0, 4, 0x3);
683 
684    nir_validate_shader(b->shader, NULL);
685    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
686    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
687 
688    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
689 
690    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
691    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
692 
693    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
694    ASSERT_EQ(load->def.bit_size, 32);
695    ASSERT_EQ(load->def.num_components, 2);
696    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
697    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
698    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
699 }
700 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent)701 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
702 {
703    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
704    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
705 
706    nir_validate_shader(b->shader, NULL);
707    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
708 
709    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
710 
711    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
712 
713    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
714    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
715    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
716    nir_def *val = store->src[0].ssa;
717    ASSERT_EQ(val->bit_size, 32);
718    ASSERT_EQ(val->num_components, 2);
719    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
720    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
721    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
722 }
723 
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting)724 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
725 {
726    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
727    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
728 
729    nir_validate_shader(b->shader, NULL);
730    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
731 
732    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
733 
734    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
735 
736    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
737    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
738    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
739    nir_def *val = store->src[0].ssa;
740    ASSERT_EQ(val->bit_size, 32);
741    ASSERT_EQ(val->num_components, 3);
742    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
743    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
744    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
745    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
746 }
747 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical)748 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
749 {
750    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
751    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
752 
753    nir_validate_shader(b->shader, NULL);
754    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
755 
756    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
757 
758    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
759 
760    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
761    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
762    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
763    nir_def *val = store->src[0].ssa;
764    ASSERT_EQ(val->bit_size, 32);
765    ASSERT_EQ(val->num_components, 1);
766    ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
767 }
768 
TEST_F(nir_load_store_vectorize_test,ssbo_store_large)769 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
770 {
771    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
772    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
773 
774    nir_validate_shader(b->shader, NULL);
775    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
776 
777    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
778 
779    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
780 }
781 
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent_memory_barrier)782 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
783 {
784    create_load(nir_var_mem_ubo, 0, 0, 0x1);
785 
786    nir_scoped_memory_barrier(b, SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
787                              nir_var_mem_ssbo);
788 
789    create_load(nir_var_mem_ubo, 0, 4, 0x2);
790 
791    nir_validate_shader(b->shader, NULL);
792    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
793 
794    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
795 
796    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
797 }
798 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier)799 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
800 {
801    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
802 
803    nir_scoped_memory_barrier(b, SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
804                              nir_var_mem_ssbo);
805 
806    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
807 
808    nir_validate_shader(b->shader, NULL);
809    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
810 
811    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
812 
813    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
814 }
815 
816 /* A control barrier may only sync invocations in a workgroup, it doesn't
817  * require that loads/stores complete.
818  */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_barrier)819 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
820 {
821    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
822    nir_barrier(b, SCOPE_WORKGROUP, SCOPE_NONE,
823                       (nir_memory_semantics)0, (nir_variable_mode)0);
824    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
825 
826    nir_validate_shader(b->shader, NULL);
827    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
828 
829    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
830 
831    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
832 }
833 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier_shared)834 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
835 {
836    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
837 
838    nir_scoped_memory_barrier(b, SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
839                              nir_var_mem_shared);
840 
841    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
842 
843    nir_validate_shader(b->shader, NULL);
844    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
845 
846    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
847 
848    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
849 }
850 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_discard)851 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_discard)
852 {
853    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
854    nir_discard(b);
855    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
856 
857    nir_validate_shader(b->shader, NULL);
858    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
859 
860    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
861 
862    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
863 }
864 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_demote)865 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_demote)
866 {
867    create_store(nir_var_mem_ssbo, 0, 0, 0x1);
868    nir_demote(b);
869    create_store(nir_var_mem_ssbo, 0, 4, 0x2);
870 
871    nir_validate_shader(b->shader, NULL);
872    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
873 
874    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
875 
876    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
877 }
878 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_discard)879 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_discard)
880 {
881    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
882    nir_discard(b);
883    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
884 
885    nir_validate_shader(b->shader, NULL);
886    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
887 
888    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
889 
890    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
891 }
892 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_demote)893 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_demote)
894 {
895    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
896    nir_demote(b);
897    create_load(nir_var_mem_ssbo, 0, 4, 0x2);
898 
899    nir_validate_shader(b->shader, NULL);
900    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
901 
902    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
903 
904    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
905 }
906 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_8_8_16)907 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
908 {
909    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
910    create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
911    create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
912 
913    nir_validate_shader(b->shader, NULL);
914    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
915 
916    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
917 
918    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
919 
920    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
921    ASSERT_EQ(load->def.bit_size, 8);
922    ASSERT_EQ(load->def.num_components, 4);
923    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
924    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
925    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
926 
927    nir_def *val = loads[0x3]->src.ssa;
928    ASSERT_EQ(val->bit_size, 16);
929    ASSERT_EQ(val->num_components, 1);
930    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
931    nir_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
932    nir_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
933    ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
934    high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
935    ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
936    ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
937    ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->def, 2));
938    ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->def, 3));
939 }
940 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64)941 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
942 {
943    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
944    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
945 
946    nir_validate_shader(b->shader, NULL);
947    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
948 
949    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
950 
951    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
952 
953    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
954    ASSERT_EQ(load->def.bit_size, 32);
955    ASSERT_EQ(load->def.num_components, 4);
956    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
957    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
958 
959    nir_def *val = loads[0x2]->src.ssa;
960    ASSERT_EQ(val->bit_size, 64);
961    ASSERT_EQ(val->num_components, 1);
962    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
963    nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
964    EXPECT_INSTR_SWIZZLES(pack, load, "zw");
965 }
966 
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64_64)967 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
968 {
969    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
970    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
971    create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
972 
973    nir_validate_shader(b->shader, NULL);
974    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
975 
976    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
977 
978    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
979 
980    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
981    ASSERT_EQ(load->def.bit_size, 64);
982    ASSERT_EQ(load->def.num_components, 3);
983    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
984    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
985 
986    nir_def *val = loads[0x2]->src.ssa;
987    ASSERT_EQ(val->bit_size, 64);
988    ASSERT_EQ(val->num_components, 1);
989    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
990    nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
991    EXPECT_INSTR_SWIZZLES(mov, load, "y");
992 
993    val = loads[0x1]->src.ssa;
994    ASSERT_EQ(val->bit_size, 32);
995    ASSERT_EQ(val->num_components, 2);
996    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
997    nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
998    EXPECT_INSTR_SWIZZLES(unpack, load, "x");
999 }
1000 
TEST_F(nir_load_store_vectorize_test,ssbo_load_intersecting_32_32_64)1001 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
1002 {
1003    create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
1004    create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1005 
1006    nir_validate_shader(b->shader, NULL);
1007    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1008 
1009    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1010 
1011    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1012 
1013    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1014    ASSERT_EQ(load->def.bit_size, 32);
1015    ASSERT_EQ(load->def.num_components, 3);
1016    ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
1017    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
1018 
1019    nir_def *val = loads[0x2]->src.ssa;
1020    ASSERT_EQ(val->bit_size, 64);
1021    ASSERT_EQ(val->num_components, 1);
1022    ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
1023    nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
1024    EXPECT_INSTR_SWIZZLES(pack, load, "yz");
1025 }
1026 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_8_8_16)1027 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
1028 {
1029    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
1030    create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
1031    create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
1032 
1033    nir_validate_shader(b->shader, NULL);
1034    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1035 
1036    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1037 
1038    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1039 
1040    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1041    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1042    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1043    nir_def *val = store->src[0].ssa;
1044    ASSERT_EQ(val->bit_size, 8);
1045    ASSERT_EQ(val->num_components, 4);
1046    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1047    ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
1048    ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
1049    ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
1050    ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
1051 }
1052 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64)1053 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
1054 {
1055    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1056    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1057 
1058    nir_validate_shader(b->shader, NULL);
1059    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1060 
1061    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1062 
1063    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1064 
1065    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1066    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1067    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1068    nir_def *val = store->src[0].ssa;
1069    ASSERT_EQ(val->bit_size, 32);
1070    ASSERT_EQ(val->num_components, 4);
1071    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1072    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1073    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1074    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1075    ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1076 }
1077 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64_64)1078 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1079 {
1080    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1081    create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1082    create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1083 
1084    nir_validate_shader(b->shader, NULL);
1085    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1086 
1087    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1088 
1089    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1090 
1091    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1092    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1093    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1094    nir_def *val = store->src[0].ssa;
1095    ASSERT_EQ(val->bit_size, 64);
1096    ASSERT_EQ(val->num_components, 3);
1097    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1098    ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1099    ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1100    ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1101 }
1102 
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting_32_32_64)1103 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1104 {
1105    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1106    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1107 
1108    nir_validate_shader(b->shader, NULL);
1109    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1110 
1111    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1112 
1113    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1114 
1115    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1116    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1117    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1118    nir_def *val = store->src[0].ssa;
1119    ASSERT_EQ(val->bit_size, 32);
1120    ASSERT_EQ(val->num_components, 3);
1121    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1122    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1123    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1124    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1125 }
1126 
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_64)1127 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1128 {
1129    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1130    create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1131 
1132    nir_validate_shader(b->shader, NULL);
1133    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1134 
1135    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1136 
1137    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1138 }
1139 
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_wrmask)1140 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1141 {
1142    create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1143    create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1144 
1145    nir_validate_shader(b->shader, NULL);
1146    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1147 
1148    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1149 
1150    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1151 
1152    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1153    ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1154    ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1155    nir_def *val = store->src[0].ssa;
1156    ASSERT_EQ(val->bit_size, 32);
1157    ASSERT_EQ(val->num_components, 4);
1158    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1159    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1160    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1161    ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1162    ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1163 }
1164 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent)1165 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1166 {
1167    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1168    nir_deref_instr *deref = nir_build_deref_var(b, var);
1169 
1170    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1171    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1172 
1173    nir_validate_shader(b->shader, NULL);
1174    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1175 
1176    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1177 
1178    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1179 
1180    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1181    ASSERT_EQ(load->def.bit_size, 32);
1182    ASSERT_EQ(load->def.num_components, 2);
1183 
1184    deref = nir_src_as_deref(load->src[0]);
1185    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1186 
1187    deref = nir_deref_instr_parent(deref);
1188    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1189    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1190 
1191    deref = nir_deref_instr_parent(deref);
1192    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1193    ASSERT_EQ(deref->var, var);
1194 
1195    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1196    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1197 }
1198 
TEST_F(nir_load_store_vectorize_test,shared_load_distant_64bit)1199 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1200 {
1201    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1202    nir_deref_instr *deref = nir_build_deref_var(b, var);
1203    nir_def_init(&deref->instr, &deref->def, 1, 64);
1204 
1205    create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1206    create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1207 
1208    nir_validate_shader(b->shader, NULL);
1209    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1210 
1211    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1212 
1213    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1214 }
1215 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect)1216 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1217 {
1218    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1219    nir_deref_instr *deref = nir_build_deref_var(b, var);
1220    nir_def *index_base = nir_load_local_invocation_index(b);
1221 
1222    create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1223    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1224 
1225    nir_validate_shader(b->shader, NULL);
1226    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1227 
1228    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1229 
1230    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1231 
1232    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1233    ASSERT_EQ(load->def.bit_size, 32);
1234    ASSERT_EQ(load->def.num_components, 2);
1235 
1236    deref = nir_src_as_deref(load->src[0]);
1237    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1238 
1239    deref = nir_deref_instr_parent(deref);
1240    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1241    ASSERT_EQ(deref->arr.index.ssa, index_base);
1242 
1243    deref = nir_deref_instr_parent(deref);
1244    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1245    ASSERT_EQ(deref->var, var);
1246 
1247    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1248    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1249 }
1250 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect_sub)1251 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1252 {
1253    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1254    nir_deref_instr *deref = nir_build_deref_var(b, var);
1255    nir_def *index_base = nir_load_local_invocation_index(b);
1256    nir_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1257 
1258    create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1259    create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1260 
1261    nir_validate_shader(b->shader, NULL);
1262    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1263 
1264    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1265 
1266    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1267 
1268    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1269    ASSERT_EQ(load->def.bit_size, 32);
1270    ASSERT_EQ(load->def.num_components, 2);
1271 
1272    deref = nir_src_as_deref(load->src[0]);
1273    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1274 
1275    deref = nir_deref_instr_parent(deref);
1276    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1277    ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1278 
1279    deref = nir_deref_instr_parent(deref);
1280    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1281    ASSERT_EQ(deref->var, var);
1282 
1283    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1284    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1285 }
1286 
TEST_F(nir_load_store_vectorize_test,shared_load_struct)1287 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1288 {
1289    glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1290                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1291 
1292    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1293    nir_deref_instr *deref = nir_build_deref_var(b, var);
1294 
1295    create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1296    create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1297 
1298    nir_validate_shader(b->shader, NULL);
1299    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1300 
1301    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1302 
1303    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1304 
1305    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1306    ASSERT_EQ(load->def.bit_size, 32);
1307    ASSERT_EQ(load->def.num_components, 2);
1308 
1309    deref = nir_src_as_deref(load->src[0]);
1310    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1311 
1312    deref = nir_deref_instr_parent(deref);
1313    ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1314    ASSERT_EQ(deref->strct.index, 0);
1315 
1316    deref = nir_deref_instr_parent(deref);
1317    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1318    ASSERT_EQ(deref->var, var);
1319 
1320    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1321    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1322 }
1323 
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_adjacent)1324 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1325 {
1326    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1327    nir_deref_instr *deref = nir_build_deref_var(b, var);
1328 
1329    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1330    create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1331    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1332 
1333    nir_validate_shader(b->shader, NULL);
1334    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1335    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1336 
1337    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1338 
1339    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1340    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1341 
1342    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1343    ASSERT_EQ(load->def.bit_size, 32);
1344    ASSERT_EQ(load->def.num_components, 1);
1345 
1346    deref = nir_src_as_deref(load->src[0]);
1347    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1348    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1349 
1350    deref = nir_deref_instr_parent(deref);
1351    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1352    ASSERT_EQ(deref->var, var);
1353 
1354    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1355    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1356 }
1357 
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_identical)1358 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1359 {
1360    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1361    nir_deref_instr *deref = nir_build_deref_var(b, var);
1362 
1363    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1364    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1365    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1366 
1367    nir_validate_shader(b->shader, NULL);
1368    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1369 
1370    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1371 
1372    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1373 }
1374 
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_store_identical)1375 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1376 {
1377    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1378    nir_deref_instr *deref = nir_build_deref_var(b, var);
1379 
1380    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1381    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1382    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1383 
1384    nir_validate_shader(b->shader, NULL);
1385    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1386    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1387 
1388    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1389 
1390    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1391    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1392 
1393    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1394    ASSERT_EQ(load->def.bit_size, 32);
1395    ASSERT_EQ(load->def.num_components, 2);
1396 
1397    deref = nir_src_as_deref(load->src[0]);
1398    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1399 
1400    deref = nir_deref_instr_parent(deref);
1401    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1402    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1403 
1404    deref = nir_deref_instr_parent(deref);
1405    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1406    ASSERT_EQ(deref->var, var);
1407 
1408    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1409    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1410 }
1411 
TEST_F(nir_load_store_vectorize_test,shared_load_bool)1412 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1413 {
1414    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1415    nir_deref_instr *deref = nir_build_deref_var(b, var);
1416 
1417    create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1418    create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1419 
1420    nir_validate_shader(b->shader, NULL);
1421    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1422 
1423    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1424 
1425    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1426 
1427    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1428    ASSERT_EQ(load->def.bit_size, 32);
1429    ASSERT_EQ(load->def.num_components, 2);
1430 
1431    deref = nir_src_as_deref(load->src[0]);
1432    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1433 
1434    deref = nir_deref_instr_parent(deref);
1435    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1436    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1437 
1438    deref = nir_deref_instr_parent(deref);
1439    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1440    ASSERT_EQ(deref->var, var);
1441 
1442    /* The loaded value is converted to Boolean by (loaded != 0). */
1443    ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_ine));
1444    ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_ine));
1445    ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->def, 0));
1446    ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->def, 1));
1447 }
1448 
TEST_F(nir_load_store_vectorize_test,shared_load_bool_mixed)1449 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1450 {
1451    glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1452                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1453 
1454    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1455    nir_deref_instr *deref = nir_build_deref_var(b, var);
1456 
1457    create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1458    create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1459 
1460    nir_validate_shader(b->shader, NULL);
1461    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1462 
1463    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1464 
1465    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1466 
1467    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1468    ASSERT_EQ(load->def.bit_size, 32);
1469    ASSERT_EQ(load->def.num_components, 2);
1470 
1471    deref = nir_src_as_deref(load->src[0]);
1472    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1473 
1474    deref = nir_deref_instr_parent(deref);
1475    ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1476    ASSERT_EQ(deref->strct.index, 0);
1477 
1478    deref = nir_deref_instr_parent(deref);
1479    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1480    ASSERT_EQ(deref->var, var);
1481 
1482    /* The loaded value is converted to Boolean by (loaded != 0). */
1483    ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_ine));
1484    ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->def, 0));
1485 
1486    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1487 }
1488 
TEST_F(nir_load_store_vectorize_test,shared_store_adjacent)1489 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1490 {
1491    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1492    nir_deref_instr *deref = nir_build_deref_var(b, var);
1493 
1494    create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1495    create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1496 
1497    nir_validate_shader(b->shader, NULL);
1498    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1499 
1500    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1501 
1502    ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1503 
1504    nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1505    ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1506    nir_def *val = store->src[1].ssa;
1507    ASSERT_EQ(val->bit_size, 32);
1508    ASSERT_EQ(val->num_components, 2);
1509    nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1510    ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1511    ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1512 
1513    deref = nir_src_as_deref(store->src[0]);
1514    ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1515 
1516    deref = nir_deref_instr_parent(deref);
1517    ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1518    ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1519 
1520    deref = nir_deref_instr_parent(deref);
1521    ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1522    ASSERT_EQ(deref->var, var);
1523 }
1524 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_base)1525 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1526 {
1527    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1528    nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1529 
1530    nir_validate_shader(b->shader, NULL);
1531    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1532 
1533    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1534 
1535    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1536 }
1537 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_direct)1538 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1539 {
1540    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1541    create_load(nir_var_mem_push_const, 0, 8, 0x2);
1542 
1543    nir_validate_shader(b->shader, NULL);
1544    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1545 
1546    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1547 
1548    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1549 }
1550 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_indirect)1551 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1552 {
1553    nir_def *index_base = nir_load_local_invocation_index(b);
1554    create_load(nir_var_mem_push_const, 0, 0, 0x1);
1555    create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1556 
1557    nir_validate_shader(b->shader, NULL);
1558    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1559 
1560    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1561 
1562    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1563 }
1564 
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_indirect_indirect)1565 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1566 {
1567    nir_def *index_base = nir_load_local_invocation_index(b);
1568    create_indirect_load(nir_var_mem_push_const, 0,
1569       nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1570    create_indirect_load(nir_var_mem_push_const, 0,
1571       nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1572 
1573    nir_validate_shader(b->shader, NULL);
1574    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1575 
1576    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1577 
1578    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1579 }
1580 
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_complex_indirect)1581 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1582 {
1583    nir_def *index_base = nir_load_local_invocation_index(b);
1584    //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1585    nir_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1586    nir_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1587    create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1588    create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1589 
1590    nir_validate_shader(b->shader, NULL);
1591    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1592 
1593    EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1594 
1595    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1596 
1597    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1598    ASSERT_EQ(load->def.bit_size, 32);
1599    ASSERT_EQ(load->def.num_components, 2);
1600    ASSERT_EQ(load->src[0].ssa, low);
1601    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1602    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1603 }
1604 
TEST_F(nir_load_store_vectorize_test,ssbo_alias0)1605 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1606 {
1607    nir_def *index_base = nir_load_local_invocation_index(b);
1608    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1609    create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1610    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1611 
1612    nir_validate_shader(b->shader, NULL);
1613    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1614 
1615    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1616 
1617    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1618 }
1619 
TEST_F(nir_load_store_vectorize_test,ssbo_alias1)1620 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1621 {
1622    nir_def *load_base = nir_load_global_invocation_index(b, 32);
1623    nir_def *store_base = nir_load_local_invocation_index(b);
1624    create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1625    create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1626    create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1627 
1628    nir_validate_shader(b->shader, NULL);
1629    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1630 
1631    EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1632 
1633    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1634 }
1635 
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias2)1636 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1637 {
1638    /* TODO: try to combine these loads */
1639    nir_def *index_base = nir_load_local_invocation_index(b);
1640    nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1641    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1642    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1643    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1644 
1645    nir_validate_shader(b->shader, NULL);
1646    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1647 
1648    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1649 
1650    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1651 
1652    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1653    ASSERT_EQ(load->def.bit_size, 32);
1654    ASSERT_EQ(load->def.num_components, 1);
1655    ASSERT_EQ(load->src[1].ssa, offset);
1656    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1657    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1658 }
1659 
TEST_F(nir_load_store_vectorize_test,ssbo_alias3)1660 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1661 {
1662    /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1663     * these loads can't be combined because if index_base == 268435455, then
1664     * offset == 0 because the addition would wrap around */
1665    nir_def *index_base = nir_load_local_invocation_index(b);
1666    nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1667    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1668    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1669    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1670 
1671    nir_validate_shader(b->shader, NULL);
1672    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1673 
1674    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1675 
1676    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1677 }
1678 
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias4)1679 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1680 {
1681    /* TODO: try to combine these loads */
1682    nir_def *index_base = nir_load_local_invocation_index(b);
1683    nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1684    nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1685    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1686    create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1687    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1688 
1689    nir_validate_shader(b->shader, NULL);
1690    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1691 
1692    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1693 
1694    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1695 
1696    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1697    ASSERT_EQ(load->def.bit_size, 32);
1698    ASSERT_EQ(load->def.num_components, 1);
1699    ASSERT_EQ(load->src[1].ssa, offset);
1700    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1701    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1702 }
1703 
TEST_F(nir_load_store_vectorize_test,ssbo_alias5)1704 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1705 {
1706    create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1707    create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1708    create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1709 
1710    nir_validate_shader(b->shader, NULL);
1711    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1712 
1713    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1714 
1715    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1716 }
1717 
TEST_F(nir_load_store_vectorize_test,ssbo_alias6)1718 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1719 {
1720    create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1721    create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1722    create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1723 
1724    nir_validate_shader(b->shader, NULL);
1725    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1726 
1727    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1728 
1729    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1730 
1731    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1732    ASSERT_EQ(load->def.bit_size, 32);
1733    ASSERT_EQ(load->def.num_components, 1);
1734    ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1735    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1736    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1737 }
1738 
TEST_F(nir_load_store_vectorize_test,DISABLED_shared_alias0)1739 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1740 {
1741    /* TODO: implement type-based alias analysis so that these loads can be
1742     * combined. this is made a bit more difficult than simply using
1743     * nir_compare_derefs() because the vectorizer creates loads/stores with
1744     * casted derefs. The solution would probably be to keep multiple derefs for
1745     * an entry (one for each load/store combined into it). */
1746    glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1747                                   glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1748 
1749    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1750    nir_deref_instr *deref = nir_build_deref_var(b, var);
1751 
1752    nir_def *index0 = nir_load_local_invocation_index(b);
1753    nir_def *index1 = nir_load_global_invocation_index(b, 32);
1754    nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1755 
1756    create_shared_load(load_deref, 0x1);
1757    create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1758    create_shared_load(load_deref, 0x3);
1759 
1760    nir_validate_shader(b->shader, NULL);
1761    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1762 
1763    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1764 
1765    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1766 
1767    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1768    ASSERT_EQ(load->def.bit_size, 32);
1769    ASSERT_EQ(load->def.num_components, 1);
1770    ASSERT_EQ(load->src[0].ssa, &load_deref->def);
1771    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1772    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1773 }
1774 
TEST_F(nir_load_store_vectorize_test,shared_alias1)1775 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1776 {
1777    nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1778    nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1779    nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1780 
1781    create_shared_load(load_deref, 0x1);
1782    create_shared_store(nir_build_deref_var(b, var1), 0x2);
1783    create_shared_load(load_deref, 0x3);
1784 
1785    nir_validate_shader(b->shader, NULL);
1786    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1787 
1788    EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1789 
1790    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1791 
1792    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1793    ASSERT_EQ(load->def.bit_size, 32);
1794    ASSERT_EQ(load->def.num_components, 1);
1795    ASSERT_EQ(load->src[0].ssa, &load_deref->def);
1796    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1797    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1798 }
1799 
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_64bit)1800 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1801 {
1802    create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1803    create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1804 
1805    nir_validate_shader(b->shader, NULL);
1806    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1807 
1808    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1809 
1810    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1811 }
1812 
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_indirect_64bit)1813 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1814 {
1815    nir_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1816    nir_def *first = nir_imul_imm(b, index_base, 0x100000000);
1817    nir_def *second = nir_imul_imm(b, index_base, 0x200000000);
1818    create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1819    create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1820 
1821    nir_validate_shader(b->shader, NULL);
1822    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1823 
1824    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1825 
1826    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1827 }
1828 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust)1829 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1830 {
1831    create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1832    create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1833 
1834    nir_validate_shader(b->shader, NULL);
1835    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1836 
1837    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1838 
1839    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1840 }
1841 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride1)1842 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride1)
1843 {
1844    nir_def *offset = nir_load_local_invocation_index(b);
1845    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1846    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1847 
1848    nir_validate_shader(b->shader, NULL);
1849    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1850 
1851    EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1852 
1853    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1854 }
1855 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride8)1856 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride8)
1857 {
1858    nir_def *offset = nir_load_local_invocation_index(b);
1859    offset = nir_imul_imm(b, offset, 8);
1860    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1861    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1862 
1863    nir_validate_shader(b->shader, NULL);
1864    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1865 
1866    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1867 
1868    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1869 }
1870 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride12)1871 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride12)
1872 {
1873    nir_def *offset = nir_load_local_invocation_index(b);
1874    offset = nir_imul_imm(b, offset, 12);
1875    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1876    nir_def *offset_4 = nir_iadd_imm(b, offset, 4);
1877    create_indirect_load(nir_var_mem_ssbo, 0, offset_4, 0x2);
1878    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1879 
1880    nir_validate_shader(b->shader, NULL);
1881    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
1882 
1883    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1884 
1885    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1886 
1887    nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1888    ASSERT_EQ(load->def.bit_size, 32);
1889    ASSERT_EQ(load->def.num_components, 1);
1890    ASSERT_EQ(load->src[1].ssa, offset);
1891    EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1892 
1893    load = get_intrinsic(nir_intrinsic_load_ssbo, 1);
1894    ASSERT_EQ(load->def.bit_size, 32);
1895    ASSERT_EQ(load->def.num_components, 2);
1896    ASSERT_EQ(load->src[1].ssa, offset_4);
1897    EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
1898    EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1899 }
1900 
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride16)1901 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride16)
1902 {
1903    nir_def *offset = nir_load_local_invocation_index(b);
1904    offset = nir_imul_imm(b, offset, 16);
1905    create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1906    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1907    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1908    create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 12), 0x4);
1909 
1910    nir_validate_shader(b->shader, NULL);
1911    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 4);
1912 
1913    EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1914 
1915    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1916 }
1917 
TEST_F(nir_load_store_vectorize_test,shared_offset_overflow_robust_indirect_stride12)1918 TEST_F(nir_load_store_vectorize_test, shared_offset_overflow_robust_indirect_stride12)
1919 {
1920    nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared,
1921                                            glsl_array_type(glsl_uint_type(), 4, 0), "var");
1922    nir_deref_instr *deref = nir_build_deref_var(b, var);
1923 
1924    nir_def *index = nir_load_local_invocation_index(b);
1925    index = nir_imul_imm(b, index, 3);
1926    create_shared_load(nir_build_deref_array(b, deref, index), 0x1);
1927    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 1)), 0x2);
1928    create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 2)), 0x3);
1929 
1930    nir_validate_shader(b->shader, NULL);
1931    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1932 
1933    EXPECT_FALSE(run_vectorizer(nir_var_mem_shared, false, nir_var_mem_shared));
1934 
1935    ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1936 }
1937 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4)1938 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
1939 {
1940    nir_def *offset = nir_load_local_invocation_index(b);
1941    offset = nir_imul_imm(b, offset, 16);
1942    offset = nir_iadd_imm(b, offset, 4);
1943    nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1944                                                     0x1);
1945 
1946    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1947    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1948    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1949 }
1950 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4_swapped)1951 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4_swapped)
1952 {
1953    nir_def *offset = nir_load_local_invocation_index(b);
1954    offset = nir_iadd_imm(b, offset, 1);
1955    offset = nir_imul_imm(b, offset, 16);
1956    offset = nir_iadd_imm(b, offset, 4);
1957    nir_intrinsic_instr *load =
1958       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1959 
1960    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1961    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1962    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1963 }
1964 
1965 /* Check offset % mul != 0 */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_20)1966 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_20)
1967 {
1968    nir_def *offset = nir_load_local_invocation_index(b);
1969    offset = nir_imul_imm(b, offset, 16);
1970    offset = nir_iadd_imm(b, offset, 20);
1971    nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1972                                                     0x1);
1973 
1974    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1975    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1976    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1977 }
1978 
1979 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_24_4)1980 TEST_F(nir_load_store_vectorize_test, ubo_alignment_24_4)
1981 {
1982    nir_def *offset = nir_load_local_invocation_index(b);
1983    offset = nir_imul_imm(b, offset, 24);
1984    offset = nir_iadd_imm(b, offset, 4);
1985    nir_intrinsic_instr *load =
1986       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1987 
1988    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1989    EXPECT_EQ(nir_intrinsic_align_mul(load), 8);
1990    EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1991 }
1992 
1993 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_64_16_8)1994 TEST_F(nir_load_store_vectorize_test, ubo_alignment_64_16_8)
1995 {
1996    nir_def *x = nir_imul_imm(b, nir_load_local_invocation_index(b), 64);
1997    nir_def *y = nir_imul_imm(b, nir_load_instance_id(b), 16);
1998    nir_def *offset = nir_iadd(b, x, y);
1999    offset = nir_iadd_imm(b, offset, 8);
2000    nir_intrinsic_instr *load =
2001       create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2002 
2003    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2004    EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
2005    EXPECT_EQ(nir_intrinsic_align_offset(load), 8);
2006 }
2007 
TEST_F(nir_load_store_vectorize_test,ubo_alignment_const_100)2008 TEST_F(nir_load_store_vectorize_test, ubo_alignment_const_100)
2009 {
2010    nir_intrinsic_instr *load =
2011       create_indirect_load(nir_var_mem_ubo, 0, nir_imm_int(b, 100), 0x1);
2012 
2013    EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2014    EXPECT_EQ(nir_intrinsic_align_mul(load), NIR_ALIGN_MUL_MAX);
2015    EXPECT_EQ(nir_intrinsic_align_offset(load), 100);
2016 }
2017