1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir_test.h"
25
26 /* This is a macro so you get good line numbers */
27 #define EXPECT_INSTR_SWIZZLES(instr, load, expected_swizzle) \
28 EXPECT_EQ((instr)->src[0].src.ssa, &(load)->def); \
29 EXPECT_EQ(swizzle(instr, 0), expected_swizzle);
30
31 namespace {
32
33 class nir_load_store_vectorize_test : public nir_test {
34 protected:
nir_load_store_vectorize_test()35 nir_load_store_vectorize_test()
36 : nir_test::nir_test("nir_load_store_vectorize_test")
37 {
38 }
39
40 unsigned count_intrinsics(nir_intrinsic_op intrinsic);
41
42 nir_intrinsic_instr *get_intrinsic(nir_intrinsic_op intrinsic,
43 unsigned index);
44
45 bool run_vectorizer(nir_variable_mode modes, bool cse=false,
46 nir_variable_mode robust_modes = (nir_variable_mode)0);
47
48 nir_def *get_resource(uint32_t binding, bool ssbo);
49
50 nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_def *offset,
51 uint32_t id, unsigned bit_size=32, unsigned components=1,
52 unsigned access=0);
53 void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_def *offset,
54 uint32_t id, unsigned bit_size=32, unsigned components=1,
55 unsigned wrmask=0xf, unsigned access=0);
56
57 nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
58 uint32_t id, unsigned bit_size=32, unsigned components=1,
59 unsigned access=0);
60 void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
61 uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
62 unsigned access=0);
63
64 void create_shared_load(nir_deref_instr *deref, uint32_t id,
65 unsigned bit_size=32, unsigned components=1);
66 void create_shared_store(nir_deref_instr *deref, uint32_t id,
67 unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf);
68
69 bool test_alu(nir_instr *instr, nir_op op);
70 bool test_alu_def(nir_instr *instr, unsigned index, nir_def *def, unsigned swizzle=0);
71
72 static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
73 unsigned bit_size,
74 unsigned num_components,
75 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
76 void *data);
77 static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
78
79 std::string swizzle(nir_alu_instr *instr, int src);
80
81 std::map<unsigned, nir_alu_instr*> movs;
82 std::map<unsigned, nir_alu_src*> loads;
83 std::map<unsigned, nir_def*> res_map;
84 };
85
86 std::string
swizzle(nir_alu_instr * instr,int src)87 nir_load_store_vectorize_test::swizzle(nir_alu_instr *instr, int src)
88 {
89 std::string swizzle;
90 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(instr, src); i++) {
91 swizzle += "xyzw"[instr->src[src].swizzle[i]];
92 }
93
94 return swizzle;
95 }
96
97 unsigned
count_intrinsics(nir_intrinsic_op intrinsic)98 nir_load_store_vectorize_test::count_intrinsics(nir_intrinsic_op intrinsic)
99 {
100 unsigned count = 0;
101 nir_foreach_block(block, b->impl) {
102 nir_foreach_instr(instr, block) {
103 if (instr->type != nir_instr_type_intrinsic)
104 continue;
105 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
106 if (intrin->intrinsic == intrinsic)
107 count++;
108 }
109 }
110 return count;
111 }
112
113 nir_intrinsic_instr *
get_intrinsic(nir_intrinsic_op intrinsic,unsigned index)114 nir_load_store_vectorize_test::get_intrinsic(nir_intrinsic_op intrinsic,
115 unsigned index)
116 {
117 nir_foreach_block(block, b->impl) {
118 nir_foreach_instr(instr, block) {
119 if (instr->type != nir_instr_type_intrinsic)
120 continue;
121 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
122 if (intrin->intrinsic == intrinsic) {
123 if (index == 0)
124 return intrin;
125 index--;
126 }
127 }
128 }
129 return NULL;
130 }
131
132 bool
run_vectorizer(nir_variable_mode modes,bool cse,nir_variable_mode robust_modes)133 nir_load_store_vectorize_test::run_vectorizer(nir_variable_mode modes,
134 bool cse,
135 nir_variable_mode robust_modes)
136 {
137 if (modes & nir_var_mem_shared)
138 nir_lower_vars_to_explicit_types(b->shader, nir_var_mem_shared, shared_type_info);
139
140 nir_load_store_vectorize_options opts = { };
141 opts.callback = mem_vectorize_callback;
142 opts.modes = modes;
143 opts.robust_modes = robust_modes;
144 bool progress = nir_opt_load_store_vectorize(b->shader, &opts);
145
146 if (progress) {
147 nir_validate_shader(b->shader, NULL);
148 if (cse)
149 nir_opt_cse(b->shader);
150 nir_copy_prop(b->shader);
151 nir_opt_algebraic(b->shader);
152 nir_opt_constant_folding(b->shader);
153 }
154 return progress;
155 }
156
157 nir_def *
get_resource(uint32_t binding,bool ssbo)158 nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
159 {
160 if (res_map.count(binding))
161 return res_map[binding];
162
163 nir_intrinsic_instr *res = nir_intrinsic_instr_create(
164 b->shader, nir_intrinsic_vulkan_resource_index);
165 nir_def_init(&res->instr, &res->def, 1, 32);
166 res->num_components = 1;
167 res->src[0] = nir_src_for_ssa(nir_imm_zero(b, 1, 32));
168 nir_intrinsic_set_desc_type(
169 res, ssbo ? 7/*VK_DESCRIPTOR_TYPE_STORAGE_BUFFER*/ : 6/*VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER*/);
170 nir_intrinsic_set_desc_set(res, 0);
171 nir_intrinsic_set_binding(res, binding);
172 nir_builder_instr_insert(b, &res->instr);
173 res_map[binding] = &res->def;
174 return &res->def;
175 }
176
177 nir_intrinsic_instr *
create_indirect_load(nir_variable_mode mode,uint32_t binding,nir_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)178 nir_load_store_vectorize_test::create_indirect_load(
179 nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
180 unsigned bit_size, unsigned components, unsigned access)
181 {
182 nir_intrinsic_op intrinsic;
183 nir_def *res = NULL;
184 switch (mode) {
185 case nir_var_mem_ubo:
186 intrinsic = nir_intrinsic_load_ubo;
187 res = get_resource(binding, false);
188 break;
189 case nir_var_mem_ssbo:
190 intrinsic = nir_intrinsic_load_ssbo;
191 res = get_resource(binding, true);
192 break;
193 case nir_var_mem_push_const:
194 intrinsic = nir_intrinsic_load_push_constant;
195 break;
196 default:
197 return NULL;
198 }
199 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, intrinsic);
200 nir_def_init(&load->instr, &load->def, components, bit_size);
201 load->num_components = components;
202 if (res) {
203 load->src[0] = nir_src_for_ssa(res);
204 load->src[1] = nir_src_for_ssa(offset);
205 } else {
206 load->src[0] = nir_src_for_ssa(offset);
207 }
208 int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
209
210 nir_intrinsic_set_align(load, byte_size, 0);
211 if (mode != nir_var_mem_push_const) {
212 nir_intrinsic_set_access(load, (gl_access_qualifier)access);
213 }
214
215 if (nir_intrinsic_has_range_base(load)) {
216 uint32_t range = byte_size * components;
217 int offset_src = res ? 1 : 0;
218
219 if (nir_src_is_const(load->src[offset_src])) {
220 nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
221 nir_intrinsic_set_range(load, range);
222 } else {
223 /* Unknown range */
224 nir_intrinsic_set_range_base(load, 0);
225 nir_intrinsic_set_range(load, ~0);
226 }
227 }
228
229 nir_builder_instr_insert(b, &load->instr);
230 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->def)->parent_instr);
231 movs[id] = mov;
232 loads[id] = &mov->src[0];
233
234 return load;
235 }
236
237 void
create_indirect_store(nir_variable_mode mode,uint32_t binding,nir_def * offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)238 nir_load_store_vectorize_test::create_indirect_store(
239 nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
240 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
241 {
242 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
243 for (unsigned i = 0; i < components; i++)
244 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
245 nir_def *value = nir_build_imm(b, components, bit_size, values);
246
247 nir_intrinsic_op intrinsic;
248 nir_def *res = NULL;
249 switch (mode) {
250 case nir_var_mem_ssbo:
251 intrinsic = nir_intrinsic_store_ssbo;
252 res = get_resource(binding, true);
253 break;
254 case nir_var_mem_shared:
255 intrinsic = nir_intrinsic_store_shared;
256 break;
257 default:
258 return;
259 }
260 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, intrinsic);
261 nir_def_init(&store->instr, &store->def, components, bit_size);
262 store->num_components = components;
263 if (res) {
264 store->src[0] = nir_src_for_ssa(value);
265 store->src[1] = nir_src_for_ssa(res);
266 store->src[2] = nir_src_for_ssa(offset);
267 } else {
268 store->src[0] = nir_src_for_ssa(value);
269 store->src[1] = nir_src_for_ssa(offset);
270 }
271 nir_intrinsic_set_align(store, (bit_size == 1 ? 32 : bit_size) / 8, 0);
272 nir_intrinsic_set_access(store, (gl_access_qualifier)access);
273 nir_intrinsic_set_write_mask(store, wrmask & ((1 << components) - 1));
274 nir_builder_instr_insert(b, &store->instr);
275 }
276
277 nir_intrinsic_instr *
create_load(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned access)278 nir_load_store_vectorize_test::create_load(
279 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
280 unsigned bit_size, unsigned components, unsigned access)
281 {
282 return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
283 }
284
285 void
create_store(nir_variable_mode mode,uint32_t binding,uint32_t offset,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask,unsigned access)286 nir_load_store_vectorize_test::create_store(
287 nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
288 unsigned bit_size, unsigned components, unsigned wrmask, unsigned access)
289 {
290 create_indirect_store(mode, binding, nir_imm_int(b, offset), id, bit_size, components, wrmask, access);
291 }
292
create_shared_load(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components)293 void nir_load_store_vectorize_test::create_shared_load(
294 nir_deref_instr *deref, uint32_t id, unsigned bit_size, unsigned components)
295 {
296 nir_def *load = nir_load_deref(b, deref);
297 nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, load)->parent_instr);
298 movs[id] = mov;
299 loads[id] = &mov->src[0];
300 }
301
create_shared_store(nir_deref_instr * deref,uint32_t id,unsigned bit_size,unsigned components,unsigned wrmask)302 void nir_load_store_vectorize_test::create_shared_store(
303 nir_deref_instr *deref, uint32_t id,
304 unsigned bit_size, unsigned components, unsigned wrmask)
305 {
306 nir_const_value values[NIR_MAX_VEC_COMPONENTS];
307 for (unsigned i = 0; i < components; i++)
308 values[i] = nir_const_value_for_raw_uint((id << 4) | i, bit_size);
309 nir_def *value = nir_build_imm(b, components, bit_size, values);
310
311 nir_store_deref(b, deref, value, wrmask & ((1 << components) - 1));
312 }
313
test_alu(nir_instr * instr,nir_op op)314 bool nir_load_store_vectorize_test::test_alu(nir_instr *instr, nir_op op)
315 {
316 return instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->op == op;
317 }
318
test_alu_def(nir_instr * instr,unsigned index,nir_def * def,unsigned swizzle)319 bool nir_load_store_vectorize_test::test_alu_def(
320 nir_instr *instr, unsigned index, nir_def *def, unsigned swizzle)
321 {
322 if (instr->type != nir_instr_type_alu)
323 return false;
324
325 nir_alu_instr *alu = nir_instr_as_alu(instr);
326
327 if (index >= nir_op_infos[alu->op].num_inputs)
328 return false;
329 if (alu->src[index].src.ssa != def)
330 return false;
331 if (alu->src[index].swizzle[0] != swizzle)
332 return false;
333
334 return true;
335 }
336
mem_vectorize_callback(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)337 bool nir_load_store_vectorize_test::mem_vectorize_callback(
338 unsigned align_mul, unsigned align_offset, unsigned bit_size,
339 unsigned num_components,
340 nir_intrinsic_instr *low, nir_intrinsic_instr *high,
341 void *data)
342 {
343 /* Calculate a simple alignment, like how nir_intrinsic_align() does. */
344 uint32_t align = align_mul;
345 if (align_offset)
346 align = 1 << (ffs(align_offset) - 1);
347
348 /* Require scalar alignment and less than 5 components. */
349 return align % (bit_size / 8) == 0 &&
350 num_components <= 4;
351 }
352
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)353 void nir_load_store_vectorize_test::shared_type_info(
354 const struct glsl_type *type, unsigned *size, unsigned *align)
355 {
356 assert(glsl_type_is_vector_or_scalar(type));
357
358 uint32_t comp_size = glsl_type_is_boolean(type)
359 ? 4 : glsl_get_bit_size(type) / 8;
360 unsigned length = glsl_get_vector_elements(type);
361 *size = comp_size * length,
362 *align = comp_size;
363 }
364 } // namespace
365
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent)366 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
367 {
368 create_load(nir_var_mem_ubo, 0, 0, 0x1);
369 create_load(nir_var_mem_ubo, 0, 4, 0x2);
370
371 nir_validate_shader(b->shader, NULL);
372 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
373
374 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
375
376 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
377
378 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
379 ASSERT_EQ(load->def.bit_size, 32);
380 ASSERT_EQ(load->def.num_components, 2);
381 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
382 ASSERT_EQ(nir_intrinsic_range(load), 8);
383 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
384 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
385 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
386 }
387
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting)388 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
389 {
390 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
391 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 2);
392
393 nir_validate_shader(b->shader, NULL);
394 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
395
396 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
397
398 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
399
400 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
401 ASSERT_EQ(load->def.bit_size, 32);
402 ASSERT_EQ(load->def.num_components, 3);
403 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
404 ASSERT_EQ(nir_intrinsic_range(load), 12);
405 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
406 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
407 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
408 }
409
410 /* Test for a bug in range handling */
TEST_F(nir_load_store_vectorize_test,ubo_load_intersecting_range)411 TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
412 {
413 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
414 create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
415
416 nir_validate_shader(b->shader, NULL);
417 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
418
419 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
420
421 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
422
423 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
424 ASSERT_EQ(load->def.bit_size, 32);
425 ASSERT_EQ(load->def.num_components, 4);
426 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
427 ASSERT_EQ(nir_intrinsic_range(load), 16);
428 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
429 ASSERT_EQ(loads[0x1]->src.ssa, &load->def);
430 ASSERT_EQ(loads[0x2]->src.ssa, &load->def);
431 ASSERT_EQ(loads[0x1]->swizzle[0], 0);
432 ASSERT_EQ(loads[0x1]->swizzle[1], 1);
433 ASSERT_EQ(loads[0x1]->swizzle[2], 2);
434 ASSERT_EQ(loads[0x1]->swizzle[3], 3);
435 ASSERT_EQ(loads[0x2]->swizzle[0], 1);
436 }
437
TEST_F(nir_load_store_vectorize_test,ubo_load_identical)438 TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
439 {
440 create_load(nir_var_mem_ubo, 0, 0, 0x1);
441 create_load(nir_var_mem_ubo, 0, 0, 0x2);
442
443 nir_validate_shader(b->shader, NULL);
444 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
445
446 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
447
448 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
449
450 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
451 ASSERT_EQ(load->def.bit_size, 32);
452 ASSERT_EQ(load->def.num_components, 1);
453 ASSERT_EQ(nir_intrinsic_range_base(load), 0);
454 ASSERT_EQ(nir_intrinsic_range(load), 4);
455 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
456 ASSERT_EQ(loads[0x1]->src.ssa, &load->def);
457 ASSERT_EQ(loads[0x2]->src.ssa, &load->def);
458 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
459 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
460 }
461
TEST_F(nir_load_store_vectorize_test,ubo_load_large)462 TEST_F(nir_load_store_vectorize_test, ubo_load_large)
463 {
464 create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 2);
465 create_load(nir_var_mem_ubo, 0, 8, 0x2, 32, 3);
466
467 nir_validate_shader(b->shader, NULL);
468 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
469
470 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
471
472 nir_validate_shader(b->shader, NULL);
473 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
474 }
475
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent)476 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent)
477 {
478 create_load(nir_var_mem_push_const, 0, 0, 0x1);
479 create_load(nir_var_mem_push_const, 0, 4, 0x2);
480
481 nir_validate_shader(b->shader, NULL);
482 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
483
484 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
485
486 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
487
488 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
489 ASSERT_EQ(load->def.bit_size, 32);
490 ASSERT_EQ(load->def.num_components, 2);
491 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
492 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
493 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
494 }
495
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_base)496 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_base)
497 {
498 create_load(nir_var_mem_push_const, 0, 0, 0x1);
499 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 0, 0x2), 4);
500
501 nir_validate_shader(b->shader, NULL);
502 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
503
504 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
505
506 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
507
508 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
509 ASSERT_EQ(load->def.bit_size, 32);
510 ASSERT_EQ(load->def.num_components, 2);
511 ASSERT_EQ(nir_src_as_uint(load->src[0]), 0);
512 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
513 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
514 }
515
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent)516 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent)
517 {
518 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
519 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
520
521 nir_validate_shader(b->shader, NULL);
522 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
523
524 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
525
526 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
527
528 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
529 ASSERT_EQ(load->def.bit_size, 32);
530 ASSERT_EQ(load->def.num_components, 2);
531 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
532 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
533 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
534 }
535
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect)536 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect)
537 {
538 nir_def *index_base = nir_load_local_invocation_index(b);
539 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x1);
540 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, index_base, 4), 0x2);
541
542 nir_validate_shader(b->shader, NULL);
543 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
544
545 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
546
547 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
548
549 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
550 ASSERT_EQ(load->def.bit_size, 32);
551 ASSERT_EQ(load->def.num_components, 2);
552 ASSERT_EQ(load->src[1].ssa, index_base);
553 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
554 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
555 }
556
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_sub)557 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_sub)
558 {
559 nir_def *index_base = nir_load_local_invocation_index(b);
560 nir_def *index_base_prev = nir_iadd_imm(b, index_base, 0xfffffffc);
561 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
562 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
563
564 nir_validate_shader(b->shader, NULL);
565 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
566
567 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
568
569 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
570
571 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
572 ASSERT_EQ(load->def.bit_size, 32);
573 ASSERT_EQ(load->def.num_components, 2);
574 ASSERT_EQ(load->src[1].ssa, index_base_prev);
575 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
576 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
577 }
578
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_indirect_neg_stride)579 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_indirect_neg_stride)
580 {
581 nir_def *inv = nir_load_local_invocation_index(b);
582 nir_def *inv_plus_one = nir_iadd_imm(b, inv, 1);
583 nir_def *index_base = nir_imul_imm(b, inv, 0xfffffffc);
584 nir_def *index_base_prev = nir_imul_imm(b, inv_plus_one, 0xfffffffc);
585 create_indirect_load(nir_var_mem_ssbo, 0, index_base_prev, 0x1);
586 create_indirect_load(nir_var_mem_ssbo, 0, index_base, 0x2);
587
588 nir_validate_shader(b->shader, NULL);
589 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
590
591 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
592
593 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
594
595 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
596 ASSERT_EQ(load->def.bit_size, 32);
597 ASSERT_EQ(load->def.num_components, 2);
598 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
599 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
600
601 /* nir_opt_algebraic optimizes the imul */
602 ASSERT_TRUE(test_alu(load->src[1].ssa->parent_instr, nir_op_ineg));
603 nir_def *offset = nir_instr_as_alu(load->src[1].ssa->parent_instr)->src[0].src.ssa;
604 ASSERT_TRUE(test_alu(offset->parent_instr, nir_op_ishl));
605 nir_alu_instr *shl = nir_instr_as_alu(offset->parent_instr);
606 ASSERT_EQ(shl->src[0].src.ssa, inv_plus_one);
607 ASSERT_EQ(nir_src_as_uint(shl->src[1].src), 2);
608 }
609
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_adjacent)610 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_adjacent)
611 {
612 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
613 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
614 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
615
616 nir_validate_shader(b->shader, NULL);
617 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
618
619 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
620
621 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
622
623 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
624 ASSERT_EQ(load->def.bit_size, 32);
625 ASSERT_EQ(load->def.num_components, 1);
626 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
627 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
628 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
629 }
630
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_intersecting)631 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_intersecting)
632 {
633 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
634 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
635 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 2);
636
637 nir_validate_shader(b->shader, NULL);
638 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
639
640 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
641
642 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
643 }
644
TEST_F(nir_load_store_vectorize_test,ssbo_load_identical_store_identical)645 TEST_F(nir_load_store_vectorize_test, ssbo_load_identical_store_identical)
646 {
647 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
648 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
649 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
650
651 nir_validate_shader(b->shader, NULL);
652 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
653
654 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
655
656 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
657 }
658
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_load_identical)659 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_load_identical)
660 {
661 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
662 create_load(nir_var_mem_ssbo, 0, 0, 0x2);
663 create_store(nir_var_mem_ssbo, 0, 0, 0x3);
664
665 nir_validate_shader(b->shader, NULL);
666 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
667
668 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
669
670 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
671 }
672
673 /* if nir_opt_load_store_vectorize were implemented like many load/store
674 * optimization passes are (for example, nir_opt_combine_stores and
675 * nir_opt_copy_prop_vars) and stopped tracking a load when an aliasing store is
676 * encountered, this case wouldn't be optimized.
677 * A similar test for derefs is shared_load_adjacent_store_identical. */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_store_identical)678 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_store_identical)
679 {
680 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
681 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
682 create_load(nir_var_mem_ssbo, 0, 4, 0x3);
683
684 nir_validate_shader(b->shader, NULL);
685 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
686 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
687
688 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
689
690 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
691 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
692
693 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
694 ASSERT_EQ(load->def.bit_size, 32);
695 ASSERT_EQ(load->def.num_components, 2);
696 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
697 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
698 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
699 }
700
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent)701 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent)
702 {
703 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
704 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
705
706 nir_validate_shader(b->shader, NULL);
707 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
708
709 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
710
711 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
712
713 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
714 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
715 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
716 nir_def *val = store->src[0].ssa;
717 ASSERT_EQ(val->bit_size, 32);
718 ASSERT_EQ(val->num_components, 2);
719 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
720 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
721 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
722 }
723
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting)724 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting)
725 {
726 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
727 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 32, 2);
728
729 nir_validate_shader(b->shader, NULL);
730 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
731
732 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
733
734 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
735
736 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
737 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
738 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
739 nir_def *val = store->src[0].ssa;
740 ASSERT_EQ(val->bit_size, 32);
741 ASSERT_EQ(val->num_components, 3);
742 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
743 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
744 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
745 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x21);
746 }
747
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical)748 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical)
749 {
750 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
751 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
752
753 nir_validate_shader(b->shader, NULL);
754 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
755
756 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
757
758 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
759
760 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
761 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
762 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x1);
763 nir_def *val = store->src[0].ssa;
764 ASSERT_EQ(val->bit_size, 32);
765 ASSERT_EQ(val->num_components, 1);
766 ASSERT_EQ(nir_src_as_uint(store->src[0]), 0x20);
767 }
768
TEST_F(nir_load_store_vectorize_test,ssbo_store_large)769 TEST_F(nir_load_store_vectorize_test, ssbo_store_large)
770 {
771 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
772 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 32, 3);
773
774 nir_validate_shader(b->shader, NULL);
775 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
776
777 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
778
779 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
780 }
781
TEST_F(nir_load_store_vectorize_test,ubo_load_adjacent_memory_barrier)782 TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent_memory_barrier)
783 {
784 create_load(nir_var_mem_ubo, 0, 0, 0x1);
785
786 nir_scoped_memory_barrier(b, SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
787 nir_var_mem_ssbo);
788
789 create_load(nir_var_mem_ubo, 0, 4, 0x2);
790
791 nir_validate_shader(b->shader, NULL);
792 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
793
794 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
795
796 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
797 }
798
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier)799 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier)
800 {
801 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
802
803 nir_scoped_memory_barrier(b, SCOPE_DEVICE, NIR_MEMORY_ACQ_REL,
804 nir_var_mem_ssbo);
805
806 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
807
808 nir_validate_shader(b->shader, NULL);
809 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
810
811 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
812
813 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
814 }
815
816 /* A control barrier may only sync invocations in a workgroup, it doesn't
817 * require that loads/stores complete.
818 */
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_barrier)819 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_barrier)
820 {
821 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
822 nir_barrier(b, SCOPE_WORKGROUP, SCOPE_NONE,
823 (nir_memory_semantics)0, (nir_variable_mode)0);
824 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
825
826 nir_validate_shader(b->shader, NULL);
827 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
828
829 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
830
831 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
832 }
833
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_memory_barrier_shared)834 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_memory_barrier_shared)
835 {
836 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
837
838 nir_scoped_memory_barrier(b, SCOPE_WORKGROUP, NIR_MEMORY_ACQ_REL,
839 nir_var_mem_shared);
840
841 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
842
843 nir_validate_shader(b->shader, NULL);
844 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
845
846 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
847
848 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
849 }
850
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_discard)851 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_discard)
852 {
853 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
854 nir_discard(b);
855 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
856
857 nir_validate_shader(b->shader, NULL);
858 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
859
860 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
861
862 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
863 }
864
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_demote)865 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_demote)
866 {
867 create_store(nir_var_mem_ssbo, 0, 0, 0x1);
868 nir_demote(b);
869 create_store(nir_var_mem_ssbo, 0, 4, 0x2);
870
871 nir_validate_shader(b->shader, NULL);
872 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
873
874 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
875
876 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
877 }
878
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_discard)879 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_discard)
880 {
881 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
882 nir_discard(b);
883 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
884
885 nir_validate_shader(b->shader, NULL);
886 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
887
888 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
889
890 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
891 }
892
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_demote)893 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_demote)
894 {
895 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
896 nir_demote(b);
897 create_load(nir_var_mem_ssbo, 0, 4, 0x2);
898
899 nir_validate_shader(b->shader, NULL);
900 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
901
902 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
903
904 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
905 }
906
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_8_8_16)907 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_8_8_16)
908 {
909 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 8);
910 create_load(nir_var_mem_ssbo, 0, 1, 0x2, 8);
911 create_load(nir_var_mem_ssbo, 0, 2, 0x3, 16);
912
913 nir_validate_shader(b->shader, NULL);
914 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
915
916 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
917
918 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
919
920 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
921 ASSERT_EQ(load->def.bit_size, 8);
922 ASSERT_EQ(load->def.num_components, 4);
923 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
924 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
925 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
926
927 nir_def *val = loads[0x3]->src.ssa;
928 ASSERT_EQ(val->bit_size, 16);
929 ASSERT_EQ(val->num_components, 1);
930 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_ior));
931 nir_def *low = nir_instr_as_alu(val->parent_instr)->src[0].src.ssa;
932 nir_def *high = nir_instr_as_alu(val->parent_instr)->src[1].src.ssa;
933 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_ishl));
934 high = nir_instr_as_alu(high->parent_instr)->src[0].src.ssa;
935 ASSERT_TRUE(test_alu(low->parent_instr, nir_op_u2u16));
936 ASSERT_TRUE(test_alu(high->parent_instr, nir_op_u2u16));
937 ASSERT_TRUE(test_alu_def(low->parent_instr, 0, &load->def, 2));
938 ASSERT_TRUE(test_alu_def(high->parent_instr, 0, &load->def, 3));
939 }
940
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64)941 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64)
942 {
943 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
944 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
945
946 nir_validate_shader(b->shader, NULL);
947 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
948
949 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
950
951 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
952
953 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
954 ASSERT_EQ(load->def.bit_size, 32);
955 ASSERT_EQ(load->def.num_components, 4);
956 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
957 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
958
959 nir_def *val = loads[0x2]->src.ssa;
960 ASSERT_EQ(val->bit_size, 64);
961 ASSERT_EQ(val->num_components, 1);
962 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
963 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
964 EXPECT_INSTR_SWIZZLES(pack, load, "zw");
965 }
966
TEST_F(nir_load_store_vectorize_test,ssbo_load_adjacent_32_32_64_64)967 TEST_F(nir_load_store_vectorize_test, ssbo_load_adjacent_32_32_64_64)
968 {
969 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
970 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
971 create_load(nir_var_mem_ssbo, 0, 16, 0x3, 64);
972
973 nir_validate_shader(b->shader, NULL);
974 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
975
976 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, true));
977
978 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
979
980 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
981 ASSERT_EQ(load->def.bit_size, 64);
982 ASSERT_EQ(load->def.num_components, 3);
983 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
984 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "z");
985
986 nir_def *val = loads[0x2]->src.ssa;
987 ASSERT_EQ(val->bit_size, 64);
988 ASSERT_EQ(val->num_components, 1);
989 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_mov));
990 nir_alu_instr *mov = nir_instr_as_alu(val->parent_instr);
991 EXPECT_INSTR_SWIZZLES(mov, load, "y");
992
993 val = loads[0x1]->src.ssa;
994 ASSERT_EQ(val->bit_size, 32);
995 ASSERT_EQ(val->num_components, 2);
996 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_unpack_64_2x32));
997 nir_alu_instr *unpack = nir_instr_as_alu(val->parent_instr);
998 EXPECT_INSTR_SWIZZLES(unpack, load, "x");
999 }
1000
TEST_F(nir_load_store_vectorize_test,ssbo_load_intersecting_32_32_64)1001 TEST_F(nir_load_store_vectorize_test, ssbo_load_intersecting_32_32_64)
1002 {
1003 create_load(nir_var_mem_ssbo, 0, 4, 0x1, 32, 2);
1004 create_load(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1005
1006 nir_validate_shader(b->shader, NULL);
1007 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1008
1009 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1010
1011 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1012
1013 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1014 ASSERT_EQ(load->def.bit_size, 32);
1015 ASSERT_EQ(load->def.num_components, 3);
1016 ASSERT_EQ(nir_src_as_uint(load->src[1]), 4);
1017 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
1018
1019 nir_def *val = loads[0x2]->src.ssa;
1020 ASSERT_EQ(val->bit_size, 64);
1021 ASSERT_EQ(val->num_components, 1);
1022 ASSERT_TRUE(test_alu(val->parent_instr, nir_op_pack_64_2x32));
1023 nir_alu_instr *pack = nir_instr_as_alu(val->parent_instr);
1024 EXPECT_INSTR_SWIZZLES(pack, load, "yz");
1025 }
1026
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_8_8_16)1027 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_8_8_16)
1028 {
1029 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 8);
1030 create_store(nir_var_mem_ssbo, 0, 1, 0x2, 8);
1031 create_store(nir_var_mem_ssbo, 0, 2, 0x3, 16);
1032
1033 nir_validate_shader(b->shader, NULL);
1034 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1035
1036 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1037
1038 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1039
1040 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1041 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1042 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1043 nir_def *val = store->src[0].ssa;
1044 ASSERT_EQ(val->bit_size, 8);
1045 ASSERT_EQ(val->num_components, 4);
1046 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1047 ASSERT_EQ(nir_const_value_as_uint(cv[0], 8), 0x10);
1048 ASSERT_EQ(nir_const_value_as_uint(cv[1], 8), 0x20);
1049 ASSERT_EQ(nir_const_value_as_uint(cv[2], 8), 0x30);
1050 ASSERT_EQ(nir_const_value_as_uint(cv[3], 8), 0x0);
1051 }
1052
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64)1053 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64)
1054 {
1055 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1056 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1057
1058 nir_validate_shader(b->shader, NULL);
1059 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1060
1061 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1062
1063 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1064
1065 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1066 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1067 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1068 nir_def *val = store->src[0].ssa;
1069 ASSERT_EQ(val->bit_size, 32);
1070 ASSERT_EQ(val->num_components, 4);
1071 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1072 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1073 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x11);
1074 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x20);
1075 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x0);
1076 }
1077
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_32_64_64)1078 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_32_64_64)
1079 {
1080 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1081 create_store(nir_var_mem_ssbo, 0, 8, 0x2, 64);
1082 create_store(nir_var_mem_ssbo, 0, 16, 0x3, 64);
1083
1084 nir_validate_shader(b->shader, NULL);
1085 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 3);
1086
1087 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1088
1089 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1090
1091 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1092 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1093 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1094 nir_def *val = store->src[0].ssa;
1095 ASSERT_EQ(val->bit_size, 64);
1096 ASSERT_EQ(val->num_components, 3);
1097 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1098 ASSERT_EQ(nir_const_value_as_uint(cv[0], 64), 0x1100000010ull);
1099 ASSERT_EQ(nir_const_value_as_uint(cv[1], 64), 0x20);
1100 ASSERT_EQ(nir_const_value_as_uint(cv[2], 64), 0x30);
1101 }
1102
TEST_F(nir_load_store_vectorize_test,ssbo_store_intersecting_32_32_64)1103 TEST_F(nir_load_store_vectorize_test, ssbo_store_intersecting_32_32_64)
1104 {
1105 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 2);
1106 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64);
1107
1108 nir_validate_shader(b->shader, NULL);
1109 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1110
1111 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1112
1113 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1114
1115 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1116 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1117 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x7);
1118 nir_def *val = store->src[0].ssa;
1119 ASSERT_EQ(val->bit_size, 32);
1120 ASSERT_EQ(val->num_components, 3);
1121 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1122 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1123 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1124 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x0);
1125 }
1126
TEST_F(nir_load_store_vectorize_test,ssbo_store_adjacent_32_64)1127 TEST_F(nir_load_store_vectorize_test, ssbo_store_adjacent_32_64)
1128 {
1129 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32);
1130 create_store(nir_var_mem_ssbo, 0, 4, 0x2, 64, 2);
1131
1132 nir_validate_shader(b->shader, NULL);
1133 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1134
1135 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1136
1137 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1138 }
1139
TEST_F(nir_load_store_vectorize_test,ssbo_store_identical_wrmask)1140 TEST_F(nir_load_store_vectorize_test, ssbo_store_identical_wrmask)
1141 {
1142 create_store(nir_var_mem_ssbo, 0, 0, 0x1, 32, 4, 1 | 4);
1143 create_store(nir_var_mem_ssbo, 0, 0, 0x2, 32, 4, 2 | 4 | 8);
1144
1145 nir_validate_shader(b->shader, NULL);
1146 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 2);
1147
1148 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1149
1150 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_ssbo), 1);
1151
1152 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_ssbo, 0);
1153 ASSERT_EQ(nir_src_as_uint(store->src[2]), 0);
1154 ASSERT_EQ(nir_intrinsic_write_mask(store), 0xf);
1155 nir_def *val = store->src[0].ssa;
1156 ASSERT_EQ(val->bit_size, 32);
1157 ASSERT_EQ(val->num_components, 4);
1158 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1159 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1160 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x21);
1161 ASSERT_EQ(nir_const_value_as_uint(cv[2], 32), 0x22);
1162 ASSERT_EQ(nir_const_value_as_uint(cv[3], 32), 0x23);
1163 }
1164
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent)1165 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent)
1166 {
1167 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1168 nir_deref_instr *deref = nir_build_deref_var(b, var);
1169
1170 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1171 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2);
1172
1173 nir_validate_shader(b->shader, NULL);
1174 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1175
1176 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1177
1178 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1179
1180 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1181 ASSERT_EQ(load->def.bit_size, 32);
1182 ASSERT_EQ(load->def.num_components, 2);
1183
1184 deref = nir_src_as_deref(load->src[0]);
1185 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1186
1187 deref = nir_deref_instr_parent(deref);
1188 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1189 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1190
1191 deref = nir_deref_instr_parent(deref);
1192 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1193 ASSERT_EQ(deref->var, var);
1194
1195 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1196 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1197 }
1198
TEST_F(nir_load_store_vectorize_test,shared_load_distant_64bit)1199 TEST_F(nir_load_store_vectorize_test, shared_load_distant_64bit)
1200 {
1201 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1202 nir_deref_instr *deref = nir_build_deref_var(b, var);
1203 nir_def_init(&deref->instr, &deref->def, 1, 64);
1204
1205 create_shared_load(nir_build_deref_array_imm(b, deref, 0x100000000), 0x1);
1206 create_shared_load(nir_build_deref_array_imm(b, deref, 0x200000001), 0x2);
1207
1208 nir_validate_shader(b->shader, NULL);
1209 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1210
1211 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1212
1213 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1214 }
1215
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect)1216 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect)
1217 {
1218 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1219 nir_deref_instr *deref = nir_build_deref_var(b, var);
1220 nir_def *index_base = nir_load_local_invocation_index(b);
1221
1222 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x1);
1223 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index_base, 1)), 0x2);
1224
1225 nir_validate_shader(b->shader, NULL);
1226 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1227
1228 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1229
1230 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1231
1232 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1233 ASSERT_EQ(load->def.bit_size, 32);
1234 ASSERT_EQ(load->def.num_components, 2);
1235
1236 deref = nir_src_as_deref(load->src[0]);
1237 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1238
1239 deref = nir_deref_instr_parent(deref);
1240 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1241 ASSERT_EQ(deref->arr.index.ssa, index_base);
1242
1243 deref = nir_deref_instr_parent(deref);
1244 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1245 ASSERT_EQ(deref->var, var);
1246
1247 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1248 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1249 }
1250
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_indirect_sub)1251 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_indirect_sub)
1252 {
1253 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1254 nir_deref_instr *deref = nir_build_deref_var(b, var);
1255 nir_def *index_base = nir_load_local_invocation_index(b);
1256 nir_def *index_base_prev = nir_iadd_imm(b, index_base, 0xffffffff);
1257
1258 create_shared_load(nir_build_deref_array(b, deref, index_base_prev), 0x1);
1259 create_shared_load(nir_build_deref_array(b, deref, index_base), 0x2);
1260
1261 nir_validate_shader(b->shader, NULL);
1262 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1263
1264 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1265
1266 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1267
1268 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1269 ASSERT_EQ(load->def.bit_size, 32);
1270 ASSERT_EQ(load->def.num_components, 2);
1271
1272 deref = nir_src_as_deref(load->src[0]);
1273 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1274
1275 deref = nir_deref_instr_parent(deref);
1276 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1277 ASSERT_EQ(deref->arr.index.ssa, index_base_prev);
1278
1279 deref = nir_deref_instr_parent(deref);
1280 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1281 ASSERT_EQ(deref->var, var);
1282
1283 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1284 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1285 }
1286
TEST_F(nir_load_store_vectorize_test,shared_load_struct)1287 TEST_F(nir_load_store_vectorize_test, shared_load_struct)
1288 {
1289 glsl_struct_field fields[2] = {glsl_struct_field(glsl_uint_type(), "field0"),
1290 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1291
1292 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1293 nir_deref_instr *deref = nir_build_deref_var(b, var);
1294
1295 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1);
1296 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1297
1298 nir_validate_shader(b->shader, NULL);
1299 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1300
1301 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1302
1303 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1304
1305 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1306 ASSERT_EQ(load->def.bit_size, 32);
1307 ASSERT_EQ(load->def.num_components, 2);
1308
1309 deref = nir_src_as_deref(load->src[0]);
1310 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1311
1312 deref = nir_deref_instr_parent(deref);
1313 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1314 ASSERT_EQ(deref->strct.index, 0);
1315
1316 deref = nir_deref_instr_parent(deref);
1317 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1318 ASSERT_EQ(deref->var, var);
1319
1320 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1321 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1322 }
1323
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_adjacent)1324 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_adjacent)
1325 {
1326 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1327 nir_deref_instr *deref = nir_build_deref_var(b, var);
1328
1329 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1330 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1331 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1332
1333 nir_validate_shader(b->shader, NULL);
1334 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1335 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1336
1337 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1338
1339 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1340 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1341
1342 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1343 ASSERT_EQ(load->def.bit_size, 32);
1344 ASSERT_EQ(load->def.num_components, 1);
1345
1346 deref = nir_src_as_deref(load->src[0]);
1347 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1348 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1349
1350 deref = nir_deref_instr_parent(deref);
1351 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1352 ASSERT_EQ(deref->var, var);
1353
1354 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1355 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1356 }
1357
TEST_F(nir_load_store_vectorize_test,shared_load_identical_store_identical)1358 TEST_F(nir_load_store_vectorize_test, shared_load_identical_store_identical)
1359 {
1360 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1361 nir_deref_instr *deref = nir_build_deref_var(b, var);
1362
1363 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1364 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1365 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x3);
1366
1367 nir_validate_shader(b->shader, NULL);
1368 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1369
1370 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared));
1371
1372 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1373 }
1374
TEST_F(nir_load_store_vectorize_test,shared_load_adjacent_store_identical)1375 TEST_F(nir_load_store_vectorize_test, shared_load_adjacent_store_identical)
1376 {
1377 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1378 nir_deref_instr *deref = nir_build_deref_var(b, var);
1379
1380 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1);
1381 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x2);
1382 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x3);
1383
1384 nir_validate_shader(b->shader, NULL);
1385 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1386 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1387
1388 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1389
1390 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1391 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1392
1393 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1394 ASSERT_EQ(load->def.bit_size, 32);
1395 ASSERT_EQ(load->def.num_components, 2);
1396
1397 deref = nir_src_as_deref(load->src[0]);
1398 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1399
1400 deref = nir_deref_instr_parent(deref);
1401 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1402 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1403
1404 deref = nir_deref_instr_parent(deref);
1405 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1406 ASSERT_EQ(deref->var, var);
1407
1408 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1409 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1410 }
1411
TEST_F(nir_load_store_vectorize_test,shared_load_bool)1412 TEST_F(nir_load_store_vectorize_test, shared_load_bool)
1413 {
1414 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_bool_type(), 4, 0), "var");
1415 nir_deref_instr *deref = nir_build_deref_var(b, var);
1416
1417 create_shared_load(nir_build_deref_array_imm(b, deref, 0), 0x1, 1);
1418 create_shared_load(nir_build_deref_array_imm(b, deref, 1), 0x2, 1);
1419
1420 nir_validate_shader(b->shader, NULL);
1421 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1422
1423 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1424
1425 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1426
1427 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1428 ASSERT_EQ(load->def.bit_size, 32);
1429 ASSERT_EQ(load->def.num_components, 2);
1430
1431 deref = nir_src_as_deref(load->src[0]);
1432 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1433
1434 deref = nir_deref_instr_parent(deref);
1435 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1436 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1437
1438 deref = nir_deref_instr_parent(deref);
1439 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1440 ASSERT_EQ(deref->var, var);
1441
1442 /* The loaded value is converted to Boolean by (loaded != 0). */
1443 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_ine));
1444 ASSERT_TRUE(test_alu(loads[0x2]->src.ssa->parent_instr, nir_op_ine));
1445 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->def, 0));
1446 ASSERT_TRUE(test_alu_def(loads[0x2]->src.ssa->parent_instr, 0, &load->def, 1));
1447 }
1448
TEST_F(nir_load_store_vectorize_test,shared_load_bool_mixed)1449 TEST_F(nir_load_store_vectorize_test, shared_load_bool_mixed)
1450 {
1451 glsl_struct_field fields[2] = {glsl_struct_field(glsl_bool_type(), "field0"),
1452 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1453
1454 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1455 nir_deref_instr *deref = nir_build_deref_var(b, var);
1456
1457 create_shared_load(nir_build_deref_struct(b, deref, 0), 0x1, 1);
1458 create_shared_load(nir_build_deref_array_imm(b, nir_build_deref_struct(b, deref, 1), 0), 0x2);
1459
1460 nir_validate_shader(b->shader, NULL);
1461 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1462
1463 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1464
1465 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1466
1467 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1468 ASSERT_EQ(load->def.bit_size, 32);
1469 ASSERT_EQ(load->def.num_components, 2);
1470
1471 deref = nir_src_as_deref(load->src[0]);
1472 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1473
1474 deref = nir_deref_instr_parent(deref);
1475 ASSERT_EQ(deref->deref_type, nir_deref_type_struct);
1476 ASSERT_EQ(deref->strct.index, 0);
1477
1478 deref = nir_deref_instr_parent(deref);
1479 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1480 ASSERT_EQ(deref->var, var);
1481
1482 /* The loaded value is converted to Boolean by (loaded != 0). */
1483 ASSERT_TRUE(test_alu(loads[0x1]->src.ssa->parent_instr, nir_op_ine));
1484 ASSERT_TRUE(test_alu_def(loads[0x1]->src.ssa->parent_instr, 0, &load->def, 0));
1485
1486 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1487 }
1488
TEST_F(nir_load_store_vectorize_test,shared_store_adjacent)1489 TEST_F(nir_load_store_vectorize_test, shared_store_adjacent)
1490 {
1491 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_array_type(glsl_uint_type(), 4, 0), "var");
1492 nir_deref_instr *deref = nir_build_deref_var(b, var);
1493
1494 create_shared_store(nir_build_deref_array_imm(b, deref, 0), 0x1);
1495 create_shared_store(nir_build_deref_array_imm(b, deref, 1), 0x2);
1496
1497 nir_validate_shader(b->shader, NULL);
1498 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 2);
1499
1500 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1501
1502 ASSERT_EQ(count_intrinsics(nir_intrinsic_store_deref), 1);
1503
1504 nir_intrinsic_instr *store = get_intrinsic(nir_intrinsic_store_deref, 0);
1505 ASSERT_EQ(nir_intrinsic_write_mask(store), 0x3);
1506 nir_def *val = store->src[1].ssa;
1507 ASSERT_EQ(val->bit_size, 32);
1508 ASSERT_EQ(val->num_components, 2);
1509 nir_const_value *cv = nir_instr_as_load_const(val->parent_instr)->value;
1510 ASSERT_EQ(nir_const_value_as_uint(cv[0], 32), 0x10);
1511 ASSERT_EQ(nir_const_value_as_uint(cv[1], 32), 0x20);
1512
1513 deref = nir_src_as_deref(store->src[0]);
1514 ASSERT_EQ(deref->deref_type, nir_deref_type_cast);
1515
1516 deref = nir_deref_instr_parent(deref);
1517 ASSERT_EQ(deref->deref_type, nir_deref_type_array);
1518 ASSERT_EQ(nir_src_as_uint(deref->arr.index), 0);
1519
1520 deref = nir_deref_instr_parent(deref);
1521 ASSERT_EQ(deref->deref_type, nir_deref_type_var);
1522 ASSERT_EQ(deref->var, var);
1523 }
1524
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_base)1525 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_base)
1526 {
1527 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1528 nir_intrinsic_set_base(create_load(nir_var_mem_push_const, 0, 4, 0x2), 4);
1529
1530 nir_validate_shader(b->shader, NULL);
1531 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1532
1533 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1534
1535 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1536 }
1537
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_direct)1538 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_direct)
1539 {
1540 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1541 create_load(nir_var_mem_push_const, 0, 8, 0x2);
1542
1543 nir_validate_shader(b->shader, NULL);
1544 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1545
1546 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1547
1548 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1549 }
1550
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_direct_indirect)1551 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_direct_indirect)
1552 {
1553 nir_def *index_base = nir_load_local_invocation_index(b);
1554 create_load(nir_var_mem_push_const, 0, 0, 0x1);
1555 create_indirect_load(nir_var_mem_push_const, 0, index_base, 0x2);
1556
1557 nir_validate_shader(b->shader, NULL);
1558 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1559
1560 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1561
1562 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1563 }
1564
TEST_F(nir_load_store_vectorize_test,push_const_load_separate_indirect_indirect)1565 TEST_F(nir_load_store_vectorize_test, push_const_load_separate_indirect_indirect)
1566 {
1567 nir_def *index_base = nir_load_local_invocation_index(b);
1568 create_indirect_load(nir_var_mem_push_const, 0,
1569 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 2), 16), 32), 0x1);
1570 create_indirect_load(nir_var_mem_push_const, 0,
1571 nir_iadd_imm(b, nir_imul_imm(b, nir_iadd_imm(b, index_base, 3), 16), 32), 0x2);
1572
1573 nir_validate_shader(b->shader, NULL);
1574 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1575
1576 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1577
1578 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1579 }
1580
TEST_F(nir_load_store_vectorize_test,push_const_load_adjacent_complex_indirect)1581 TEST_F(nir_load_store_vectorize_test, push_const_load_adjacent_complex_indirect)
1582 {
1583 nir_def *index_base = nir_load_local_invocation_index(b);
1584 //vec4 pc[]; pc[gl_LocalInvocationIndex].w; pc[gl_LocalInvocationIndex+1].x;
1585 nir_def *low = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 12);
1586 nir_def *high = nir_imul_imm(b, nir_iadd_imm(b, index_base, 1), 16);
1587 create_indirect_load(nir_var_mem_push_const, 0, low, 0x1);
1588 create_indirect_load(nir_var_mem_push_const, 0, high, 0x2);
1589
1590 nir_validate_shader(b->shader, NULL);
1591 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 2);
1592
1593 EXPECT_TRUE(run_vectorizer(nir_var_mem_push_const));
1594
1595 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_push_constant), 1);
1596
1597 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_push_constant, 0);
1598 ASSERT_EQ(load->def.bit_size, 32);
1599 ASSERT_EQ(load->def.num_components, 2);
1600 ASSERT_EQ(load->src[0].ssa, low);
1601 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1602 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
1603 }
1604
TEST_F(nir_load_store_vectorize_test,ssbo_alias0)1605 TEST_F(nir_load_store_vectorize_test, ssbo_alias0)
1606 {
1607 nir_def *index_base = nir_load_local_invocation_index(b);
1608 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1609 create_indirect_store(nir_var_mem_ssbo, 0, index_base, 0x2);
1610 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1611
1612 nir_validate_shader(b->shader, NULL);
1613 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1614
1615 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1616
1617 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1618 }
1619
TEST_F(nir_load_store_vectorize_test,ssbo_alias1)1620 TEST_F(nir_load_store_vectorize_test, ssbo_alias1)
1621 {
1622 nir_def *load_base = nir_load_global_invocation_index(b, 32);
1623 nir_def *store_base = nir_load_local_invocation_index(b);
1624 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x1);
1625 create_indirect_store(nir_var_mem_ssbo, 0, store_base, 0x2);
1626 create_indirect_load(nir_var_mem_ssbo, 0, load_base, 0x3);
1627
1628 nir_validate_shader(b->shader, NULL);
1629 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1630
1631 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo));
1632
1633 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1634 }
1635
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias2)1636 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias2)
1637 {
1638 /* TODO: try to combine these loads */
1639 nir_def *index_base = nir_load_local_invocation_index(b);
1640 nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 4);
1641 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1642 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1643 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1644
1645 nir_validate_shader(b->shader, NULL);
1646 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1647
1648 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1649
1650 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1651
1652 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1653 ASSERT_EQ(load->def.bit_size, 32);
1654 ASSERT_EQ(load->def.num_components, 1);
1655 ASSERT_EQ(load->src[1].ssa, offset);
1656 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1657 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1658 }
1659
TEST_F(nir_load_store_vectorize_test,ssbo_alias3)1660 TEST_F(nir_load_store_vectorize_test, ssbo_alias3)
1661 {
1662 /* these loads can be combined if nir_alu_instr::no_unsigned_wrap is set.
1663 * these loads can't be combined because if index_base == 268435455, then
1664 * offset == 0 because the addition would wrap around */
1665 nir_def *index_base = nir_load_local_invocation_index(b);
1666 nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1667 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1668 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1669 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1670
1671 nir_validate_shader(b->shader, NULL);
1672 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1673
1674 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1675
1676 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1677 }
1678
TEST_F(nir_load_store_vectorize_test,DISABLED_ssbo_alias4)1679 TEST_F(nir_load_store_vectorize_test, DISABLED_ssbo_alias4)
1680 {
1681 /* TODO: try to combine these loads */
1682 nir_def *index_base = nir_load_local_invocation_index(b);
1683 nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, index_base, 16), 16);
1684 nir_instr_as_alu(offset->parent_instr)->no_unsigned_wrap = true;
1685 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1686 create_store(nir_var_mem_ssbo, 0, 0, 0x2);
1687 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x3);
1688
1689 nir_validate_shader(b->shader, NULL);
1690 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1691
1692 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1693
1694 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1695
1696 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1697 ASSERT_EQ(load->def.bit_size, 32);
1698 ASSERT_EQ(load->def.num_components, 1);
1699 ASSERT_EQ(load->src[1].ssa, offset);
1700 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1701 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1702 }
1703
TEST_F(nir_load_store_vectorize_test,ssbo_alias5)1704 TEST_F(nir_load_store_vectorize_test, ssbo_alias5)
1705 {
1706 create_load(nir_var_mem_ssbo, 0, 0, 0x1);
1707 create_store(nir_var_mem_ssbo, 1, 0, 0x2);
1708 create_load(nir_var_mem_ssbo, 0, 0, 0x3);
1709
1710 nir_validate_shader(b->shader, NULL);
1711 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1712
1713 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1714
1715 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1716 }
1717
TEST_F(nir_load_store_vectorize_test,ssbo_alias6)1718 TEST_F(nir_load_store_vectorize_test, ssbo_alias6)
1719 {
1720 create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, ACCESS_RESTRICT);
1721 create_store(nir_var_mem_ssbo, 1, 0, 0x2, 32, 1, 0xf, ACCESS_RESTRICT);
1722 create_load(nir_var_mem_ssbo, 0, 0, 0x3, 32, 1, ACCESS_RESTRICT);
1723
1724 nir_validate_shader(b->shader, NULL);
1725 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1726
1727 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1728
1729 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1730
1731 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1732 ASSERT_EQ(load->def.bit_size, 32);
1733 ASSERT_EQ(load->def.num_components, 1);
1734 ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
1735 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1736 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1737 }
1738
TEST_F(nir_load_store_vectorize_test,DISABLED_shared_alias0)1739 TEST_F(nir_load_store_vectorize_test, DISABLED_shared_alias0)
1740 {
1741 /* TODO: implement type-based alias analysis so that these loads can be
1742 * combined. this is made a bit more difficult than simply using
1743 * nir_compare_derefs() because the vectorizer creates loads/stores with
1744 * casted derefs. The solution would probably be to keep multiple derefs for
1745 * an entry (one for each load/store combined into it). */
1746 glsl_struct_field fields[2] = {glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field0"),
1747 glsl_struct_field(glsl_array_type(glsl_uint_type(), 4, 0), "field1")};
1748
1749 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared, glsl_struct_type(fields, 2, "Struct", false), "var");
1750 nir_deref_instr *deref = nir_build_deref_var(b, var);
1751
1752 nir_def *index0 = nir_load_local_invocation_index(b);
1753 nir_def *index1 = nir_load_global_invocation_index(b, 32);
1754 nir_deref_instr *load_deref = nir_build_deref_array(b, nir_build_deref_struct(b, deref, 0), index0);
1755
1756 create_shared_load(load_deref, 0x1);
1757 create_shared_store(nir_build_deref_array(b, nir_build_deref_struct(b, deref, 1), index1), 0x2);
1758 create_shared_load(load_deref, 0x3);
1759
1760 nir_validate_shader(b->shader, NULL);
1761 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1762
1763 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1764
1765 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1766
1767 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1768 ASSERT_EQ(load->def.bit_size, 32);
1769 ASSERT_EQ(load->def.num_components, 1);
1770 ASSERT_EQ(load->src[0].ssa, &load_deref->def);
1771 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1772 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1773 }
1774
TEST_F(nir_load_store_vectorize_test,shared_alias1)1775 TEST_F(nir_load_store_vectorize_test, shared_alias1)
1776 {
1777 nir_variable *var0 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var0");
1778 nir_variable *var1 = nir_variable_create(b->shader, nir_var_mem_shared, glsl_uint_type(), "var1");
1779 nir_deref_instr *load_deref = nir_build_deref_var(b, var0);
1780
1781 create_shared_load(load_deref, 0x1);
1782 create_shared_store(nir_build_deref_var(b, var1), 0x2);
1783 create_shared_load(load_deref, 0x3);
1784
1785 nir_validate_shader(b->shader, NULL);
1786 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 2);
1787
1788 EXPECT_TRUE(run_vectorizer(nir_var_mem_shared));
1789
1790 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 1);
1791
1792 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_deref, 0);
1793 ASSERT_EQ(load->def.bit_size, 32);
1794 ASSERT_EQ(load->def.num_components, 1);
1795 ASSERT_EQ(load->src[0].ssa, &load_deref->def);
1796 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1797 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "x");
1798 }
1799
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_64bit)1800 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_64bit)
1801 {
1802 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x100000000), 0x1);
1803 create_indirect_load(nir_var_mem_ssbo, 0, nir_imm_int64(b, 0x200000004), 0x2);
1804
1805 nir_validate_shader(b->shader, NULL);
1806 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1807
1808 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1809
1810 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1811 }
1812
TEST_F(nir_load_store_vectorize_test,ssbo_load_distant_indirect_64bit)1813 TEST_F(nir_load_store_vectorize_test, ssbo_load_distant_indirect_64bit)
1814 {
1815 nir_def *index_base = nir_u2u64(b, nir_load_local_invocation_index(b));
1816 nir_def *first = nir_imul_imm(b, index_base, 0x100000000);
1817 nir_def *second = nir_imul_imm(b, index_base, 0x200000000);
1818 create_indirect_load(nir_var_mem_ssbo, 0, first, 0x1);
1819 create_indirect_load(nir_var_mem_ssbo, 0, second, 0x2);
1820
1821 nir_validate_shader(b->shader, NULL);
1822 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1823
1824 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
1825
1826 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1827 }
1828
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust)1829 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust)
1830 {
1831 create_load(nir_var_mem_ssbo, 0, 0xfffffffc, 0x1);
1832 create_load(nir_var_mem_ssbo, 0, 0x0, 0x2);
1833
1834 nir_validate_shader(b->shader, NULL);
1835 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1836
1837 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1838
1839 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1840 }
1841
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride1)1842 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride1)
1843 {
1844 nir_def *offset = nir_load_local_invocation_index(b);
1845 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1846 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1847
1848 nir_validate_shader(b->shader, NULL);
1849 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1850
1851 EXPECT_FALSE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1852
1853 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1854 }
1855
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride8)1856 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride8)
1857 {
1858 nir_def *offset = nir_load_local_invocation_index(b);
1859 offset = nir_imul_imm(b, offset, 8);
1860 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1861 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1862
1863 nir_validate_shader(b->shader, NULL);
1864 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1865
1866 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1867
1868 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1869 }
1870
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride12)1871 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride12)
1872 {
1873 nir_def *offset = nir_load_local_invocation_index(b);
1874 offset = nir_imul_imm(b, offset, 12);
1875 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1876 nir_def *offset_4 = nir_iadd_imm(b, offset, 4);
1877 create_indirect_load(nir_var_mem_ssbo, 0, offset_4, 0x2);
1878 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1879
1880 nir_validate_shader(b->shader, NULL);
1881 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 3);
1882
1883 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1884
1885 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
1886
1887 nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
1888 ASSERT_EQ(load->def.bit_size, 32);
1889 ASSERT_EQ(load->def.num_components, 1);
1890 ASSERT_EQ(load->src[1].ssa, offset);
1891 EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
1892
1893 load = get_intrinsic(nir_intrinsic_load_ssbo, 1);
1894 ASSERT_EQ(load->def.bit_size, 32);
1895 ASSERT_EQ(load->def.num_components, 2);
1896 ASSERT_EQ(load->src[1].ssa, offset_4);
1897 EXPECT_INSTR_SWIZZLES(movs[0x2], load, "x");
1898 EXPECT_INSTR_SWIZZLES(movs[0x3], load, "y");
1899 }
1900
TEST_F(nir_load_store_vectorize_test,ssbo_offset_overflow_robust_indirect_stride16)1901 TEST_F(nir_load_store_vectorize_test, ssbo_offset_overflow_robust_indirect_stride16)
1902 {
1903 nir_def *offset = nir_load_local_invocation_index(b);
1904 offset = nir_imul_imm(b, offset, 16);
1905 create_indirect_load(nir_var_mem_ssbo, 0, offset, 0x1);
1906 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 4), 0x2);
1907 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 8), 0x3);
1908 create_indirect_load(nir_var_mem_ssbo, 0, nir_iadd_imm(b, offset, 12), 0x4);
1909
1910 nir_validate_shader(b->shader, NULL);
1911 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 4);
1912
1913 EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo, false, nir_var_mem_ssbo));
1914
1915 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
1916 }
1917
TEST_F(nir_load_store_vectorize_test,shared_offset_overflow_robust_indirect_stride12)1918 TEST_F(nir_load_store_vectorize_test, shared_offset_overflow_robust_indirect_stride12)
1919 {
1920 nir_variable *var = nir_variable_create(b->shader, nir_var_mem_shared,
1921 glsl_array_type(glsl_uint_type(), 4, 0), "var");
1922 nir_deref_instr *deref = nir_build_deref_var(b, var);
1923
1924 nir_def *index = nir_load_local_invocation_index(b);
1925 index = nir_imul_imm(b, index, 3);
1926 create_shared_load(nir_build_deref_array(b, deref, index), 0x1);
1927 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 1)), 0x2);
1928 create_shared_load(nir_build_deref_array(b, deref, nir_iadd_imm(b, index, 2)), 0x3);
1929
1930 nir_validate_shader(b->shader, NULL);
1931 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1932
1933 EXPECT_FALSE(run_vectorizer(nir_var_mem_shared, false, nir_var_mem_shared));
1934
1935 ASSERT_EQ(count_intrinsics(nir_intrinsic_load_deref), 3);
1936 }
1937
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4)1938 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4)
1939 {
1940 nir_def *offset = nir_load_local_invocation_index(b);
1941 offset = nir_imul_imm(b, offset, 16);
1942 offset = nir_iadd_imm(b, offset, 4);
1943 nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1944 0x1);
1945
1946 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1947 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1948 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1949 }
1950
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_4_swapped)1951 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_4_swapped)
1952 {
1953 nir_def *offset = nir_load_local_invocation_index(b);
1954 offset = nir_iadd_imm(b, offset, 1);
1955 offset = nir_imul_imm(b, offset, 16);
1956 offset = nir_iadd_imm(b, offset, 4);
1957 nir_intrinsic_instr *load =
1958 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1959
1960 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1961 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1962 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1963 }
1964
1965 /* Check offset % mul != 0 */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_16_20)1966 TEST_F(nir_load_store_vectorize_test, ubo_alignment_16_20)
1967 {
1968 nir_def *offset = nir_load_local_invocation_index(b);
1969 offset = nir_imul_imm(b, offset, 16);
1970 offset = nir_iadd_imm(b, offset, 20);
1971 nir_intrinsic_instr *load = create_indirect_load(nir_var_mem_ubo, 0, offset,
1972 0x1);
1973
1974 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1975 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
1976 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1977 }
1978
1979 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_24_4)1980 TEST_F(nir_load_store_vectorize_test, ubo_alignment_24_4)
1981 {
1982 nir_def *offset = nir_load_local_invocation_index(b);
1983 offset = nir_imul_imm(b, offset, 24);
1984 offset = nir_iadd_imm(b, offset, 4);
1985 nir_intrinsic_instr *load =
1986 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
1987
1988 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
1989 EXPECT_EQ(nir_intrinsic_align_mul(load), 8);
1990 EXPECT_EQ(nir_intrinsic_align_offset(load), 4);
1991 }
1992
1993 /* Check that we don't upgrade to non-power-of-two alignments. */
TEST_F(nir_load_store_vectorize_test,ubo_alignment_64_16_8)1994 TEST_F(nir_load_store_vectorize_test, ubo_alignment_64_16_8)
1995 {
1996 nir_def *x = nir_imul_imm(b, nir_load_local_invocation_index(b), 64);
1997 nir_def *y = nir_imul_imm(b, nir_load_instance_id(b), 16);
1998 nir_def *offset = nir_iadd(b, x, y);
1999 offset = nir_iadd_imm(b, offset, 8);
2000 nir_intrinsic_instr *load =
2001 create_indirect_load(nir_var_mem_ubo, 0, offset, 0x1);
2002
2003 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2004 EXPECT_EQ(nir_intrinsic_align_mul(load), 16);
2005 EXPECT_EQ(nir_intrinsic_align_offset(load), 8);
2006 }
2007
TEST_F(nir_load_store_vectorize_test,ubo_alignment_const_100)2008 TEST_F(nir_load_store_vectorize_test, ubo_alignment_const_100)
2009 {
2010 nir_intrinsic_instr *load =
2011 create_indirect_load(nir_var_mem_ubo, 0, nir_imm_int(b, 100), 0x1);
2012
2013 EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
2014 EXPECT_EQ(nir_intrinsic_align_mul(load), NIR_ALIGN_MUL_MAX);
2015 EXPECT_EQ(nir_intrinsic_align_offset(load), 100);
2016 }
2017