xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_opt_vectorize.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 /**
26  * nir_opt_vectorize() aims to vectorize ALU instructions.
27  *
28  * The default vectorization width is 4.
29  * If desired, a callback function which returns the max vectorization width
30  * per instruction can be provided.
31  *
32  * The max vectorization width must be a power of 2.
33  */
34 
35 #include "util/u_dynarray.h"
36 #include "nir.h"
37 #include "nir_builder.h"
38 #include "nir_vla.h"
39 
40 #define HASH(hash, data) XXH32(&data, sizeof(data), hash)
41 
42 static uint32_t
hash_src(uint32_t hash,const nir_src * src)43 hash_src(uint32_t hash, const nir_src *src)
44 {
45    void *hash_data = nir_src_is_const(*src) ? NULL : src->ssa;
46 
47    return HASH(hash, hash_data);
48 }
49 
50 static uint32_t
hash_alu_src(uint32_t hash,const nir_alu_src * src,uint32_t num_components,uint32_t max_vec)51 hash_alu_src(uint32_t hash, const nir_alu_src *src,
52              uint32_t num_components, uint32_t max_vec)
53 {
54    /* hash whether a swizzle accesses elements beyond the maximum
55     * vectorization factor:
56     * For example accesses to .x and .y are considered different variables
57     * compared to accesses to .z and .w for 16-bit vec2.
58     */
59    uint32_t swizzle = (src->swizzle[0] & ~(max_vec - 1));
60    hash = HASH(hash, swizzle);
61 
62    return hash_src(hash, &src->src);
63 }
64 
65 static uint32_t
hash_phi_src(uint32_t hash,const nir_phi_instr * phi,const nir_phi_src * src,uint32_t max_vec)66 hash_phi_src(uint32_t hash, const nir_phi_instr *phi, const nir_phi_src *src,
67              uint32_t max_vec)
68 {
69    hash = HASH(hash, src->pred);
70 
71    nir_scalar chased = nir_scalar_chase_movs(nir_get_scalar(src->src.ssa, 0));
72    uint32_t swizzle = chased.comp & ~(max_vec - 1);
73    hash = HASH(hash, swizzle);
74 
75    if (nir_scalar_is_const(chased)) {
76       void *data = NULL;
77       hash = HASH(hash, data);
78    } else if (src->pred->index < phi->instr.block->index) {
79       hash = HASH(hash, chased.def);
80    } else {
81       nir_instr *chased_instr = chased.def->parent_instr;
82       hash = HASH(hash, chased_instr->type);
83 
84       if (chased_instr->type == nir_instr_type_alu)
85          hash = HASH(hash, nir_instr_as_alu(chased_instr)->op);
86    }
87 
88    return hash;
89 }
90 
91 static uint32_t
hash_instr(const void * data)92 hash_instr(const void *data)
93 {
94    const nir_instr *instr = (nir_instr *)data;
95    uint32_t hash = HASH(0, instr->type);
96 
97    if (instr->type == nir_instr_type_phi) {
98       nir_phi_instr *phi = nir_instr_as_phi(instr);
99 
100       hash = HASH(hash, instr->block);
101       hash = HASH(hash, phi->def.bit_size);
102 
103       /* The order of phi sources is not guaranteed so hash commutatively. */
104       nir_foreach_phi_src(src, phi)
105          hash *= hash_phi_src(0, phi, src, instr->pass_flags);
106 
107       return hash;
108    }
109 
110    assert(instr->type == nir_instr_type_alu);
111    nir_alu_instr *alu = nir_instr_as_alu(instr);
112 
113    hash = HASH(hash, alu->op);
114    hash = HASH(hash, alu->def.bit_size);
115 
116    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
117       hash = hash_alu_src(hash, &alu->src[i],
118                           alu->def.num_components,
119                           instr->pass_flags);
120 
121    return hash;
122 }
123 
124 static bool
srcs_equal(const nir_src * src1,const nir_src * src2)125 srcs_equal(const nir_src *src1, const nir_src *src2)
126 {
127 
128    return src1->ssa == src2->ssa ||
129           (nir_src_is_const(*src1) && nir_src_is_const(*src2));
130 }
131 
132 static bool
alu_srcs_equal(const nir_alu_src * src1,const nir_alu_src * src2,uint32_t max_vec)133 alu_srcs_equal(const nir_alu_src *src1, const nir_alu_src *src2,
134                uint32_t max_vec)
135 {
136    uint32_t mask = ~(max_vec - 1);
137    if ((src1->swizzle[0] & mask) != (src2->swizzle[0] & mask))
138       return false;
139 
140    return srcs_equal(&src1->src, &src2->src);
141 }
142 
143 static bool
phi_srcs_equal(nir_block * block,const nir_phi_src * src1,const nir_phi_src * src2,uint32_t max_vec)144 phi_srcs_equal(nir_block *block, const nir_phi_src *src1,
145                const nir_phi_src *src2, uint32_t max_vec)
146 {
147    if (src1->pred != src2->pred)
148       return false;
149 
150    /* Since phi sources don't have swizzles, they are swizzled using movs.
151     * Get the real sources first.
152     */
153    nir_scalar chased1 = nir_scalar_chase_movs(nir_get_scalar(src1->src.ssa, 0));
154    nir_scalar chased2 = nir_scalar_chase_movs(nir_get_scalar(src2->src.ssa, 0));
155 
156    if (nir_scalar_is_const(chased1) && nir_scalar_is_const(chased2))
157       return true;
158 
159    uint32_t mask = ~(max_vec - 1);
160    if ((chased1.comp & mask) != (chased2.comp & mask))
161       return false;
162 
163    /* For phi sources whose defs we have already processed, we require that
164     * they point to the same def like we do for ALU instructions.
165     */
166    if (src1->pred->index < block->index)
167       return chased1.def == chased2.def;
168 
169    /* Otherwise (i.e., for loop back-edges), we haven't processed the sources
170     * yet so they haven't been vectorized. In this case, try to guess if they
171     * could be vectorized later. Keep it simple for now: if they are the same
172     * type of instruction and, if ALU, have the same operation, assume they
173     * might be vectorized later. Although this won't be true in general, this
174     * heuristic is probable good enough in practice: since we check that other
175     * (forward-edge) sources are vectorized, chances are the back-edge will
176     * also be vectorized.
177     */
178    nir_instr *chased_instr1 = chased1.def->parent_instr;
179    nir_instr *chased_instr2 = chased2.def->parent_instr;
180 
181    if (chased_instr1->type != chased_instr2->type)
182       return false;
183 
184    if (chased_instr1->type != nir_instr_type_alu)
185       return true;
186 
187    return nir_instr_as_alu(chased_instr1)->op ==
188           nir_instr_as_alu(chased_instr2)->op;
189 }
190 
191 static bool
instrs_equal(const void * data1,const void * data2)192 instrs_equal(const void *data1, const void *data2)
193 {
194    const nir_instr *instr1 = (nir_instr *)data1;
195    const nir_instr *instr2 = (nir_instr *)data2;
196 
197    if (instr1->type != instr2->type)
198       return false;
199 
200    if (instr1->type == nir_instr_type_phi) {
201       if (instr1->block != instr2->block)
202          return false;
203 
204       nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
205       nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
206 
207       if (phi1->def.bit_size != phi2->def.bit_size)
208          return false;
209 
210       nir_foreach_phi_src(src1, phi1) {
211          nir_phi_src *src2 = nir_phi_get_src_from_block(phi2, src1->pred);
212 
213          if (!phi_srcs_equal(instr1->block, src1, src2, instr1->pass_flags))
214             return false;
215       }
216 
217       return true;
218    }
219 
220    assert(instr1->type == nir_instr_type_alu);
221    assert(instr2->type == nir_instr_type_alu);
222 
223    nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
224    nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
225 
226    if (alu1->op != alu2->op)
227       return false;
228 
229    if (alu1->def.bit_size != alu2->def.bit_size)
230       return false;
231 
232    for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
233       if (!alu_srcs_equal(&alu1->src[i], &alu2->src[i], instr1->pass_flags))
234          return false;
235    }
236 
237    return true;
238 }
239 
240 static bool
instr_can_rewrite(nir_instr * instr)241 instr_can_rewrite(nir_instr *instr)
242 {
243    switch (instr->type) {
244    case nir_instr_type_alu: {
245       nir_alu_instr *alu = nir_instr_as_alu(instr);
246 
247       /* Don't try and vectorize mov's. Either they'll be handled by copy
248        * prop, or they're actually necessary and trying to vectorize them
249        * would result in fighting with copy prop.
250        */
251       if (alu->op == nir_op_mov)
252          return false;
253 
254       /* no need to hash instructions which are already vectorized */
255       if (alu->def.num_components >= instr->pass_flags)
256          return false;
257 
258       if (nir_op_infos[alu->op].output_size != 0)
259          return false;
260 
261       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
262          if (nir_op_infos[alu->op].input_sizes[i] != 0)
263             return false;
264 
265          /* don't hash instructions which are already swizzled
266           * outside of max_components: these should better be scalarized */
267          uint32_t mask = ~(instr->pass_flags - 1);
268          for (unsigned j = 1; j < alu->def.num_components; j++) {
269             if ((alu->src[i].swizzle[0] & mask) != (alu->src[i].swizzle[j] & mask))
270                return false;
271          }
272       }
273 
274       return true;
275    }
276 
277    case nir_instr_type_phi: {
278       nir_phi_instr *phi = nir_instr_as_phi(instr);
279       return phi->def.num_components < instr->pass_flags;
280    }
281 
282    default:
283       break;
284    }
285 
286    return false;
287 }
288 
289 static void
rewrite_uses(nir_builder * b,struct set * instr_set,nir_def * def1,nir_def * def2,nir_def * new_def)290 rewrite_uses(nir_builder *b, struct set *instr_set, nir_def *def1,
291              nir_def *def2, nir_def *new_def)
292 {
293    /* update all ALU uses */
294    nir_foreach_use_safe(src, def1) {
295       nir_instr *user_instr = nir_src_parent_instr(src);
296       if (user_instr->type == nir_instr_type_alu) {
297          /* Check if user is found in the hashset */
298          struct set_entry *entry = _mesa_set_search(instr_set, user_instr);
299 
300          /* For ALU instructions, rewrite the source directly to avoid a
301           * round-trip through copy propagation.
302           */
303          nir_src_rewrite(src, new_def);
304 
305          /* Rehash user if it was found in the hashset */
306          if (entry && entry->key == user_instr) {
307             _mesa_set_remove(instr_set, entry);
308             _mesa_set_add(instr_set, user_instr);
309          }
310       }
311    }
312 
313    nir_foreach_use_safe(src, def2) {
314       if (nir_src_parent_instr(src)->type == nir_instr_type_alu) {
315          /* For ALU instructions, rewrite the source directly to avoid a
316           * round-trip through copy propagation.
317           */
318          nir_src_rewrite(src, new_def);
319 
320          nir_alu_src *alu_src = container_of(src, nir_alu_src, src);
321          nir_alu_instr *use = nir_instr_as_alu(nir_src_parent_instr(src));
322          unsigned components =
323             nir_ssa_alu_instr_src_components(use, alu_src - use->src);
324          for (unsigned i = 0; i < components; i++)
325             alu_src->swizzle[i] += def1->num_components;
326       }
327    }
328 
329    /* update all other uses if there are any */
330    unsigned swiz[NIR_MAX_VEC_COMPONENTS];
331 
332    if (!nir_def_is_unused(def1)) {
333       for (unsigned i = 0; i < def1->num_components; i++)
334          swiz[i] = i;
335       nir_def *new_def1 = nir_swizzle(b, new_def, swiz, def1->num_components);
336       nir_def_rewrite_uses(def1, new_def1);
337    }
338 
339    if (!nir_def_is_unused(def2)) {
340       for (unsigned i = 0; i < def2->num_components; i++)
341          swiz[i] = i + def1->num_components;
342       nir_def *new_def2 = nir_swizzle(b, new_def, swiz, def2->num_components);
343       nir_def_rewrite_uses(def2, new_def2);
344    }
345 
346    nir_instr_remove(def1->parent_instr);
347    nir_instr_remove(def2->parent_instr);
348 }
349 
350 static nir_instr *
instr_try_combine_phi(struct set * instr_set,nir_phi_instr * phi1,nir_phi_instr * phi2)351 instr_try_combine_phi(struct set *instr_set, nir_phi_instr *phi1, nir_phi_instr *phi2)
352 {
353    assert(phi1->def.bit_size == phi2->def.bit_size);
354    unsigned phi1_components = phi1->def.num_components;
355    unsigned phi2_components = phi2->def.num_components;
356    unsigned total_components = phi1_components + phi2_components;
357 
358    assert(phi1->instr.pass_flags == phi2->instr.pass_flags);
359    if (total_components > phi1->instr.pass_flags)
360       return NULL;
361 
362    assert(phi1->instr.block == phi2->instr.block);
363    nir_block *block = phi1->instr.block;
364 
365    nir_builder b = nir_builder_at(nir_after_instr(&phi1->instr));
366    nir_phi_instr *new_phi = nir_phi_instr_create(b.shader);
367    nir_def_init(&new_phi->instr, &new_phi->def, total_components,
368                 phi1->def.bit_size);
369    nir_builder_instr_insert(&b, &new_phi->instr);
370    new_phi->instr.pass_flags = phi1->instr.pass_flags;
371 
372    assert(exec_list_length(&phi1->srcs) == exec_list_length(&phi2->srcs));
373 
374    nir_foreach_phi_src(src1, phi1) {
375       nir_phi_src *src2 = nir_phi_get_src_from_block(phi2, src1->pred);
376       nir_block *pred_block = src1->pred;
377 
378       nir_scalar new_srcs[NIR_MAX_VEC_COMPONENTS];
379 
380       for (unsigned i = 0; i < phi1_components; i++) {
381          nir_scalar s = nir_get_scalar(src1->src.ssa, i);
382          new_srcs[i] = nir_scalar_chase_movs(s);
383       }
384 
385       for (unsigned i = 0; i < phi2_components; i++) {
386          nir_scalar s = nir_get_scalar(src2->src.ssa, i);
387          new_srcs[phi1_components + i] = nir_scalar_chase_movs(s);
388       }
389 
390       nir_def *new_src;
391 
392       if (nir_scalar_is_const(new_srcs[0])) {
393          nir_const_value value[NIR_MAX_VEC_COMPONENTS];
394 
395          for (unsigned i = 0; i < total_components; i++) {
396             assert(nir_scalar_is_const(new_srcs[i]));
397             value[i] = nir_scalar_as_const_value(new_srcs[i]);
398          }
399 
400          b.cursor = nir_after_block_before_jump(pred_block);
401          unsigned bit_size = src1->src.ssa->bit_size;
402          new_src = nir_build_imm(&b, total_components, bit_size, value);
403       } else if (pred_block->index < block->index) {
404          nir_def *def = new_srcs[0].def;
405          unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
406 
407          for (unsigned i = 0; i < total_components; i++) {
408             assert(new_srcs[i].def == def);
409             swizzle[i] = new_srcs[i].comp;
410          }
411 
412          b.cursor = nir_after_instr_and_phis(def->parent_instr);
413          new_src = nir_swizzle(&b, def, swizzle, total_components);
414       } else {
415          /* This is a loop back-edge so we haven't vectorized the sources yet.
416           * Combine them in a vec which, if they are vectorized later, will be
417           * cleaned up by copy propagation.
418           */
419          b.cursor = nir_after_block_before_jump(pred_block);
420          new_src = nir_vec_scalars(&b, new_srcs, total_components);
421       }
422 
423       nir_phi_src *new_phi_src =
424          nir_phi_instr_add_src(new_phi, src1->pred, new_src);
425       list_addtail(&new_phi_src->src.use_link, &new_src->uses);
426    }
427 
428    b.cursor = nir_after_phis(block);
429    rewrite_uses(&b, instr_set, &phi1->def, &phi2->def, &new_phi->def);
430 
431    return &new_phi->instr;
432 }
433 
434 static nir_instr *
instr_try_combine_alu(struct set * instr_set,nir_alu_instr * alu1,nir_alu_instr * alu2)435 instr_try_combine_alu(struct set *instr_set, nir_alu_instr *alu1, nir_alu_instr *alu2)
436 {
437    assert(alu1->def.bit_size == alu2->def.bit_size);
438    unsigned alu1_components = alu1->def.num_components;
439    unsigned alu2_components = alu2->def.num_components;
440    unsigned total_components = alu1_components + alu2_components;
441 
442    assert(alu1->instr.pass_flags == alu2->instr.pass_flags);
443    if (total_components > alu1->instr.pass_flags)
444       return NULL;
445 
446    nir_builder b = nir_builder_at(nir_after_instr(&alu1->instr));
447 
448    nir_alu_instr *new_alu = nir_alu_instr_create(b.shader, alu1->op);
449    nir_def_init(&new_alu->instr, &new_alu->def, total_components,
450                 alu1->def.bit_size);
451    new_alu->instr.pass_flags = alu1->instr.pass_flags;
452 
453    /* If either channel is exact, we have to preserve it even if it's
454     * not optimal for other channels.
455     */
456    new_alu->exact = alu1->exact || alu2->exact;
457 
458    /* fp_fast_math is a set of FLOAT_CONTROLS_*_PRESERVE_*.  Preserve anything
459     * preserved by either instruction.
460     */
461    new_alu->fp_fast_math = alu1->fp_fast_math | alu2->fp_fast_math;
462 
463    /* If all channels don't wrap, we can say that the whole vector doesn't
464     * wrap.
465     */
466    new_alu->no_signed_wrap = alu1->no_signed_wrap && alu2->no_signed_wrap;
467    new_alu->no_unsigned_wrap = alu1->no_unsigned_wrap && alu2->no_unsigned_wrap;
468 
469    for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
470       /* handle constant merging case */
471       if (alu1->src[i].src.ssa != alu2->src[i].src.ssa) {
472          nir_const_value *c1 = nir_src_as_const_value(alu1->src[i].src);
473          nir_const_value *c2 = nir_src_as_const_value(alu2->src[i].src);
474          assert(c1 && c2);
475          nir_const_value value[NIR_MAX_VEC_COMPONENTS];
476          unsigned bit_size = alu1->src[i].src.ssa->bit_size;
477 
478          for (unsigned j = 0; j < total_components; j++) {
479             value[j].u64 = j < alu1_components ? c1[alu1->src[i].swizzle[j]].u64 : c2[alu2->src[i].swizzle[j - alu1_components]].u64;
480          }
481          nir_def *def = nir_build_imm(&b, total_components, bit_size, value);
482 
483          new_alu->src[i].src = nir_src_for_ssa(def);
484          for (unsigned j = 0; j < total_components; j++)
485             new_alu->src[i].swizzle[j] = j;
486          continue;
487       }
488 
489       new_alu->src[i].src = alu1->src[i].src;
490 
491       for (unsigned j = 0; j < alu1_components; j++)
492          new_alu->src[i].swizzle[j] = alu1->src[i].swizzle[j];
493 
494       for (unsigned j = 0; j < alu2_components; j++) {
495          new_alu->src[i].swizzle[j + alu1_components] =
496             alu2->src[i].swizzle[j];
497       }
498    }
499 
500    nir_builder_instr_insert(&b, &new_alu->instr);
501    rewrite_uses(&b, instr_set, &alu1->def, &alu2->def, &new_alu->def);
502 
503    return &new_alu->instr;
504 }
505 
506 /*
507  * Tries to combine two instructions whose sources are different components of
508  * the same instructions into one vectorized instruction. Note that instr1
509  * should dominate instr2.
510  */
511 static nir_instr *
instr_try_combine(struct set * instr_set,nir_instr * instr1,nir_instr * instr2)512 instr_try_combine(struct set *instr_set, nir_instr *instr1, nir_instr *instr2)
513 {
514    switch (instr1->type) {
515    case nir_instr_type_alu:
516       assert(instr2->type == nir_instr_type_alu);
517       return instr_try_combine_alu(instr_set, nir_instr_as_alu(instr1),
518                                    nir_instr_as_alu(instr2));
519 
520    case nir_instr_type_phi:
521       assert(instr2->type == nir_instr_type_phi);
522       return instr_try_combine_phi(instr_set, nir_instr_as_phi(instr1),
523                                    nir_instr_as_phi(instr2));
524 
525    default:
526       unreachable("Unsupported instruction type");
527    }
528 }
529 
530 static struct set *
vec_instr_set_create(void)531 vec_instr_set_create(void)
532 {
533    return _mesa_set_create(NULL, hash_instr, instrs_equal);
534 }
535 
536 static void
vec_instr_set_destroy(struct set * instr_set)537 vec_instr_set_destroy(struct set *instr_set)
538 {
539    _mesa_set_destroy(instr_set, NULL);
540 }
541 
542 static bool
vec_instr_set_add_or_rewrite(struct set * instr_set,nir_instr * instr,nir_vectorize_cb filter,void * data)543 vec_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr,
544                              nir_vectorize_cb filter, void *data)
545 {
546    /* set max vector to instr pass flags: this is used to hash swizzles */
547    instr->pass_flags = filter ? filter(instr, data) : 4;
548    assert(util_is_power_of_two_or_zero(instr->pass_flags));
549 
550    if (!instr_can_rewrite(instr))
551       return false;
552 
553    struct set_entry *entry = _mesa_set_search(instr_set, instr);
554    if (entry) {
555       nir_instr *old_instr = (nir_instr *)entry->key;
556 
557       /* We cannot combine the instructions if the old one doesn't dominate
558        * the new one. Since we will never encounter a block again that is
559        * dominated by the old instruction, overwrite it with the new one in
560        * the instruction set.
561        */
562       if (!nir_block_dominates(old_instr->block, instr->block)) {
563          entry->key = instr;
564          return false;
565       }
566 
567       _mesa_set_remove(instr_set, entry);
568       nir_instr *new_instr = instr_try_combine(instr_set, old_instr, instr);
569       if (new_instr) {
570          if (instr_can_rewrite(new_instr))
571             _mesa_set_add(instr_set, new_instr);
572          return true;
573       }
574    }
575 
576    _mesa_set_add(instr_set, instr);
577    return false;
578 }
579 
580 static bool
nir_opt_vectorize_impl(nir_function_impl * impl,nir_vectorize_cb filter,void * data)581 nir_opt_vectorize_impl(nir_function_impl *impl,
582                        nir_vectorize_cb filter, void *data)
583 {
584    struct set *instr_set = vec_instr_set_create();
585 
586    nir_metadata_require(impl, nir_metadata_control_flow);
587 
588    bool progress = false;
589 
590    nir_foreach_block(block, impl) {
591       nir_foreach_instr_safe(instr, block) {
592          progress |= vec_instr_set_add_or_rewrite(instr_set, instr, filter, data);
593       }
594    }
595 
596    if (progress) {
597       nir_metadata_preserve(impl, nir_metadata_control_flow);
598    } else {
599       nir_metadata_preserve(impl, nir_metadata_all);
600    }
601 
602    vec_instr_set_destroy(instr_set);
603    return progress;
604 }
605 
606 bool
nir_opt_vectorize(nir_shader * shader,nir_vectorize_cb filter,void * data)607 nir_opt_vectorize(nir_shader *shader, nir_vectorize_cb filter,
608                   void *data)
609 {
610    bool progress = false;
611 
612    nir_foreach_function_impl(impl, shader) {
613       progress |= nir_opt_vectorize_impl(impl, filter, data);
614    }
615 
616    return progress;
617 }
618