xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/test/test-optimizer.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "bi_builder.h"
25 #include "bi_test.h"
26 #include "compiler.h"
27 
28 #include <gtest/gtest.h>
29 
30 static void
bi_optimizer(bi_context * ctx)31 bi_optimizer(bi_context *ctx)
32 {
33    bi_opt_mod_prop_forward(ctx);
34    bi_opt_mod_prop_backward(ctx);
35    bi_opt_dce(ctx, true);
36 }
37 
38 /* Define reg first so it has a consistent variable index, and pass it to an
39  * instruction that cannot be dead code eliminated so the program is nontrivial.
40  */
41 #define CASE(instr, expected)                                                  \
42    INSTRUCTION_CASE(                                                           \
43       {                                                                        \
44          UNUSED bi_index reg = bi_temp(b->shader);                             \
45          instr;                                                                \
46          bi_kaboom(b, reg);                                                    \
47       },                                                                       \
48       {                                                                        \
49          UNUSED bi_index reg = bi_temp(b->shader);                             \
50          expected;                                                             \
51          bi_kaboom(b, reg);                                                    \
52       },                                                                       \
53       bi_optimizer);
54 
55 #define NEGCASE(instr) CASE(instr, instr)
56 
57 class Optimizer : public testing::Test {
58  protected:
Optimizer()59    Optimizer()
60    {
61       mem_ctx = ralloc_context(NULL);
62 
63       x = bi_register(1);
64       y = bi_register(2);
65       negabsx = bi_neg(bi_abs(x));
66    }
67 
~Optimizer()68    ~Optimizer()
69    {
70       ralloc_free(mem_ctx);
71    }
72 
73    void *mem_ctx;
74 
75    bi_index x;
76    bi_index y;
77    bi_index negabsx;
78 };
79 
TEST_F(Optimizer,FusedFABSNEG)80 TEST_F(Optimizer, FusedFABSNEG)
81 {
82    CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), y),
83         bi_fadd_f32_to(b, reg, bi_abs(x), y));
84 
85    CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_neg(x)), y),
86         bi_fadd_f32_to(b, reg, bi_neg(x), y));
87 
88    CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y),
89         bi_fadd_f32_to(b, reg, negabsx, y));
90 
91    CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, x), y),
92         bi_fadd_f32_to(b, reg, x, y));
93 
94    CASE(bi_fmin_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), bi_neg(y)),
95         bi_fmin_f32_to(b, reg, negabsx, bi_neg(y)));
96 }
97 
TEST_F(Optimizer,FusedFABSNEGForFP16)98 TEST_F(Optimizer, FusedFABSNEGForFP16)
99 {
100    CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y),
101         bi_fadd_v2f16_to(b, reg, negabsx, y));
102 
103    CASE(bi_fmin_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), bi_neg(y)),
104         bi_fmin_v2f16_to(b, reg, negabsx, bi_neg(y)));
105 }
106 
TEST_F(Optimizer,FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)107 TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)
108 {
109    CASE(
110       {
111          bi_instr *I =
112             bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
113          I->clamp = BI_CLAMP_CLAMP_0_1;
114       },
115       {
116          bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
117          I->clamp = BI_CLAMP_CLAMP_0_1;
118       });
119 
120    CASE(
121       {
122          bi_instr *I =
123             bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
124          I->clamp = BI_CLAMP_CLAMP_0_1;
125       },
126       {
127          bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
128          I->clamp = BI_CLAMP_CLAMP_0_1;
129       });
130 
131    CASE(
132       {
133          bi_instr *I =
134             bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
135          I->clamp = BI_CLAMP_CLAMP_0_INF;
136       },
137       {
138          bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
139          I->clamp = BI_CLAMP_CLAMP_0_INF;
140       });
141 }
142 
TEST_F(Optimizer,FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)143 TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)
144 {
145    CASE(
146       {
147          bi_instr *I =
148             bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
149          I->clamp = BI_CLAMP_CLAMP_0_1;
150       },
151       {
152          bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
153          I->clamp = BI_CLAMP_CLAMP_0_1;
154       });
155 
156    CASE(
157       {
158          bi_instr *I =
159             bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
160          I->clamp = BI_CLAMP_CLAMP_0_1;
161       },
162       {
163          bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
164          I->clamp = BI_CLAMP_CLAMP_0_1;
165       });
166 
167    CASE(
168       {
169          bi_instr *I =
170             bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
171          I->clamp = BI_CLAMP_CLAMP_0_INF;
172       },
173       {
174          bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
175          I->clamp = BI_CLAMP_CLAMP_0_INF;
176       });
177 }
178 
TEST_F(Optimizer,AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)179 TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)
180 {
181    NEGCASE({
182       bi_instr *I =
183          bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
184       I->clamp = BI_CLAMP_CLAMP_0_1;
185    });
186 
187    NEGCASE({
188       bi_instr *I =
189          bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
190       I->clamp = BI_CLAMP_CLAMP_0_1;
191    });
192 
193    NEGCASE({
194       bi_instr *I =
195          bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
196       I->clamp = BI_CLAMP_CLAMP_0_INF;
197    });
198 }
199 
TEST_F(Optimizer,SwizzlesComposedForFP16)200 TEST_F(Optimizer, SwizzlesComposedForFP16)
201 {
202    CASE(bi_fadd_v2f16_to(
203            b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
204         bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
205 
206    CASE(bi_fadd_v2f16_to(
207            b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
208         bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
209 
210    CASE(bi_fadd_v2f16_to(
211            b, reg,
212            bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true,
213                      false),
214            y),
215         bi_fadd_v2f16_to(b, reg, negabsx, y));
216 
217    CASE(bi_fadd_v2f16_to(
218            b, reg,
219            bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false),
220            y),
221         bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y));
222 
223    CASE(bi_fadd_v2f16_to(
224            b, reg,
225            bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false),
226            y),
227         bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y));
228 }
229 
TEST_F(Optimizer,PreserveWidens)230 TEST_F(Optimizer, PreserveWidens)
231 {
232    /* Check that widens are passed through */
233    CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, false)), y),
234         bi_fadd_f32_to(b, reg, bi_half(negabsx, false), y));
235 
236    CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y),
237         bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y));
238 
239    CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)),
240                        bi_fabsneg_f32(b, bi_half(x, false))),
241         bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false)));
242 }
243 
TEST_F(Optimizer,DoNotMixSizesForFABSNEG)244 TEST_F(Optimizer, DoNotMixSizesForFABSNEG)
245 {
246    /* Refuse to mix sizes for fabsneg, that's wrong */
247    NEGCASE(bi_fadd_f32_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y));
248    NEGCASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_f32(b, negabsx), y));
249 }
250 
TEST_F(Optimizer,AvoidZeroAndFABSNEGFootguns)251 TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns)
252 {
253    /* It's tempting to use addition by 0.0 as the absneg primitive, but that
254     * has footguns around signed zero and round modes. Check we don't
255     * incorrectly fuse these rules. */
256 
257    bi_index zero = bi_zero();
258 
259    NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), zero), y));
260    NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(x), zero), y));
261    NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(bi_abs(x)), zero), y));
262    NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, zero), y));
263 }
264 
TEST_F(Optimizer,ClampsPropagated)265 TEST_F(Optimizer, ClampsPropagated)
266 {
267    CASE(
268       {
269          bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
270          I->clamp = BI_CLAMP_CLAMP_0_INF;
271       },
272       {
273          bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
274          I->clamp = BI_CLAMP_CLAMP_0_INF;
275       });
276 
277    CASE(
278       {
279          bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
280          I->clamp = BI_CLAMP_CLAMP_0_1;
281       },
282       {
283          bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
284          I->clamp = BI_CLAMP_CLAMP_0_1;
285       });
286 }
287 
TEST_F(Optimizer,ClampsComposed)288 TEST_F(Optimizer, ClampsComposed)
289 {
290    CASE(
291       {
292          bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
293          bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
294          I->clamp = BI_CLAMP_CLAMP_M1_1;
295          J->clamp = BI_CLAMP_CLAMP_0_INF;
296       },
297       {
298          bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
299          I->clamp = BI_CLAMP_CLAMP_0_1;
300       });
301 
302    CASE(
303       {
304          bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
305          bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
306          I->clamp = BI_CLAMP_CLAMP_0_1;
307          J->clamp = BI_CLAMP_CLAMP_0_INF;
308       },
309       {
310          bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
311          I->clamp = BI_CLAMP_CLAMP_0_1;
312       });
313 
314    CASE(
315       {
316          bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
317          bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
318          I->clamp = BI_CLAMP_CLAMP_0_INF;
319          J->clamp = BI_CLAMP_CLAMP_0_INF;
320       },
321       {
322          bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
323          I->clamp = BI_CLAMP_CLAMP_0_INF;
324       });
325 
326    CASE(
327       {
328          bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
329          bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
330          I->clamp = BI_CLAMP_CLAMP_M1_1;
331          J->clamp = BI_CLAMP_CLAMP_0_INF;
332       },
333       {
334          bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
335          I->clamp = BI_CLAMP_CLAMP_0_1;
336       });
337 
338    CASE(
339       {
340          bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
341          bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
342          I->clamp = BI_CLAMP_CLAMP_0_1;
343          J->clamp = BI_CLAMP_CLAMP_0_INF;
344       },
345       {
346          bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
347          I->clamp = BI_CLAMP_CLAMP_0_1;
348       });
349 
350    CASE(
351       {
352          bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
353          bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
354          I->clamp = BI_CLAMP_CLAMP_0_INF;
355          J->clamp = BI_CLAMP_CLAMP_0_INF;
356       },
357       {
358          bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
359          I->clamp = BI_CLAMP_CLAMP_0_INF;
360       });
361 }
362 
TEST_F(Optimizer,DoNotMixSizesWhenClamping)363 TEST_F(Optimizer, DoNotMixSizesWhenClamping)
364 {
365    NEGCASE({
366       bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_v2f16(b, x, y));
367       I->clamp = BI_CLAMP_CLAMP_0_1;
368    });
369 
370    NEGCASE({
371       bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_f32(b, x, y));
372       I->clamp = BI_CLAMP_CLAMP_0_1;
373    });
374 }
375 
TEST_F(Optimizer,DoNotUseAdditionByZeroForClamps)376 TEST_F(Optimizer, DoNotUseAdditionByZeroForClamps)
377 {
378    bi_index zero = bi_zero();
379 
380    /* We can't use addition by 0.0 for clamps due to signed zeros. */
381    NEGCASE({
382       bi_instr *I = bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, y), zero);
383       I->clamp = BI_CLAMP_CLAMP_M1_1;
384    });
385 
386    NEGCASE({
387       bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y), zero);
388       I->clamp = BI_CLAMP_CLAMP_0_1;
389    });
390 }
391 
TEST_F(Optimizer,FuseComparisonsWithDISCARD)392 TEST_F(Optimizer, FuseComparisonsWithDISCARD)
393 {
394    CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1)),
395         bi_discard_f32(b, x, y, BI_CMPF_LE));
396 
397    CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1)),
398         bi_discard_f32(b, x, y, BI_CMPF_NE));
399 
400    CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1)),
401         bi_discard_f32(b, x, y, BI_CMPF_EQ));
402 
403    for (unsigned h = 0; h < 2; ++h) {
404       CASE(bi_discard_b32(
405               b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1),
406                          h)),
407            bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_LE));
408 
409       CASE(bi_discard_b32(
410               b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1),
411                          h)),
412            bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_NE));
413 
414       CASE(bi_discard_b32(
415               b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1),
416                          h)),
417            bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_EQ));
418    }
419 }
420 
TEST_F(Optimizer,DoNotFuseSpecialComparisons)421 TEST_F(Optimizer, DoNotFuseSpecialComparisons)
422 {
423    NEGCASE(
424       bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
425    NEGCASE(bi_discard_b32(
426       b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
427 }
428 
TEST_F(Optimizer,FuseResultType)429 TEST_F(Optimizer, FuseResultType)
430 {
431    CASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
432                       bi_fcmp_f32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
433                       BI_MUX_INT_ZERO),
434         bi_fcmp_f32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1));
435 
436    CASE(bi_mux_i32_to(
437            b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
438            bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
439            BI_MUX_INT_ZERO),
440         bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
441                        BI_RESULT_TYPE_F1));
442 
443    CASE(bi_mux_i32_to(
444            b, reg, bi_imm_u32(0), bi_imm_u32(1),
445            bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
446            BI_MUX_INT_ZERO),
447         bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
448                        BI_RESULT_TYPE_I1));
449 
450    CASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
451                         bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
452                                       BI_RESULT_TYPE_M1),
453                         BI_MUX_INT_ZERO),
454         bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
455                          BI_RESULT_TYPE_F1));
456 
457    CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
458                         bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
459                                       BI_RESULT_TYPE_M1),
460                         BI_MUX_INT_ZERO),
461         bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
462                          BI_RESULT_TYPE_I1));
463 
464    CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
465                       bi_icmp_u32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
466                       BI_MUX_INT_ZERO),
467         bi_icmp_u32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
468 
469    CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
470                         bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
471                         BI_MUX_INT_ZERO),
472         bi_icmp_v2u16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
473 
474    CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
475                        bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
476                        BI_MUX_INT_ZERO),
477         bi_icmp_v4u8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
478 
479    CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
480                       bi_icmp_s32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
481                       BI_MUX_INT_ZERO),
482         bi_icmp_s32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
483 
484    CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
485                         bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
486                         BI_MUX_INT_ZERO),
487         bi_icmp_v2s16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
488 
489    CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
490                        bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
491                        BI_MUX_INT_ZERO),
492         bi_icmp_v4s8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
493 }
494 
TEST_F(Optimizer,DoNotFuseMixedSizeResultType)495 TEST_F(Optimizer, DoNotFuseMixedSizeResultType)
496 {
497    NEGCASE(bi_mux_i32_to(
498       b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
499       bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
500       BI_MUX_INT_ZERO));
501 
502    NEGCASE(bi_mux_v2i16_to(
503       b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
504       bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
505       BI_MUX_INT_ZERO));
506 }
507 
TEST_F(Optimizer,VarTexCoord32)508 TEST_F(Optimizer, VarTexCoord32)
509 {
510    CASE(
511       {
512          bi_index ld =
513             bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32,
514                           BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
515 
516          bi_index x = bi_temp(b->shader);
517          bi_index y = bi_temp(b->shader);
518          bi_instr *split = bi_split_i32_to(b, 2, ld);
519          split->dest[0] = x;
520          split->dest[1] = y;
521 
522          bi_texs_2d_f32_to(b, reg, x, y, false, 0, 0);
523       },
524       {
525          bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
526                            0);
527       });
528 }
529 
TEST_F(Optimizer,Int8ToFloat32)530 TEST_F(Optimizer, Int8ToFloat32)
531 {
532    for (unsigned i = 0; i < 4; ++i) {
533       CASE(bi_s32_to_f32_to(b, reg, bi_s8_to_s32(b, bi_byte(x, i))),
534            bi_s8_to_f32_to(b, reg, bi_byte(x, i)));
535 
536       CASE(bi_s32_to_f32_to(b, reg, bi_u8_to_u32(b, bi_byte(x, i))),
537            bi_u8_to_f32_to(b, reg, bi_byte(x, i)));
538 
539       CASE(bi_u32_to_f32_to(b, reg, bi_u8_to_u32(b, bi_byte(x, i))),
540            bi_u8_to_f32_to(b, reg, bi_byte(x, i)));
541    }
542 }
543 
TEST_F(Optimizer,Int16ToFloat32)544 TEST_F(Optimizer, Int16ToFloat32)
545 {
546    for (unsigned i = 0; i < 2; ++i) {
547       CASE(bi_s32_to_f32_to(b, reg, bi_s16_to_s32(b, bi_half(x, i))),
548            bi_s16_to_f32_to(b, reg, bi_half(x, i)));
549 
550       CASE(bi_s32_to_f32_to(b, reg, bi_u16_to_u32(b, bi_half(x, i))),
551            bi_u16_to_f32_to(b, reg, bi_half(x, i)));
552 
553       CASE(bi_u32_to_f32_to(b, reg, bi_u16_to_u32(b, bi_half(x, i))),
554            bi_u16_to_f32_to(b, reg, bi_half(x, i)));
555    }
556 }
557