1 /*
2 * Copyright (C) 2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "bi_builder.h"
25 #include "bi_test.h"
26 #include "compiler.h"
27
28 #include <gtest/gtest.h>
29
30 static void
bi_optimizer(bi_context * ctx)31 bi_optimizer(bi_context *ctx)
32 {
33 bi_opt_mod_prop_forward(ctx);
34 bi_opt_mod_prop_backward(ctx);
35 bi_opt_dce(ctx, true);
36 }
37
38 /* Define reg first so it has a consistent variable index, and pass it to an
39 * instruction that cannot be dead code eliminated so the program is nontrivial.
40 */
41 #define CASE(instr, expected) \
42 INSTRUCTION_CASE( \
43 { \
44 UNUSED bi_index reg = bi_temp(b->shader); \
45 instr; \
46 bi_kaboom(b, reg); \
47 }, \
48 { \
49 UNUSED bi_index reg = bi_temp(b->shader); \
50 expected; \
51 bi_kaboom(b, reg); \
52 }, \
53 bi_optimizer);
54
55 #define NEGCASE(instr) CASE(instr, instr)
56
57 class Optimizer : public testing::Test {
58 protected:
Optimizer()59 Optimizer()
60 {
61 mem_ctx = ralloc_context(NULL);
62
63 x = bi_register(1);
64 y = bi_register(2);
65 negabsx = bi_neg(bi_abs(x));
66 }
67
~Optimizer()68 ~Optimizer()
69 {
70 ralloc_free(mem_ctx);
71 }
72
73 void *mem_ctx;
74
75 bi_index x;
76 bi_index y;
77 bi_index negabsx;
78 };
79
TEST_F(Optimizer,FusedFABSNEG)80 TEST_F(Optimizer, FusedFABSNEG)
81 {
82 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), y),
83 bi_fadd_f32_to(b, reg, bi_abs(x), y));
84
85 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_neg(x)), y),
86 bi_fadd_f32_to(b, reg, bi_neg(x), y));
87
88 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y),
89 bi_fadd_f32_to(b, reg, negabsx, y));
90
91 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, x), y),
92 bi_fadd_f32_to(b, reg, x, y));
93
94 CASE(bi_fmin_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), bi_neg(y)),
95 bi_fmin_f32_to(b, reg, negabsx, bi_neg(y)));
96 }
97
TEST_F(Optimizer,FusedFABSNEGForFP16)98 TEST_F(Optimizer, FusedFABSNEGForFP16)
99 {
100 CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y),
101 bi_fadd_v2f16_to(b, reg, negabsx, y));
102
103 CASE(bi_fmin_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), bi_neg(y)),
104 bi_fmin_v2f16_to(b, reg, negabsx, bi_neg(y)));
105 }
106
TEST_F(Optimizer,FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)107 TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)
108 {
109 CASE(
110 {
111 bi_instr *I =
112 bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
113 I->clamp = BI_CLAMP_CLAMP_0_1;
114 },
115 {
116 bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
117 I->clamp = BI_CLAMP_CLAMP_0_1;
118 });
119
120 CASE(
121 {
122 bi_instr *I =
123 bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
124 I->clamp = BI_CLAMP_CLAMP_0_1;
125 },
126 {
127 bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
128 I->clamp = BI_CLAMP_CLAMP_0_1;
129 });
130
131 CASE(
132 {
133 bi_instr *I =
134 bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
135 I->clamp = BI_CLAMP_CLAMP_0_INF;
136 },
137 {
138 bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
139 I->clamp = BI_CLAMP_CLAMP_0_INF;
140 });
141 }
142
TEST_F(Optimizer,FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)143 TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)
144 {
145 CASE(
146 {
147 bi_instr *I =
148 bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
149 I->clamp = BI_CLAMP_CLAMP_0_1;
150 },
151 {
152 bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
153 I->clamp = BI_CLAMP_CLAMP_0_1;
154 });
155
156 CASE(
157 {
158 bi_instr *I =
159 bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
160 I->clamp = BI_CLAMP_CLAMP_0_1;
161 },
162 {
163 bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
164 I->clamp = BI_CLAMP_CLAMP_0_1;
165 });
166
167 CASE(
168 {
169 bi_instr *I =
170 bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
171 I->clamp = BI_CLAMP_CLAMP_0_INF;
172 },
173 {
174 bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
175 I->clamp = BI_CLAMP_CLAMP_0_INF;
176 });
177 }
178
TEST_F(Optimizer,AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)179 TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)
180 {
181 NEGCASE({
182 bi_instr *I =
183 bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
184 I->clamp = BI_CLAMP_CLAMP_0_1;
185 });
186
187 NEGCASE({
188 bi_instr *I =
189 bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
190 I->clamp = BI_CLAMP_CLAMP_0_1;
191 });
192
193 NEGCASE({
194 bi_instr *I =
195 bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
196 I->clamp = BI_CLAMP_CLAMP_0_INF;
197 });
198 }
199
TEST_F(Optimizer,SwizzlesComposedForFP16)200 TEST_F(Optimizer, SwizzlesComposedForFP16)
201 {
202 CASE(bi_fadd_v2f16_to(
203 b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
204 bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
205
206 CASE(bi_fadd_v2f16_to(
207 b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
208 bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
209
210 CASE(bi_fadd_v2f16_to(
211 b, reg,
212 bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true,
213 false),
214 y),
215 bi_fadd_v2f16_to(b, reg, negabsx, y));
216
217 CASE(bi_fadd_v2f16_to(
218 b, reg,
219 bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false),
220 y),
221 bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y));
222
223 CASE(bi_fadd_v2f16_to(
224 b, reg,
225 bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false),
226 y),
227 bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y));
228 }
229
TEST_F(Optimizer,PreserveWidens)230 TEST_F(Optimizer, PreserveWidens)
231 {
232 /* Check that widens are passed through */
233 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, false)), y),
234 bi_fadd_f32_to(b, reg, bi_half(negabsx, false), y));
235
236 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y),
237 bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y));
238
239 CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)),
240 bi_fabsneg_f32(b, bi_half(x, false))),
241 bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false)));
242 }
243
TEST_F(Optimizer,DoNotMixSizesForFABSNEG)244 TEST_F(Optimizer, DoNotMixSizesForFABSNEG)
245 {
246 /* Refuse to mix sizes for fabsneg, that's wrong */
247 NEGCASE(bi_fadd_f32_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y));
248 NEGCASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_f32(b, negabsx), y));
249 }
250
TEST_F(Optimizer,AvoidZeroAndFABSNEGFootguns)251 TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns)
252 {
253 /* It's tempting to use addition by 0.0 as the absneg primitive, but that
254 * has footguns around signed zero and round modes. Check we don't
255 * incorrectly fuse these rules. */
256
257 bi_index zero = bi_zero();
258
259 NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), zero), y));
260 NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(x), zero), y));
261 NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(bi_abs(x)), zero), y));
262 NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, zero), y));
263 }
264
TEST_F(Optimizer,ClampsPropagated)265 TEST_F(Optimizer, ClampsPropagated)
266 {
267 CASE(
268 {
269 bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
270 I->clamp = BI_CLAMP_CLAMP_0_INF;
271 },
272 {
273 bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
274 I->clamp = BI_CLAMP_CLAMP_0_INF;
275 });
276
277 CASE(
278 {
279 bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
280 I->clamp = BI_CLAMP_CLAMP_0_1;
281 },
282 {
283 bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
284 I->clamp = BI_CLAMP_CLAMP_0_1;
285 });
286 }
287
TEST_F(Optimizer,ClampsComposed)288 TEST_F(Optimizer, ClampsComposed)
289 {
290 CASE(
291 {
292 bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
293 bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
294 I->clamp = BI_CLAMP_CLAMP_M1_1;
295 J->clamp = BI_CLAMP_CLAMP_0_INF;
296 },
297 {
298 bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
299 I->clamp = BI_CLAMP_CLAMP_0_1;
300 });
301
302 CASE(
303 {
304 bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
305 bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
306 I->clamp = BI_CLAMP_CLAMP_0_1;
307 J->clamp = BI_CLAMP_CLAMP_0_INF;
308 },
309 {
310 bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
311 I->clamp = BI_CLAMP_CLAMP_0_1;
312 });
313
314 CASE(
315 {
316 bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
317 bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
318 I->clamp = BI_CLAMP_CLAMP_0_INF;
319 J->clamp = BI_CLAMP_CLAMP_0_INF;
320 },
321 {
322 bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
323 I->clamp = BI_CLAMP_CLAMP_0_INF;
324 });
325
326 CASE(
327 {
328 bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
329 bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
330 I->clamp = BI_CLAMP_CLAMP_M1_1;
331 J->clamp = BI_CLAMP_CLAMP_0_INF;
332 },
333 {
334 bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
335 I->clamp = BI_CLAMP_CLAMP_0_1;
336 });
337
338 CASE(
339 {
340 bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
341 bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
342 I->clamp = BI_CLAMP_CLAMP_0_1;
343 J->clamp = BI_CLAMP_CLAMP_0_INF;
344 },
345 {
346 bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
347 I->clamp = BI_CLAMP_CLAMP_0_1;
348 });
349
350 CASE(
351 {
352 bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
353 bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
354 I->clamp = BI_CLAMP_CLAMP_0_INF;
355 J->clamp = BI_CLAMP_CLAMP_0_INF;
356 },
357 {
358 bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
359 I->clamp = BI_CLAMP_CLAMP_0_INF;
360 });
361 }
362
TEST_F(Optimizer,DoNotMixSizesWhenClamping)363 TEST_F(Optimizer, DoNotMixSizesWhenClamping)
364 {
365 NEGCASE({
366 bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_v2f16(b, x, y));
367 I->clamp = BI_CLAMP_CLAMP_0_1;
368 });
369
370 NEGCASE({
371 bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_f32(b, x, y));
372 I->clamp = BI_CLAMP_CLAMP_0_1;
373 });
374 }
375
TEST_F(Optimizer,DoNotUseAdditionByZeroForClamps)376 TEST_F(Optimizer, DoNotUseAdditionByZeroForClamps)
377 {
378 bi_index zero = bi_zero();
379
380 /* We can't use addition by 0.0 for clamps due to signed zeros. */
381 NEGCASE({
382 bi_instr *I = bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, y), zero);
383 I->clamp = BI_CLAMP_CLAMP_M1_1;
384 });
385
386 NEGCASE({
387 bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y), zero);
388 I->clamp = BI_CLAMP_CLAMP_0_1;
389 });
390 }
391
TEST_F(Optimizer,FuseComparisonsWithDISCARD)392 TEST_F(Optimizer, FuseComparisonsWithDISCARD)
393 {
394 CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1)),
395 bi_discard_f32(b, x, y, BI_CMPF_LE));
396
397 CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1)),
398 bi_discard_f32(b, x, y, BI_CMPF_NE));
399
400 CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1)),
401 bi_discard_f32(b, x, y, BI_CMPF_EQ));
402
403 for (unsigned h = 0; h < 2; ++h) {
404 CASE(bi_discard_b32(
405 b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1),
406 h)),
407 bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_LE));
408
409 CASE(bi_discard_b32(
410 b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1),
411 h)),
412 bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_NE));
413
414 CASE(bi_discard_b32(
415 b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1),
416 h)),
417 bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_EQ));
418 }
419 }
420
TEST_F(Optimizer,DoNotFuseSpecialComparisons)421 TEST_F(Optimizer, DoNotFuseSpecialComparisons)
422 {
423 NEGCASE(
424 bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
425 NEGCASE(bi_discard_b32(
426 b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
427 }
428
TEST_F(Optimizer,FuseResultType)429 TEST_F(Optimizer, FuseResultType)
430 {
431 CASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
432 bi_fcmp_f32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
433 BI_MUX_INT_ZERO),
434 bi_fcmp_f32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1));
435
436 CASE(bi_mux_i32_to(
437 b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
438 bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
439 BI_MUX_INT_ZERO),
440 bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
441 BI_RESULT_TYPE_F1));
442
443 CASE(bi_mux_i32_to(
444 b, reg, bi_imm_u32(0), bi_imm_u32(1),
445 bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
446 BI_MUX_INT_ZERO),
447 bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
448 BI_RESULT_TYPE_I1));
449
450 CASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
451 bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
452 BI_RESULT_TYPE_M1),
453 BI_MUX_INT_ZERO),
454 bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
455 BI_RESULT_TYPE_F1));
456
457 CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
458 bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
459 BI_RESULT_TYPE_M1),
460 BI_MUX_INT_ZERO),
461 bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
462 BI_RESULT_TYPE_I1));
463
464 CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
465 bi_icmp_u32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
466 BI_MUX_INT_ZERO),
467 bi_icmp_u32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
468
469 CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
470 bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
471 BI_MUX_INT_ZERO),
472 bi_icmp_v2u16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
473
474 CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
475 bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
476 BI_MUX_INT_ZERO),
477 bi_icmp_v4u8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
478
479 CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
480 bi_icmp_s32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
481 BI_MUX_INT_ZERO),
482 bi_icmp_s32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
483
484 CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
485 bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
486 BI_MUX_INT_ZERO),
487 bi_icmp_v2s16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
488
489 CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
490 bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
491 BI_MUX_INT_ZERO),
492 bi_icmp_v4s8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
493 }
494
TEST_F(Optimizer,DoNotFuseMixedSizeResultType)495 TEST_F(Optimizer, DoNotFuseMixedSizeResultType)
496 {
497 NEGCASE(bi_mux_i32_to(
498 b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
499 bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
500 BI_MUX_INT_ZERO));
501
502 NEGCASE(bi_mux_v2i16_to(
503 b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
504 bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
505 BI_MUX_INT_ZERO));
506 }
507
TEST_F(Optimizer,VarTexCoord32)508 TEST_F(Optimizer, VarTexCoord32)
509 {
510 CASE(
511 {
512 bi_index ld =
513 bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32,
514 BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
515
516 bi_index x = bi_temp(b->shader);
517 bi_index y = bi_temp(b->shader);
518 bi_instr *split = bi_split_i32_to(b, 2, ld);
519 split->dest[0] = x;
520 split->dest[1] = y;
521
522 bi_texs_2d_f32_to(b, reg, x, y, false, 0, 0);
523 },
524 {
525 bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
526 0);
527 });
528 }
529
TEST_F(Optimizer,Int8ToFloat32)530 TEST_F(Optimizer, Int8ToFloat32)
531 {
532 for (unsigned i = 0; i < 4; ++i) {
533 CASE(bi_s32_to_f32_to(b, reg, bi_s8_to_s32(b, bi_byte(x, i))),
534 bi_s8_to_f32_to(b, reg, bi_byte(x, i)));
535
536 CASE(bi_s32_to_f32_to(b, reg, bi_u8_to_u32(b, bi_byte(x, i))),
537 bi_u8_to_f32_to(b, reg, bi_byte(x, i)));
538
539 CASE(bi_u32_to_f32_to(b, reg, bi_u8_to_u32(b, bi_byte(x, i))),
540 bi_u8_to_f32_to(b, reg, bi_byte(x, i)));
541 }
542 }
543
TEST_F(Optimizer,Int16ToFloat32)544 TEST_F(Optimizer, Int16ToFloat32)
545 {
546 for (unsigned i = 0; i < 2; ++i) {
547 CASE(bi_s32_to_f32_to(b, reg, bi_s16_to_s32(b, bi_half(x, i))),
548 bi_s16_to_f32_to(b, reg, bi_half(x, i)));
549
550 CASE(bi_s32_to_f32_to(b, reg, bi_u16_to_u32(b, bi_half(x, i))),
551 bi_u16_to_f32_to(b, reg, bi_half(x, i)));
552
553 CASE(bi_u32_to_f32_to(b, reg, bi_u16_to_u32(b, bi_half(x, i))),
554 bi_u16_to_f32_to(b, reg, bi_half(x, i)));
555 }
556 }
557