1 /* 2 * Copyright © 2024 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 #include "helpers.h" 7 #include <stdarg.h> 8 9 using namespace aco; 10 11 BEGIN_TEST(lower_subdword.simple_extract) 12 //>> v1: %a, v2: %b = p_startpgm 13 if (!setup_cs("v1 v2", GFX6)) 14 return; 15 16 //! v1: %aw0 = p_parallelcopy %a 17 bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[0], Operand::c32(0)); 18 19 //! v1: %aw1 = v_lshrrev_b32 16, %a 20 bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[0], Operand::c32(1)); 21 22 //! v1: %ab3 = v_lshrrev_b32 24, %a 23 bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(3)); 24 25 //! v1: %_, v1: %bd1 = p_split_vector %b 26 //! v1: %6 = v_lshrrev_b32 16, %bd1 27 bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand::c32(3)); 28 29 //! v1: %bd0, v1: %_ = p_split_vector %b 30 //! v1: %bb2 = v_lshrrev_b32 16, %bd0 31 bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[1], Operand::c32(2)); 32 33 //! v1: %bd0_2, v1: %bd1_2 = p_split_vector %b 34 //! v1: %bv3bhi = v_alignbyte_b32 %bd1_2, %bd0_2, 3 35 bld.pseudo(aco_opcode::p_extract_vector, bld.def(v3b), inputs[1], Operand::c32(1)); 36 37 finish_lower_subdword_test(); 38 END_TEST 39 40 BEGIN_TEST(lower_subdword.simple_split) 41 //>> v1: %a, v2: %b = p_startpgm 42 if (!setup_cs("v1 v2", GFX6)) 43 return; 44 45 //! v1: %o1_0 = p_parallelcopy %a 46 //! v1: %o1_1 = v_lshrrev_b32 8, %a 47 //! v1: %o1_2 = v_lshrrev_b32 24, %a 48 bld.pseudo(aco_opcode::p_split_vector, bld.def(v1b), bld.def(v2b), bld.def(v1b), inputs[0]); 49 50 //! v1: %tmp2_0, v1: %_ = p_split_vector %b 51 //! v1: %o2_0 = p_parallelcopy %tmp2_0 52 //! v1: %tmp2_1, v1: %_ = p_split_vector %b 53 //! v1: %o2_1 = v_lshrrev_b32 8, %tmp2_1 54 //! v1: %tmp2_2, v1: %_ = p_split_vector %b 55 //! v1: %o2_2 = v_lshrrev_b32 16, %tmp2_2 56 //! v1: %tmp2_3, v1: %_ = p_split_vector %b 57 //! v1: %o2_3 = v_lshrrev_b32 24, %tmp2_3 58 //! v1: %_, v1: %tmp2_4 = p_split_vector %b 59 //! v1: %o2_4 = p_parallelcopy %tmp2_4 60 //! v1: %_, v1: %tmp2_5 = p_split_vector %b 61 //! v1: %o2_5 = v_lshrrev_b32 8, %tmp2_5 62 //! v1: %_, v1: %tmp2_6 = p_split_vector %b 63 //! v1: %o2_6 = v_lshrrev_b32 16, %tmp2_6 64 //! v1: %_, v1: %tmp2_7 = p_split_vector %b 65 //! v1: %o2_7 = v_lshrrev_b32 24, %tmp2_7 66 bld.pseudo(aco_opcode::p_split_vector, bld.def(v1b), bld.def(v1b), bld.def(v1b), bld.def(v1b), 67 bld.def(v1b), bld.def(v1b), bld.def(v1b), bld.def(v1b), inputs[1]); 68 69 finish_lower_subdword_test(); 70 END_TEST 71 72 BEGIN_TEST(lower_subdword.simple_create) 73 //>> v1: %b, v1: %w, v1: %tb, s1: %sd, v1: %vd = p_startpgm 74 if (!setup_cs("v1b v2b v3b s1 v1 ", GFX6)) 75 return; 76 77 //! v1: %tmp1_0 = v_lshlrev_b32 24, %b 78 //! v1: %tmp1_1 = v_alignbyte_b32 %b, %tmp1_0, 1 79 //! v1: %tmp1_2 = v_alignbyte_b32 %b, %tmp1_1, 1 80 //! v1: %o1 = v_alignbyte_b32 %b, %tmp1_2, 1 81 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), inputs[0], inputs[0], inputs[0], inputs[0]); 82 83 //! v1: %tmp2_0 = v_lshlrev_b32 24, %b 84 //! v1: %tmp2_1 = v_alignbyte_b32 %w, %tmp2_0, 2 85 //! v1: %o2 = v_alignbyte_b32 %b, %tmp2_1, 1 86 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), inputs[0], inputs[1], inputs[0]); 87 88 //! v1: %tmp3_0 = v_lshlrev_b32 24, %b 89 //! v1: %tmp3_1 = v_alignbyte_b32 %tb, %tmp3_0, 3 90 //! v1: %tmp3_2 = v_lshlrev_b32 16, %w 91 //! v1: %tmp3_3 = v_alignbyte_b32 %b, %tmp3_2, 1 92 //! v1: %tmp3_4 = v_alignbyte_b32 %b, %tmp3_3, 1 93 //! v2: %o3 = p_create_vector %tmp3_1, %tmp3_4 94 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[0], inputs[2], inputs[1], inputs[0], 95 inputs[0]); 96 97 //! v1: %tmp4_0 = p_parallelcopy %sd 98 //! v1: %tmp4_1 = p_parallelcopy %vd 99 //! v1: %tmp4_2 = v_lshlrev_b32 16, %w 100 //! v1: %tmp4_3 = v_alignbyte_b32 %tb, %tmp4_2, 2 101 //! v1: %tmp4_4 = v_lshlrev_b32 8, %tb 102 //! v1: %tmp4_5 = v_alignbyte_b32 %tb, %tmp4_4, 3 103 //! v4: %o4 = p_create_vector %tmp4_0, %tmp4_1, %tmp4_3, %tmp4_5 104 bld.pseudo(aco_opcode::p_create_vector, bld.def(v4), inputs[3], inputs[4], inputs[1], inputs[2], 105 inputs[2]); 106 107 //! v1: %tmp5_0 = v_lshlrev_b32 24, %b 108 //! v1: %o5 = v_alignbyte_b32 %b, %tmp5_0, 3 109 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2b), inputs[0], inputs[0]); 110 111 finish_lower_subdword_test(); 112 END_TEST 113 114 BEGIN_TEST(lower_subdword.create_const_undef) 115 //>> v1: %b, v1: %w = p_startpgm 116 if (!setup_cs("v1b v2b", GFX6)) 117 return; 118 119 //! v1: %o1 = p_parallelcopy %b 120 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), inputs[0], Operand(v1b), Operand(v2b)); 121 122 //! v1: %o2 = p_parallelcopy 0x40300 123 bld.pseudo(aco_opcode::p_create_vector, bld.def(v3b), Operand::c8(0), Operand::c8(3), 124 Operand::c8(4)); 125 126 //! s1: %tmp1 = p_parallelcopy 0x64000000 127 //! v1: %o3 = v_alignbyte_b32 %w, %tmp1, 3 128 bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::c8(100), inputs[1], Operand(v1b)); 129 130 //! s1: %tmp2 = p_parallelcopy 0x64000000 131 //! v1: %tmp3 = v_alignbyte_b32 %b, %tmp2, 2 132 //! v1: %tmp4 = v_alignbyte_b32 %b, %tmp3, 1 133 //! v1: %tmp5 = p_parallelcopy %w 134 //! v2: %o4 = p_create_vector %tmp4, %tmp5 135 bld.pseudo(aco_opcode::p_create_vector, bld.def(v6b), Operand::c8(100), inputs[0], Operand(v1b), 136 inputs[0], inputs[1]); 137 138 //! v1: %o5 = v_alignbyte_b32 %b, 0, 3 139 bld.pseudo(aco_opcode::p_create_vector, bld.def(v2b), Operand(v1b), inputs[0]); 140 141 finish_lower_subdword_test(); 142 END_TEST 143