xref: /aosp_15_r20/external/llvm/test/Transforms/InstCombine/x86-insertps.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: opt < %s -instcombine -S | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
8*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
9*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
10*9880d681SAndroid Build Coastguard Worker
11*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_non_const_imm
12*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
13*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
14*9880d681SAndroid Build Coastguard Worker}
15*9880d681SAndroid Build Coastguard Worker
16*9880d681SAndroid Build Coastguard Worker; If all zero mask bits are set, return a zero regardless of the other control bits.
17*9880d681SAndroid Build Coastguard Worker
18*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
19*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
20*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
21*9880d681SAndroid Build Coastguard Worker
22*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0x0f
23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float> zeroinitializer
24*9880d681SAndroid Build Coastguard Worker}
25*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
26*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
27*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0xff
30*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float> zeroinitializer
31*9880d681SAndroid Build Coastguard Worker}
32*9880d681SAndroid Build Coastguard Worker
33*9880d681SAndroid Build Coastguard Worker; If some zero mask bits are set that do not override the insertion, we do not change anything.
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
36*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
37*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
38*9880d681SAndroid Build Coastguard Worker
39*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0x0c
40*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
41*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
42*9880d681SAndroid Build Coastguard Worker}
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Worker; ...unless both input vectors are the same operand.
45*9880d681SAndroid Build Coastguard Worker
46*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
47*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
48*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
49*9880d681SAndroid Build Coastguard Worker
50*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0x15_single_input
51*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
52*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
53*9880d681SAndroid Build Coastguard Worker}
54*9880d681SAndroid Build Coastguard Worker
55*9880d681SAndroid Build Coastguard Worker; The zero mask overrides the insertion lane.
56*9880d681SAndroid Build Coastguard Worker
57*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
58*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
59*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
60*9880d681SAndroid Build Coastguard Worker
61*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0x1a_single_input
62*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
63*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
64*9880d681SAndroid Build Coastguard Worker}
65*9880d681SAndroid Build Coastguard Worker
66*9880d681SAndroid Build Coastguard Worker; The zero mask overrides the insertion lane, so the second input vector is not used.
67*9880d681SAndroid Build Coastguard Worker
68*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
69*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
70*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
71*9880d681SAndroid Build Coastguard Worker
72*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0xc1
73*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
74*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
75*9880d681SAndroid Build Coastguard Worker}
76*9880d681SAndroid Build Coastguard Worker
77*9880d681SAndroid Build Coastguard Worker; If no zero mask bits are set, convert to a shuffle.
78*9880d681SAndroid Build Coastguard Worker
79*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
80*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
81*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
82*9880d681SAndroid Build Coastguard Worker
83*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0x00
84*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
85*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
86*9880d681SAndroid Build Coastguard Worker}
87*9880d681SAndroid Build Coastguard Worker
88*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
89*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
90*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0x10
93*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
94*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
95*9880d681SAndroid Build Coastguard Worker}
96*9880d681SAndroid Build Coastguard Worker
97*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
98*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
99*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
100*9880d681SAndroid Build Coastguard Worker
101*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0x20
102*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
103*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
104*9880d681SAndroid Build Coastguard Worker}
105*9880d681SAndroid Build Coastguard Worker
106*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
107*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
108*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0x30
111*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
112*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
113*9880d681SAndroid Build Coastguard Worker}
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
116*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
117*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0xc0
120*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
121*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
122*9880d681SAndroid Build Coastguard Worker}
123*9880d681SAndroid Build Coastguard Worker
124*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
125*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
126*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
127*9880d681SAndroid Build Coastguard Worker
128*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0xd0
129*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
130*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
131*9880d681SAndroid Build Coastguard Worker}
132*9880d681SAndroid Build Coastguard Worker
133*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
134*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
135*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
136*9880d681SAndroid Build Coastguard Worker
137*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0xe0
138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
140*9880d681SAndroid Build Coastguard Worker}
141*9880d681SAndroid Build Coastguard Worker
142*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
143*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
144*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
145*9880d681SAndroid Build Coastguard Worker
146*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @insertps_0xf0
147*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
148*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x float>
149*9880d681SAndroid Build Coastguard Worker}
150*9880d681SAndroid Build Coastguard Worker
151