xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/insertps-combine.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuffle_v4f32_0z27(<4 x float> %x, <4 x float> %a) {
7*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: shuffle_v4f32_0z27:
8*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
9*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[2]
10*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
11*9880d681SAndroid Build Coastguard Worker;
12*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: shuffle_v4f32_0z27:
13*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
14*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[2]
15*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
16*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %x, i32 0
17*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
18*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
19*9880d681SAndroid Build Coastguard Worker  %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
20*9880d681SAndroid Build Coastguard Worker  %vecinit5 = shufflevector <4 x float> %vecinit3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
21*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
22*9880d681SAndroid Build Coastguard Worker}
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuffle_v4f32_0zz4(<4 x float> %xyzw, <4 x float> %abcd) {
25*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: shuffle_v4f32_0zz4:
26*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
27*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
28*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
29*9880d681SAndroid Build Coastguard Worker;
30*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: shuffle_v4f32_0zz4:
31*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
32*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
33*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
34*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %xyzw, i32 0
35*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
36*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
37*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit1, float 0.000000e+00, i32 2
38*9880d681SAndroid Build Coastguard Worker  %vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %abcd, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
39*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
40*9880d681SAndroid Build Coastguard Worker}
41*9880d681SAndroid Build Coastguard Worker
42*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuffle_v4f32_0z24(<4 x float> %xyzw, <4 x float> %abcd) {
43*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: shuffle_v4f32_0z24:
44*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
45*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
46*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
47*9880d681SAndroid Build Coastguard Worker;
48*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: shuffle_v4f32_0z24:
49*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
50*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
51*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
52*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %xyzw, i32 0
53*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
54*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
55*9880d681SAndroid Build Coastguard Worker  %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %xyzw, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
56*9880d681SAndroid Build Coastguard Worker  %vecinit5 = shufflevector <4 x float> %vecinit3, <4 x float> %abcd, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
57*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
58*9880d681SAndroid Build Coastguard Worker}
59*9880d681SAndroid Build Coastguard Worker
60*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuffle_v4f32_0zz0(float %a) {
61*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: shuffle_v4f32_0zz0:
62*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
63*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
64*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
65*9880d681SAndroid Build Coastguard Worker;
66*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: shuffle_v4f32_0zz0:
67*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
68*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
69*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
70*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %a, i32 0
71*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
72*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <4 x float> %vecinit1, float 0.000000e+00, i32 2
73*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit2, float %a, i32 3
74*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit3
75*9880d681SAndroid Build Coastguard Worker}
76*9880d681SAndroid Build Coastguard Worker
77*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @shuffle_v4f32_0z6z(<4 x float> %A, <4 x float> %B) {
78*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: shuffle_v4f32_0z6z:
79*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
80*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
81*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
82*9880d681SAndroid Build Coastguard Worker;
83*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: shuffle_v4f32_0z6z:
84*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
85*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
86*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
87*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
88*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
89*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
90*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %B, i32 2
91*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
92*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
93*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit4
94*9880d681SAndroid Build Coastguard Worker}
95*9880d681SAndroid Build Coastguard Worker
96*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_undef_input0(<4 x float> %a0, <4 x float> %a1) {
97*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: insertps_undef_input0:
98*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
99*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero
100*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
101*9880d681SAndroid Build Coastguard Worker;
102*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: insertps_undef_input0:
103*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
104*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero
105*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
106*9880d681SAndroid Build Coastguard Worker  %res0 = fadd <4 x float> %a0, <float 1.0, float 1.0, float 1.0, float 1.0>
107*9880d681SAndroid Build Coastguard Worker  %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %res0, <4 x float> %a1, i8 21)
108*9880d681SAndroid Build Coastguard Worker  %res2 = shufflevector <4 x float> %res1, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
109*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res2
110*9880d681SAndroid Build Coastguard Worker}
111*9880d681SAndroid Build Coastguard Worker
112*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_undef_input1(<4 x float> %a0, <4 x float> %a1) {
113*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: insertps_undef_input1:
114*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
115*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm1, %xmm1
116*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
117*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
118*9880d681SAndroid Build Coastguard Worker;
119*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: insertps_undef_input1:
120*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
121*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
122*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
123*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
124*9880d681SAndroid Build Coastguard Worker  %res0 = fadd <4 x float> %a1, <float 1.0, float 1.0, float 1.0, float 1.0>
125*9880d681SAndroid Build Coastguard Worker  %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %res0, i8 21)
126*9880d681SAndroid Build Coastguard Worker  %res2 = shufflevector <4 x float> %res1, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
127*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res2
128*9880d681SAndroid Build Coastguard Worker}
129*9880d681SAndroid Build Coastguard Worker
130*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_zero_from_v2f64(<4 x float> %a0, <2 x double>* %a1) nounwind {
131*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: insertps_zero_from_v2f64:
132*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
133*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd (%rdi), %xmm1
134*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd {{.*}}(%rip), %xmm1
135*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
136*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm1, (%rdi)
137*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
138*9880d681SAndroid Build Coastguard Worker;
139*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: insertps_zero_from_v2f64:
140*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
141*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovapd (%rdi), %xmm1
142*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddpd {{.*}}(%rip), %xmm1, %xmm1
143*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
144*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovapd %xmm1, (%rdi)
145*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
146*9880d681SAndroid Build Coastguard Worker  %1 = load <2 x double>, <2 x double>* %a1
147*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <2 x double> <double 1.0, double 2.0> to <4 x float>
148*9880d681SAndroid Build Coastguard Worker  %3 = fadd <2 x double> %1, <double 1.0, double 2.0>
149*9880d681SAndroid Build Coastguard Worker  %4 = shufflevector <4 x float> %a0, <4 x float> %2, <4 x i32> <i32 6, i32 2, i32 2, i32 3>
150*9880d681SAndroid Build Coastguard Worker  store <2 x double> %3, <2 x double> *%a1
151*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %4
152*9880d681SAndroid Build Coastguard Worker}
153*9880d681SAndroid Build Coastguard Worker
154*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_zero_from_v2i64(<4 x float> %a0, <2 x i64>* %a1) nounwind {
155*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: insertps_zero_from_v2i64:
156*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
157*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa (%rdi), %xmm1
158*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq {{.*}}(%rip), %xmm1
159*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
160*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm1, (%rdi)
161*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
162*9880d681SAndroid Build Coastguard Worker;
163*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: insertps_zero_from_v2i64:
164*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
165*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rdi), %xmm1
166*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq {{.*}}(%rip), %xmm1, %xmm1
167*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
168*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa %xmm1, (%rdi)
169*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
170*9880d681SAndroid Build Coastguard Worker  %1 = load <2 x i64>, <2 x i64>* %a1
171*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <2 x i64> <i64 1, i64 -2> to <4 x float>
172*9880d681SAndroid Build Coastguard Worker  %3 = add <2 x i64> %1, <i64 1, i64 -2>
173*9880d681SAndroid Build Coastguard Worker  %4 = shufflevector <4 x float> %a0, <4 x float> %2, <4 x i32> <i32 5, i32 2, i32 2, i32 3>
174*9880d681SAndroid Build Coastguard Worker  store <2 x i64> %3, <2 x i64> *%a1
175*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %4
176*9880d681SAndroid Build Coastguard Worker}
177*9880d681SAndroid Build Coastguard Worker
178*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @insertps_zero_from_v8i16(<4 x float> %a0, <8 x i16>* %a1) nounwind {
179*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: insertps_zero_from_v8i16:
180*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
181*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa (%rdi), %xmm1
182*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw {{.*}}(%rip), %xmm1
183*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
184*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm1, (%rdi)
185*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
186*9880d681SAndroid Build Coastguard Worker;
187*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: insertps_zero_from_v8i16:
188*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
189*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rdi), %xmm1
190*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddw {{.*}}(%rip), %xmm1, %xmm1
191*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[2,2,3]
192*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa %xmm1, (%rdi)
193*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
194*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i16>, <8 x i16>* %a1
195*9880d681SAndroid Build Coastguard Worker  %2 = bitcast <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 2, i16 2, i16 3, i16 3> to <4 x float>
196*9880d681SAndroid Build Coastguard Worker  %3 = add <8 x i16> %1, <i16 0, i16 0, i16 1, i16 1, i16 2, i16 2, i16 3, i16 3>
197*9880d681SAndroid Build Coastguard Worker  %4 = shufflevector <4 x float> %a0, <4 x float> %2, <4 x i32> <i32 4, i32 2, i32 2, i32 3>
198*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %3, <8 x i16> *%a1
199*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %4
200*9880d681SAndroid Build Coastguard Worker}
201*9880d681SAndroid Build Coastguard Worker
202*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @consecutive_load_insertps_04zz(float* %p) {
203*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: consecutive_load_insertps_04zz:
204*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
205*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
206*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
207*9880d681SAndroid Build Coastguard Worker;
208*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: consecutive_load_insertps_04zz:
209*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
210*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
211*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
212*9880d681SAndroid Build Coastguard Worker  %p0 = getelementptr inbounds float, float* %p, i64 1
213*9880d681SAndroid Build Coastguard Worker  %p1 = getelementptr inbounds float, float* %p, i64 2
214*9880d681SAndroid Build Coastguard Worker  %s0 = load float, float* %p0
215*9880d681SAndroid Build Coastguard Worker  %s1 = load float, float* %p1
216*9880d681SAndroid Build Coastguard Worker  %v0 = insertelement <4 x float> undef, float %s0, i32 0
217*9880d681SAndroid Build Coastguard Worker  %v1 = insertelement <4 x float> undef, float %s1, i32 0
218*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v0, <4 x float> %v1, i8 28)
219*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
220*9880d681SAndroid Build Coastguard Worker}
221*9880d681SAndroid Build Coastguard Worker
222*9880d681SAndroid Build Coastguard Workerdefine float @extract_zero_insertps_z0z7(<4 x float> %a0, <4 x float> %a1) {
223*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: extract_zero_insertps_z0z7:
224*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
225*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
226*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
227*9880d681SAndroid Build Coastguard Worker;
228*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: extract_zero_insertps_z0z7:
229*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
230*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
231*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
232*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 21)
233*9880d681SAndroid Build Coastguard Worker  %ext = extractelement <4 x float> %res, i32 0
234*9880d681SAndroid Build Coastguard Worker  ret float %ext
235*9880d681SAndroid Build Coastguard Worker}
236*9880d681SAndroid Build Coastguard Worker
237*9880d681SAndroid Build Coastguard Workerdefine float @extract_lane_insertps_5123(<4 x float> %a0, <4 x float> *%p1) {
238*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: extract_lane_insertps_5123:
239*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
240*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
241*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
242*9880d681SAndroid Build Coastguard Worker;
243*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: extract_lane_insertps_5123:
244*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
245*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
246*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
247*9880d681SAndroid Build Coastguard Worker  %a1 = load <4 x float>, <4 x float> *%p1
248*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)
249*9880d681SAndroid Build Coastguard Worker  %ext = extractelement <4 x float> %res, i32 0
250*9880d681SAndroid Build Coastguard Worker  ret float %ext
251*9880d681SAndroid Build Coastguard Worker}
252*9880d681SAndroid Build Coastguard Worker
253*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
254