xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vec_extract-avx.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X32
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X64
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker; When extracting multiple consecutive elements from a larger
6*9880d681SAndroid Build Coastguard Worker; vector into a smaller one, do it efficiently. We should use
7*9880d681SAndroid Build Coastguard Worker; an EXTRACT_SUBVECTOR node internally rather than a bunch of
8*9880d681SAndroid Build Coastguard Worker; single element extractions.
9*9880d681SAndroid Build Coastguard Worker
10*9880d681SAndroid Build Coastguard Worker; Extracting the low elements only requires using the right kind of store.
11*9880d681SAndroid Build Coastguard Workerdefine void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
12*9880d681SAndroid Build Coastguard Worker; X32-LABEL: low_v8f32_to_v4f32:
13*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
14*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
15*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovaps %xmm0, (%eax)
16*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
17*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
18*9880d681SAndroid Build Coastguard Worker;
19*9880d681SAndroid Build Coastguard Worker; X64-LABEL: low_v8f32_to_v4f32:
20*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
21*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovaps %xmm0, (%rdi)
22*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
23*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
24*9880d681SAndroid Build Coastguard Worker  %ext0 = extractelement <8 x float> %v, i32 0
25*9880d681SAndroid Build Coastguard Worker  %ext1 = extractelement <8 x float> %v, i32 1
26*9880d681SAndroid Build Coastguard Worker  %ext2 = extractelement <8 x float> %v, i32 2
27*9880d681SAndroid Build Coastguard Worker  %ext3 = extractelement <8 x float> %v, i32 3
28*9880d681SAndroid Build Coastguard Worker  %ins0 = insertelement <4 x float> undef, float %ext0, i32 0
29*9880d681SAndroid Build Coastguard Worker  %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
30*9880d681SAndroid Build Coastguard Worker  %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
31*9880d681SAndroid Build Coastguard Worker  %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
32*9880d681SAndroid Build Coastguard Worker  store <4 x float> %ins3, <4 x float>* %ptr, align 16
33*9880d681SAndroid Build Coastguard Worker  ret void
34*9880d681SAndroid Build Coastguard Worker}
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Worker; Extracting the high elements requires just one AVX instruction.
37*9880d681SAndroid Build Coastguard Workerdefine void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
38*9880d681SAndroid Build Coastguard Worker; X32-LABEL: high_v8f32_to_v4f32:
39*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
40*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
41*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vextractf128 $1, %ymm0, (%eax)
42*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
43*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
44*9880d681SAndroid Build Coastguard Worker;
45*9880d681SAndroid Build Coastguard Worker; X64-LABEL: high_v8f32_to_v4f32:
46*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
47*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vextractf128 $1, %ymm0, (%rdi)
48*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
49*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
50*9880d681SAndroid Build Coastguard Worker  %ext0 = extractelement <8 x float> %v, i32 4
51*9880d681SAndroid Build Coastguard Worker  %ext1 = extractelement <8 x float> %v, i32 5
52*9880d681SAndroid Build Coastguard Worker  %ext2 = extractelement <8 x float> %v, i32 6
53*9880d681SAndroid Build Coastguard Worker  %ext3 = extractelement <8 x float> %v, i32 7
54*9880d681SAndroid Build Coastguard Worker  %ins0 = insertelement <4 x float> undef, float %ext0, i32 0
55*9880d681SAndroid Build Coastguard Worker  %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
56*9880d681SAndroid Build Coastguard Worker  %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
57*9880d681SAndroid Build Coastguard Worker  %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
58*9880d681SAndroid Build Coastguard Worker  store <4 x float> %ins3, <4 x float>* %ptr, align 16
59*9880d681SAndroid Build Coastguard Worker  ret void
60*9880d681SAndroid Build Coastguard Worker}
61*9880d681SAndroid Build Coastguard Worker
62*9880d681SAndroid Build Coastguard Worker; Make sure element type doesn't alter the codegen. Note that
63*9880d681SAndroid Build Coastguard Worker; if we were actually using the vector in this function and
64*9880d681SAndroid Build Coastguard Worker; have AVX2, we should generate vextracti128 (the int version).
65*9880d681SAndroid Build Coastguard Workerdefine void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
66*9880d681SAndroid Build Coastguard Worker; X32-LABEL: high_v8i32_to_v4i32:
67*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
68*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
69*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vextractf128 $1, %ymm0, (%eax)
70*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
71*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
72*9880d681SAndroid Build Coastguard Worker;
73*9880d681SAndroid Build Coastguard Worker; X64-LABEL: high_v8i32_to_v4i32:
74*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
75*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vextractf128 $1, %ymm0, (%rdi)
76*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
77*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
78*9880d681SAndroid Build Coastguard Worker  %ext0 = extractelement <8 x i32> %v, i32 4
79*9880d681SAndroid Build Coastguard Worker  %ext1 = extractelement <8 x i32> %v, i32 5
80*9880d681SAndroid Build Coastguard Worker  %ext2 = extractelement <8 x i32> %v, i32 6
81*9880d681SAndroid Build Coastguard Worker  %ext3 = extractelement <8 x i32> %v, i32 7
82*9880d681SAndroid Build Coastguard Worker  %ins0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
83*9880d681SAndroid Build Coastguard Worker  %ins1 = insertelement <4 x i32> %ins0, i32 %ext1, i32 1
84*9880d681SAndroid Build Coastguard Worker  %ins2 = insertelement <4 x i32> %ins1, i32 %ext2, i32 2
85*9880d681SAndroid Build Coastguard Worker  %ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
86*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
87*9880d681SAndroid Build Coastguard Worker  ret void
88*9880d681SAndroid Build Coastguard Worker}
89*9880d681SAndroid Build Coastguard Worker
90*9880d681SAndroid Build Coastguard Worker; Make sure that element size doesn't alter the codegen.
91*9880d681SAndroid Build Coastguard Workerdefine void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
92*9880d681SAndroid Build Coastguard Worker; X32-LABEL: high_v4f64_to_v2f64:
93*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
94*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
95*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vextractf128 $1, %ymm0, (%eax)
96*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
97*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
98*9880d681SAndroid Build Coastguard Worker;
99*9880d681SAndroid Build Coastguard Worker; X64-LABEL: high_v4f64_to_v2f64:
100*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
101*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vextractf128 $1, %ymm0, (%rdi)
102*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
103*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
104*9880d681SAndroid Build Coastguard Worker  %ext0 = extractelement <4 x double> %v, i32 2
105*9880d681SAndroid Build Coastguard Worker  %ext1 = extractelement <4 x double> %v, i32 3
106*9880d681SAndroid Build Coastguard Worker  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
107*9880d681SAndroid Build Coastguard Worker  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
108*9880d681SAndroid Build Coastguard Worker  store <2 x double> %ins1, <2 x double>* %ptr, align 16
109*9880d681SAndroid Build Coastguard Worker  ret void
110*9880d681SAndroid Build Coastguard Worker}
111*9880d681SAndroid Build Coastguard Worker
112*9880d681SAndroid Build Coastguard Worker; PR25320 Make sure that a widened (possibly legalized) vector correctly zero-extends upper elements.
113*9880d681SAndroid Build Coastguard Worker; FIXME - Ideally these should just call VMOVD/VMOVQ/VMOVSS/VMOVSD
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Workerdefine void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
116*9880d681SAndroid Build Coastguard Worker; X32-LABEL: legal_vzmovl_2i32_8i32:
117*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
118*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
119*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
120*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
121*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vxorps %ymm1, %ymm1, %ymm1
122*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
123*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovaps %ymm0, (%eax)
124*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
125*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
126*9880d681SAndroid Build Coastguard Worker;
127*9880d681SAndroid Build Coastguard Worker; X64-LABEL: legal_vzmovl_2i32_8i32:
128*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
129*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
130*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vxorps %ymm1, %ymm1, %ymm1
131*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
132*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovaps %ymm0, (%rsi)
133*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
134*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
135*9880d681SAndroid Build Coastguard Worker  %ld = load <2 x i32>, <2 x i32>* %in, align 8
136*9880d681SAndroid Build Coastguard Worker  %ext = extractelement <2 x i32> %ld, i64 0
137*9880d681SAndroid Build Coastguard Worker  %ins = insertelement <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %ext, i64 0
138*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %ins, <8 x i32>* %out, align 32
139*9880d681SAndroid Build Coastguard Worker  ret void
140*9880d681SAndroid Build Coastguard Worker}
141*9880d681SAndroid Build Coastguard Worker
142*9880d681SAndroid Build Coastguard Workerdefine void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
143*9880d681SAndroid Build Coastguard Worker; X32-LABEL: legal_vzmovl_2i64_4i64:
144*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
145*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
146*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
147*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovupd (%ecx), %xmm0
148*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
149*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
150*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovapd %ymm0, (%eax)
151*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
152*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
153*9880d681SAndroid Build Coastguard Worker;
154*9880d681SAndroid Build Coastguard Worker; X64-LABEL: legal_vzmovl_2i64_4i64:
155*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
156*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovupd (%rdi), %xmm0
157*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
158*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
159*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovapd %ymm0, (%rsi)
160*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
161*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
162*9880d681SAndroid Build Coastguard Worker  %ld = load <2 x i64>, <2 x i64>* %in, align 8
163*9880d681SAndroid Build Coastguard Worker  %ext = extractelement <2 x i64> %ld, i64 0
164*9880d681SAndroid Build Coastguard Worker  %ins = insertelement <4 x i64> <i64 undef, i64 0, i64 0, i64 0>, i64 %ext, i64 0
165*9880d681SAndroid Build Coastguard Worker  store <4 x i64> %ins, <4 x i64>* %out, align 32
166*9880d681SAndroid Build Coastguard Worker  ret void
167*9880d681SAndroid Build Coastguard Worker}
168*9880d681SAndroid Build Coastguard Worker
169*9880d681SAndroid Build Coastguard Workerdefine void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
170*9880d681SAndroid Build Coastguard Worker; X32-LABEL: legal_vzmovl_2f32_8f32:
171*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
172*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
173*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
174*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
175*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovaps %ymm0, (%eax)
176*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
177*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
178*9880d681SAndroid Build Coastguard Worker;
179*9880d681SAndroid Build Coastguard Worker; X64-LABEL: legal_vzmovl_2f32_8f32:
180*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
181*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
182*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vxorps %ymm1, %ymm1, %ymm1
183*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
184*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovaps %ymm0, (%rsi)
185*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
186*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
187*9880d681SAndroid Build Coastguard Worker  %ld = load <2 x float>, <2 x float>* %in, align 8
188*9880d681SAndroid Build Coastguard Worker  %ext = extractelement <2 x float> %ld, i64 0
189*9880d681SAndroid Build Coastguard Worker  %ins = insertelement <8 x float> <float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0
190*9880d681SAndroid Build Coastguard Worker  store <8 x float> %ins, <8 x float>* %out, align 32
191*9880d681SAndroid Build Coastguard Worker  ret void
192*9880d681SAndroid Build Coastguard Worker}
193*9880d681SAndroid Build Coastguard Worker
194*9880d681SAndroid Build Coastguard Workerdefine void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) {
195*9880d681SAndroid Build Coastguard Worker; X32-LABEL: legal_vzmovl_2f64_4f64:
196*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
197*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
198*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
199*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovupd (%ecx), %xmm0
200*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
201*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
202*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovapd %ymm0, (%eax)
203*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
204*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
205*9880d681SAndroid Build Coastguard Worker;
206*9880d681SAndroid Build Coastguard Worker; X64-LABEL: legal_vzmovl_2f64_4f64:
207*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
208*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovupd (%rdi), %xmm0
209*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
210*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
211*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovapd %ymm0, (%rsi)
212*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
213*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
214*9880d681SAndroid Build Coastguard Worker  %ld = load <2 x double>, <2 x double>* %in, align 8
215*9880d681SAndroid Build Coastguard Worker  %ext = extractelement <2 x double> %ld, i64 0
216*9880d681SAndroid Build Coastguard Worker  %ins = insertelement <4 x double> <double undef, double 0.0, double 0.0, double 0.0>, double %ext, i64 0
217*9880d681SAndroid Build Coastguard Worker  store <4 x double> %ins, <4 x double>* %out, align 32
218*9880d681SAndroid Build Coastguard Worker  ret void
219*9880d681SAndroid Build Coastguard Worker}
220