xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vec_shift6.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.1 | FileCheck %s --check-prefix=SSE
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx2 | FileCheck %s --check-prefix=AVX2
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s --check-prefix=AVX512
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker; Verify that we don't scalarize a packed vector shift left of 16-bit
7*9880d681SAndroid Build Coastguard Worker; signed integers if the amount is a constant build_vector.
8*9880d681SAndroid Build Coastguard Worker; Check that we produce a SSE2 packed integer multiply (pmullw) instead.
9*9880d681SAndroid Build Coastguard Worker
10*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test1(<8 x i16> %a) {
11*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test1:
12*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
13*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm0
14*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
15*9880d681SAndroid Build Coastguard Worker;
16*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test1:
17*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
18*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
19*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
20*9880d681SAndroid Build Coastguard Worker;
21*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test1:
22*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
23*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
24*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
25*9880d681SAndroid Build Coastguard Worker  %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
26*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %shl
27*9880d681SAndroid Build Coastguard Worker}
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test2(<8 x i16> %a) {
30*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test2:
31*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
32*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm0
33*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
34*9880d681SAndroid Build Coastguard Worker;
35*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test2:
36*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
37*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
38*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
39*9880d681SAndroid Build Coastguard Worker;
40*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test2:
41*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
42*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
43*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
44*9880d681SAndroid Build Coastguard Worker  %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
45*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %shl
46*9880d681SAndroid Build Coastguard Worker}
47*9880d681SAndroid Build Coastguard Worker
48*9880d681SAndroid Build Coastguard Worker; Verify that a vector shift left of 32-bit signed integers is simply expanded
49*9880d681SAndroid Build Coastguard Worker; into a SSE4.1 pmulld (instead of cvttps2dq + pmulld) if the vector of shift
50*9880d681SAndroid Build Coastguard Worker; counts is a constant build_vector.
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test3(<4 x i32> %a) {
53*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test3:
54*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
55*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmulld {{.*}}(%rip), %xmm0
56*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
57*9880d681SAndroid Build Coastguard Worker;
58*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test3:
59*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
60*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
61*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
62*9880d681SAndroid Build Coastguard Worker;
63*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test3:
64*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
65*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
66*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
67*9880d681SAndroid Build Coastguard Worker  %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
68*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %shl
69*9880d681SAndroid Build Coastguard Worker}
70*9880d681SAndroid Build Coastguard Worker
71*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test4(<4 x i32> %a) {
72*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test4:
73*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
74*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmulld {{.*}}(%rip), %xmm0
75*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
76*9880d681SAndroid Build Coastguard Worker;
77*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test4:
78*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
79*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
80*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
81*9880d681SAndroid Build Coastguard Worker;
82*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test4:
83*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
84*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
85*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
86*9880d681SAndroid Build Coastguard Worker  %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
87*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %shl
88*9880d681SAndroid Build Coastguard Worker}
89*9880d681SAndroid Build Coastguard Worker
90*9880d681SAndroid Build Coastguard Worker; If we have AVX/SSE2 but not AVX2, verify that the following shift is split
91*9880d681SAndroid Build Coastguard Worker; into two pmullw instructions. With AVX2, the test case below would produce
92*9880d681SAndroid Build Coastguard Worker; a single vpmullw.
93*9880d681SAndroid Build Coastguard Worker
94*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test5(<16 x i16> %a) {
95*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test5:
96*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
97*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2,2,4,8,128,1,512,2048]
98*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm2, %xmm0
99*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm2, %xmm1
100*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
101*9880d681SAndroid Build Coastguard Worker;
102*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test5:
103*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
104*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
105*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
106*9880d681SAndroid Build Coastguard Worker;
107*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test5:
108*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
109*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
110*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
111*9880d681SAndroid Build Coastguard Worker  %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
112*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %shl
113*9880d681SAndroid Build Coastguard Worker}
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Worker; If we have AVX/SSE4.1 but not AVX2, verify that the following shift is split
116*9880d681SAndroid Build Coastguard Worker; into two pmulld instructions. With AVX2, the test case below would produce
117*9880d681SAndroid Build Coastguard Worker; a single vpsllvd instead.
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test6(<8 x i32> %a) {
120*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test6:
121*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
122*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2,2,4,8]
123*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmulld %xmm2, %xmm0
124*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmulld %xmm2, %xmm1
125*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
126*9880d681SAndroid Build Coastguard Worker;
127*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test6:
128*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
129*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
130*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
131*9880d681SAndroid Build Coastguard Worker;
132*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test6:
133*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
134*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
135*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
136*9880d681SAndroid Build Coastguard Worker  %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
137*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %shl
138*9880d681SAndroid Build Coastguard Worker}
139*9880d681SAndroid Build Coastguard Worker
140*9880d681SAndroid Build Coastguard Worker; With AVX2 and AVX512, the test case below should produce a sequence of
141*9880d681SAndroid Build Coastguard Worker; two vpmullw instructions. On SSE2 instead, we split the shift in four
142*9880d681SAndroid Build Coastguard Worker; parts and then we convert each part into a pmullw.
143*9880d681SAndroid Build Coastguard Worker
144*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test7(<32 x i16> %a) {
145*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test7:
146*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
147*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [2,2,4,8,128,1,512,2048]
148*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm4, %xmm0
149*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm4, %xmm1
150*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm4, %xmm2
151*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmullw %xmm4, %xmm3
152*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
153*9880d681SAndroid Build Coastguard Worker;
154*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test7:
155*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
156*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048]
157*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
158*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
159*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
160*9880d681SAndroid Build Coastguard Worker;
161*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test7:
162*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
163*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovdqa {{.*#+}} ymm2 = [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048]
164*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
165*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
166*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
167*9880d681SAndroid Build Coastguard Worker  %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
168*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %shl
169*9880d681SAndroid Build Coastguard Worker}
170*9880d681SAndroid Build Coastguard Worker
171*9880d681SAndroid Build Coastguard Worker; Similar to test7; the difference is that with AVX512 support
172*9880d681SAndroid Build Coastguard Worker; we only produce a single vpsllvd/vpsllvq instead of a pair of vpsllvd/vpsllvq.
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test8(<16 x i32> %a) {
175*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test8:
176*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
177*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [2,2,4,8]
178*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmulld %xmm4, %xmm0
179*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmulld %xmm4, %xmm1
180*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmulld %xmm4, %xmm2
181*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pmulld %xmm4, %xmm3
182*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
183*9880d681SAndroid Build Coastguard Worker;
184*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test8:
185*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
186*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,1,2,3,1,1,2,3]
187*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsllvd %ymm2, %ymm0, %ymm0
188*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsllvd %ymm2, %ymm1, %ymm1
189*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
190*9880d681SAndroid Build Coastguard Worker;
191*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test8:
192*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
193*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
194*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
195*9880d681SAndroid Build Coastguard Worker  %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
196*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %shl
197*9880d681SAndroid Build Coastguard Worker}
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Worker; The shift from 'test9' gets shifted separately and blended if we don't have AVX2/AVX512f support.
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test9(<8 x i64> %a) {
202*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test9:
203*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
204*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm1, %xmm4
205*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $3, %xmm4
206*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $2, %xmm1
207*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm4[4,5,6,7]
208*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa %xmm3, %xmm4
209*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $3, %xmm4
210*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psllq $2, %xmm3
211*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
212*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm0, %xmm0
213*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm2, %xmm2
214*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
215*9880d681SAndroid Build Coastguard Worker;
216*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test9:
217*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
218*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,1,2,3]
219*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsllvq %ymm2, %ymm0, %ymm0
220*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsllvq %ymm2, %ymm1, %ymm1
221*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
222*9880d681SAndroid Build Coastguard Worker;
223*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test9:
224*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
225*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpsllvq {{.*}}(%rip), %zmm0, %zmm0
226*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
227*9880d681SAndroid Build Coastguard Worker  %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
228*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %shl
229*9880d681SAndroid Build Coastguard Worker}
230