xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker;
6*9880d681SAndroid Build Coastguard Worker; sdiv by 7
7*9880d681SAndroid Build Coastguard Worker;
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
10*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_4i64:
11*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
12*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
13*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
14*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movabsq $5270498306774157605, %rcx # imm = 0x4924924924924925
15*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    imulq %rcx
16*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rdx, %rax
17*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $63, %rax
18*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    sarq %rdx
19*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rax, %rdx
20*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rdx, %xmm2
21*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %xmm1, %rax
22*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    imulq %rcx
23*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rdx, %rax
24*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $63, %rax
25*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    sarq %rdx
26*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rax, %rdx
27*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rdx, %xmm1
28*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
29*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
30*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    imulq %rcx
31*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rdx, %rax
32*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $63, %rax
33*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    sarq %rdx
34*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rax, %rdx
35*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rdx, %xmm2
36*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %xmm0, %rax
37*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    imulq %rcx
38*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rdx, %rax
39*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $63, %rax
40*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    sarq %rdx
41*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rax, %rdx
42*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rdx, %xmm0
43*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
44*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
45*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
46*9880d681SAndroid Build Coastguard Worker;
47*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_4i64:
48*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
49*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
50*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
51*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movabsq $5270498306774157605, %rcx # imm = 0x4924924924924925
52*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    imulq %rcx
53*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rdx, %rax
54*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $63, %rax
55*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    sarq %rdx
56*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rax, %rdx
57*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rdx, %xmm2
58*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm1, %rax
59*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    imulq %rcx
60*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rdx, %rax
61*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $63, %rax
62*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    sarq %rdx
63*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rax, %rdx
64*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rdx, %xmm1
65*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
66*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
67*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    imulq %rcx
68*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rdx, %rax
69*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $63, %rax
70*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    sarq %rdx
71*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rax, %rdx
72*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rdx, %xmm2
73*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, %rax
74*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    imulq %rcx
75*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rdx, %rax
76*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $63, %rax
77*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    sarq %rdx
78*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rax, %rdx
79*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rdx, %xmm0
80*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
81*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
82*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
83*9880d681SAndroid Build Coastguard Worker  %res = sdiv <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
84*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
85*9880d681SAndroid Build Coastguard Worker}
86*9880d681SAndroid Build Coastguard Worker
87*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind {
88*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_8i32:
89*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
90*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027]
91*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
92*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
93*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
94*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
95*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuldq %xmm3, %xmm5, %xmm3
96*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuldq %xmm2, %xmm4, %xmm2
97*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
98*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
99*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm4, %xmm2, %xmm2
100*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $31, %xmm2, %xmm3
101*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrad $2, %xmm2, %xmm2
102*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
103*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
104*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
105*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuldq %xmm3, %xmm4, %xmm3
106*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
107*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
108*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
109*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
110*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm1
111*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrad $2, %xmm0, %xmm0
112*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
113*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
114*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
115*9880d681SAndroid Build Coastguard Worker;
116*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_8i32:
117*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
118*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
119*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
120*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
121*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmuldq %ymm2, %ymm3, %ymm2
122*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmuldq %ymm1, %ymm0, %ymm1
123*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
124*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
125*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
126*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm1
127*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrad $2, %ymm0, %ymm0
128*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
129*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
130*9880d681SAndroid Build Coastguard Worker  %res = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
131*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
132*9880d681SAndroid Build Coastguard Worker}
133*9880d681SAndroid Build Coastguard Worker
134*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind {
135*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_16i16:
136*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
137*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
138*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
139*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulhw %xmm2, %xmm1, %xmm1
140*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $15, %xmm1, %xmm3
141*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsraw $1, %xmm1, %xmm1
142*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm3, %xmm1, %xmm1
143*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulhw %xmm2, %xmm0, %xmm0
144*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm2
145*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm0
146*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
147*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
148*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
149*9880d681SAndroid Build Coastguard Worker;
150*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_16i16:
151*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
152*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmulhw {{.*}}(%rip), %ymm0, %ymm0
153*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm1
154*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsraw $1, %ymm0, %ymm0
155*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
156*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
157*9880d681SAndroid Build Coastguard Worker  %res = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
158*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %res
159*9880d681SAndroid Build Coastguard Worker}
160*9880d681SAndroid Build Coastguard Worker
161*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
162*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_32i8:
163*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
164*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
165*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm1, %xmm2
166*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw {{.*}}(%rip), %xmm3
167*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
168*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
169*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
170*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm4, %xmm4
171*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm3, %xmm4, %xmm4
172*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
173*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm4, %xmm2, %xmm2
174*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
175*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm2
176*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
177*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
178*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm1, %xmm1
179*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
180*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
181*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
182*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
183*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm6, %xmm1, %xmm1
184*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
185*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm2
186*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
187*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
188*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[2,3,0,1]
189*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm7, %xmm7
190*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm3, %xmm7, %xmm3
191*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
192*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
193*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm0, %xmm2, %xmm0
194*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm2
195*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
196*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm0
197*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
198*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
199*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm6, %xmm0, %xmm0
200*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
201*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
202*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
203*9880d681SAndroid Build Coastguard Worker;
204*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_32i8:
205*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
206*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147]
207*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
208*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
209*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
210*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm3, %ymm3
211*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
212*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
213*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
214*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm3
215*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm1, %ymm3, %ymm1
216*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
217*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3]
218*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
219*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
220*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
221*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $2, %ymm0, %ymm1
222*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
223*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
224*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm1
225*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
226*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $7, %ymm0, %ymm0
227*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
228*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
229*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
230*9880d681SAndroid Build Coastguard Worker  %res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
231*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res
232*9880d681SAndroid Build Coastguard Worker}
233*9880d681SAndroid Build Coastguard Worker
234*9880d681SAndroid Build Coastguard Worker;
235*9880d681SAndroid Build Coastguard Worker; srem by 7
236*9880d681SAndroid Build Coastguard Worker;
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
239*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_4i64:
240*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
241*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
242*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
243*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movabsq $5270498306774157605, %rsi # imm = 0x4924924924924925
244*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
245*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    imulq %rsi
246*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rdx, %rax
247*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $63, %rax
248*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    sarq %rdx
249*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rax, %rdx
250*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    leaq (,%rdx,8), %rax
251*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rax
252*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rax, %rcx
253*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm2
254*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %xmm1, %rcx
255*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
256*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    imulq %rsi
257*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rdx, %rax
258*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $63, %rax
259*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    sarq %rdx
260*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rax, %rdx
261*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    leaq (,%rdx,8), %rax
262*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rax
263*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rax, %rcx
264*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm1
265*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
266*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
267*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
268*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    imulq %rsi
269*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rdx, %rax
270*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $63, %rax
271*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    sarq %rdx
272*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rax, %rdx
273*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    leaq (,%rdx,8), %rax
274*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rax
275*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rax, %rcx
276*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm2
277*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %xmm0, %rcx
278*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
279*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    imulq %rsi
280*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rdx, %rax
281*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $63, %rax
282*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    sarq %rdx
283*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rax, %rdx
284*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    leaq (,%rdx,8), %rax
285*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rax
286*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rax, %rcx
287*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm0
288*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
289*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
290*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
291*9880d681SAndroid Build Coastguard Worker;
292*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_4i64:
293*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
294*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
295*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
296*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movabsq $5270498306774157605, %rsi # imm = 0x4924924924924925
297*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
298*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    imulq %rsi
299*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rdx, %rax
300*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $63, %rax
301*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    sarq %rdx
302*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rax, %rdx
303*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    leaq (,%rdx,8), %rax
304*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rax
305*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rax, %rcx
306*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm2
307*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm1, %rcx
308*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
309*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    imulq %rsi
310*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rdx, %rax
311*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $63, %rax
312*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    sarq %rdx
313*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rax, %rdx
314*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    leaq (,%rdx,8), %rax
315*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rax
316*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rax, %rcx
317*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm1
318*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
319*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
320*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
321*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    imulq %rsi
322*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rdx, %rax
323*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $63, %rax
324*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    sarq %rdx
325*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rax, %rdx
326*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    leaq (,%rdx,8), %rax
327*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rax
328*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rax, %rcx
329*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm2
330*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, %rcx
331*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
332*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    imulq %rsi
333*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rdx, %rax
334*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $63, %rax
335*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    sarq %rdx
336*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rax, %rdx
337*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    leaq (,%rdx,8), %rax
338*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rax
339*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rax, %rcx
340*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm0
341*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
342*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
343*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
344*9880d681SAndroid Build Coastguard Worker  %res = srem <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
345*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
346*9880d681SAndroid Build Coastguard Worker}
347*9880d681SAndroid Build Coastguard Worker
348*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind {
349*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_8i32:
350*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
351*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027]
352*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
353*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
354*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
355*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
356*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuldq %xmm3, %xmm5, %xmm3
357*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuldq %xmm2, %xmm4, %xmm2
358*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
359*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
360*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm4, %xmm2, %xmm2
361*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $31, %xmm2, %xmm3
362*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrad $2, %xmm2, %xmm2
363*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
364*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [7,7,7,7]
365*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulld %xmm3, %xmm2, %xmm2
366*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubd %xmm2, %xmm4, %xmm2
367*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
368*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
369*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuldq %xmm4, %xmm5, %xmm4
370*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm1
371*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
372*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
373*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm0, %xmm1, %xmm1
374*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $31, %xmm1, %xmm4
375*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrad $2, %xmm1, %xmm1
376*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm1
377*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm1
378*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
379*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
380*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
381*9880d681SAndroid Build Coastguard Worker;
382*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_8i32:
383*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
384*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
385*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
386*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
387*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmuldq %ymm2, %ymm3, %ymm2
388*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmuldq %ymm1, %ymm0, %ymm1
389*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
390*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
391*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm1
392*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrld $31, %ymm1, %ymm2
393*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrad $2, %ymm1, %ymm1
394*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
395*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
396*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmulld %ymm2, %ymm1, %ymm1
397*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
398*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
399*9880d681SAndroid Build Coastguard Worker  %res = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
400*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
401*9880d681SAndroid Build Coastguard Worker}
402*9880d681SAndroid Build Coastguard Worker
403*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind {
404*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_16i16:
405*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
406*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
407*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
408*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulhw %xmm2, %xmm1, %xmm3
409*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $15, %xmm3, %xmm4
410*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsraw $1, %xmm3, %xmm3
411*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm4, %xmm3, %xmm3
412*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [7,7,7,7,7,7,7,7]
413*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm4, %xmm3, %xmm3
414*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm3, %xmm1, %xmm1
415*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulhw %xmm2, %xmm0, %xmm2
416*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $15, %xmm2, %xmm3
417*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsraw $1, %xmm2, %xmm2
418*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
419*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm4, %xmm2, %xmm2
420*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
421*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
422*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
423*9880d681SAndroid Build Coastguard Worker;
424*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_16i16:
425*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
426*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmulhw {{.*}}(%rip), %ymm0, %ymm1
427*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $15, %ymm1, %ymm2
428*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsraw $1, %ymm1, %ymm1
429*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
430*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm1, %ymm1
431*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
432*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
433*9880d681SAndroid Build Coastguard Worker  %res = srem <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
434*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %res
435*9880d681SAndroid Build Coastguard Worker}
436*9880d681SAndroid Build Coastguard Worker
437*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
438*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_32i8:
439*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
440*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
441*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm2, %xmm3
442*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw {{.*}}(%rip), %xmm1
443*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm1, %xmm3, %xmm3
444*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
445*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
446*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm4, %xmm4
447*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm1, %xmm4, %xmm4
448*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
449*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm4, %xmm3, %xmm3
450*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm3
451*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $7, %xmm3, %xmm4
452*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
453*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm8, %xmm4, %xmm4
454*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm3, %xmm3
455*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm9 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
456*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm9, %xmm3, %xmm3
457*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
458*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm7, %xmm3, %xmm3
459*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm7, %xmm3, %xmm3
460*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm4, %xmm3, %xmm3
461*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm3, %xmm4
462*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw {{.*}}(%rip), %xmm5
463*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm5, %xmm4, %xmm4
464*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
465*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm6, %xmm4, %xmm4
466*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
467*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm3, %xmm3
468*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm5, %xmm3, %xmm3
469*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
470*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm3, %xmm4, %xmm3
471*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
472*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm3
473*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm1, %xmm3, %xmm3
474*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
475*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
476*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm4, %xmm4
477*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm1, %xmm4, %xmm1
478*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
479*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm1, %xmm3, %xmm1
480*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm1
481*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm3
482*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm8, %xmm3, %xmm3
483*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm1, %xmm1
484*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm9, %xmm1, %xmm1
485*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm7, %xmm1, %xmm1
486*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm7, %xmm1, %xmm1
487*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
488*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm1, %xmm3
489*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm5, %xmm3, %xmm3
490*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
491*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
492*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm1, %xmm1
493*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm5, %xmm1, %xmm1
494*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm6, %xmm1, %xmm1
495*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm1, %xmm3, %xmm1
496*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
497*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
498*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
499*9880d681SAndroid Build Coastguard Worker;
500*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_32i8:
501*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
502*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147,147]
503*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
504*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
505*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
506*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm3, %ymm3
507*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
508*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
509*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
510*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm3
511*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm1, %ymm3, %ymm1
512*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
513*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3]
514*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
515*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
516*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm1
517*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $2, %ymm1, %ymm2
518*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
519*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
520*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
521*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
522*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $7, %ymm1, %ymm1
523*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
524*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
525*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
526*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
527*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm3
528*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
529*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm4
530*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
531*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
532*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
533*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
534*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
535*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
536*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
537*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm3, %xmm3
538*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
539*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
540*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
541*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
542*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
543*9880d681SAndroid Build Coastguard Worker  %res = srem <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
544*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res
545*9880d681SAndroid Build Coastguard Worker}
546