xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker;
6*9880d681SAndroid Build Coastguard Worker; udiv by 7
7*9880d681SAndroid Build Coastguard Worker;
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
10*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_4i64:
11*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
12*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
13*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
14*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
15*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
16*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    mulq %rsi
17*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rcx
18*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq %rcx
19*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rdx, %rcx
20*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $2, %rcx
21*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm2
22*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %xmm1, %rcx
23*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
24*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    mulq %rsi
25*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rcx
26*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq %rcx
27*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rdx, %rcx
28*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $2, %rcx
29*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm1
30*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
31*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
32*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
33*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    mulq %rsi
34*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rcx
35*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq %rcx
36*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rdx, %rcx
37*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $2, %rcx
38*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm2
39*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %xmm0, %rcx
40*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
41*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    mulq %rsi
42*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rcx
43*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq %rcx
44*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rdx, %rcx
45*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $2, %rcx
46*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm0
47*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
48*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
49*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
50*9880d681SAndroid Build Coastguard Worker;
51*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_4i64:
52*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
53*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
54*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
55*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
56*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
57*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    mulq %rsi
58*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rcx
59*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq %rcx
60*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rdx, %rcx
61*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $2, %rcx
62*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm2
63*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm1, %rcx
64*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
65*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    mulq %rsi
66*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rcx
67*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq %rcx
68*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rdx, %rcx
69*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $2, %rcx
70*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm1
71*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
72*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
73*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
74*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    mulq %rsi
75*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rcx
76*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq %rcx
77*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rdx, %rcx
78*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $2, %rcx
79*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm2
80*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, %rcx
81*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
82*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    mulq %rsi
83*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rcx
84*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq %rcx
85*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rdx, %rcx
86*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $2, %rcx
87*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm0
88*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
89*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
90*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
91*9880d681SAndroid Build Coastguard Worker  %res = udiv <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
92*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
93*9880d681SAndroid Build Coastguard Worker}
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind {
96*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_8i32:
97*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
98*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} ymm1 = [613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757]
99*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
100*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
101*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
102*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm3
103*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
104*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
105*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubd %xmm2, %xmm0, %xmm3
106*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $1, %xmm3, %xmm3
107*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
108*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $2, %xmm2, %xmm2
109*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
110*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
111*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
112*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
113*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuludq %xmm3, %xmm4, %xmm3
114*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
115*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
116*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
117*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
118*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
119*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
120*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $2, %xmm0, %xmm0
121*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
122*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
123*9880d681SAndroid Build Coastguard Worker;
124*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_8i32:
125*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
126*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
127*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
128*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
129*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmuludq %ymm2, %ymm3, %ymm2
130*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm1
131*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
132*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
133*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
134*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrld $1, %ymm0, %ymm0
135*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
136*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrld $2, %ymm0, %ymm0
137*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
138*9880d681SAndroid Build Coastguard Worker  %res = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
139*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
140*9880d681SAndroid Build Coastguard Worker}
141*9880d681SAndroid Build Coastguard Worker
142*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind {
143*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_16i16:
144*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
145*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
146*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
147*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm3
148*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $1, %xmm3, %xmm3
149*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
150*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm2
151*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
152*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm1
153*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
154*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0
155*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
156*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm0
157*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
158*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
159*9880d681SAndroid Build Coastguard Worker;
160*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_16i16:
161*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
162*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
163*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
164*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $1, %ymm0, %ymm0
165*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
166*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $2, %ymm0, %ymm0
167*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
168*9880d681SAndroid Build Coastguard Worker  %res = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
169*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %res
170*9880d681SAndroid Build Coastguard Worker}
171*9880d681SAndroid Build Coastguard Worker
172*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
173*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_32i8:
174*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
175*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
176*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
177*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
178*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
179*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
180*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
181*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
182*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm3, %xmm4, %xmm4
183*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
184*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm4, %xmm2, %xmm2
185*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
186*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm1
187*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
188*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
189*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
190*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm1, %xmm1
191*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
192*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
193*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
194*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm3, %xmm5, %xmm5
195*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
196*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
197*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
198*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm3, %xmm6, %xmm3
199*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
200*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm3, %xmm5, %xmm3
201*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
202*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm0
203*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
204*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
205*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm0
206*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
207*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
208*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
209*9880d681SAndroid Build Coastguard Worker;
210*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_32i8:
211*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
212*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
213*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
214*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
215*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
216*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
217*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
218*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
219*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
220*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
221*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm1, %ymm3, %ymm1
222*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
223*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3]
224*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
225*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
226*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
227*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $1, %ymm0, %ymm0
228*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
229*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
230*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $2, %ymm0, %ymm0
231*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
232*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
233*9880d681SAndroid Build Coastguard Worker  %res = udiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
234*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res
235*9880d681SAndroid Build Coastguard Worker}
236*9880d681SAndroid Build Coastguard Worker
237*9880d681SAndroid Build Coastguard Worker;
238*9880d681SAndroid Build Coastguard Worker; urem by 7
239*9880d681SAndroid Build Coastguard Worker;
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind {
242*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_4i64:
243*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
244*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
245*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
246*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
247*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
248*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    mulq %rsi
249*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
250*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rax
251*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq %rax
252*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rdx, %rax
253*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $2, %rax
254*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    leaq (,%rax,8), %rdx
255*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rax, %rdx
256*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rcx
257*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm2
258*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %xmm1, %rcx
259*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
260*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    mulq %rsi
261*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
262*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rax
263*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq %rax
264*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rdx, %rax
265*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $2, %rax
266*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    leaq (,%rax,8), %rdx
267*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rax, %rdx
268*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rcx
269*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm1
270*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
271*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
272*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
273*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    mulq %rsi
274*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
275*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rax
276*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq %rax
277*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rdx, %rax
278*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $2, %rax
279*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    leaq (,%rax,8), %rdx
280*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rax, %rdx
281*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rcx
282*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm2
283*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %xmm0, %rcx
284*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
285*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    mulq %rsi
286*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rcx, %rax
287*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rax
288*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq %rax
289*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addq %rdx, %rax
290*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    shrq $2, %rax
291*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    leaq (,%rax,8), %rdx
292*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rax, %rdx
293*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq %rdx, %rcx
294*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq %rcx, %xmm0
295*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
296*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
297*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
298*9880d681SAndroid Build Coastguard Worker;
299*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_4i64:
300*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
301*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
302*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
303*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
304*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
305*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    mulq %rsi
306*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
307*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rax
308*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq %rax
309*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rdx, %rax
310*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $2, %rax
311*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    leaq (,%rax,8), %rdx
312*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rax, %rdx
313*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rcx
314*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm2
315*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm1, %rcx
316*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
317*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    mulq %rsi
318*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
319*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rax
320*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq %rax
321*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rdx, %rax
322*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $2, %rax
323*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    leaq (,%rax,8), %rdx
324*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rax, %rdx
325*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rcx
326*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm1
327*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
328*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
329*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
330*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    mulq %rsi
331*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
332*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rax
333*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq %rax
334*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rdx, %rax
335*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $2, %rax
336*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    leaq (,%rax,8), %rdx
337*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rax, %rdx
338*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rcx
339*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm2
340*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, %rcx
341*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
342*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    mulq %rsi
343*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rcx, %rax
344*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rax
345*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq %rax
346*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    addq %rdx, %rax
347*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    shrq $2, %rax
348*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    leaq (,%rax,8), %rdx
349*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rax, %rdx
350*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq %rdx, %rcx
351*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %rcx, %xmm0
352*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
353*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
354*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
355*9880d681SAndroid Build Coastguard Worker  %res = urem <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
356*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
357*9880d681SAndroid Build Coastguard Worker}
358*9880d681SAndroid Build Coastguard Worker
359*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind {
360*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_8i32:
361*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
362*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps {{.*#+}} ymm1 = [613566757,613566757,613566757,613566757,613566757,613566757,613566757,613566757]
363*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
364*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
365*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
366*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
367*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuludq %xmm3, %xmm5, %xmm3
368*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuludq %xmm2, %xmm4, %xmm2
369*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
370*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
371*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubd %xmm2, %xmm4, %xmm3
372*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $1, %xmm3, %xmm3
373*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
374*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $2, %xmm2, %xmm2
375*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [7,7,7,7]
376*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulld %xmm3, %xmm2, %xmm2
377*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubd %xmm2, %xmm4, %xmm2
378*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
379*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
380*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuludq %xmm4, %xmm5, %xmm4
381*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
382*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
383*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
384*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm4
385*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $1, %xmm4, %xmm4
386*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm1, %xmm4, %xmm1
387*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
388*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm1
389*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
390*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
391*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
392*9880d681SAndroid Build Coastguard Worker;
393*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_8i32:
394*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
395*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
396*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
397*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
398*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmuludq %ymm2, %ymm3, %ymm2
399*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm1
400*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
401*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
402*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm2
403*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrld $1, %ymm2, %ymm2
404*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd %ymm1, %ymm2, %ymm1
405*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrld $2, %ymm1, %ymm1
406*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
407*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmulld %ymm2, %ymm1, %ymm1
408*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
409*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
410*9880d681SAndroid Build Coastguard Worker  %res = urem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
411*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
412*9880d681SAndroid Build Coastguard Worker}
413*9880d681SAndroid Build Coastguard Worker
414*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind {
415*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_16i16:
416*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
417*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
418*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
419*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulhuw %xmm2, %xmm1, %xmm3
420*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm3, %xmm1, %xmm4
421*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $1, %xmm4, %xmm4
422*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm3, %xmm4, %xmm3
423*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm3, %xmm3
424*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [7,7,7,7,7,7,7,7]
425*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm4, %xmm3, %xmm3
426*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm3, %xmm1, %xmm1
427*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmulhuw %xmm2, %xmm0, %xmm2
428*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm3
429*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $1, %xmm3, %xmm3
430*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
431*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm2
432*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm4, %xmm2, %xmm2
433*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm0
434*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
435*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
436*9880d681SAndroid Build Coastguard Worker;
437*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_16i16:
438*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
439*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
440*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm2
441*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $1, %ymm2, %ymm2
442*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw %ymm1, %ymm2, %ymm1
443*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $2, %ymm1, %ymm1
444*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm1, %ymm1
445*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
446*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
447*9880d681SAndroid Build Coastguard Worker  %res = urem <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
448*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %res
449*9880d681SAndroid Build Coastguard Worker}
450*9880d681SAndroid Build Coastguard Worker
451*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
452*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_32i8:
453*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
454*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
455*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
456*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
457*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm1, %xmm3, %xmm3
458*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
459*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
460*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
461*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm1, %xmm4, %xmm4
462*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
463*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm4, %xmm3, %xmm3
464*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm4
465*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $1, %xmm4, %xmm4
466*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
467*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm8, %xmm4, %xmm4
468*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm3, %xmm4, %xmm3
469*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm3, %xmm3
470*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
471*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
472*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm3, %xmm6
473*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw {{.*}}(%rip), %xmm7
474*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm7, %xmm6, %xmm6
475*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
476*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm5, %xmm6, %xmm6
477*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
478*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm3, %xmm3
479*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm7, %xmm3, %xmm3
480*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
481*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm3, %xmm6, %xmm3
482*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
483*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
484*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm1, %xmm3, %xmm3
485*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
486*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
487*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
488*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm1, %xmm6, %xmm1
489*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
490*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm1, %xmm3, %xmm1
491*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm3
492*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $1, %xmm3, %xmm3
493*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm8, %xmm3, %xmm3
494*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm1, %xmm3, %xmm1
495*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsrlw $2, %xmm1, %xmm1
496*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
497*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm1, %xmm3
498*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm7, %xmm3, %xmm3
499*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
500*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
501*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmovsxbw %xmm1, %xmm1
502*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmullw %xmm7, %xmm1, %xmm1
503*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
504*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpackuswb %xmm1, %xmm3, %xmm1
505*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
506*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
507*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
508*9880d681SAndroid Build Coastguard Worker;
509*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_32i8:
510*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
511*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37]
512*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
513*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
514*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
515*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
516*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
517*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
518*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
519*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
520*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm1, %ymm3, %ymm1
521*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
522*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[2,3]
523*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
524*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
525*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm2
526*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $1, %ymm2, %ymm2
527*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
528*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
529*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsrlw $2, %ymm1, %ymm1
530*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
531*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
532*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm2, %ymm2
533*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm3
534*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
535*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm4
536*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
537*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
538*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
539*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
540*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
541*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
542*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
543*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm3, %xmm3
544*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
545*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
546*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
547*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
548*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
549*9880d681SAndroid Build Coastguard Worker  %res = urem <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
550*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %res
551*9880d681SAndroid Build Coastguard Worker}
552