1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6*9880d681SAndroid Build Coastguard Worker 7*9880d681SAndroid Build Coastguard Worker; 8*9880d681SAndroid Build Coastguard Worker; udiv by 7 9*9880d681SAndroid Build Coastguard Worker; 10*9880d681SAndroid Build Coastguard Worker 11*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind { 12*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_div7_2i64: 13*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 14*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm0, %rcx 15*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 16*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %rcx, %rax 17*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: mulq %rsi 18*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq %rdx, %rcx 19*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shrq %rcx 20*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: addq %rdx, %rcx 21*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shrq $2, %rcx 22*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %rcx, %xmm1 23*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 24*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm0, %rcx 25*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %rcx, %rax 26*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: mulq %rsi 27*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq %rdx, %rcx 28*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shrq %rcx 29*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: addq %rdx, %rcx 30*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shrq $2, %rcx 31*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %rcx, %xmm0 32*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 33*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm1, %xmm0 34*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 35*9880d681SAndroid Build Coastguard Worker; 36*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_div7_2i64: 37*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 38*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pextrq $1, %xmm0, %rcx 39*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 40*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movq %rcx, %rax 41*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: mulq %rsi 42*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq %rdx, %rcx 43*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: shrq %rcx 44*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: addq %rdx, %rcx 45*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: shrq $2, %rcx 46*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movd %rcx, %xmm1 47*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movd %xmm0, %rcx 48*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movq %rcx, %rax 49*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: mulq %rsi 50*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq %rdx, %rcx 51*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: shrq %rcx 52*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: addq %rdx, %rcx 53*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: shrq $2, %rcx 54*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movd %rcx, %xmm0 55*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 56*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 57*9880d681SAndroid Build Coastguard Worker; 58*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_div7_2i64: 59*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 60*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrq $1, %xmm0, %rcx 61*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 62*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movq %rcx, %rax 63*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: mulq %rsi 64*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq %rdx, %rcx 65*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: shrq %rcx 66*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: addq %rdx, %rcx 67*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: shrq $2, %rcx 68*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovq %rcx, %xmm1 69*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovq %xmm0, %rcx 70*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movq %rcx, %rax 71*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: mulq %rsi 72*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq %rdx, %rcx 73*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: shrq %rcx 74*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: addq %rdx, %rcx 75*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: shrq $2, %rcx 76*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovq %rcx, %xmm0 77*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 78*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 79*9880d681SAndroid Build Coastguard Worker %res = udiv <2 x i64> %a, <i64 7, i64 7> 80*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %res 81*9880d681SAndroid Build Coastguard Worker} 82*9880d681SAndroid Build Coastguard Worker 83*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind { 84*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_div7_4i32: 85*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 86*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 87*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm2 88*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm1, %xmm2 89*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 90*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 91*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 92*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm1, %xmm3 93*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 94*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 95*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psubd %xmm2, %xmm0 96*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrld $1, %xmm0 97*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: paddd %xmm2, %xmm0 98*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrld $2, %xmm0 99*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 100*9880d681SAndroid Build Coastguard Worker; 101*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_div7_4i32: 102*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 103*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 104*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 105*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 106*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmuludq %xmm2, %xmm3 107*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmuludq %xmm0, %xmm1 108*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 109*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 110*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psubd %xmm1, %xmm0 111*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrld $1, %xmm0 112*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: paddd %xmm1, %xmm0 113*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrld $2, %xmm0 114*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 115*9880d681SAndroid Build Coastguard Worker; 116*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_4i32: 117*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 118*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 119*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 120*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 121*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 122*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 123*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 124*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 125*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 126*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 127*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 128*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrld $2, %xmm0, %xmm0 129*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 130*9880d681SAndroid Build Coastguard Worker; 131*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_4i32: 132*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 133*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 134*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 135*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 136*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 137*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 138*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 139*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 140*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 141*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 142*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 143*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrld $2, %xmm0, %xmm0 144*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 145*9880d681SAndroid Build Coastguard Worker %res = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7> 146*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %res 147*9880d681SAndroid Build Coastguard Worker} 148*9880d681SAndroid Build Coastguard Worker 149*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_div7_8i16(<8 x i16> %a) nounwind { 150*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_div7_8i16: 151*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 152*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363] 153*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmulhuw %xmm0, %xmm1 154*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psubw %xmm1, %xmm0 155*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlw $1, %xmm0 156*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw %xmm1, %xmm0 157*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlw $2, %xmm0 158*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 159*9880d681SAndroid Build Coastguard Worker; 160*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_div7_8i16: 161*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 162*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 163*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 164*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 165*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 166*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlw $2, %xmm0, %xmm0 167*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 168*9880d681SAndroid Build Coastguard Worker %res = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 169*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %res 170*9880d681SAndroid Build Coastguard Worker} 171*9880d681SAndroid Build Coastguard Worker 172*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { 173*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_div7_16i8: 174*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 175*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] 176*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm1 177*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm2 178*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 179*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm2 180*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm1, %xmm2 181*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm2 182*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm3 183*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 184*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm3 185*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm1, %xmm3 186*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm3 187*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm2, %xmm3 188*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psubb %xmm3, %xmm0 189*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $1, %xmm0 190*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 191*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: paddb %xmm3, %xmm0 192*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $2, %xmm0 193*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 194*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 195*9880d681SAndroid Build Coastguard Worker; 196*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_div7_16i8: 197*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 198*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 199*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 200*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm1 201*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrlw $8, %xmm1 202*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] 203*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 204*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm3 205*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrlw $8, %xmm3 206*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm3, %xmm1 207*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psubb %xmm1, %xmm0 208*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrlw $1, %xmm0 209*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 210*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: paddb %xmm1, %xmm0 211*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrlw $2, %xmm0 212*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 213*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 214*9880d681SAndroid Build Coastguard Worker; 215*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_div7_16i8: 216*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 217*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 218*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 219*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 220*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 221*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] 222*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 223*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2 224*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 225*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 226*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 227*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 228*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 229*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 230*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm0 231*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 232*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 233*9880d681SAndroid Build Coastguard Worker; 234*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_div7_16i8: 235*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 236*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 237*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 238*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 239*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 240*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 241*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 242*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 243*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0 244*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 245*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 246*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrlw $2, %xmm0, %xmm0 247*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 248*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 249*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 250*9880d681SAndroid Build Coastguard Worker %res = udiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7> 251*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %res 252*9880d681SAndroid Build Coastguard Worker} 253*9880d681SAndroid Build Coastguard Worker 254*9880d681SAndroid Build Coastguard Worker; 255*9880d681SAndroid Build Coastguard Worker; urem by 7 256*9880d681SAndroid Build Coastguard Worker; 257*9880d681SAndroid Build Coastguard Worker 258*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_rem7_2i64(<2 x i64> %a) nounwind { 259*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_rem7_2i64: 260*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 261*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm0, %rcx 262*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 263*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %rcx, %rax 264*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: mulq %rsi 265*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %rcx, %rax 266*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq %rdx, %rax 267*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shrq %rax 268*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: addq %rdx, %rax 269*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shrq $2, %rax 270*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: leaq (,%rax,8), %rdx 271*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq %rax, %rdx 272*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq %rdx, %rcx 273*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %rcx, %xmm1 274*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 275*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm0, %rcx 276*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %rcx, %rax 277*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: mulq %rsi 278*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %rcx, %rax 279*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq %rdx, %rax 280*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shrq %rax 281*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: addq %rdx, %rax 282*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shrq $2, %rax 283*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: leaq (,%rax,8), %rdx 284*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq %rax, %rdx 285*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: subq %rdx, %rcx 286*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %rcx, %xmm0 287*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 288*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm1, %xmm0 289*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 290*9880d681SAndroid Build Coastguard Worker; 291*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_rem7_2i64: 292*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 293*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pextrq $1, %xmm0, %rcx 294*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 295*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movq %rcx, %rax 296*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: mulq %rsi 297*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movq %rcx, %rax 298*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq %rdx, %rax 299*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: shrq %rax 300*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: addq %rdx, %rax 301*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: shrq $2, %rax 302*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: leaq (,%rax,8), %rdx 303*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq %rax, %rdx 304*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq %rdx, %rcx 305*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movd %rcx, %xmm1 306*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movd %xmm0, %rcx 307*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movq %rcx, %rax 308*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: mulq %rsi 309*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movq %rcx, %rax 310*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq %rdx, %rax 311*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: shrq %rax 312*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: addq %rdx, %rax 313*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: shrq $2, %rax 314*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: leaq (,%rax,8), %rdx 315*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq %rax, %rdx 316*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: subq %rdx, %rcx 317*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movd %rcx, %xmm0 318*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 319*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 320*9880d681SAndroid Build Coastguard Worker; 321*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_rem7_2i64: 322*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 323*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrq $1, %xmm0, %rcx 324*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 325*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movq %rcx, %rax 326*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: mulq %rsi 327*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movq %rcx, %rax 328*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq %rdx, %rax 329*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: shrq %rax 330*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: addq %rdx, %rax 331*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: shrq $2, %rax 332*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: leaq (,%rax,8), %rdx 333*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq %rax, %rdx 334*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq %rdx, %rcx 335*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovq %rcx, %xmm1 336*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovq %xmm0, %rcx 337*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movq %rcx, %rax 338*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: mulq %rsi 339*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movq %rcx, %rax 340*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq %rdx, %rax 341*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: shrq %rax 342*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: addq %rdx, %rax 343*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: shrq $2, %rax 344*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: leaq (,%rax,8), %rdx 345*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq %rax, %rdx 346*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subq %rdx, %rcx 347*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovq %rcx, %xmm0 348*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 349*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 350*9880d681SAndroid Build Coastguard Worker %res = urem <2 x i64> %a, <i64 7, i64 7> 351*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %res 352*9880d681SAndroid Build Coastguard Worker} 353*9880d681SAndroid Build Coastguard Worker 354*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind { 355*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_rem7_4i32: 356*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 357*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 358*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm2 359*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm1, %xmm2 360*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 361*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 362*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 363*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm1, %xmm3 364*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 365*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 366*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm1 367*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psubd %xmm2, %xmm1 368*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrld $1, %xmm1 369*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: paddd %xmm2, %xmm1 370*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrld $2, %xmm1 371*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [7,7,7,7] 372*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 373*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm1 374*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 375*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmuludq %xmm2, %xmm3 376*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] 377*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 378*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psubd %xmm1, %xmm0 379*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 380*9880d681SAndroid Build Coastguard Worker; 381*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_rem7_4i32: 382*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 383*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 384*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 385*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 386*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmuludq %xmm2, %xmm3 387*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmuludq %xmm0, %xmm1 388*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 389*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 390*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm0, %xmm2 391*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psubd %xmm1, %xmm2 392*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrld $1, %xmm2 393*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: paddd %xmm1, %xmm2 394*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrld $2, %xmm2 395*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 396*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psubd %xmm2, %xmm0 397*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 398*9880d681SAndroid Build Coastguard Worker; 399*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_4i32: 400*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 401*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 402*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 403*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 404*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 405*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 406*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 407*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 408*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2 409*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2 410*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 411*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 412*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 413*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 414*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 415*9880d681SAndroid Build Coastguard Worker; 416*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_4i32: 417*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 418*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 419*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 420*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 421*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 422*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 423*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 424*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 425*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm2 426*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrld $1, %xmm2, %xmm2 427*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd %xmm1, %xmm2, %xmm1 428*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 429*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 430*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 431*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 432*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 433*9880d681SAndroid Build Coastguard Worker %res = urem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7> 434*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %res 435*9880d681SAndroid Build Coastguard Worker} 436*9880d681SAndroid Build Coastguard Worker 437*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_rem7_8i16(<8 x i16> %a) nounwind { 438*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_rem7_8i16: 439*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 440*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363] 441*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmulhuw %xmm0, %xmm1 442*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movdqa %xmm0, %xmm2 443*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psubw %xmm1, %xmm2 444*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlw $1, %xmm2 445*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw %xmm1, %xmm2 446*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psrlw $2, %xmm2 447*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pmullw {{.*}}(%rip), %xmm2 448*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: psubw %xmm2, %xmm0 449*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 450*9880d681SAndroid Build Coastguard Worker; 451*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_rem7_8i16: 452*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 453*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 454*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm2 455*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlw $1, %xmm2, %xmm2 456*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddw %xmm1, %xmm2, %xmm1 457*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsrlw $2, %xmm1, %xmm1 458*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1 459*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 460*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 461*9880d681SAndroid Build Coastguard Worker %res = urem <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 462*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %res 463*9880d681SAndroid Build Coastguard Worker} 464*9880d681SAndroid Build Coastguard Worker 465*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { 466*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_rem7_16i8: 467*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 468*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] 469*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm1 470*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm2 471*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 472*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm2 473*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm1, %xmm2 474*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm2 475*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm3 476*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 477*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm3 478*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm1, %xmm3 479*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $8, %xmm3 480*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm2, %xmm3 481*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm0, %xmm1 482*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psubb %xmm3, %xmm1 483*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $1, %xmm1 484*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 485*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: paddb %xmm3, %xmm1 486*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psrlw $2, %xmm1 487*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 488*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa %xmm1, %xmm2 489*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 490*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm2 491*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 492*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm3 493*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm3, %xmm2 494*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 495*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm2 496*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 497*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psraw $8, %xmm1 498*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pmullw %xmm3, %xmm1 499*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pand %xmm4, %xmm1 500*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: packuswb %xmm2, %xmm1 501*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: psubb %xmm1, %xmm0 502*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 503*9880d681SAndroid Build Coastguard Worker; 504*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_rem7_16i8: 505*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 506*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 507*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 508*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm1 509*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrlw $8, %xmm1 510*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] 511*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 512*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm2, %xmm3 513*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrlw $8, %xmm3 514*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm3, %xmm1 515*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa %xmm0, %xmm2 516*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psubb %xmm1, %xmm2 517*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrlw $1, %xmm2 518*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 519*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: paddb %xmm1, %xmm2 520*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psrlw $2, %xmm2 521*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 522*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm1 523*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm3 524*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm3, %xmm1 525*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 526*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm4, %xmm1 527*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] 528*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmovsxbw %xmm2, %xmm2 529*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pmullw %xmm3, %xmm2 530*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pand %xmm4, %xmm2 531*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: packuswb %xmm2, %xmm1 532*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: psubb %xmm1, %xmm0 533*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 534*9880d681SAndroid Build Coastguard Worker; 535*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_rem7_16i8: 536*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 537*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 538*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 539*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 540*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 541*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] 542*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 543*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2 544*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 545*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 546*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm2 547*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm2 548*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 549*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1 550*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm1 551*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 552*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovsxbw %xmm1, %xmm2 553*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm3 554*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 555*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 556*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 557*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 558*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmovsxbw %xmm1, %xmm1 559*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpmullw %xmm3, %xmm1, %xmm1 560*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 561*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 562*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 563*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 564*9880d681SAndroid Build Coastguard Worker; 565*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_rem7_16i8: 566*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 567*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 568*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 569*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 570*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 571*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 572*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 573*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm2 574*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrlw $1, %xmm2, %xmm2 575*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 576*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddb %xmm1, %xmm2, %xmm1 577*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsrlw $2, %xmm1, %xmm1 578*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 579*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 580*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2 581*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 582*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 583*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 584*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 585*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 586*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 587*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 588*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 589*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 590*9880d681SAndroid Build Coastguard Worker %res = urem <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7> 591*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %res 592*9880d681SAndroid Build Coastguard Worker} 593