1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512BW 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i8(<4 x i8>* %a, <4 x i8>* %b) { 6*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i8: 7*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 8*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 9*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 10*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb %xmm0, %xmm1 11*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm1, (%rax) 12*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 13*9880d681SAndroid Build Coastguard Worker; 14*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i8: 15*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 16*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 17*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 18*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb %xmm0, %xmm1, %xmm0 19*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovd %xmm0, (%rax) 20*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 21*9880d681SAndroid Build Coastguard Worker; 22*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i8: 23*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 24*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd (%rdi), %xmm0 25*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd (%rsi), %xmm1 26*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb %xmm0, %xmm1, %xmm0 27*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd %xmm0, (%rax) 28*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 29*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i8>, <4 x i8>* %a 30*9880d681SAndroid Build Coastguard Worker %2 = load <4 x i8>, <4 x i8>* %b 31*9880d681SAndroid Build Coastguard Worker %3 = zext <4 x i8> %1 to <4 x i32> 32*9880d681SAndroid Build Coastguard Worker %4 = zext <4 x i8> %2 to <4 x i32> 33*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1> 34*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <4 x i32> %5, %4 35*9880d681SAndroid Build Coastguard Worker %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1> 36*9880d681SAndroid Build Coastguard Worker %8 = trunc <4 x i32> %7 to <4 x i8> 37*9880d681SAndroid Build Coastguard Worker store <4 x i8> %8, <4 x i8>* undef, align 4 38*9880d681SAndroid Build Coastguard Worker ret void 39*9880d681SAndroid Build Coastguard Worker} 40*9880d681SAndroid Build Coastguard Worker 41*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i8(<8 x i8>* %a, <8 x i8>* %b) { 42*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i8: 43*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 44*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 45*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 46*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb %xmm0, %xmm1 47*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %xmm1, (%rax) 48*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 49*9880d681SAndroid Build Coastguard Worker; 50*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v8i8: 51*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 52*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 53*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 54*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb %xmm0, %xmm1, %xmm0 55*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq %xmm0, (%rax) 56*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 57*9880d681SAndroid Build Coastguard Worker; 58*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v8i8: 59*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 60*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rdi), %xmm0 61*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rsi), %xmm1 62*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb %xmm0, %xmm1, %xmm0 63*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq %xmm0, (%rax) 64*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 65*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i8>, <8 x i8>* %a 66*9880d681SAndroid Build Coastguard Worker %2 = load <8 x i8>, <8 x i8>* %b 67*9880d681SAndroid Build Coastguard Worker %3 = zext <8 x i8> %1 to <8 x i32> 68*9880d681SAndroid Build Coastguard Worker %4 = zext <8 x i8> %2 to <8 x i32> 69*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 70*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <8 x i32> %5, %4 71*9880d681SAndroid Build Coastguard Worker %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 72*9880d681SAndroid Build Coastguard Worker %8 = trunc <8 x i32> %7 to <8 x i8> 73*9880d681SAndroid Build Coastguard Worker store <8 x i8> %8, <8 x i8>* undef, align 4 74*9880d681SAndroid Build Coastguard Worker ret void 75*9880d681SAndroid Build Coastguard Worker} 76*9880d681SAndroid Build Coastguard Worker 77*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i8(<16 x i8>* %a, <16 x i8>* %b) { 78*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v16i8: 79*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 80*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa (%rsi), %xmm0 81*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb (%rdi), %xmm0 82*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqu %xmm0, (%rax) 83*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 84*9880d681SAndroid Build Coastguard Worker; 85*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v16i8: 86*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 87*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa (%rsi), %xmm0 88*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 89*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqu %xmm0, (%rax) 90*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 91*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i8>, <16 x i8>* %a 92*9880d681SAndroid Build Coastguard Worker %2 = load <16 x i8>, <16 x i8>* %b 93*9880d681SAndroid Build Coastguard Worker %3 = zext <16 x i8> %1 to <16 x i32> 94*9880d681SAndroid Build Coastguard Worker %4 = zext <16 x i8> %2 to <16 x i32> 95*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 96*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <16 x i32> %5, %4 97*9880d681SAndroid Build Coastguard Worker %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 98*9880d681SAndroid Build Coastguard Worker %8 = trunc <16 x i32> %7 to <16 x i8> 99*9880d681SAndroid Build Coastguard Worker store <16 x i8> %8, <16 x i8>* undef, align 4 100*9880d681SAndroid Build Coastguard Worker ret void 101*9880d681SAndroid Build Coastguard Worker} 102*9880d681SAndroid Build Coastguard Worker 103*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) { 104*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v32i8: 105*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 106*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa (%rsi), %ymm0 107*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb (%rdi), %ymm0, %ymm0 108*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqu %ymm0, (%rax) 109*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 110*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 111*9880d681SAndroid Build Coastguard Worker; 112*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i8: 113*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 114*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0 115*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb (%rdi), %ymm0, %ymm0 116*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu %ymm0, (%rax) 117*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 118*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i8>, <32 x i8>* %a 119*9880d681SAndroid Build Coastguard Worker %2 = load <32 x i8>, <32 x i8>* %b 120*9880d681SAndroid Build Coastguard Worker %3 = zext <32 x i8> %1 to <32 x i32> 121*9880d681SAndroid Build Coastguard Worker %4 = zext <32 x i8> %2 to <32 x i32> 122*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 123*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <32 x i32> %5, %4 124*9880d681SAndroid Build Coastguard Worker %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 125*9880d681SAndroid Build Coastguard Worker %8 = trunc <32 x i32> %7 to <32 x i8> 126*9880d681SAndroid Build Coastguard Worker store <32 x i8> %8, <32 x i8>* undef, align 4 127*9880d681SAndroid Build Coastguard Worker ret void 128*9880d681SAndroid Build Coastguard Worker} 129*9880d681SAndroid Build Coastguard Worker 130*9880d681SAndroid Build Coastguard Workerdefine void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) { 131*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v64i8: 132*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 133*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0 134*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb (%rdi), %zmm0, %zmm0 135*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax) 136*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 137*9880d681SAndroid Build Coastguard Worker %1 = load <64 x i8>, <64 x i8>* %a 138*9880d681SAndroid Build Coastguard Worker %2 = load <64 x i8>, <64 x i8>* %b 139*9880d681SAndroid Build Coastguard Worker %3 = zext <64 x i8> %1 to <64 x i32> 140*9880d681SAndroid Build Coastguard Worker %4 = zext <64 x i8> %2 to <64 x i32> 141*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 142*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <64 x i32> %5, %4 143*9880d681SAndroid Build Coastguard Worker %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 144*9880d681SAndroid Build Coastguard Worker %8 = trunc <64 x i32> %7 to <64 x i8> 145*9880d681SAndroid Build Coastguard Worker store <64 x i8> %8, <64 x i8>* undef, align 4 146*9880d681SAndroid Build Coastguard Worker ret void 147*9880d681SAndroid Build Coastguard Worker} 148*9880d681SAndroid Build Coastguard Worker 149*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i16(<4 x i16>* %a, <4 x i16>* %b) { 150*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i16: 151*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 152*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 153*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 154*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgw %xmm0, %xmm1 155*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %xmm1, (%rax) 156*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 157*9880d681SAndroid Build Coastguard Worker; 158*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i16: 159*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 160*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 161*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 162*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgw %xmm0, %xmm1, %xmm0 163*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq %xmm0, (%rax) 164*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 165*9880d681SAndroid Build Coastguard Worker; 166*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i16: 167*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 168*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rdi), %xmm0 169*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rsi), %xmm1 170*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw %xmm0, %xmm1, %xmm0 171*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq %xmm0, (%rax) 172*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 173*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i16>, <4 x i16>* %a 174*9880d681SAndroid Build Coastguard Worker %2 = load <4 x i16>, <4 x i16>* %b 175*9880d681SAndroid Build Coastguard Worker %3 = zext <4 x i16> %1 to <4 x i32> 176*9880d681SAndroid Build Coastguard Worker %4 = zext <4 x i16> %2 to <4 x i32> 177*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1> 178*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <4 x i32> %5, %4 179*9880d681SAndroid Build Coastguard Worker %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1> 180*9880d681SAndroid Build Coastguard Worker %8 = trunc <4 x i32> %7 to <4 x i16> 181*9880d681SAndroid Build Coastguard Worker store <4 x i16> %8, <4 x i16>* undef, align 4 182*9880d681SAndroid Build Coastguard Worker ret void 183*9880d681SAndroid Build Coastguard Worker} 184*9880d681SAndroid Build Coastguard Worker 185*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i16(<8 x i16>* %a, <8 x i16>* %b) { 186*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i16: 187*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 188*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa (%rsi), %xmm0 189*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgw (%rdi), %xmm0 190*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqu %xmm0, (%rax) 191*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 192*9880d681SAndroid Build Coastguard Worker; 193*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v8i16: 194*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 195*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa (%rsi), %xmm0 196*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 197*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqu %xmm0, (%rax) 198*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 199*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i16>, <8 x i16>* %a 200*9880d681SAndroid Build Coastguard Worker %2 = load <8 x i16>, <8 x i16>* %b 201*9880d681SAndroid Build Coastguard Worker %3 = zext <8 x i16> %1 to <8 x i32> 202*9880d681SAndroid Build Coastguard Worker %4 = zext <8 x i16> %2 to <8 x i32> 203*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 204*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <8 x i32> %5, %4 205*9880d681SAndroid Build Coastguard Worker %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 206*9880d681SAndroid Build Coastguard Worker %8 = trunc <8 x i32> %7 to <8 x i16> 207*9880d681SAndroid Build Coastguard Worker store <8 x i16> %8, <8 x i16>* undef, align 4 208*9880d681SAndroid Build Coastguard Worker ret void 209*9880d681SAndroid Build Coastguard Worker} 210*9880d681SAndroid Build Coastguard Worker 211*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) { 212*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v16i16: 213*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 214*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa (%rsi), %ymm0 215*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgw (%rdi), %ymm0, %ymm0 216*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqu %ymm0, (%rax) 217*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 218*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 219*9880d681SAndroid Build Coastguard Worker; 220*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v16i16: 221*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 222*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0 223*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw (%rdi), %ymm0, %ymm0 224*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu %ymm0, (%rax) 225*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 226*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i16>, <16 x i16>* %a 227*9880d681SAndroid Build Coastguard Worker %2 = load <16 x i16>, <16 x i16>* %b 228*9880d681SAndroid Build Coastguard Worker %3 = zext <16 x i16> %1 to <16 x i32> 229*9880d681SAndroid Build Coastguard Worker %4 = zext <16 x i16> %2 to <16 x i32> 230*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 231*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <16 x i32> %5, %4 232*9880d681SAndroid Build Coastguard Worker %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 233*9880d681SAndroid Build Coastguard Worker %8 = trunc <16 x i32> %7 to <16 x i16> 234*9880d681SAndroid Build Coastguard Worker store <16 x i16> %8, <16 x i16>* undef, align 4 235*9880d681SAndroid Build Coastguard Worker ret void 236*9880d681SAndroid Build Coastguard Worker} 237*9880d681SAndroid Build Coastguard Worker 238*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) { 239*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i16: 240*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 241*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 (%rsi), %zmm0 242*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw (%rdi), %zmm0, %zmm0 243*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax) 244*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 245*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i16>, <32 x i16>* %a 246*9880d681SAndroid Build Coastguard Worker %2 = load <32 x i16>, <32 x i16>* %b 247*9880d681SAndroid Build Coastguard Worker %3 = zext <32 x i16> %1 to <32 x i32> 248*9880d681SAndroid Build Coastguard Worker %4 = zext <32 x i16> %2 to <32 x i32> 249*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 250*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <32 x i32> %5, %4 251*9880d681SAndroid Build Coastguard Worker %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 252*9880d681SAndroid Build Coastguard Worker %8 = trunc <32 x i32> %7 to <32 x i16> 253*9880d681SAndroid Build Coastguard Worker store <32 x i16> %8, <32 x i16>* undef, align 4 254*9880d681SAndroid Build Coastguard Worker ret void 255*9880d681SAndroid Build Coastguard Worker} 256*9880d681SAndroid Build Coastguard Worker 257*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i8_2(<4 x i8>* %a, <4 x i8>* %b) { 258*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i8_2: 259*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 260*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 261*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 262*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb %xmm0, %xmm1 263*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm1, (%rax) 264*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 265*9880d681SAndroid Build Coastguard Worker; 266*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i8_2: 267*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 268*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 269*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 270*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 271*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovd %xmm0, (%rax) 272*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 273*9880d681SAndroid Build Coastguard Worker; 274*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i8_2: 275*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 276*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd (%rdi), %xmm0 277*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd (%rsi), %xmm1 278*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb %xmm1, %xmm0, %xmm0 279*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd %xmm0, (%rax) 280*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 281*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i8>, <4 x i8>* %a 282*9880d681SAndroid Build Coastguard Worker %2 = load <4 x i8>, <4 x i8>* %b 283*9880d681SAndroid Build Coastguard Worker %3 = zext <4 x i8> %1 to <4 x i32> 284*9880d681SAndroid Build Coastguard Worker %4 = zext <4 x i8> %2 to <4 x i32> 285*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <4 x i32> %3, %4 286*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1> 287*9880d681SAndroid Build Coastguard Worker %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1> 288*9880d681SAndroid Build Coastguard Worker %8 = trunc <4 x i32> %7 to <4 x i8> 289*9880d681SAndroid Build Coastguard Worker store <4 x i8> %8, <4 x i8>* undef, align 4 290*9880d681SAndroid Build Coastguard Worker ret void 291*9880d681SAndroid Build Coastguard Worker} 292*9880d681SAndroid Build Coastguard Worker 293*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i8_2(<8 x i8>* %a, <8 x i8>* %b) { 294*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i8_2: 295*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 296*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 297*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 298*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb %xmm0, %xmm1 299*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %xmm1, (%rax) 300*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 301*9880d681SAndroid Build Coastguard Worker; 302*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v8i8_2: 303*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 304*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 305*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 306*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 307*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq %xmm0, (%rax) 308*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 309*9880d681SAndroid Build Coastguard Worker; 310*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v8i8_2: 311*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 312*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rdi), %xmm0 313*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rsi), %xmm1 314*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb %xmm1, %xmm0, %xmm0 315*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq %xmm0, (%rax) 316*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 317*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i8>, <8 x i8>* %a 318*9880d681SAndroid Build Coastguard Worker %2 = load <8 x i8>, <8 x i8>* %b 319*9880d681SAndroid Build Coastguard Worker %3 = zext <8 x i8> %1 to <8 x i32> 320*9880d681SAndroid Build Coastguard Worker %4 = zext <8 x i8> %2 to <8 x i32> 321*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <8 x i32> %3, %4 322*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 323*9880d681SAndroid Build Coastguard Worker %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 324*9880d681SAndroid Build Coastguard Worker %8 = trunc <8 x i32> %7 to <8 x i8> 325*9880d681SAndroid Build Coastguard Worker store <8 x i8> %8, <8 x i8>* undef, align 4 326*9880d681SAndroid Build Coastguard Worker ret void 327*9880d681SAndroid Build Coastguard Worker} 328*9880d681SAndroid Build Coastguard Worker 329*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i8_2(<16 x i8>* %a, <16 x i8>* %b) { 330*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v16i8_2: 331*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 332*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa (%rdi), %xmm0 333*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb (%rsi), %xmm0 334*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqu %xmm0, (%rax) 335*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 336*9880d681SAndroid Build Coastguard Worker; 337*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v16i8_2: 338*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 339*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa (%rdi), %xmm0 340*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpavgb (%rsi), %xmm0, %xmm0 341*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqu %xmm0, (%rax) 342*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 343*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i8>, <16 x i8>* %a 344*9880d681SAndroid Build Coastguard Worker %2 = load <16 x i8>, <16 x i8>* %b 345*9880d681SAndroid Build Coastguard Worker %3 = zext <16 x i8> %1 to <16 x i32> 346*9880d681SAndroid Build Coastguard Worker %4 = zext <16 x i8> %2 to <16 x i32> 347*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <16 x i32> %3, %4 348*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 349*9880d681SAndroid Build Coastguard Worker %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 350*9880d681SAndroid Build Coastguard Worker %8 = trunc <16 x i32> %7 to <16 x i8> 351*9880d681SAndroid Build Coastguard Worker store <16 x i8> %8, <16 x i8>* undef, align 4 352*9880d681SAndroid Build Coastguard Worker ret void 353*9880d681SAndroid Build Coastguard Worker} 354*9880d681SAndroid Build Coastguard Worker 355*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i8_2(<32 x i8>* %a, <32 x i8>* %b) { 356*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v32i8_2: 357*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 358*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa (%rdi), %ymm0 359*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb (%rsi), %ymm0, %ymm0 360*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqu %ymm0, (%rax) 361*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 362*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 363*9880d681SAndroid Build Coastguard Worker; 364*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i8_2: 365*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 366*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 367*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb (%rsi), %ymm0, %ymm0 368*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu %ymm0, (%rax) 369*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 370*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i8>, <32 x i8>* %a 371*9880d681SAndroid Build Coastguard Worker %2 = load <32 x i8>, <32 x i8>* %b 372*9880d681SAndroid Build Coastguard Worker %3 = zext <32 x i8> %1 to <32 x i32> 373*9880d681SAndroid Build Coastguard Worker %4 = zext <32 x i8> %2 to <32 x i32> 374*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <32 x i32> %3, %4 375*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <32 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 376*9880d681SAndroid Build Coastguard Worker %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 377*9880d681SAndroid Build Coastguard Worker %8 = trunc <32 x i32> %7 to <32 x i8> 378*9880d681SAndroid Build Coastguard Worker store <32 x i8> %8, <32 x i8>* undef, align 4 379*9880d681SAndroid Build Coastguard Worker ret void 380*9880d681SAndroid Build Coastguard Worker} 381*9880d681SAndroid Build Coastguard Worker 382*9880d681SAndroid Build Coastguard Workerdefine void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) { 383*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v64i8_2: 384*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 385*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0 386*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb %zmm0, %zmm0, %zmm0 387*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax) 388*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 389*9880d681SAndroid Build Coastguard Worker %1 = load <64 x i8>, <64 x i8>* %a 390*9880d681SAndroid Build Coastguard Worker %2 = load <64 x i8>, <64 x i8>* %b 391*9880d681SAndroid Build Coastguard Worker %3 = zext <64 x i8> %1 to <64 x i32> 392*9880d681SAndroid Build Coastguard Worker %4 = zext <64 x i8> %2 to <64 x i32> 393*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <64 x i32> %4, %4 394*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <64 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 395*9880d681SAndroid Build Coastguard Worker %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 396*9880d681SAndroid Build Coastguard Worker %8 = trunc <64 x i32> %7 to <64 x i8> 397*9880d681SAndroid Build Coastguard Worker store <64 x i8> %8, <64 x i8>* undef, align 4 398*9880d681SAndroid Build Coastguard Worker ret void 399*9880d681SAndroid Build Coastguard Worker} 400*9880d681SAndroid Build Coastguard Worker 401*9880d681SAndroid Build Coastguard Worker 402*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i16_2(<4 x i16>* %a, <4 x i16>* %b) { 403*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i16_2: 404*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 405*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 406*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 407*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgw %xmm0, %xmm1 408*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %xmm1, (%rax) 409*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 410*9880d681SAndroid Build Coastguard Worker; 411*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i16_2: 412*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 413*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 414*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 415*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 416*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq %xmm0, (%rax) 417*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 418*9880d681SAndroid Build Coastguard Worker; 419*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i16_2: 420*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 421*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rdi), %xmm0 422*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rsi), %xmm1 423*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw %xmm1, %xmm0, %xmm0 424*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq %xmm0, (%rax) 425*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 426*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i16>, <4 x i16>* %a 427*9880d681SAndroid Build Coastguard Worker %2 = load <4 x i16>, <4 x i16>* %b 428*9880d681SAndroid Build Coastguard Worker %3 = zext <4 x i16> %1 to <4 x i32> 429*9880d681SAndroid Build Coastguard Worker %4 = zext <4 x i16> %2 to <4 x i32> 430*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <4 x i32> %3, %4 431*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1> 432*9880d681SAndroid Build Coastguard Worker %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1> 433*9880d681SAndroid Build Coastguard Worker %8 = trunc <4 x i32> %7 to <4 x i16> 434*9880d681SAndroid Build Coastguard Worker store <4 x i16> %8, <4 x i16>* undef, align 4 435*9880d681SAndroid Build Coastguard Worker ret void 436*9880d681SAndroid Build Coastguard Worker} 437*9880d681SAndroid Build Coastguard Worker 438*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i16_2(<8 x i16>* %a, <8 x i16>* %b) { 439*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i16_2: 440*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 441*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa (%rdi), %xmm0 442*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgw (%rsi), %xmm0 443*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqu %xmm0, (%rax) 444*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 445*9880d681SAndroid Build Coastguard Worker; 446*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v8i16_2: 447*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 448*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa (%rdi), %xmm0 449*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpavgw (%rsi), %xmm0, %xmm0 450*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqu %xmm0, (%rax) 451*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 452*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i16>, <8 x i16>* %a 453*9880d681SAndroid Build Coastguard Worker %2 = load <8 x i16>, <8 x i16>* %b 454*9880d681SAndroid Build Coastguard Worker %3 = zext <8 x i16> %1 to <8 x i32> 455*9880d681SAndroid Build Coastguard Worker %4 = zext <8 x i16> %2 to <8 x i32> 456*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <8 x i32> %3, %4 457*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 458*9880d681SAndroid Build Coastguard Worker %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 459*9880d681SAndroid Build Coastguard Worker %8 = trunc <8 x i32> %7 to <8 x i16> 460*9880d681SAndroid Build Coastguard Worker store <8 x i16> %8, <8 x i16>* undef, align 4 461*9880d681SAndroid Build Coastguard Worker ret void 462*9880d681SAndroid Build Coastguard Worker} 463*9880d681SAndroid Build Coastguard Worker 464*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i16_2(<16 x i16>* %a, <16 x i16>* %b) { 465*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v16i16_2: 466*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 467*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa (%rdi), %ymm0 468*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgw (%rsi), %ymm0, %ymm0 469*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqu %ymm0, (%rax) 470*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 471*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 472*9880d681SAndroid Build Coastguard Worker; 473*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v16i16_2: 474*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 475*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 476*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw (%rsi), %ymm0, %ymm0 477*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu %ymm0, (%rax) 478*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 479*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i16>, <16 x i16>* %a 480*9880d681SAndroid Build Coastguard Worker %2 = load <16 x i16>, <16 x i16>* %b 481*9880d681SAndroid Build Coastguard Worker %3 = zext <16 x i16> %1 to <16 x i32> 482*9880d681SAndroid Build Coastguard Worker %4 = zext <16 x i16> %2 to <16 x i32> 483*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <16 x i32> %3, %4 484*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 485*9880d681SAndroid Build Coastguard Worker %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 486*9880d681SAndroid Build Coastguard Worker %8 = trunc <16 x i32> %7 to <16 x i16> 487*9880d681SAndroid Build Coastguard Worker store <16 x i16> %8, <16 x i16>* undef, align 4 488*9880d681SAndroid Build Coastguard Worker ret void 489*9880d681SAndroid Build Coastguard Worker} 490*9880d681SAndroid Build Coastguard Worker 491*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) { 492*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i16_2: 493*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 494*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0 495*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw (%rsi), %zmm0, %zmm0 496*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax) 497*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 498*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i16>, <32 x i16>* %a 499*9880d681SAndroid Build Coastguard Worker %2 = load <32 x i16>, <32 x i16>* %b 500*9880d681SAndroid Build Coastguard Worker %3 = zext <32 x i16> %1 to <32 x i32> 501*9880d681SAndroid Build Coastguard Worker %4 = zext <32 x i16> %2 to <32 x i32> 502*9880d681SAndroid Build Coastguard Worker %5 = add nuw nsw <32 x i32> %3, %4 503*9880d681SAndroid Build Coastguard Worker %6 = add nuw nsw <32 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 504*9880d681SAndroid Build Coastguard Worker %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 505*9880d681SAndroid Build Coastguard Worker %8 = trunc <32 x i32> %7 to <32 x i16> 506*9880d681SAndroid Build Coastguard Worker store <32 x i16> %8, <32 x i16>* undef, align 4 507*9880d681SAndroid Build Coastguard Worker ret void 508*9880d681SAndroid Build Coastguard Worker} 509*9880d681SAndroid Build Coastguard Worker 510*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i8_const(<4 x i8>* %a) { 511*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i8_const: 512*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 513*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 514*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb {{.*}}(%rip), %xmm0 515*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm0, (%rax) 516*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 517*9880d681SAndroid Build Coastguard Worker; 518*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i8_const: 519*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 520*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 521*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0 522*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovd %xmm0, (%rax) 523*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 524*9880d681SAndroid Build Coastguard Worker; 525*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i8_const: 526*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 527*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd (%rdi), %xmm0 528*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0 529*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovd %xmm0, (%rax) 530*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 531*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i8>, <4 x i8>* %a 532*9880d681SAndroid Build Coastguard Worker %2 = zext <4 x i8> %1 to <4 x i32> 533*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4> 534*9880d681SAndroid Build Coastguard Worker %4 = lshr <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1> 535*9880d681SAndroid Build Coastguard Worker %5 = trunc <4 x i32> %4 to <4 x i8> 536*9880d681SAndroid Build Coastguard Worker store <4 x i8> %5, <4 x i8>* undef, align 4 537*9880d681SAndroid Build Coastguard Worker ret void 538*9880d681SAndroid Build Coastguard Worker} 539*9880d681SAndroid Build Coastguard Worker 540*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i8_const(<8 x i8>* %a) { 541*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i8_const: 542*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 543*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 544*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb {{.*}}(%rip), %xmm0 545*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %xmm0, (%rax) 546*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 547*9880d681SAndroid Build Coastguard Worker; 548*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v8i8_const: 549*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 550*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 551*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0 552*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq %xmm0, (%rax) 553*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 554*9880d681SAndroid Build Coastguard Worker; 555*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v8i8_const: 556*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 557*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rdi), %xmm0 558*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0 559*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq %xmm0, (%rax) 560*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 561*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i8>, <8 x i8>* %a 562*9880d681SAndroid Build Coastguard Worker %2 = zext <8 x i8> %1 to <8 x i32> 563*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 564*9880d681SAndroid Build Coastguard Worker %4 = lshr <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 565*9880d681SAndroid Build Coastguard Worker %5 = trunc <8 x i32> %4 to <8 x i8> 566*9880d681SAndroid Build Coastguard Worker store <8 x i8> %5, <8 x i8>* undef, align 4 567*9880d681SAndroid Build Coastguard Worker ret void 568*9880d681SAndroid Build Coastguard Worker} 569*9880d681SAndroid Build Coastguard Worker 570*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i8_const(<16 x i8>* %a) { 571*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v16i8_const: 572*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 573*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa (%rdi), %xmm0 574*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgb {{.*}}(%rip), %xmm0 575*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqu %xmm0, (%rax) 576*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 577*9880d681SAndroid Build Coastguard Worker; 578*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v16i8_const: 579*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 580*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa (%rdi), %xmm0 581*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0 582*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqu %xmm0, (%rax) 583*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 584*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i8>, <16 x i8>* %a 585*9880d681SAndroid Build Coastguard Worker %2 = zext <16 x i8> %1 to <16 x i32> 586*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 587*9880d681SAndroid Build Coastguard Worker %4 = lshr <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 588*9880d681SAndroid Build Coastguard Worker %5 = trunc <16 x i32> %4 to <16 x i8> 589*9880d681SAndroid Build Coastguard Worker store <16 x i8> %5, <16 x i8>* undef, align 4 590*9880d681SAndroid Build Coastguard Worker ret void 591*9880d681SAndroid Build Coastguard Worker} 592*9880d681SAndroid Build Coastguard Worker 593*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i8_const(<32 x i8>* %a) { 594*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v32i8_const: 595*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 596*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa (%rdi), %ymm0 597*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgb {{.*}}(%rip), %ymm0, %ymm0 598*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqu %ymm0, (%rax) 599*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 600*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 601*9880d681SAndroid Build Coastguard Worker; 602*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i8_const: 603*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 604*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 605*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %ymm0, %ymm0 606*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu %ymm0, (%rax) 607*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 608*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i8>, <32 x i8>* %a 609*9880d681SAndroid Build Coastguard Worker %2 = zext <32 x i8> %1 to <32 x i32> 610*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <32 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 611*9880d681SAndroid Build Coastguard Worker %4 = lshr <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 612*9880d681SAndroid Build Coastguard Worker %5 = trunc <32 x i32> %4 to <32 x i8> 613*9880d681SAndroid Build Coastguard Worker store <32 x i8> %5, <32 x i8>* undef, align 4 614*9880d681SAndroid Build Coastguard Worker ret void 615*9880d681SAndroid Build Coastguard Worker} 616*9880d681SAndroid Build Coastguard Worker 617*9880d681SAndroid Build Coastguard Workerdefine void @avg_v64i8_const(<64 x i8>* %a) { 618*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v64i8_const: 619*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 620*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0 621*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %zmm0, %zmm0 622*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax) 623*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 624*9880d681SAndroid Build Coastguard Worker %1 = load <64 x i8>, <64 x i8>* %a 625*9880d681SAndroid Build Coastguard Worker %2 = zext <64 x i8> %1 to <64 x i32> 626*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <64 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 627*9880d681SAndroid Build Coastguard Worker %4 = lshr <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 628*9880d681SAndroid Build Coastguard Worker %5 = trunc <64 x i32> %4 to <64 x i8> 629*9880d681SAndroid Build Coastguard Worker store <64 x i8> %5, <64 x i8>* undef, align 4 630*9880d681SAndroid Build Coastguard Worker ret void 631*9880d681SAndroid Build Coastguard Worker} 632*9880d681SAndroid Build Coastguard Worker 633*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i16_const(<4 x i16>* %a) { 634*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i16_const: 635*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 636*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 637*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgw {{.*}}(%rip), %xmm0 638*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movq %xmm0, (%rax) 639*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 640*9880d681SAndroid Build Coastguard Worker; 641*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i16_const: 642*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 643*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 644*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgw {{.*}}(%rip), %xmm0, %xmm0 645*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovq %xmm0, (%rax) 646*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 647*9880d681SAndroid Build Coastguard Worker; 648*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i16_const: 649*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 650*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq (%rdi), %xmm0 651*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %xmm0, %xmm0 652*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovq %xmm0, (%rax) 653*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 654*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i16>, <4 x i16>* %a 655*9880d681SAndroid Build Coastguard Worker %2 = zext <4 x i16> %1 to <4 x i32> 656*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4> 657*9880d681SAndroid Build Coastguard Worker %4 = lshr <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1> 658*9880d681SAndroid Build Coastguard Worker %5 = trunc <4 x i32> %4 to <4 x i16> 659*9880d681SAndroid Build Coastguard Worker store <4 x i16> %5, <4 x i16>* undef, align 4 660*9880d681SAndroid Build Coastguard Worker ret void 661*9880d681SAndroid Build Coastguard Worker} 662*9880d681SAndroid Build Coastguard Worker 663*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i16_const(<8 x i16>* %a) { 664*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i16_const: 665*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 666*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqa (%rdi), %xmm0 667*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pavgw {{.*}}(%rip), %xmm0 668*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movdqu %xmm0, (%rax) 669*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 670*9880d681SAndroid Build Coastguard Worker; 671*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v8i16_const: 672*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 673*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqa (%rdi), %xmm0 674*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpavgw {{.*}}(%rip), %xmm0, %xmm0 675*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovdqu %xmm0, (%rax) 676*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 677*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i16>, <8 x i16>* %a 678*9880d681SAndroid Build Coastguard Worker %2 = zext <8 x i16> %1 to <8 x i32> 679*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 680*9880d681SAndroid Build Coastguard Worker %4 = lshr <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 681*9880d681SAndroid Build Coastguard Worker %5 = trunc <8 x i32> %4 to <8 x i16> 682*9880d681SAndroid Build Coastguard Worker store <8 x i16> %5, <8 x i16>* undef, align 4 683*9880d681SAndroid Build Coastguard Worker ret void 684*9880d681SAndroid Build Coastguard Worker} 685*9880d681SAndroid Build Coastguard Worker 686*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i16_const(<16 x i16>* %a) { 687*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v16i16_const: 688*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 689*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa (%rdi), %ymm0 690*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpavgw {{.*}}(%rip), %ymm0, %ymm0 691*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqu %ymm0, (%rax) 692*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 693*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 694*9880d681SAndroid Build Coastguard Worker; 695*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v16i16_const: 696*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 697*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 698*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %ymm0, %ymm0 699*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu %ymm0, (%rax) 700*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 701*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i16>, <16 x i16>* %a 702*9880d681SAndroid Build Coastguard Worker %2 = zext <16 x i16> %1 to <16 x i32> 703*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 704*9880d681SAndroid Build Coastguard Worker %4 = lshr <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 705*9880d681SAndroid Build Coastguard Worker %5 = trunc <16 x i32> %4 to <16 x i16> 706*9880d681SAndroid Build Coastguard Worker store <16 x i16> %5, <16 x i16>* undef, align 4 707*9880d681SAndroid Build Coastguard Worker ret void 708*9880d681SAndroid Build Coastguard Worker} 709*9880d681SAndroid Build Coastguard Worker 710*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i16_const(<32 x i16>* %a) { 711*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i16_const: 712*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 713*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0 714*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %zmm0, %zmm0 715*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax) 716*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 717*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i16>, <32 x i16>* %a 718*9880d681SAndroid Build Coastguard Worker %2 = zext <32 x i16> %1 to <32 x i32> 719*9880d681SAndroid Build Coastguard Worker %3 = add nuw nsw <32 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 720*9880d681SAndroid Build Coastguard Worker %4 = lshr <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 721*9880d681SAndroid Build Coastguard Worker %5 = trunc <32 x i32> %4 to <32 x i16> 722*9880d681SAndroid Build Coastguard Worker store <32 x i16> %5, <32 x i16>* undef, align 4 723*9880d681SAndroid Build Coastguard Worker ret void 724*9880d681SAndroid Build Coastguard Worker} 725