xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avg.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512BW
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i8(<4 x i8>* %a, <4 x i8>* %b) {
6*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i8:
7*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
8*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
9*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
10*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb %xmm0, %xmm1
11*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd %xmm1, (%rax)
12*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
13*9880d681SAndroid Build Coastguard Worker;
14*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i8:
15*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
16*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
17*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
18*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb %xmm0, %xmm1, %xmm0
19*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %xmm0, (%rax)
20*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
21*9880d681SAndroid Build Coastguard Worker;
22*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i8:
23*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
24*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd (%rdi), %xmm0
25*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd (%rsi), %xmm1
26*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb %xmm0, %xmm1, %xmm0
27*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd %xmm0, (%rax)
28*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
29*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i8>, <4 x i8>* %a
30*9880d681SAndroid Build Coastguard Worker  %2 = load <4 x i8>, <4 x i8>* %b
31*9880d681SAndroid Build Coastguard Worker  %3 = zext <4 x i8> %1 to <4 x i32>
32*9880d681SAndroid Build Coastguard Worker  %4 = zext <4 x i8> %2 to <4 x i32>
33*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
34*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <4 x i32> %5, %4
35*9880d681SAndroid Build Coastguard Worker  %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
36*9880d681SAndroid Build Coastguard Worker  %8 = trunc <4 x i32> %7 to <4 x i8>
37*9880d681SAndroid Build Coastguard Worker  store <4 x i8> %8, <4 x i8>* undef, align 4
38*9880d681SAndroid Build Coastguard Worker  ret void
39*9880d681SAndroid Build Coastguard Worker}
40*9880d681SAndroid Build Coastguard Worker
41*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i8(<8 x i8>* %a, <8 x i8>* %b) {
42*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i8:
43*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
44*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
45*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
46*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb %xmm0, %xmm1
47*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq %xmm1, (%rax)
48*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
49*9880d681SAndroid Build Coastguard Worker;
50*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v8i8:
51*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
52*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
53*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
54*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb %xmm0, %xmm1, %xmm0
55*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, (%rax)
56*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
57*9880d681SAndroid Build Coastguard Worker;
58*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v8i8:
59*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
60*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
61*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rsi), %xmm1
62*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb %xmm0, %xmm1, %xmm0
63*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
64*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
65*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i8>, <8 x i8>* %a
66*9880d681SAndroid Build Coastguard Worker  %2 = load <8 x i8>, <8 x i8>* %b
67*9880d681SAndroid Build Coastguard Worker  %3 = zext <8 x i8> %1 to <8 x i32>
68*9880d681SAndroid Build Coastguard Worker  %4 = zext <8 x i8> %2 to <8 x i32>
69*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
70*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <8 x i32> %5, %4
71*9880d681SAndroid Build Coastguard Worker  %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
72*9880d681SAndroid Build Coastguard Worker  %8 = trunc <8 x i32> %7 to <8 x i8>
73*9880d681SAndroid Build Coastguard Worker  store <8 x i8> %8, <8 x i8>* undef, align 4
74*9880d681SAndroid Build Coastguard Worker  ret void
75*9880d681SAndroid Build Coastguard Worker}
76*9880d681SAndroid Build Coastguard Worker
77*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i8(<16 x i8>* %a, <16 x i8>* %b) {
78*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v16i8:
79*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
80*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa (%rsi), %xmm0
81*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb (%rdi), %xmm0
82*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm0, (%rax)
83*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
84*9880d681SAndroid Build Coastguard Worker;
85*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v16i8:
86*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
87*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rsi), %xmm0
88*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
89*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rax)
90*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
91*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i8>, <16 x i8>* %a
92*9880d681SAndroid Build Coastguard Worker  %2 = load <16 x i8>, <16 x i8>* %b
93*9880d681SAndroid Build Coastguard Worker  %3 = zext <16 x i8> %1 to <16 x i32>
94*9880d681SAndroid Build Coastguard Worker  %4 = zext <16 x i8> %2 to <16 x i32>
95*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
96*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <16 x i32> %5, %4
97*9880d681SAndroid Build Coastguard Worker  %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
98*9880d681SAndroid Build Coastguard Worker  %8 = trunc <16 x i32> %7 to <16 x i8>
99*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %8, <16 x i8>* undef, align 4
100*9880d681SAndroid Build Coastguard Worker  ret void
101*9880d681SAndroid Build Coastguard Worker}
102*9880d681SAndroid Build Coastguard Worker
103*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) {
104*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v32i8:
105*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
106*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa (%rsi), %ymm0
107*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb (%rdi), %ymm0, %ymm0
108*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
109*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
110*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
111*9880d681SAndroid Build Coastguard Worker;
112*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i8:
113*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
114*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqa (%rsi), %ymm0
115*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb (%rdi), %ymm0, %ymm0
116*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
117*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
118*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i8>, <32 x i8>* %a
119*9880d681SAndroid Build Coastguard Worker  %2 = load <32 x i8>, <32 x i8>* %b
120*9880d681SAndroid Build Coastguard Worker  %3 = zext <32 x i8> %1 to <32 x i32>
121*9880d681SAndroid Build Coastguard Worker  %4 = zext <32 x i8> %2 to <32 x i32>
122*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
123*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <32 x i32> %5, %4
124*9880d681SAndroid Build Coastguard Worker  %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
125*9880d681SAndroid Build Coastguard Worker  %8 = trunc <32 x i32> %7 to <32 x i8>
126*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %8, <32 x i8>* undef, align 4
127*9880d681SAndroid Build Coastguard Worker  ret void
128*9880d681SAndroid Build Coastguard Worker}
129*9880d681SAndroid Build Coastguard Worker
130*9880d681SAndroid Build Coastguard Workerdefine void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) {
131*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v64i8:
132*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
133*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 (%rsi), %zmm0
134*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb (%rdi), %zmm0, %zmm0
135*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rax)
136*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
137*9880d681SAndroid Build Coastguard Worker  %1 = load <64 x i8>, <64 x i8>* %a
138*9880d681SAndroid Build Coastguard Worker  %2 = load <64 x i8>, <64 x i8>* %b
139*9880d681SAndroid Build Coastguard Worker  %3 = zext <64 x i8> %1 to <64 x i32>
140*9880d681SAndroid Build Coastguard Worker  %4 = zext <64 x i8> %2 to <64 x i32>
141*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
142*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <64 x i32> %5, %4
143*9880d681SAndroid Build Coastguard Worker  %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
144*9880d681SAndroid Build Coastguard Worker  %8 = trunc <64 x i32> %7 to <64 x i8>
145*9880d681SAndroid Build Coastguard Worker  store <64 x i8> %8, <64 x i8>* undef, align 4
146*9880d681SAndroid Build Coastguard Worker  ret void
147*9880d681SAndroid Build Coastguard Worker}
148*9880d681SAndroid Build Coastguard Worker
149*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i16(<4 x i16>* %a, <4 x i16>* %b) {
150*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i16:
151*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
152*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
153*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
154*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgw %xmm0, %xmm1
155*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq %xmm1, (%rax)
156*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
157*9880d681SAndroid Build Coastguard Worker;
158*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i16:
159*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
160*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
161*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
162*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgw %xmm0, %xmm1, %xmm0
163*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, (%rax)
164*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
165*9880d681SAndroid Build Coastguard Worker;
166*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i16:
167*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
168*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
169*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rsi), %xmm1
170*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw %xmm0, %xmm1, %xmm0
171*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
172*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
173*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i16>, <4 x i16>* %a
174*9880d681SAndroid Build Coastguard Worker  %2 = load <4 x i16>, <4 x i16>* %b
175*9880d681SAndroid Build Coastguard Worker  %3 = zext <4 x i16> %1 to <4 x i32>
176*9880d681SAndroid Build Coastguard Worker  %4 = zext <4 x i16> %2 to <4 x i32>
177*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
178*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <4 x i32> %5, %4
179*9880d681SAndroid Build Coastguard Worker  %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
180*9880d681SAndroid Build Coastguard Worker  %8 = trunc <4 x i32> %7 to <4 x i16>
181*9880d681SAndroid Build Coastguard Worker  store <4 x i16> %8, <4 x i16>* undef, align 4
182*9880d681SAndroid Build Coastguard Worker  ret void
183*9880d681SAndroid Build Coastguard Worker}
184*9880d681SAndroid Build Coastguard Worker
185*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i16(<8 x i16>* %a, <8 x i16>* %b) {
186*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i16:
187*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
188*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa (%rsi), %xmm0
189*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgw (%rdi), %xmm0
190*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm0, (%rax)
191*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
192*9880d681SAndroid Build Coastguard Worker;
193*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v8i16:
194*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
195*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rsi), %xmm0
196*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
197*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rax)
198*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
199*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i16>, <8 x i16>* %a
200*9880d681SAndroid Build Coastguard Worker  %2 = load <8 x i16>, <8 x i16>* %b
201*9880d681SAndroid Build Coastguard Worker  %3 = zext <8 x i16> %1 to <8 x i32>
202*9880d681SAndroid Build Coastguard Worker  %4 = zext <8 x i16> %2 to <8 x i32>
203*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
204*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <8 x i32> %5, %4
205*9880d681SAndroid Build Coastguard Worker  %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
206*9880d681SAndroid Build Coastguard Worker  %8 = trunc <8 x i32> %7 to <8 x i16>
207*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %8, <8 x i16>* undef, align 4
208*9880d681SAndroid Build Coastguard Worker  ret void
209*9880d681SAndroid Build Coastguard Worker}
210*9880d681SAndroid Build Coastguard Worker
211*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) {
212*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v16i16:
213*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
214*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa (%rsi), %ymm0
215*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgw (%rdi), %ymm0, %ymm0
216*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
217*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
218*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
219*9880d681SAndroid Build Coastguard Worker;
220*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v16i16:
221*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
222*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqa (%rsi), %ymm0
223*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw (%rdi), %ymm0, %ymm0
224*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
225*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
226*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i16>, <16 x i16>* %a
227*9880d681SAndroid Build Coastguard Worker  %2 = load <16 x i16>, <16 x i16>* %b
228*9880d681SAndroid Build Coastguard Worker  %3 = zext <16 x i16> %1 to <16 x i32>
229*9880d681SAndroid Build Coastguard Worker  %4 = zext <16 x i16> %2 to <16 x i32>
230*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
231*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <16 x i32> %5, %4
232*9880d681SAndroid Build Coastguard Worker  %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
233*9880d681SAndroid Build Coastguard Worker  %8 = trunc <16 x i32> %7 to <16 x i16>
234*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %8, <16 x i16>* undef, align 4
235*9880d681SAndroid Build Coastguard Worker  ret void
236*9880d681SAndroid Build Coastguard Worker}
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) {
239*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i16:
240*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
241*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 (%rsi), %zmm0
242*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw (%rdi), %zmm0, %zmm0
243*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rax)
244*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
245*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i16>, <32 x i16>* %a
246*9880d681SAndroid Build Coastguard Worker  %2 = load <32 x i16>, <32 x i16>* %b
247*9880d681SAndroid Build Coastguard Worker  %3 = zext <32 x i16> %1 to <32 x i32>
248*9880d681SAndroid Build Coastguard Worker  %4 = zext <32 x i16> %2 to <32 x i32>
249*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
250*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <32 x i32> %5, %4
251*9880d681SAndroid Build Coastguard Worker  %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
252*9880d681SAndroid Build Coastguard Worker  %8 = trunc <32 x i32> %7 to <32 x i16>
253*9880d681SAndroid Build Coastguard Worker  store <32 x i16> %8, <32 x i16>* undef, align 4
254*9880d681SAndroid Build Coastguard Worker  ret void
255*9880d681SAndroid Build Coastguard Worker}
256*9880d681SAndroid Build Coastguard Worker
257*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i8_2(<4 x i8>* %a, <4 x i8>* %b) {
258*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i8_2:
259*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
260*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
261*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
262*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb %xmm0, %xmm1
263*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd %xmm1, (%rax)
264*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
265*9880d681SAndroid Build Coastguard Worker;
266*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i8_2:
267*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
268*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
269*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
270*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
271*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %xmm0, (%rax)
272*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
273*9880d681SAndroid Build Coastguard Worker;
274*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i8_2:
275*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
276*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd (%rdi), %xmm0
277*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd (%rsi), %xmm1
278*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
279*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd %xmm0, (%rax)
280*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
281*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i8>, <4 x i8>* %a
282*9880d681SAndroid Build Coastguard Worker  %2 = load <4 x i8>, <4 x i8>* %b
283*9880d681SAndroid Build Coastguard Worker  %3 = zext <4 x i8> %1 to <4 x i32>
284*9880d681SAndroid Build Coastguard Worker  %4 = zext <4 x i8> %2 to <4 x i32>
285*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <4 x i32> %3, %4
286*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1>
287*9880d681SAndroid Build Coastguard Worker  %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
288*9880d681SAndroid Build Coastguard Worker  %8 = trunc <4 x i32> %7 to <4 x i8>
289*9880d681SAndroid Build Coastguard Worker  store <4 x i8> %8, <4 x i8>* undef, align 4
290*9880d681SAndroid Build Coastguard Worker  ret void
291*9880d681SAndroid Build Coastguard Worker}
292*9880d681SAndroid Build Coastguard Worker
293*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i8_2(<8 x i8>* %a, <8 x i8>* %b) {
294*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i8_2:
295*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
296*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
297*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
298*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb %xmm0, %xmm1
299*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq %xmm1, (%rax)
300*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
301*9880d681SAndroid Build Coastguard Worker;
302*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v8i8_2:
303*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
304*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
305*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
306*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
307*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, (%rax)
308*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
309*9880d681SAndroid Build Coastguard Worker;
310*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v8i8_2:
311*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
312*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
313*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rsi), %xmm1
314*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
315*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
316*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
317*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i8>, <8 x i8>* %a
318*9880d681SAndroid Build Coastguard Worker  %2 = load <8 x i8>, <8 x i8>* %b
319*9880d681SAndroid Build Coastguard Worker  %3 = zext <8 x i8> %1 to <8 x i32>
320*9880d681SAndroid Build Coastguard Worker  %4 = zext <8 x i8> %2 to <8 x i32>
321*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <8 x i32> %3, %4
322*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
323*9880d681SAndroid Build Coastguard Worker  %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
324*9880d681SAndroid Build Coastguard Worker  %8 = trunc <8 x i32> %7 to <8 x i8>
325*9880d681SAndroid Build Coastguard Worker  store <8 x i8> %8, <8 x i8>* undef, align 4
326*9880d681SAndroid Build Coastguard Worker  ret void
327*9880d681SAndroid Build Coastguard Worker}
328*9880d681SAndroid Build Coastguard Worker
329*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i8_2(<16 x i8>* %a, <16 x i8>* %b) {
330*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v16i8_2:
331*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
332*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa (%rdi), %xmm0
333*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb (%rsi), %xmm0
334*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm0, (%rax)
335*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
336*9880d681SAndroid Build Coastguard Worker;
337*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v16i8_2:
338*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
339*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rdi), %xmm0
340*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
341*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rax)
342*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
343*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i8>, <16 x i8>* %a
344*9880d681SAndroid Build Coastguard Worker  %2 = load <16 x i8>, <16 x i8>* %b
345*9880d681SAndroid Build Coastguard Worker  %3 = zext <16 x i8> %1 to <16 x i32>
346*9880d681SAndroid Build Coastguard Worker  %4 = zext <16 x i8> %2 to <16 x i32>
347*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <16 x i32> %3, %4
348*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
349*9880d681SAndroid Build Coastguard Worker  %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
350*9880d681SAndroid Build Coastguard Worker  %8 = trunc <16 x i32> %7 to <16 x i8>
351*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %8, <16 x i8>* undef, align 4
352*9880d681SAndroid Build Coastguard Worker  ret void
353*9880d681SAndroid Build Coastguard Worker}
354*9880d681SAndroid Build Coastguard Worker
355*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i8_2(<32 x i8>* %a, <32 x i8>* %b) {
356*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v32i8_2:
357*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
358*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
359*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb (%rsi), %ymm0, %ymm0
360*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
361*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
362*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
363*9880d681SAndroid Build Coastguard Worker;
364*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i8_2:
365*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
366*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
367*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb (%rsi), %ymm0, %ymm0
368*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
369*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
370*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i8>, <32 x i8>* %a
371*9880d681SAndroid Build Coastguard Worker  %2 = load <32 x i8>, <32 x i8>* %b
372*9880d681SAndroid Build Coastguard Worker  %3 = zext <32 x i8> %1 to <32 x i32>
373*9880d681SAndroid Build Coastguard Worker  %4 = zext <32 x i8> %2 to <32 x i32>
374*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <32 x i32> %3, %4
375*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <32 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
376*9880d681SAndroid Build Coastguard Worker  %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
377*9880d681SAndroid Build Coastguard Worker  %8 = trunc <32 x i32> %7 to <32 x i8>
378*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %8, <32 x i8>* undef, align 4
379*9880d681SAndroid Build Coastguard Worker  ret void
380*9880d681SAndroid Build Coastguard Worker}
381*9880d681SAndroid Build Coastguard Worker
382*9880d681SAndroid Build Coastguard Workerdefine void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) {
383*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v64i8_2:
384*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
385*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 (%rsi), %zmm0
386*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb %zmm0, %zmm0, %zmm0
387*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rax)
388*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
389*9880d681SAndroid Build Coastguard Worker  %1 = load <64 x i8>, <64 x i8>* %a
390*9880d681SAndroid Build Coastguard Worker  %2 = load <64 x i8>, <64 x i8>* %b
391*9880d681SAndroid Build Coastguard Worker  %3 = zext <64 x i8> %1 to <64 x i32>
392*9880d681SAndroid Build Coastguard Worker  %4 = zext <64 x i8> %2 to <64 x i32>
393*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <64 x i32> %4, %4
394*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <64 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
395*9880d681SAndroid Build Coastguard Worker  %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
396*9880d681SAndroid Build Coastguard Worker  %8 = trunc <64 x i32> %7 to <64 x i8>
397*9880d681SAndroid Build Coastguard Worker  store <64 x i8> %8, <64 x i8>* undef, align 4
398*9880d681SAndroid Build Coastguard Worker  ret void
399*9880d681SAndroid Build Coastguard Worker}
400*9880d681SAndroid Build Coastguard Worker
401*9880d681SAndroid Build Coastguard Worker
402*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i16_2(<4 x i16>* %a, <4 x i16>* %b) {
403*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i16_2:
404*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
405*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
406*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
407*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgw %xmm0, %xmm1
408*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq %xmm1, (%rax)
409*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
410*9880d681SAndroid Build Coastguard Worker;
411*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i16_2:
412*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
413*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
414*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
415*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
416*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, (%rax)
417*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
418*9880d681SAndroid Build Coastguard Worker;
419*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i16_2:
420*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
421*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
422*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rsi), %xmm1
423*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
424*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
425*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
426*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i16>, <4 x i16>* %a
427*9880d681SAndroid Build Coastguard Worker  %2 = load <4 x i16>, <4 x i16>* %b
428*9880d681SAndroid Build Coastguard Worker  %3 = zext <4 x i16> %1 to <4 x i32>
429*9880d681SAndroid Build Coastguard Worker  %4 = zext <4 x i16> %2 to <4 x i32>
430*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <4 x i32> %3, %4
431*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1>
432*9880d681SAndroid Build Coastguard Worker  %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
433*9880d681SAndroid Build Coastguard Worker  %8 = trunc <4 x i32> %7 to <4 x i16>
434*9880d681SAndroid Build Coastguard Worker  store <4 x i16> %8, <4 x i16>* undef, align 4
435*9880d681SAndroid Build Coastguard Worker  ret void
436*9880d681SAndroid Build Coastguard Worker}
437*9880d681SAndroid Build Coastguard Worker
438*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i16_2(<8 x i16>* %a, <8 x i16>* %b) {
439*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i16_2:
440*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
441*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa (%rdi), %xmm0
442*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgw (%rsi), %xmm0
443*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm0, (%rax)
444*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
445*9880d681SAndroid Build Coastguard Worker;
446*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v8i16_2:
447*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
448*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rdi), %xmm0
449*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
450*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rax)
451*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
452*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i16>, <8 x i16>* %a
453*9880d681SAndroid Build Coastguard Worker  %2 = load <8 x i16>, <8 x i16>* %b
454*9880d681SAndroid Build Coastguard Worker  %3 = zext <8 x i16> %1 to <8 x i32>
455*9880d681SAndroid Build Coastguard Worker  %4 = zext <8 x i16> %2 to <8 x i32>
456*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <8 x i32> %3, %4
457*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
458*9880d681SAndroid Build Coastguard Worker  %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
459*9880d681SAndroid Build Coastguard Worker  %8 = trunc <8 x i32> %7 to <8 x i16>
460*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %8, <8 x i16>* undef, align 4
461*9880d681SAndroid Build Coastguard Worker  ret void
462*9880d681SAndroid Build Coastguard Worker}
463*9880d681SAndroid Build Coastguard Worker
464*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i16_2(<16 x i16>* %a, <16 x i16>* %b) {
465*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v16i16_2:
466*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
467*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
468*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgw (%rsi), %ymm0, %ymm0
469*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
470*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
471*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
472*9880d681SAndroid Build Coastguard Worker;
473*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v16i16_2:
474*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
475*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
476*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw (%rsi), %ymm0, %ymm0
477*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
478*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
479*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i16>, <16 x i16>* %a
480*9880d681SAndroid Build Coastguard Worker  %2 = load <16 x i16>, <16 x i16>* %b
481*9880d681SAndroid Build Coastguard Worker  %3 = zext <16 x i16> %1 to <16 x i32>
482*9880d681SAndroid Build Coastguard Worker  %4 = zext <16 x i16> %2 to <16 x i32>
483*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <16 x i32> %3, %4
484*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
485*9880d681SAndroid Build Coastguard Worker  %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
486*9880d681SAndroid Build Coastguard Worker  %8 = trunc <16 x i32> %7 to <16 x i16>
487*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %8, <16 x i16>* undef, align 4
488*9880d681SAndroid Build Coastguard Worker  ret void
489*9880d681SAndroid Build Coastguard Worker}
490*9880d681SAndroid Build Coastguard Worker
491*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) {
492*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i16_2:
493*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
494*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm0
495*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw (%rsi), %zmm0, %zmm0
496*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rax)
497*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
498*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i16>, <32 x i16>* %a
499*9880d681SAndroid Build Coastguard Worker  %2 = load <32 x i16>, <32 x i16>* %b
500*9880d681SAndroid Build Coastguard Worker  %3 = zext <32 x i16> %1 to <32 x i32>
501*9880d681SAndroid Build Coastguard Worker  %4 = zext <32 x i16> %2 to <32 x i32>
502*9880d681SAndroid Build Coastguard Worker  %5 = add nuw nsw <32 x i32> %3, %4
503*9880d681SAndroid Build Coastguard Worker  %6 = add nuw nsw <32 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
504*9880d681SAndroid Build Coastguard Worker  %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
505*9880d681SAndroid Build Coastguard Worker  %8 = trunc <32 x i32> %7 to <32 x i16>
506*9880d681SAndroid Build Coastguard Worker  store <32 x i16> %8, <32 x i16>* undef, align 4
507*9880d681SAndroid Build Coastguard Worker  ret void
508*9880d681SAndroid Build Coastguard Worker}
509*9880d681SAndroid Build Coastguard Worker
510*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i8_const(<4 x i8>* %a) {
511*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i8_const:
512*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
513*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
514*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb {{.*}}(%rip), %xmm0
515*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd %xmm0, (%rax)
516*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
517*9880d681SAndroid Build Coastguard Worker;
518*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i8_const:
519*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
520*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
521*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
522*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %xmm0, (%rax)
523*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
524*9880d681SAndroid Build Coastguard Worker;
525*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i8_const:
526*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
527*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd (%rdi), %xmm0
528*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
529*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd %xmm0, (%rax)
530*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
531*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i8>, <4 x i8>* %a
532*9880d681SAndroid Build Coastguard Worker  %2 = zext <4 x i8> %1 to <4 x i32>
533*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4>
534*9880d681SAndroid Build Coastguard Worker  %4 = lshr <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
535*9880d681SAndroid Build Coastguard Worker  %5 = trunc <4 x i32> %4 to <4 x i8>
536*9880d681SAndroid Build Coastguard Worker  store <4 x i8> %5, <4 x i8>* undef, align 4
537*9880d681SAndroid Build Coastguard Worker  ret void
538*9880d681SAndroid Build Coastguard Worker}
539*9880d681SAndroid Build Coastguard Worker
540*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i8_const(<8 x i8>* %a) {
541*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i8_const:
542*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
543*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
544*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb {{.*}}(%rip), %xmm0
545*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq %xmm0, (%rax)
546*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
547*9880d681SAndroid Build Coastguard Worker;
548*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v8i8_const:
549*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
550*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
551*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
552*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, (%rax)
553*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
554*9880d681SAndroid Build Coastguard Worker;
555*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v8i8_const:
556*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
557*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
558*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
559*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
560*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
561*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i8>, <8 x i8>* %a
562*9880d681SAndroid Build Coastguard Worker  %2 = zext <8 x i8> %1 to <8 x i32>
563*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
564*9880d681SAndroid Build Coastguard Worker  %4 = lshr <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
565*9880d681SAndroid Build Coastguard Worker  %5 = trunc <8 x i32> %4 to <8 x i8>
566*9880d681SAndroid Build Coastguard Worker  store <8 x i8> %5, <8 x i8>* undef, align 4
567*9880d681SAndroid Build Coastguard Worker  ret void
568*9880d681SAndroid Build Coastguard Worker}
569*9880d681SAndroid Build Coastguard Worker
570*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i8_const(<16 x i8>* %a) {
571*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v16i8_const:
572*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
573*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa (%rdi), %xmm0
574*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgb {{.*}}(%rip), %xmm0
575*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm0, (%rax)
576*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
577*9880d681SAndroid Build Coastguard Worker;
578*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v16i8_const:
579*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
580*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rdi), %xmm0
581*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
582*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rax)
583*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
584*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i8>, <16 x i8>* %a
585*9880d681SAndroid Build Coastguard Worker  %2 = zext <16 x i8> %1 to <16 x i32>
586*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
587*9880d681SAndroid Build Coastguard Worker  %4 = lshr <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
588*9880d681SAndroid Build Coastguard Worker  %5 = trunc <16 x i32> %4 to <16 x i8>
589*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %5, <16 x i8>* undef, align 4
590*9880d681SAndroid Build Coastguard Worker  ret void
591*9880d681SAndroid Build Coastguard Worker}
592*9880d681SAndroid Build Coastguard Worker
593*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i8_const(<32 x i8>* %a) {
594*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v32i8_const:
595*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
596*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
597*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgb {{.*}}(%rip), %ymm0, %ymm0
598*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
599*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
600*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
601*9880d681SAndroid Build Coastguard Worker;
602*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i8_const:
603*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
604*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
605*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb {{.*}}(%rip), %ymm0, %ymm0
606*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
607*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
608*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i8>, <32 x i8>* %a
609*9880d681SAndroid Build Coastguard Worker  %2 = zext <32 x i8> %1 to <32 x i32>
610*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <32 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
611*9880d681SAndroid Build Coastguard Worker  %4 = lshr <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
612*9880d681SAndroid Build Coastguard Worker  %5 = trunc <32 x i32> %4 to <32 x i8>
613*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %5, <32 x i8>* undef, align 4
614*9880d681SAndroid Build Coastguard Worker  ret void
615*9880d681SAndroid Build Coastguard Worker}
616*9880d681SAndroid Build Coastguard Worker
617*9880d681SAndroid Build Coastguard Workerdefine void @avg_v64i8_const(<64 x i8>* %a) {
618*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v64i8_const:
619*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
620*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 (%rdi), %zmm0
621*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgb {{.*}}(%rip), %zmm0, %zmm0
622*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rax)
623*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
624*9880d681SAndroid Build Coastguard Worker  %1 = load <64 x i8>, <64 x i8>* %a
625*9880d681SAndroid Build Coastguard Worker  %2 = zext <64 x i8> %1 to <64 x i32>
626*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <64 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
627*9880d681SAndroid Build Coastguard Worker  %4 = lshr <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
628*9880d681SAndroid Build Coastguard Worker  %5 = trunc <64 x i32> %4 to <64 x i8>
629*9880d681SAndroid Build Coastguard Worker  store <64 x i8> %5, <64 x i8>* undef, align 4
630*9880d681SAndroid Build Coastguard Worker  ret void
631*9880d681SAndroid Build Coastguard Worker}
632*9880d681SAndroid Build Coastguard Worker
633*9880d681SAndroid Build Coastguard Workerdefine void @avg_v4i16_const(<4 x i16>* %a) {
634*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v4i16_const:
635*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
636*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
637*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgw {{.*}}(%rip), %xmm0
638*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movq %xmm0, (%rax)
639*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
640*9880d681SAndroid Build Coastguard Worker;
641*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v4i16_const:
642*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
643*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
644*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgw {{.*}}(%rip), %xmm0, %xmm0
645*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq %xmm0, (%rax)
646*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
647*9880d681SAndroid Build Coastguard Worker;
648*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v4i16_const:
649*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
650*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
651*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw {{.*}}(%rip), %xmm0, %xmm0
652*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
653*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
654*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i16>, <4 x i16>* %a
655*9880d681SAndroid Build Coastguard Worker  %2 = zext <4 x i16> %1 to <4 x i32>
656*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4>
657*9880d681SAndroid Build Coastguard Worker  %4 = lshr <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
658*9880d681SAndroid Build Coastguard Worker  %5 = trunc <4 x i32> %4 to <4 x i16>
659*9880d681SAndroid Build Coastguard Worker  store <4 x i16> %5, <4 x i16>* undef, align 4
660*9880d681SAndroid Build Coastguard Worker  ret void
661*9880d681SAndroid Build Coastguard Worker}
662*9880d681SAndroid Build Coastguard Worker
663*9880d681SAndroid Build Coastguard Workerdefine void @avg_v8i16_const(<8 x i16>* %a) {
664*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: avg_v8i16_const:
665*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
666*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqa (%rdi), %xmm0
667*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pavgw {{.*}}(%rip), %xmm0
668*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm0, (%rax)
669*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
670*9880d681SAndroid Build Coastguard Worker;
671*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avg_v8i16_const:
672*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
673*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqa (%rdi), %xmm0
674*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpavgw {{.*}}(%rip), %xmm0, %xmm0
675*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rax)
676*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
677*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i16>, <8 x i16>* %a
678*9880d681SAndroid Build Coastguard Worker  %2 = zext <8 x i16> %1 to <8 x i32>
679*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
680*9880d681SAndroid Build Coastguard Worker  %4 = lshr <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
681*9880d681SAndroid Build Coastguard Worker  %5 = trunc <8 x i32> %4 to <8 x i16>
682*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %5, <8 x i16>* undef, align 4
683*9880d681SAndroid Build Coastguard Worker  ret void
684*9880d681SAndroid Build Coastguard Worker}
685*9880d681SAndroid Build Coastguard Worker
686*9880d681SAndroid Build Coastguard Workerdefine void @avg_v16i16_const(<16 x i16>* %a) {
687*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avg_v16i16_const:
688*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
689*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
690*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpavgw {{.*}}(%rip), %ymm0, %ymm0
691*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
692*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
693*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
694*9880d681SAndroid Build Coastguard Worker;
695*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v16i16_const:
696*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
697*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
698*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw {{.*}}(%rip), %ymm0, %ymm0
699*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
700*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
701*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i16>, <16 x i16>* %a
702*9880d681SAndroid Build Coastguard Worker  %2 = zext <16 x i16> %1 to <16 x i32>
703*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
704*9880d681SAndroid Build Coastguard Worker  %4 = lshr <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
705*9880d681SAndroid Build Coastguard Worker  %5 = trunc <16 x i32> %4 to <16 x i16>
706*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %5, <16 x i16>* undef, align 4
707*9880d681SAndroid Build Coastguard Worker  ret void
708*9880d681SAndroid Build Coastguard Worker}
709*9880d681SAndroid Build Coastguard Worker
710*9880d681SAndroid Build Coastguard Workerdefine void @avg_v32i16_const(<32 x i16>* %a) {
711*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: avg_v32i16_const:
712*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
713*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm0
714*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpavgw {{.*}}(%rip), %zmm0, %zmm0
715*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rax)
716*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
717*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i16>, <32 x i16>* %a
718*9880d681SAndroid Build Coastguard Worker  %2 = zext <32 x i16> %1 to <32 x i32>
719*9880d681SAndroid Build Coastguard Worker  %3 = add nuw nsw <32 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
720*9880d681SAndroid Build Coastguard Worker  %4 = lshr <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
721*9880d681SAndroid Build Coastguard Worker  %5 = trunc <32 x i32> %4 to <32 x i16>
722*9880d681SAndroid Build Coastguard Worker  store <32 x i16> %5, <32 x i16>* undef, align 4
723*9880d681SAndroid Build Coastguard Worker  ret void
724*9880d681SAndroid Build Coastguard Worker}
725