xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/psubus.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Workerdefine void @test1(i16* nocapture %head) nounwind {
7*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test1:
8*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
9*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm0
10*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw {{.*}}(%rip), %xmm0
11*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm0, (%rdi)
12*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
13*9880d681SAndroid Build Coastguard Worker;
14*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test1:
15*9880d681SAndroid Build Coastguard Worker; AVX:       ## BB#0: ## %vector.ph
16*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu (%rdi), %xmm0
17*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsubusw {{.*}}(%rip), %xmm0, %xmm0
18*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
19*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
20*9880d681SAndroid Build Coastguard Workervector.ph:
21*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr inbounds i16, i16* %head, i64 0
22*9880d681SAndroid Build Coastguard Worker  %1 = bitcast i16* %0 to <8 x i16>*
23*9880d681SAndroid Build Coastguard Worker  %2 = load <8 x i16>, <8 x i16>* %1, align 2
24*9880d681SAndroid Build Coastguard Worker  %3 = icmp slt <8 x i16> %2, zeroinitializer
25*9880d681SAndroid Build Coastguard Worker  %4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
26*9880d681SAndroid Build Coastguard Worker  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
27*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %5, <8 x i16>* %1, align 2
28*9880d681SAndroid Build Coastguard Worker  ret void
29*9880d681SAndroid Build Coastguard Worker}
30*9880d681SAndroid Build Coastguard Worker
31*9880d681SAndroid Build Coastguard Workerdefine void @test2(i16* nocapture %head) nounwind {
32*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test2:
33*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
34*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm0
35*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw {{.*}}(%rip), %xmm0
36*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm0, (%rdi)
37*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
38*9880d681SAndroid Build Coastguard Worker;
39*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test2:
40*9880d681SAndroid Build Coastguard Worker; AVX:       ## BB#0: ## %vector.ph
41*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu (%rdi), %xmm0
42*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsubusw {{.*}}(%rip), %xmm0, %xmm0
43*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
44*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
45*9880d681SAndroid Build Coastguard Workervector.ph:
46*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr inbounds i16, i16* %head, i64 0
47*9880d681SAndroid Build Coastguard Worker  %1 = bitcast i16* %0 to <8 x i16>*
48*9880d681SAndroid Build Coastguard Worker  %2 = load <8 x i16>, <8 x i16>* %1, align 2
49*9880d681SAndroid Build Coastguard Worker  %3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
50*9880d681SAndroid Build Coastguard Worker  %4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
51*9880d681SAndroid Build Coastguard Worker  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
52*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %5, <8 x i16>* %1, align 2
53*9880d681SAndroid Build Coastguard Worker  ret void
54*9880d681SAndroid Build Coastguard Worker}
55*9880d681SAndroid Build Coastguard Worker
56*9880d681SAndroid Build Coastguard Workerdefine void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
57*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test3:
58*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
59*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %esi, %xmm0
60*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
61*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
62*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm1
63*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw %xmm0, %xmm1
64*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm1, (%rdi)
65*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
66*9880d681SAndroid Build Coastguard Worker;
67*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test3:
68*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %vector.ph
69*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %esi, %xmm0
70*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
71*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
72*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqu (%rdi), %xmm1
73*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubusw %xmm0, %xmm1, %xmm0
74*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
75*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
76*9880d681SAndroid Build Coastguard Worker;
77*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test3:
78*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %vector.ph
79*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %esi, %xmm0
80*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
81*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu (%rdi), %xmm1
82*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubusw %xmm0, %xmm1, %xmm0
83*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
84*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
85*9880d681SAndroid Build Coastguard Workervector.ph:
86*9880d681SAndroid Build Coastguard Worker  %0 = insertelement <8 x i16> undef, i16 %w, i32 0
87*9880d681SAndroid Build Coastguard Worker  %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
88*9880d681SAndroid Build Coastguard Worker  %1 = getelementptr inbounds i16, i16* %head, i64 0
89*9880d681SAndroid Build Coastguard Worker  %2 = bitcast i16* %1 to <8 x i16>*
90*9880d681SAndroid Build Coastguard Worker  %3 = load <8 x i16>, <8 x i16>* %2, align 2
91*9880d681SAndroid Build Coastguard Worker  %4 = icmp ult <8 x i16> %3, %broadcast15
92*9880d681SAndroid Build Coastguard Worker  %5 = sub <8 x i16> %3, %broadcast15
93*9880d681SAndroid Build Coastguard Worker  %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
94*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %6, <8 x i16>* %2, align 2
95*9880d681SAndroid Build Coastguard Worker  ret void
96*9880d681SAndroid Build Coastguard Worker}
97*9880d681SAndroid Build Coastguard Worker
98*9880d681SAndroid Build Coastguard Workerdefine void @test4(i8* nocapture %head) nounwind {
99*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test4:
100*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
101*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm0
102*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusb {{.*}}(%rip), %xmm0
103*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm0, (%rdi)
104*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
105*9880d681SAndroid Build Coastguard Worker;
106*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test4:
107*9880d681SAndroid Build Coastguard Worker; AVX:       ## BB#0: ## %vector.ph
108*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu (%rdi), %xmm0
109*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsubusb {{.*}}(%rip), %xmm0, %xmm0
110*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
111*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
112*9880d681SAndroid Build Coastguard Workervector.ph:
113*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr inbounds i8, i8* %head, i64 0
114*9880d681SAndroid Build Coastguard Worker  %1 = bitcast i8* %0 to <16 x i8>*
115*9880d681SAndroid Build Coastguard Worker  %2 = load <16 x i8>, <16 x i8>* %1, align 1
116*9880d681SAndroid Build Coastguard Worker  %3 = icmp slt <16 x i8> %2, zeroinitializer
117*9880d681SAndroid Build Coastguard Worker  %4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
118*9880d681SAndroid Build Coastguard Worker  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
119*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %5, <16 x i8>* %1, align 1
120*9880d681SAndroid Build Coastguard Worker  ret void
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Workerdefine void @test5(i8* nocapture %head) nounwind {
124*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test5:
125*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
126*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm0
127*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusb {{.*}}(%rip), %xmm0
128*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm0, (%rdi)
129*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
130*9880d681SAndroid Build Coastguard Worker;
131*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test5:
132*9880d681SAndroid Build Coastguard Worker; AVX:       ## BB#0: ## %vector.ph
133*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu (%rdi), %xmm0
134*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpsubusb {{.*}}(%rip), %xmm0, %xmm0
135*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
136*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
137*9880d681SAndroid Build Coastguard Workervector.ph:
138*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr inbounds i8, i8* %head, i64 0
139*9880d681SAndroid Build Coastguard Worker  %1 = bitcast i8* %0 to <16 x i8>*
140*9880d681SAndroid Build Coastguard Worker  %2 = load <16 x i8>, <16 x i8>* %1, align 1
141*9880d681SAndroid Build Coastguard Worker  %3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
142*9880d681SAndroid Build Coastguard Worker  %4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
143*9880d681SAndroid Build Coastguard Worker  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
144*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %5, <16 x i8>* %1, align 1
145*9880d681SAndroid Build Coastguard Worker  ret void
146*9880d681SAndroid Build Coastguard Worker}
147*9880d681SAndroid Build Coastguard Worker
148*9880d681SAndroid Build Coastguard Workerdefine void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
149*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test6:
150*9880d681SAndroid Build Coastguard Worker; SSE2:       ## BB#0: ## %vector.ph
151*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd %esi, %xmm0
152*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
153*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
154*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
155*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu (%rdi), %xmm1
156*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psubusb %xmm0, %xmm1
157*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm1, (%rdi)
158*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
159*9880d681SAndroid Build Coastguard Worker;
160*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: test6:
161*9880d681SAndroid Build Coastguard Worker; SSSE3:       ## BB#0: ## %vector.ph
162*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movd %esi, %xmm0
163*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    pxor %xmm1, %xmm1
164*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    pshufb %xmm1, %xmm0
165*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movdqu (%rdi), %xmm1
166*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    psubusb %xmm0, %xmm1
167*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
168*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
169*9880d681SAndroid Build Coastguard Worker;
170*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test6:
171*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %vector.ph
172*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %esi, %xmm0
173*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
174*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
175*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqu (%rdi), %xmm1
176*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubusb %xmm0, %xmm1, %xmm0
177*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
178*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
179*9880d681SAndroid Build Coastguard Worker;
180*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test6:
181*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %vector.ph
182*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %esi, %xmm0
183*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
184*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu (%rdi), %xmm1
185*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubusb %xmm0, %xmm1, %xmm0
186*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
187*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
188*9880d681SAndroid Build Coastguard Workervector.ph:
189*9880d681SAndroid Build Coastguard Worker  %0 = insertelement <16 x i8> undef, i8 %w, i32 0
190*9880d681SAndroid Build Coastguard Worker  %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
191*9880d681SAndroid Build Coastguard Worker  %1 = getelementptr inbounds i8, i8* %head, i64 0
192*9880d681SAndroid Build Coastguard Worker  %2 = bitcast i8* %1 to <16 x i8>*
193*9880d681SAndroid Build Coastguard Worker  %3 = load <16 x i8>, <16 x i8>* %2, align 1
194*9880d681SAndroid Build Coastguard Worker  %4 = icmp ult <16 x i8> %3, %broadcast15
195*9880d681SAndroid Build Coastguard Worker  %5 = sub <16 x i8> %3, %broadcast15
196*9880d681SAndroid Build Coastguard Worker  %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
197*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %6, <16 x i8>* %2, align 1
198*9880d681SAndroid Build Coastguard Worker  ret void
199*9880d681SAndroid Build Coastguard Worker}
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Workerdefine void @test7(i16* nocapture %head) nounwind {
202*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test7:
203*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
204*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm0
205*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu 16(%rdi), %xmm1
206*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
207*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw %xmm2, %xmm0
208*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw %xmm2, %xmm1
209*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
210*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm0, (%rdi)
211*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
212*9880d681SAndroid Build Coastguard Worker;
213*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test7:
214*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %vector.ph
215*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups (%rdi), %ymm0
216*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
217*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
218*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm2, %xmm1
219*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm2, %xmm2
220*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
221*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
222*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
223*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups %ymm0, (%rdi)
224*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
225*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
226*9880d681SAndroid Build Coastguard Worker;
227*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test7:
228*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %vector.ph
229*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
230*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubusw {{.*}}(%rip), %ymm0, %ymm0
231*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
232*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
233*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
234*9880d681SAndroid Build Coastguard Workervector.ph:
235*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr inbounds i16, i16* %head, i64 0
236*9880d681SAndroid Build Coastguard Worker  %1 = bitcast i16* %0 to <16 x i16>*
237*9880d681SAndroid Build Coastguard Worker  %2 = load <16 x i16>, <16 x i16>* %1, align 2
238*9880d681SAndroid Build Coastguard Worker  %3 = icmp slt <16 x i16> %2, zeroinitializer
239*9880d681SAndroid Build Coastguard Worker  %4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
240*9880d681SAndroid Build Coastguard Worker  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
241*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %5, <16 x i16>* %1, align 2
242*9880d681SAndroid Build Coastguard Worker  ret void
243*9880d681SAndroid Build Coastguard Worker}
244*9880d681SAndroid Build Coastguard Worker
245*9880d681SAndroid Build Coastguard Workerdefine void @test8(i16* nocapture %head) nounwind {
246*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test8:
247*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
248*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm0
249*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu 16(%rdi), %xmm1
250*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
251*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw %xmm2, %xmm0
252*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw %xmm2, %xmm1
253*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
254*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm0, (%rdi)
255*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
256*9880d681SAndroid Build Coastguard Worker;
257*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test8:
258*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %vector.ph
259*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups (%rdi), %ymm0
260*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
261*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
262*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm3
263*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [65534,65534,65534,65534,65534,65534,65534,65534]
264*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm3, %xmm3
265*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm2
266*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm2, %xmm2
267*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
268*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32769,32769,32769,32769,32769,32769,32769,32769]
269*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm3, %xmm1, %xmm1
270*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm3, %xmm0, %xmm0
271*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
272*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
273*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups %ymm0, (%rdi)
274*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
275*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
276*9880d681SAndroid Build Coastguard Worker;
277*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test8:
278*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %vector.ph
279*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
280*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubusw {{.*}}(%rip), %ymm0, %ymm0
281*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
282*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
283*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
284*9880d681SAndroid Build Coastguard Workervector.ph:
285*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr inbounds i16, i16* %head, i64 0
286*9880d681SAndroid Build Coastguard Worker  %1 = bitcast i16* %0 to <16 x i16>*
287*9880d681SAndroid Build Coastguard Worker  %2 = load <16 x i16>, <16 x i16>* %1, align 2
288*9880d681SAndroid Build Coastguard Worker  %3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
289*9880d681SAndroid Build Coastguard Worker  %4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
290*9880d681SAndroid Build Coastguard Worker  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
291*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %5, <16 x i16>* %1, align 2
292*9880d681SAndroid Build Coastguard Worker  ret void
293*9880d681SAndroid Build Coastguard Worker
294*9880d681SAndroid Build Coastguard Worker}
295*9880d681SAndroid Build Coastguard Worker
296*9880d681SAndroid Build Coastguard Workerdefine void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
297*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test9:
298*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
299*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %esi, %xmm0
300*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
301*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
302*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm1
303*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu 16(%rdi), %xmm2
304*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw %xmm0, %xmm1
305*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusw %xmm0, %xmm2
306*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm2, 16(%rdi)
307*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm1, (%rdi)
308*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
309*9880d681SAndroid Build Coastguard Worker;
310*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test9:
311*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %vector.ph
312*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups (%rdi), %ymm0
313*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
314*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %esi, %xmm2
315*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
316*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
317*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm2, %xmm1, %xmm3
318*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubw %xmm2, %xmm0, %xmm4
319*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
320*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm4
321*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm1, %xmm1
322*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm2
323*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
324*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
325*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vandps %ymm3, %ymm0, %ymm0
326*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups %ymm0, (%rdi)
327*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
328*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
329*9880d681SAndroid Build Coastguard Worker;
330*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test9:
331*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %vector.ph
332*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %esi, %xmm0
333*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
334*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu (%rdi), %ymm1
335*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubusw %ymm0, %ymm1, %ymm0
336*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
337*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
338*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
339*9880d681SAndroid Build Coastguard Workervector.ph:
340*9880d681SAndroid Build Coastguard Worker  %0 = insertelement <16 x i16> undef, i16 %w, i32 0
341*9880d681SAndroid Build Coastguard Worker  %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
342*9880d681SAndroid Build Coastguard Worker  %1 = getelementptr inbounds i16, i16* %head, i64 0
343*9880d681SAndroid Build Coastguard Worker  %2 = bitcast i16* %1 to <16 x i16>*
344*9880d681SAndroid Build Coastguard Worker  %3 = load <16 x i16>, <16 x i16>* %2, align 2
345*9880d681SAndroid Build Coastguard Worker  %4 = icmp ult <16 x i16> %3, %broadcast15
346*9880d681SAndroid Build Coastguard Worker  %5 = sub <16 x i16> %3, %broadcast15
347*9880d681SAndroid Build Coastguard Worker  %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
348*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %6, <16 x i16>* %2, align 2
349*9880d681SAndroid Build Coastguard Worker  ret void
350*9880d681SAndroid Build Coastguard Worker}
351*9880d681SAndroid Build Coastguard Worker
352*9880d681SAndroid Build Coastguard Workerdefine void @test10(i8* nocapture %head) nounwind {
353*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test10:
354*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
355*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm0
356*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu 16(%rdi), %xmm1
357*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusb %xmm2, %xmm0
359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusb %xmm2, %xmm1
360*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
361*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm0, (%rdi)
362*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
363*9880d681SAndroid Build Coastguard Worker;
364*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test10:
365*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %vector.ph
366*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups (%rdi), %ymm0
367*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
368*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
369*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
370*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm2
371*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
372*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
373*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
374*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups %ymm0, (%rdi)
375*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
376*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
377*9880d681SAndroid Build Coastguard Worker;
378*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test10:
379*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %vector.ph
380*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
381*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubusb {{.*}}(%rip), %ymm0, %ymm0
382*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
383*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
384*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
385*9880d681SAndroid Build Coastguard Workervector.ph:
386*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr inbounds i8, i8* %head, i64 0
387*9880d681SAndroid Build Coastguard Worker  %1 = bitcast i8* %0 to <32 x i8>*
388*9880d681SAndroid Build Coastguard Worker  %2 = load <32 x i8>, <32 x i8>* %1, align 1
389*9880d681SAndroid Build Coastguard Worker  %3 = icmp slt <32 x i8> %2, zeroinitializer
390*9880d681SAndroid Build Coastguard Worker  %4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
391*9880d681SAndroid Build Coastguard Worker  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
392*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %5, <32 x i8>* %1, align 1
393*9880d681SAndroid Build Coastguard Worker  ret void
394*9880d681SAndroid Build Coastguard Worker
395*9880d681SAndroid Build Coastguard Worker}
396*9880d681SAndroid Build Coastguard Worker
397*9880d681SAndroid Build Coastguard Workerdefine void @test11(i8* nocapture %head) nounwind {
398*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test11:
399*9880d681SAndroid Build Coastguard Worker; SSE:       ## BB#0: ## %vector.ph
400*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu (%rdi), %xmm0
401*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu 16(%rdi), %xmm1
402*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
403*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusb %xmm2, %xmm0
404*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    psubusb %xmm2, %xmm1
405*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
406*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movdqu %xmm0, (%rdi)
407*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
408*9880d681SAndroid Build Coastguard Worker;
409*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test11:
410*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %vector.ph
411*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups (%rdi), %ymm0
412*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
413*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
414*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm3
415*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
416*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm3, %xmm3
417*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm2
418*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm2, %xmm2
419*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
420*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
421*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
422*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
423*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
424*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
425*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups %ymm0, (%rdi)
426*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
427*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
428*9880d681SAndroid Build Coastguard Worker;
429*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test11:
430*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %vector.ph
431*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
432*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubusb {{.*}}(%rip), %ymm0, %ymm0
433*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
434*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
435*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
436*9880d681SAndroid Build Coastguard Workervector.ph:
437*9880d681SAndroid Build Coastguard Worker  %0 = getelementptr inbounds i8, i8* %head, i64 0
438*9880d681SAndroid Build Coastguard Worker  %1 = bitcast i8* %0 to <32 x i8>*
439*9880d681SAndroid Build Coastguard Worker  %2 = load <32 x i8>, <32 x i8>* %1, align 1
440*9880d681SAndroid Build Coastguard Worker  %3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
441*9880d681SAndroid Build Coastguard Worker  %4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
442*9880d681SAndroid Build Coastguard Worker  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
443*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %5, <32 x i8>* %1, align 1
444*9880d681SAndroid Build Coastguard Worker  ret void
445*9880d681SAndroid Build Coastguard Worker}
446*9880d681SAndroid Build Coastguard Worker
447*9880d681SAndroid Build Coastguard Workerdefine void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
448*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test12:
449*9880d681SAndroid Build Coastguard Worker; SSE2:       ## BB#0: ## %vector.ph
450*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd %esi, %xmm0
451*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
452*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
453*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
454*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu (%rdi), %xmm1
455*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu 16(%rdi), %xmm2
456*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psubusb %xmm0, %xmm1
457*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    psubusb %xmm0, %xmm2
458*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm2, 16(%rdi)
459*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movdqu %xmm1, (%rdi)
460*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
461*9880d681SAndroid Build Coastguard Worker;
462*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: test12:
463*9880d681SAndroid Build Coastguard Worker; SSSE3:       ## BB#0: ## %vector.ph
464*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movd %esi, %xmm0
465*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    pxor %xmm1, %xmm1
466*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    pshufb %xmm1, %xmm0
467*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movdqu (%rdi), %xmm1
468*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movdqu 16(%rdi), %xmm2
469*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    psubusb %xmm0, %xmm1
470*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    psubusb %xmm0, %xmm2
471*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movdqu %xmm2, 16(%rdi)
472*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
473*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
474*9880d681SAndroid Build Coastguard Worker;
475*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test12:
476*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %vector.ph
477*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups (%rdi), %ymm0
478*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %esi, %xmm1
479*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
480*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
481*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
482*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm3
483*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm4
484*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
485*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm4
486*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm2, %xmm2
487*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
488*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
489*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
490*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vandps %ymm3, %ymm0, %ymm0
491*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovups %ymm0, (%rdi)
492*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
493*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
494*9880d681SAndroid Build Coastguard Worker;
495*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test12:
496*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %vector.ph
497*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %esi, %xmm0
498*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
499*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu (%rdi), %ymm1
500*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpsubusb %ymm0, %ymm1, %ymm0
501*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
502*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
503*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
504*9880d681SAndroid Build Coastguard Workervector.ph:
505*9880d681SAndroid Build Coastguard Worker  %0 = insertelement <32 x i8> undef, i8 %w, i32 0
506*9880d681SAndroid Build Coastguard Worker  %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
507*9880d681SAndroid Build Coastguard Worker  %1 = getelementptr inbounds i8, i8* %head, i64 0
508*9880d681SAndroid Build Coastguard Worker  %2 = bitcast i8* %1 to <32 x i8>*
509*9880d681SAndroid Build Coastguard Worker  %3 = load <32 x i8>, <32 x i8>* %2, align 1
510*9880d681SAndroid Build Coastguard Worker  %4 = icmp ult <32 x i8> %3, %broadcast15
511*9880d681SAndroid Build Coastguard Worker  %5 = sub <32 x i8> %3, %broadcast15
512*9880d681SAndroid Build Coastguard Worker  %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
513*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %6, <32 x i8>* %2, align 1
514*9880d681SAndroid Build Coastguard Worker  ret void
515*9880d681SAndroid Build Coastguard Worker}
516