1*67e74705SXin Li // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Werror | FileCheck %s
2*67e74705SXin Li // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3*67e74705SXin Li
4*67e74705SXin Li // Don't include mm_malloc.h, it's system specific.
5*67e74705SXin Li #define __MM_MALLOC_H
6*67e74705SXin Li
7*67e74705SXin Li #include <x86intrin.h>
8*67e74705SXin Li
9*67e74705SXin Li // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
10*67e74705SXin Li
test_mm_blend_epi16(__m128i V1,__m128i V2)11*67e74705SXin Li __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
12*67e74705SXin Li // CHECK-LABEL: test_mm_blend_epi16
13*67e74705SXin Li // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
14*67e74705SXin Li return _mm_blend_epi16(V1, V2, 42);
15*67e74705SXin Li }
16*67e74705SXin Li
test_mm_blend_pd(__m128d V1,__m128d V2)17*67e74705SXin Li __m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
18*67e74705SXin Li // CHECK-LABEL: test_mm_blend_pd
19*67e74705SXin Li // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
20*67e74705SXin Li return _mm_blend_pd(V1, V2, 2);
21*67e74705SXin Li }
22*67e74705SXin Li
test_mm_blend_ps(__m128 V1,__m128 V2)23*67e74705SXin Li __m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
24*67e74705SXin Li // CHECK-LABEL: test_mm_blend_ps
25*67e74705SXin Li // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
26*67e74705SXin Li return _mm_blend_ps(V1, V2, 6);
27*67e74705SXin Li }
28*67e74705SXin Li
test_mm_blendv_epi8(__m128i V1,__m128i V2,__m128i V3)29*67e74705SXin Li __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
30*67e74705SXin Li // CHECK-LABEL: test_mm_blendv_epi8
31*67e74705SXin Li // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
32*67e74705SXin Li return _mm_blendv_epi8(V1, V2, V3);
33*67e74705SXin Li }
34*67e74705SXin Li
test_mm_blendv_pd(__m128d V1,__m128d V2,__m128d V3)35*67e74705SXin Li __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
36*67e74705SXin Li // CHECK-LABEL: test_mm_blendv_pd
37*67e74705SXin Li // CHECK: call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
38*67e74705SXin Li return _mm_blendv_pd(V1, V2, V3);
39*67e74705SXin Li }
40*67e74705SXin Li
test_mm_blendv_ps(__m128 V1,__m128 V2,__m128 V3)41*67e74705SXin Li __m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
42*67e74705SXin Li // CHECK-LABEL: test_mm_blendv_ps
43*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
44*67e74705SXin Li return _mm_blendv_ps(V1, V2, V3);
45*67e74705SXin Li }
46*67e74705SXin Li
test_mm_ceil_pd(__m128d x)47*67e74705SXin Li __m128d test_mm_ceil_pd(__m128d x) {
48*67e74705SXin Li // CHECK-LABEL: test_mm_ceil_pd
49*67e74705SXin Li // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 2)
50*67e74705SXin Li return _mm_ceil_pd(x);
51*67e74705SXin Li }
52*67e74705SXin Li
test_mm_ceil_ps(__m128 x)53*67e74705SXin Li __m128 test_mm_ceil_ps(__m128 x) {
54*67e74705SXin Li // CHECK-LABEL: test_mm_ceil_ps
55*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 2)
56*67e74705SXin Li return _mm_ceil_ps(x);
57*67e74705SXin Li }
58*67e74705SXin Li
test_mm_ceil_sd(__m128d x,__m128d y)59*67e74705SXin Li __m128d test_mm_ceil_sd(__m128d x, __m128d y) {
60*67e74705SXin Li // CHECK-LABEL: test_mm_ceil_sd
61*67e74705SXin Li // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2)
62*67e74705SXin Li return _mm_ceil_sd(x, y);
63*67e74705SXin Li }
64*67e74705SXin Li
test_mm_ceil_ss(__m128 x,__m128 y)65*67e74705SXin Li __m128 test_mm_ceil_ss(__m128 x, __m128 y) {
66*67e74705SXin Li // CHECK-LABEL: test_mm_ceil_ss
67*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2)
68*67e74705SXin Li return _mm_ceil_ss(x, y);
69*67e74705SXin Li }
70*67e74705SXin Li
test_mm_cmpeq_epi64(__m128i A,__m128i B)71*67e74705SXin Li __m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) {
72*67e74705SXin Li // CHECK-LABEL: test_mm_cmpeq_epi64
73*67e74705SXin Li // CHECK: icmp eq <2 x i64>
74*67e74705SXin Li // CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
75*67e74705SXin Li return _mm_cmpeq_epi64(A, B);
76*67e74705SXin Li }
77*67e74705SXin Li
test_mm_cvtepi8_epi16(__m128i a)78*67e74705SXin Li __m128i test_mm_cvtepi8_epi16(__m128i a) {
79*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepi8_epi16
80*67e74705SXin Li // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
81*67e74705SXin Li // CHECK: sext <8 x i8> {{.*}} to <8 x i16>
82*67e74705SXin Li return _mm_cvtepi8_epi16(a);
83*67e74705SXin Li }
84*67e74705SXin Li
test_mm_cvtepi8_epi32(__m128i a)85*67e74705SXin Li __m128i test_mm_cvtepi8_epi32(__m128i a) {
86*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepi8_epi32
87*67e74705SXin Li // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
88*67e74705SXin Li // CHECK: sext <4 x i8> {{.*}} to <4 x i32>
89*67e74705SXin Li return _mm_cvtepi8_epi32(a);
90*67e74705SXin Li }
91*67e74705SXin Li
test_mm_cvtepi8_epi64(__m128i a)92*67e74705SXin Li __m128i test_mm_cvtepi8_epi64(__m128i a) {
93*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepi8_epi64
94*67e74705SXin Li // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> <i32 0, i32 1>
95*67e74705SXin Li // CHECK: sext <2 x i8> {{.*}} to <2 x i64>
96*67e74705SXin Li return _mm_cvtepi8_epi64(a);
97*67e74705SXin Li }
98*67e74705SXin Li
test_mm_cvtepi16_epi32(__m128i a)99*67e74705SXin Li __m128i test_mm_cvtepi16_epi32(__m128i a) {
100*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepi16_epi32
101*67e74705SXin Li // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
102*67e74705SXin Li // CHECK: sext <4 x i16> {{.*}} to <4 x i32>
103*67e74705SXin Li return _mm_cvtepi16_epi32(a);
104*67e74705SXin Li }
105*67e74705SXin Li
test_mm_cvtepi16_epi64(__m128i a)106*67e74705SXin Li __m128i test_mm_cvtepi16_epi64(__m128i a) {
107*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepi16_epi64
108*67e74705SXin Li // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> <i32 0, i32 1>
109*67e74705SXin Li // CHECK: sext <2 x i16> {{.*}} to <2 x i64>
110*67e74705SXin Li return _mm_cvtepi16_epi64(a);
111*67e74705SXin Li }
112*67e74705SXin Li
test_mm_cvtepi32_epi64(__m128i a)113*67e74705SXin Li __m128i test_mm_cvtepi32_epi64(__m128i a) {
114*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepi32_epi64
115*67e74705SXin Li // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> <i32 0, i32 1>
116*67e74705SXin Li // CHECK: sext <2 x i32> {{.*}} to <2 x i64>
117*67e74705SXin Li return _mm_cvtepi32_epi64(a);
118*67e74705SXin Li }
119*67e74705SXin Li
test_mm_cvtepu8_epi16(__m128i a)120*67e74705SXin Li __m128i test_mm_cvtepu8_epi16(__m128i a) {
121*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepu8_epi16
122*67e74705SXin Li // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
123*67e74705SXin Li // CHECK: zext <8 x i8> {{.*}} to <8 x i16>
124*67e74705SXin Li return _mm_cvtepu8_epi16(a);
125*67e74705SXin Li }
126*67e74705SXin Li
test_mm_cvtepu8_epi32(__m128i a)127*67e74705SXin Li __m128i test_mm_cvtepu8_epi32(__m128i a) {
128*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepu8_epi32
129*67e74705SXin Li // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
130*67e74705SXin Li // CHECK: zext <4 x i8> {{.*}} to <4 x i32>
131*67e74705SXin Li return _mm_cvtepu8_epi32(a);
132*67e74705SXin Li }
133*67e74705SXin Li
test_mm_cvtepu8_epi64(__m128i a)134*67e74705SXin Li __m128i test_mm_cvtepu8_epi64(__m128i a) {
135*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepu8_epi64
136*67e74705SXin Li // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <2 x i32> <i32 0, i32 1>
137*67e74705SXin Li // CHECK: zext <2 x i8> {{.*}} to <2 x i64>
138*67e74705SXin Li return _mm_cvtepu8_epi64(a);
139*67e74705SXin Li }
140*67e74705SXin Li
test_mm_cvtepu16_epi32(__m128i a)141*67e74705SXin Li __m128i test_mm_cvtepu16_epi32(__m128i a) {
142*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepu16_epi32
143*67e74705SXin Li // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
144*67e74705SXin Li // CHECK: zext <4 x i16> {{.*}} to <4 x i32>
145*67e74705SXin Li return _mm_cvtepu16_epi32(a);
146*67e74705SXin Li }
147*67e74705SXin Li
test_mm_cvtepu16_epi64(__m128i a)148*67e74705SXin Li __m128i test_mm_cvtepu16_epi64(__m128i a) {
149*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepu16_epi64
150*67e74705SXin Li // CHECK: shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <2 x i32> <i32 0, i32 1>
151*67e74705SXin Li // CHECK: zext <2 x i16> {{.*}} to <2 x i64>
152*67e74705SXin Li return _mm_cvtepu16_epi64(a);
153*67e74705SXin Li }
154*67e74705SXin Li
test_mm_cvtepu32_epi64(__m128i a)155*67e74705SXin Li __m128i test_mm_cvtepu32_epi64(__m128i a) {
156*67e74705SXin Li // CHECK-LABEL: test_mm_cvtepu32_epi64
157*67e74705SXin Li // CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> {{.*}}, <2 x i32> <i32 0, i32 1>
158*67e74705SXin Li // CHECK: zext <2 x i32> {{.*}} to <2 x i64>
159*67e74705SXin Li return _mm_cvtepu32_epi64(a);
160*67e74705SXin Li }
161*67e74705SXin Li
test_mm_dp_pd(__m128d x,__m128d y)162*67e74705SXin Li __m128d test_mm_dp_pd(__m128d x, __m128d y) {
163*67e74705SXin Li // CHECK-LABEL: test_mm_dp_pd
164*67e74705SXin Li // CHECK: call <2 x double> @llvm.x86.sse41.dppd(<2 x double> {{.*}}, <2 x double> {{.*}}, i8 7)
165*67e74705SXin Li return _mm_dp_pd(x, y, 7);
166*67e74705SXin Li }
167*67e74705SXin Li
test_mm_dp_ps(__m128 x,__m128 y)168*67e74705SXin Li __m128 test_mm_dp_ps(__m128 x, __m128 y) {
169*67e74705SXin Li // CHECK-LABEL: test_mm_dp_ps
170*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.dpps(<4 x float> {{.*}}, <4 x float> {{.*}}, i8 7)
171*67e74705SXin Li return _mm_dp_ps(x, y, 7);
172*67e74705SXin Li }
173*67e74705SXin Li
test_mm_extract_epi8(__m128i x)174*67e74705SXin Li int test_mm_extract_epi8(__m128i x) {
175*67e74705SXin Li // CHECK-LABEL: test_mm_extract_epi8
176*67e74705SXin Li // CHECK: extractelement <16 x i8> %{{.*}}, i32 1
177*67e74705SXin Li // CHECK: zext i8 %{{.*}} to i32
178*67e74705SXin Li return _mm_extract_epi8(x, 1);
179*67e74705SXin Li }
180*67e74705SXin Li
test_mm_extract_epi32(__m128i x)181*67e74705SXin Li int test_mm_extract_epi32(__m128i x) {
182*67e74705SXin Li // CHECK-LABEL: test_mm_extract_epi32
183*67e74705SXin Li // CHECK: extractelement <4 x i32> %{{.*}}, i32 1
184*67e74705SXin Li return _mm_extract_epi32(x, 1);
185*67e74705SXin Li }
186*67e74705SXin Li
test_mm_extract_epi64(__m128i x)187*67e74705SXin Li long long test_mm_extract_epi64(__m128i x) {
188*67e74705SXin Li // CHECK-LABEL: test_mm_extract_epi64
189*67e74705SXin Li // CHECK: extractelement <2 x i64> %{{.*}}, i32 1
190*67e74705SXin Li return _mm_extract_epi64(x, 1);
191*67e74705SXin Li }
192*67e74705SXin Li
test_mm_extract_ps(__m128 x)193*67e74705SXin Li int test_mm_extract_ps(__m128 x) {
194*67e74705SXin Li // CHECK-LABEL: test_mm_extract_ps
195*67e74705SXin Li // CHECK: extractelement <4 x float> %{{.*}}, i32 1
196*67e74705SXin Li return _mm_extract_ps(x, 1);
197*67e74705SXin Li }
198*67e74705SXin Li
test_mm_floor_pd(__m128d x)199*67e74705SXin Li __m128d test_mm_floor_pd(__m128d x) {
200*67e74705SXin Li // CHECK-LABEL: test_mm_floor_pd
201*67e74705SXin Li // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 1)
202*67e74705SXin Li return _mm_floor_pd(x);
203*67e74705SXin Li }
204*67e74705SXin Li
test_mm_floor_ps(__m128 x)205*67e74705SXin Li __m128 test_mm_floor_ps(__m128 x) {
206*67e74705SXin Li // CHECK-LABEL: test_mm_floor_ps
207*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 1)
208*67e74705SXin Li return _mm_floor_ps(x);
209*67e74705SXin Li }
210*67e74705SXin Li
test_mm_floor_sd(__m128d x,__m128d y)211*67e74705SXin Li __m128d test_mm_floor_sd(__m128d x, __m128d y) {
212*67e74705SXin Li // CHECK-LABEL: test_mm_floor_sd
213*67e74705SXin Li // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 1)
214*67e74705SXin Li return _mm_floor_sd(x, y);
215*67e74705SXin Li }
216*67e74705SXin Li
test_mm_floor_ss(__m128 x,__m128 y)217*67e74705SXin Li __m128 test_mm_floor_ss(__m128 x, __m128 y) {
218*67e74705SXin Li // CHECK-LABEL: test_mm_floor_ss
219*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 1)
220*67e74705SXin Li return _mm_floor_ss(x, y);
221*67e74705SXin Li }
222*67e74705SXin Li
test_mm_insert_epi8(__m128i x,char b)223*67e74705SXin Li __m128i test_mm_insert_epi8(__m128i x, char b) {
224*67e74705SXin Li // CHECK-LABEL: test_mm_insert_epi8
225*67e74705SXin Li // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0
226*67e74705SXin Li return _mm_insert_epi8(x, b, 16);
227*67e74705SXin Li }
228*67e74705SXin Li
test_mm_insert_epi32(__m128i x,int b)229*67e74705SXin Li __m128i test_mm_insert_epi32(__m128i x, int b) {
230*67e74705SXin Li // CHECK-LABEL: test_mm_insert_epi32
231*67e74705SXin Li // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
232*67e74705SXin Li return _mm_insert_epi32(x, b, 4);
233*67e74705SXin Li }
234*67e74705SXin Li
test_mm_insert_epi64(__m128i x,long long b)235*67e74705SXin Li __m128i test_mm_insert_epi64(__m128i x, long long b) {
236*67e74705SXin Li // CHECK-LABEL: test_mm_insert_epi64
237*67e74705SXin Li // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0
238*67e74705SXin Li return _mm_insert_epi64(x, b, 2);
239*67e74705SXin Li }
240*67e74705SXin Li
test_mm_insert_ps(__m128 x,__m128 y)241*67e74705SXin Li __m128 test_mm_insert_ps(__m128 x, __m128 y) {
242*67e74705SXin Li // CHECK-LABEL: test_mm_insert_ps
243*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
244*67e74705SXin Li return _mm_insert_ps(x, y, 4);
245*67e74705SXin Li }
246*67e74705SXin Li
test_mm_max_epi8(__m128i x,__m128i y)247*67e74705SXin Li __m128i test_mm_max_epi8(__m128i x, __m128i y) {
248*67e74705SXin Li // CHECK-LABEL: test_mm_max_epi8
249*67e74705SXin Li // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]]
250*67e74705SXin Li // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
251*67e74705SXin Li return _mm_max_epi8(x, y);
252*67e74705SXin Li }
253*67e74705SXin Li
test_mm_max_epi32(__m128i x,__m128i y)254*67e74705SXin Li __m128i test_mm_max_epi32(__m128i x, __m128i y) {
255*67e74705SXin Li // CHECK-LABEL: test_mm_max_epi32
256*67e74705SXin Li // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
257*67e74705SXin Li // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]]
258*67e74705SXin Li return _mm_max_epi32(x, y);
259*67e74705SXin Li }
260*67e74705SXin Li
test_mm_max_epu16(__m128i x,__m128i y)261*67e74705SXin Li __m128i test_mm_max_epu16(__m128i x, __m128i y) {
262*67e74705SXin Li // CHECK-LABEL: test_mm_max_epu16
263*67e74705SXin Li // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]]
264*67e74705SXin Li // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
265*67e74705SXin Li return _mm_max_epu16(x, y);
266*67e74705SXin Li }
267*67e74705SXin Li
test_mm_max_epu32(__m128i x,__m128i y)268*67e74705SXin Li __m128i test_mm_max_epu32(__m128i x, __m128i y) {
269*67e74705SXin Li // CHECK-LABEL: test_mm_max_epu32
270*67e74705SXin Li // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]]
271*67e74705SXin Li // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]]
272*67e74705SXin Li return _mm_max_epu32(x, y);
273*67e74705SXin Li }
274*67e74705SXin Li
test_mm_min_epi8(__m128i x,__m128i y)275*67e74705SXin Li __m128i test_mm_min_epi8(__m128i x, __m128i y) {
276*67e74705SXin Li // CHECK-LABEL: test_mm_min_epi8
277*67e74705SXin Li // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]]
278*67e74705SXin Li // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
279*67e74705SXin Li return _mm_min_epi8(x, y);
280*67e74705SXin Li }
281*67e74705SXin Li
test_mm_min_epi32(__m128i x,__m128i y)282*67e74705SXin Li __m128i test_mm_min_epi32(__m128i x, __m128i y) {
283*67e74705SXin Li // CHECK-LABEL: test_mm_min_epi32
284*67e74705SXin Li // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
285*67e74705SXin Li // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]]
286*67e74705SXin Li return _mm_min_epi32(x, y);
287*67e74705SXin Li }
288*67e74705SXin Li
test_mm_min_epu16(__m128i x,__m128i y)289*67e74705SXin Li __m128i test_mm_min_epu16(__m128i x, __m128i y) {
290*67e74705SXin Li // CHECK-LABEL: test_mm_min_epu16
291*67e74705SXin Li // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]]
292*67e74705SXin Li // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
293*67e74705SXin Li return _mm_min_epu16(x, y);
294*67e74705SXin Li }
295*67e74705SXin Li
test_mm_min_epu32(__m128i x,__m128i y)296*67e74705SXin Li __m128i test_mm_min_epu32(__m128i x, __m128i y) {
297*67e74705SXin Li // CHECK-LABEL: test_mm_min_epu32
298*67e74705SXin Li // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]]
299*67e74705SXin Li // CHECK-NEXT: select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]]
300*67e74705SXin Li return _mm_min_epu32(x, y);
301*67e74705SXin Li }
302*67e74705SXin Li
test_mm_minpos_epu16(__m128i x)303*67e74705SXin Li __m128i test_mm_minpos_epu16(__m128i x) {
304*67e74705SXin Li // CHECK-LABEL: test_mm_minpos_epu16
305*67e74705SXin Li // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}})
306*67e74705SXin Li return _mm_minpos_epu16(x);
307*67e74705SXin Li }
308*67e74705SXin Li
test_mm_mpsadbw_epu8(__m128i x,__m128i y)309*67e74705SXin Li __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
310*67e74705SXin Li // CHECK-LABEL: test_mm_mpsadbw_epu8
311*67e74705SXin Li // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 1)
312*67e74705SXin Li return _mm_mpsadbw_epu8(x, y, 1);
313*67e74705SXin Li }
314*67e74705SXin Li
test_mm_mul_epi32(__m128i x,__m128i y)315*67e74705SXin Li __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
316*67e74705SXin Li // CHECK-LABEL: test_mm_mul_epi32
317*67e74705SXin Li // CHECK: call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
318*67e74705SXin Li return _mm_mul_epi32(x, y);
319*67e74705SXin Li }
320*67e74705SXin Li
test_mm_mullo_epi32(__m128i x,__m128i y)321*67e74705SXin Li __m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
322*67e74705SXin Li // CHECK-LABEL: test_mm_mullo_epi32
323*67e74705SXin Li // CHECK: mul <4 x i32>
324*67e74705SXin Li return _mm_mullo_epi32(x, y);
325*67e74705SXin Li }
326*67e74705SXin Li
test_mm_packus_epi32(__m128i x,__m128i y)327*67e74705SXin Li __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
328*67e74705SXin Li // CHECK-LABEL: test_mm_packus_epi32
329*67e74705SXin Li // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
330*67e74705SXin Li return _mm_packus_epi32(x, y);
331*67e74705SXin Li }
332*67e74705SXin Li
test_mm_round_pd(__m128d x)333*67e74705SXin Li __m128d test_mm_round_pd(__m128d x) {
334*67e74705SXin Li // CHECK-LABEL: test_mm_round_pd
335*67e74705SXin Li // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 4)
336*67e74705SXin Li return _mm_round_pd(x, 4);
337*67e74705SXin Li }
338*67e74705SXin Li
test_mm_round_ps(__m128 x)339*67e74705SXin Li __m128 test_mm_round_ps(__m128 x) {
340*67e74705SXin Li // CHECK-LABEL: test_mm_round_ps
341*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 4)
342*67e74705SXin Li return _mm_round_ps(x, 4);
343*67e74705SXin Li }
344*67e74705SXin Li
test_mm_round_sd(__m128d x,__m128d y)345*67e74705SXin Li __m128d test_mm_round_sd(__m128d x, __m128d y) {
346*67e74705SXin Li // CHECK-LABEL: test_mm_round_sd
347*67e74705SXin Li // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 4)
348*67e74705SXin Li return _mm_round_sd(x, y, 4);
349*67e74705SXin Li }
350*67e74705SXin Li
test_mm_round_ss(__m128 x,__m128 y)351*67e74705SXin Li __m128 test_mm_round_ss(__m128 x, __m128 y) {
352*67e74705SXin Li // CHECK-LABEL: test_mm_round_ss
353*67e74705SXin Li // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 4)
354*67e74705SXin Li return _mm_round_ss(x, y, 4);
355*67e74705SXin Li }
356*67e74705SXin Li
test_mm_stream_load_si128(__m128i const * a)357*67e74705SXin Li __m128i test_mm_stream_load_si128(__m128i const *a) {
358*67e74705SXin Li // CHECK-LABEL: test_mm_stream_load_si128
359*67e74705SXin Li // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %{{.*}})
360*67e74705SXin Li return _mm_stream_load_si128(a);
361*67e74705SXin Li }
362*67e74705SXin Li
test_mm_test_all_ones(__m128i x)363*67e74705SXin Li int test_mm_test_all_ones(__m128i x) {
364*67e74705SXin Li // CHECK-LABEL: test_mm_test_all_ones
365*67e74705SXin Li // CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
366*67e74705SXin Li return _mm_test_all_ones(x);
367*67e74705SXin Li }
368*67e74705SXin Li
test_mm_test_all_zeros(__m128i x,__m128i y)369*67e74705SXin Li int test_mm_test_all_zeros(__m128i x, __m128i y) {
370*67e74705SXin Li // CHECK-LABEL: test_mm_test_all_zeros
371*67e74705SXin Li // CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
372*67e74705SXin Li return _mm_test_all_zeros(x, y);
373*67e74705SXin Li }
374*67e74705SXin Li
test_mm_test_mix_ones_zeros(__m128i x,__m128i y)375*67e74705SXin Li int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
376*67e74705SXin Li // CHECK-LABEL: test_mm_test_mix_ones_zeros
377*67e74705SXin Li // CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
378*67e74705SXin Li return _mm_test_mix_ones_zeros(x, y);
379*67e74705SXin Li }
380*67e74705SXin Li
test_mm_testc_si128(__m128i x,__m128i y)381*67e74705SXin Li int test_mm_testc_si128(__m128i x, __m128i y) {
382*67e74705SXin Li // CHECK-LABEL: test_mm_testc_si128
383*67e74705SXin Li // CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
384*67e74705SXin Li return _mm_testc_si128(x, y);
385*67e74705SXin Li }
386*67e74705SXin Li
test_mm_testnzc_si128(__m128i x,__m128i y)387*67e74705SXin Li int test_mm_testnzc_si128(__m128i x, __m128i y) {
388*67e74705SXin Li // CHECK-LABEL: test_mm_testnzc_si128
389*67e74705SXin Li // CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
390*67e74705SXin Li return _mm_testnzc_si128(x, y);
391*67e74705SXin Li }
392*67e74705SXin Li
test_mm_testz_si128(__m128i x,__m128i y)393*67e74705SXin Li int test_mm_testz_si128(__m128i x, __m128i y) {
394*67e74705SXin Li // CHECK-LABEL: test_mm_testz_si128
395*67e74705SXin Li // CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
396*67e74705SXin Li return _mm_testz_si128(x, y);
397*67e74705SXin Li }
398