xref: /aosp_15_r20/external/clang/test/CodeGen/xop-builtins.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Werror | FileCheck %s
2*67e74705SXin Li // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3*67e74705SXin Li 
4*67e74705SXin Li // Don't include mm_malloc.h, it's system specific.
5*67e74705SXin Li #define __MM_MALLOC_H
6*67e74705SXin Li 
7*67e74705SXin Li #include <x86intrin.h>
8*67e74705SXin Li 
9*67e74705SXin Li // NOTE: This should match the tests in llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
10*67e74705SXin Li 
test_mm_maccs_epi16(__m128i a,__m128i b,__m128i c)11*67e74705SXin Li __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) {
12*67e74705SXin Li   // CHECK-LABEL: test_mm_maccs_epi16
13*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
14*67e74705SXin Li   return _mm_maccs_epi16(a, b, c);
15*67e74705SXin Li }
16*67e74705SXin Li 
test_mm_macc_epi16(__m128i a,__m128i b,__m128i c)17*67e74705SXin Li __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) {
18*67e74705SXin Li   // CHECK-LABEL: test_mm_macc_epi16
19*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
20*67e74705SXin Li   return _mm_macc_epi16(a, b, c);
21*67e74705SXin Li }
22*67e74705SXin Li 
test_mm_maccsd_epi16(__m128i a,__m128i b,__m128i c)23*67e74705SXin Li __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) {
24*67e74705SXin Li   // CHECK-LABEL: test_mm_maccsd_epi16
25*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
26*67e74705SXin Li   return _mm_maccsd_epi16(a, b, c);
27*67e74705SXin Li }
28*67e74705SXin Li 
test_mm_maccd_epi16(__m128i a,__m128i b,__m128i c)29*67e74705SXin Li __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) {
30*67e74705SXin Li   // CHECK-LABEL: test_mm_maccd_epi16
31*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
32*67e74705SXin Li   return _mm_maccd_epi16(a, b, c);
33*67e74705SXin Li }
34*67e74705SXin Li 
test_mm_maccs_epi32(__m128i a,__m128i b,__m128i c)35*67e74705SXin Li __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) {
36*67e74705SXin Li   // CHECK-LABEL: test_mm_maccs_epi32
37*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
38*67e74705SXin Li   return _mm_maccs_epi32(a, b, c);
39*67e74705SXin Li }
40*67e74705SXin Li 
test_mm_macc_epi32(__m128i a,__m128i b,__m128i c)41*67e74705SXin Li __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) {
42*67e74705SXin Li   // CHECK-LABEL: test_mm_macc_epi32
43*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
44*67e74705SXin Li   return _mm_macc_epi32(a, b, c);
45*67e74705SXin Li }
46*67e74705SXin Li 
test_mm_maccslo_epi32(__m128i a,__m128i b,__m128i c)47*67e74705SXin Li __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) {
48*67e74705SXin Li   // CHECK-LABEL: test_mm_maccslo_epi32
49*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
50*67e74705SXin Li   return _mm_maccslo_epi32(a, b, c);
51*67e74705SXin Li }
52*67e74705SXin Li 
test_mm_macclo_epi32(__m128i a,__m128i b,__m128i c)53*67e74705SXin Li __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) {
54*67e74705SXin Li   // CHECK-LABEL: test_mm_macclo_epi32
55*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
56*67e74705SXin Li   return _mm_macclo_epi32(a, b, c);
57*67e74705SXin Li }
58*67e74705SXin Li 
test_mm_maccshi_epi32(__m128i a,__m128i b,__m128i c)59*67e74705SXin Li __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) {
60*67e74705SXin Li   // CHECK-LABEL: test_mm_maccshi_epi32
61*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
62*67e74705SXin Li   return _mm_maccshi_epi32(a, b, c);
63*67e74705SXin Li }
64*67e74705SXin Li 
test_mm_macchi_epi32(__m128i a,__m128i b,__m128i c)65*67e74705SXin Li __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) {
66*67e74705SXin Li   // CHECK-LABEL: test_mm_macchi_epi32
67*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
68*67e74705SXin Li   return _mm_macchi_epi32(a, b, c);
69*67e74705SXin Li }
70*67e74705SXin Li 
test_mm_maddsd_epi16(__m128i a,__m128i b,__m128i c)71*67e74705SXin Li __m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) {
72*67e74705SXin Li   // CHECK-LABEL: test_mm_maddsd_epi16
73*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
74*67e74705SXin Li   return _mm_maddsd_epi16(a, b, c);
75*67e74705SXin Li }
76*67e74705SXin Li 
test_mm_maddd_epi16(__m128i a,__m128i b,__m128i c)77*67e74705SXin Li __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) {
78*67e74705SXin Li   // CHECK-LABEL: test_mm_maddd_epi16
79*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
80*67e74705SXin Li   return _mm_maddd_epi16(a, b, c);
81*67e74705SXin Li }
82*67e74705SXin Li 
test_mm_haddw_epi8(__m128i a)83*67e74705SXin Li __m128i test_mm_haddw_epi8(__m128i a) {
84*67e74705SXin Li   // CHECK-LABEL: test_mm_haddw_epi8
85*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %{{.*}})
86*67e74705SXin Li   return _mm_haddw_epi8(a);
87*67e74705SXin Li }
88*67e74705SXin Li 
test_mm_haddd_epi8(__m128i a)89*67e74705SXin Li __m128i test_mm_haddd_epi8(__m128i a) {
90*67e74705SXin Li   // CHECK-LABEL: test_mm_haddd_epi8
91*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %{{.*}})
92*67e74705SXin Li   return _mm_haddd_epi8(a);
93*67e74705SXin Li }
94*67e74705SXin Li 
test_mm_haddq_epi8(__m128i a)95*67e74705SXin Li __m128i test_mm_haddq_epi8(__m128i a) {
96*67e74705SXin Li   // CHECK-LABEL: test_mm_haddq_epi8
97*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %{{.*}})
98*67e74705SXin Li   return _mm_haddq_epi8(a);
99*67e74705SXin Li }
100*67e74705SXin Li 
test_mm_haddd_epi16(__m128i a)101*67e74705SXin Li __m128i test_mm_haddd_epi16(__m128i a) {
102*67e74705SXin Li   // CHECK-LABEL: test_mm_haddd_epi16
103*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %{{.*}})
104*67e74705SXin Li   return _mm_haddd_epi16(a);
105*67e74705SXin Li }
106*67e74705SXin Li 
test_mm_haddq_epi16(__m128i a)107*67e74705SXin Li __m128i test_mm_haddq_epi16(__m128i a) {
108*67e74705SXin Li   // CHECK-LABEL: test_mm_haddq_epi16
109*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %{{.*}})
110*67e74705SXin Li   return _mm_haddq_epi16(a);
111*67e74705SXin Li }
112*67e74705SXin Li 
test_mm_haddq_epi32(__m128i a)113*67e74705SXin Li __m128i test_mm_haddq_epi32(__m128i a) {
114*67e74705SXin Li   // CHECK-LABEL: test_mm_haddq_epi32
115*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %{{.*}})
116*67e74705SXin Li   return _mm_haddq_epi32(a);
117*67e74705SXin Li }
118*67e74705SXin Li 
test_mm_haddw_epu8(__m128i a)119*67e74705SXin Li __m128i test_mm_haddw_epu8(__m128i a) {
120*67e74705SXin Li   // CHECK-LABEL: test_mm_haddw_epu8
121*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %{{.*}})
122*67e74705SXin Li   return _mm_haddw_epu8(a);
123*67e74705SXin Li }
124*67e74705SXin Li 
test_mm_haddd_epu8(__m128i a)125*67e74705SXin Li __m128i test_mm_haddd_epu8(__m128i a) {
126*67e74705SXin Li   // CHECK-LABEL: test_mm_haddd_epu8
127*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %{{.*}})
128*67e74705SXin Li   return _mm_haddd_epu8(a);
129*67e74705SXin Li }
130*67e74705SXin Li 
test_mm_haddq_epu8(__m128i a)131*67e74705SXin Li __m128i test_mm_haddq_epu8(__m128i a) {
132*67e74705SXin Li   // CHECK-LABEL: test_mm_haddq_epu8
133*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %{{.*}})
134*67e74705SXin Li   return _mm_haddq_epu8(a);
135*67e74705SXin Li }
136*67e74705SXin Li 
test_mm_haddd_epu16(__m128i a)137*67e74705SXin Li __m128i test_mm_haddd_epu16(__m128i a) {
138*67e74705SXin Li   // CHECK-LABEL: test_mm_haddd_epu16
139*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %{{.*}})
140*67e74705SXin Li   return _mm_haddd_epu16(a);
141*67e74705SXin Li }
142*67e74705SXin Li 
test_mm_haddq_epu16(__m128i a)143*67e74705SXin Li __m128i test_mm_haddq_epu16(__m128i a) {
144*67e74705SXin Li   // CHECK-LABEL: test_mm_haddq_epu16
145*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %{{.*}})
146*67e74705SXin Li   return _mm_haddq_epu16(a);
147*67e74705SXin Li }
148*67e74705SXin Li 
test_mm_haddq_epu32(__m128i a)149*67e74705SXin Li __m128i test_mm_haddq_epu32(__m128i a) {
150*67e74705SXin Li   // CHECK-LABEL: test_mm_haddq_epu32
151*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %{{.*}})
152*67e74705SXin Li   return _mm_haddq_epu32(a);
153*67e74705SXin Li }
154*67e74705SXin Li 
test_mm_hsubw_epi8(__m128i a)155*67e74705SXin Li __m128i test_mm_hsubw_epi8(__m128i a) {
156*67e74705SXin Li   // CHECK-LABEL: test_mm_hsubw_epi8
157*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %{{.*}})
158*67e74705SXin Li   return _mm_hsubw_epi8(a);
159*67e74705SXin Li }
160*67e74705SXin Li 
test_mm_hsubd_epi16(__m128i a)161*67e74705SXin Li __m128i test_mm_hsubd_epi16(__m128i a) {
162*67e74705SXin Li   // CHECK-LABEL: test_mm_hsubd_epi16
163*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %{{.*}})
164*67e74705SXin Li   return _mm_hsubd_epi16(a);
165*67e74705SXin Li }
166*67e74705SXin Li 
test_mm_hsubq_epi32(__m128i a)167*67e74705SXin Li __m128i test_mm_hsubq_epi32(__m128i a) {
168*67e74705SXin Li   // CHECK-LABEL: test_mm_hsubq_epi32
169*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %{{.*}})
170*67e74705SXin Li   return _mm_hsubq_epi32(a);
171*67e74705SXin Li }
172*67e74705SXin Li 
test_mm_cmov_si128(__m128i a,__m128i b,__m128i c)173*67e74705SXin Li __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
174*67e74705SXin Li   // CHECK-LABEL: test_mm_cmov_si128
175*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
176*67e74705SXin Li   return _mm_cmov_si128(a, b, c);
177*67e74705SXin Li }
178*67e74705SXin Li 
test_mm256_cmov_si256(__m256i a,__m256i b,__m256i c)179*67e74705SXin Li __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) {
180*67e74705SXin Li   // CHECK-LABEL: test_mm256_cmov_si256
181*67e74705SXin Li   // CHECK: call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
182*67e74705SXin Li   return _mm256_cmov_si256(a, b, c);
183*67e74705SXin Li }
184*67e74705SXin Li 
test_mm_perm_epi8(__m128i a,__m128i b,__m128i c)185*67e74705SXin Li __m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) {
186*67e74705SXin Li   // CHECK-LABEL: test_mm_perm_epi8
187*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
188*67e74705SXin Li   return _mm_perm_epi8(a, b, c);
189*67e74705SXin Li }
190*67e74705SXin Li 
test_mm_rot_epi8(__m128i a,__m128i b)191*67e74705SXin Li __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
192*67e74705SXin Li   // CHECK-LABEL: test_mm_rot_epi8
193*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
194*67e74705SXin Li   return _mm_rot_epi8(a, b);
195*67e74705SXin Li }
196*67e74705SXin Li 
test_mm_rot_epi16(__m128i a,__m128i b)197*67e74705SXin Li __m128i test_mm_rot_epi16(__m128i a, __m128i b) {
198*67e74705SXin Li   // CHECK-LABEL: test_mm_rot_epi16
199*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
200*67e74705SXin Li   return _mm_rot_epi16(a, b);
201*67e74705SXin Li }
202*67e74705SXin Li 
test_mm_rot_epi32(__m128i a,__m128i b)203*67e74705SXin Li __m128i test_mm_rot_epi32(__m128i a, __m128i b) {
204*67e74705SXin Li   // CHECK-LABEL: test_mm_rot_epi32
205*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
206*67e74705SXin Li   return _mm_rot_epi32(a, b);
207*67e74705SXin Li }
208*67e74705SXin Li 
test_mm_rot_epi64(__m128i a,__m128i b)209*67e74705SXin Li __m128i test_mm_rot_epi64(__m128i a, __m128i b) {
210*67e74705SXin Li   // CHECK-LABEL: test_mm_rot_epi64
211*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
212*67e74705SXin Li   return _mm_rot_epi64(a, b);
213*67e74705SXin Li }
214*67e74705SXin Li 
test_mm_roti_epi8(__m128i a)215*67e74705SXin Li __m128i test_mm_roti_epi8(__m128i a) {
216*67e74705SXin Li   // CHECK-LABEL: test_mm_roti_epi8
217*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %{{.*}}, i8 1)
218*67e74705SXin Li   return _mm_roti_epi8(a, 1);
219*67e74705SXin Li }
220*67e74705SXin Li 
test_mm_roti_epi16(__m128i a)221*67e74705SXin Li __m128i test_mm_roti_epi16(__m128i a) {
222*67e74705SXin Li   // CHECK-LABEL: test_mm_roti_epi16
223*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %{{.*}}, i8 50)
224*67e74705SXin Li   return _mm_roti_epi16(a, 50);
225*67e74705SXin Li }
226*67e74705SXin Li 
test_mm_roti_epi32(__m128i a)227*67e74705SXin Li __m128i test_mm_roti_epi32(__m128i a) {
228*67e74705SXin Li   // CHECK-LABEL: test_mm_roti_epi32
229*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %{{.*}}, i8 -30)
230*67e74705SXin Li   return _mm_roti_epi32(a, -30);
231*67e74705SXin Li }
232*67e74705SXin Li 
test_mm_roti_epi64(__m128i a)233*67e74705SXin Li __m128i test_mm_roti_epi64(__m128i a) {
234*67e74705SXin Li   // CHECK-LABEL: test_mm_roti_epi64
235*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %{{.*}}, i8 100)
236*67e74705SXin Li   return _mm_roti_epi64(a, 100);
237*67e74705SXin Li }
238*67e74705SXin Li 
test_mm_shl_epi8(__m128i a,__m128i b)239*67e74705SXin Li __m128i test_mm_shl_epi8(__m128i a, __m128i b) {
240*67e74705SXin Li   // CHECK-LABEL: test_mm_shl_epi8
241*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
242*67e74705SXin Li   return _mm_shl_epi8(a, b);
243*67e74705SXin Li }
244*67e74705SXin Li 
test_mm_shl_epi16(__m128i a,__m128i b)245*67e74705SXin Li __m128i test_mm_shl_epi16(__m128i a, __m128i b) {
246*67e74705SXin Li   // CHECK-LABEL: test_mm_shl_epi16
247*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
248*67e74705SXin Li   return _mm_shl_epi16(a, b);
249*67e74705SXin Li }
250*67e74705SXin Li 
test_mm_shl_epi32(__m128i a,__m128i b)251*67e74705SXin Li __m128i test_mm_shl_epi32(__m128i a, __m128i b) {
252*67e74705SXin Li   // CHECK-LABEL: test_mm_shl_epi32
253*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
254*67e74705SXin Li   return _mm_shl_epi32(a, b);
255*67e74705SXin Li }
256*67e74705SXin Li 
test_mm_shl_epi64(__m128i a,__m128i b)257*67e74705SXin Li __m128i test_mm_shl_epi64(__m128i a, __m128i b) {
258*67e74705SXin Li   // CHECK-LABEL: test_mm_shl_epi64
259*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
260*67e74705SXin Li   return _mm_shl_epi64(a, b);
261*67e74705SXin Li }
262*67e74705SXin Li 
test_mm_sha_epi8(__m128i a,__m128i b)263*67e74705SXin Li __m128i test_mm_sha_epi8(__m128i a, __m128i b) {
264*67e74705SXin Li   // CHECK-LABEL: test_mm_sha_epi8
265*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
266*67e74705SXin Li   return _mm_sha_epi8(a, b);
267*67e74705SXin Li }
268*67e74705SXin Li 
test_mm_sha_epi16(__m128i a,__m128i b)269*67e74705SXin Li __m128i test_mm_sha_epi16(__m128i a, __m128i b) {
270*67e74705SXin Li   // CHECK-LABEL: test_mm_sha_epi16
271*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
272*67e74705SXin Li   return _mm_sha_epi16(a, b);
273*67e74705SXin Li }
274*67e74705SXin Li 
test_mm_sha_epi32(__m128i a,__m128i b)275*67e74705SXin Li __m128i test_mm_sha_epi32(__m128i a, __m128i b) {
276*67e74705SXin Li   // CHECK-LABEL: test_mm_sha_epi32
277*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
278*67e74705SXin Li   return _mm_sha_epi32(a, b);
279*67e74705SXin Li }
280*67e74705SXin Li 
test_mm_sha_epi64(__m128i a,__m128i b)281*67e74705SXin Li __m128i test_mm_sha_epi64(__m128i a, __m128i b) {
282*67e74705SXin Li   // CHECK-LABEL: test_mm_sha_epi64
283*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
284*67e74705SXin Li   return _mm_sha_epi64(a, b);
285*67e74705SXin Li }
286*67e74705SXin Li 
test_mm_com_epu8(__m128i a,__m128i b)287*67e74705SXin Li __m128i test_mm_com_epu8(__m128i a, __m128i b) {
288*67e74705SXin Li   // CHECK-LABEL: test_mm_com_epu8
289*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0)
290*67e74705SXin Li   return _mm_com_epu8(a, b, 0);
291*67e74705SXin Li }
292*67e74705SXin Li 
test_mm_com_epu16(__m128i a,__m128i b)293*67e74705SXin Li __m128i test_mm_com_epu16(__m128i a, __m128i b) {
294*67e74705SXin Li   // CHECK-LABEL: test_mm_com_epu16
295*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0)
296*67e74705SXin Li   return _mm_com_epu16(a, b, 0);
297*67e74705SXin Li }
298*67e74705SXin Li 
test_mm_com_epu32(__m128i a,__m128i b)299*67e74705SXin Li __m128i test_mm_com_epu32(__m128i a, __m128i b) {
300*67e74705SXin Li   // CHECK-LABEL: test_mm_com_epu32
301*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
302*67e74705SXin Li   return _mm_com_epu32(a, b, 0);
303*67e74705SXin Li }
304*67e74705SXin Li 
test_mm_com_epu64(__m128i a,__m128i b)305*67e74705SXin Li __m128i test_mm_com_epu64(__m128i a, __m128i b) {
306*67e74705SXin Li   // CHECK-LABEL: test_mm_com_epu64
307*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
308*67e74705SXin Li   return _mm_com_epu64(a, b, 0);
309*67e74705SXin Li }
310*67e74705SXin Li 
test_mm_com_epi8(__m128i a,__m128i b)311*67e74705SXin Li __m128i test_mm_com_epi8(__m128i a, __m128i b) {
312*67e74705SXin Li   // CHECK-LABEL: test_mm_com_epi8
313*67e74705SXin Li   // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0)
314*67e74705SXin Li   return _mm_com_epi8(a, b, 0);
315*67e74705SXin Li }
316*67e74705SXin Li 
test_mm_com_epi16(__m128i a,__m128i b)317*67e74705SXin Li __m128i test_mm_com_epi16(__m128i a, __m128i b) {
318*67e74705SXin Li   // CHECK-LABEL: test_mm_com_epi16
319*67e74705SXin Li   // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0)
320*67e74705SXin Li   return _mm_com_epi16(a, b, 0);
321*67e74705SXin Li }
322*67e74705SXin Li 
test_mm_com_epi32(__m128i a,__m128i b)323*67e74705SXin Li __m128i test_mm_com_epi32(__m128i a, __m128i b) {
324*67e74705SXin Li   // CHECK-LABEL: test_mm_com_epi32
325*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
326*67e74705SXin Li   return _mm_com_epi32(a, b, 0);
327*67e74705SXin Li }
328*67e74705SXin Li 
test_mm_com_epi64(__m128i a,__m128i b)329*67e74705SXin Li __m128i test_mm_com_epi64(__m128i a, __m128i b) {
330*67e74705SXin Li   // CHECK-LABEL: test_mm_com_epi64
331*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
332*67e74705SXin Li   return _mm_com_epi64(a, b, 0);
333*67e74705SXin Li }
334*67e74705SXin Li 
test_mm_permute2_pd(__m128d a,__m128d b,__m128i c)335*67e74705SXin Li __m128d test_mm_permute2_pd(__m128d a, __m128d b, __m128i c) {
336*67e74705SXin Li   // CHECK-LABEL: test_mm_permute2_pd
337*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
338*67e74705SXin Li   return _mm_permute2_pd(a, b, c, 0);
339*67e74705SXin Li }
340*67e74705SXin Li 
test_mm256_permute2_pd(__m256d a,__m256d b,__m256i c)341*67e74705SXin Li __m256d test_mm256_permute2_pd(__m256d a, __m256d b, __m256i c) {
342*67e74705SXin Li   // CHECK-LABEL: test_mm256_permute2_pd
343*67e74705SXin Li   // CHECK: call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i64> %{{.*}}, i8 0)
344*67e74705SXin Li   return _mm256_permute2_pd(a, b, c, 0);
345*67e74705SXin Li }
346*67e74705SXin Li 
test_mm_permute2_ps(__m128 a,__m128 b,__m128i c)347*67e74705SXin Li __m128 test_mm_permute2_ps(__m128 a, __m128 b, __m128i c) {
348*67e74705SXin Li   // CHECK-LABEL: test_mm_permute2_ps
349*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
350*67e74705SXin Li   return _mm_permute2_ps(a, b, c, 0);
351*67e74705SXin Li }
352*67e74705SXin Li 
test_mm256_permute2_ps(__m256 a,__m256 b,__m256i c)353*67e74705SXin Li __m256 test_mm256_permute2_ps(__m256 a, __m256 b, __m256i c) {
354*67e74705SXin Li   // CHECK-LABEL: test_mm256_permute2_ps
355*67e74705SXin Li   // CHECK: call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> %{{.*}}, i8 0)
356*67e74705SXin Li   return _mm256_permute2_ps(a, b, c, 0);
357*67e74705SXin Li }
358*67e74705SXin Li 
test_mm_frcz_ss(__m128 a)359*67e74705SXin Li __m128 test_mm_frcz_ss(__m128 a) {
360*67e74705SXin Li   // CHECK-LABEL: test_mm_frcz_ss
361*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %{{.*}})
362*67e74705SXin Li   return _mm_frcz_ss(a);
363*67e74705SXin Li }
364*67e74705SXin Li 
test_mm_frcz_sd(__m128d a)365*67e74705SXin Li __m128d test_mm_frcz_sd(__m128d a) {
366*67e74705SXin Li   // CHECK-LABEL: test_mm_frcz_sd
367*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %{{.*}})
368*67e74705SXin Li   return _mm_frcz_sd(a);
369*67e74705SXin Li }
370*67e74705SXin Li 
test_mm_frcz_ps(__m128 a)371*67e74705SXin Li __m128 test_mm_frcz_ps(__m128 a) {
372*67e74705SXin Li   // CHECK-LABEL: test_mm_frcz_ps
373*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %{{.*}})
374*67e74705SXin Li   return _mm_frcz_ps(a);
375*67e74705SXin Li }
376*67e74705SXin Li 
test_mm_frcz_pd(__m128d a)377*67e74705SXin Li __m128d test_mm_frcz_pd(__m128d a) {
378*67e74705SXin Li   // CHECK-LABEL: test_mm_frcz_pd
379*67e74705SXin Li   // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %{{.*}})
380*67e74705SXin Li   return _mm_frcz_pd(a);
381*67e74705SXin Li }
382*67e74705SXin Li 
test_mm256_frcz_ps(__m256 a)383*67e74705SXin Li __m256 test_mm256_frcz_ps(__m256 a) {
384*67e74705SXin Li   // CHECK-LABEL: test_mm256_frcz_ps
385*67e74705SXin Li   // CHECK: call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %{{.*}})
386*67e74705SXin Li   return _mm256_frcz_ps(a);
387*67e74705SXin Li }
388*67e74705SXin Li 
test_mm256_frcz_pd(__m256d a)389*67e74705SXin Li __m256d test_mm256_frcz_pd(__m256d a) {
390*67e74705SXin Li   // CHECK-LABEL: test_mm256_frcz_pd
391*67e74705SXin Li   // CHECK: call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %{{.*}})
392*67e74705SXin Li   return _mm256_frcz_pd(a);
393*67e74705SXin Li }
394