xref: /aosp_15_r20/external/clang/test/CodeGen/sse-builtins.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Werror | FileCheck %s
2*67e74705SXin Li 
3*67e74705SXin Li // Don't include mm_malloc.h, it's system specific.
4*67e74705SXin Li #define __MM_MALLOC_H
5*67e74705SXin Li 
6*67e74705SXin Li #include <x86intrin.h>
7*67e74705SXin Li 
8*67e74705SXin Li // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
9*67e74705SXin Li 
test_mm_add_ps(__m128 A,__m128 B)10*67e74705SXin Li __m128 test_mm_add_ps(__m128 A, __m128 B) {
11*67e74705SXin Li   // CHECK-LABEL: test_mm_add_ps
12*67e74705SXin Li   // CHECK: fadd <4 x float>
13*67e74705SXin Li   return _mm_add_ps(A, B);
14*67e74705SXin Li }
15*67e74705SXin Li 
test_mm_add_ss(__m128 A,__m128 B)16*67e74705SXin Li __m128 test_mm_add_ss(__m128 A, __m128 B) {
17*67e74705SXin Li   // CHECK-LABEL: test_mm_add_ss
18*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
19*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
20*67e74705SXin Li   // CHECK: fadd float
21*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
22*67e74705SXin Li   return _mm_add_ss(A, B);
23*67e74705SXin Li }
24*67e74705SXin Li 
test_mm_and_ps(__m128 A,__m128 B)25*67e74705SXin Li __m128 test_mm_and_ps(__m128 A, __m128 B) {
26*67e74705SXin Li   // CHECK-LABEL: test_mm_and_ps
27*67e74705SXin Li   // CHECK: and <4 x i32>
28*67e74705SXin Li   return _mm_and_ps(A, B);
29*67e74705SXin Li }
30*67e74705SXin Li 
test_mm_andnot_ps(__m128 A,__m128 B)31*67e74705SXin Li __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
32*67e74705SXin Li   // CHECK-LABEL: test_mm_andnot_ps
33*67e74705SXin Li   // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
34*67e74705SXin Li   // CHECK: and <4 x i32>
35*67e74705SXin Li   return _mm_andnot_ps(A, B);
36*67e74705SXin Li }
37*67e74705SXin Li 
test_mm_cmpeq_ps(__m128 __a,__m128 __b)38*67e74705SXin Li __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
39*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpeq_ps
40*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x float>
41*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
42*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
43*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
44*67e74705SXin Li   return _mm_cmpeq_ps(__a, __b);
45*67e74705SXin Li }
46*67e74705SXin Li 
test_mm_cmpeq_ss(__m128 __a,__m128 __b)47*67e74705SXin Li __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
48*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpeq_ss
49*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
50*67e74705SXin Li   return _mm_cmpeq_ss(__a, __b);
51*67e74705SXin Li }
52*67e74705SXin Li 
test_mm_cmpge_ps(__m128 __a,__m128 __b)53*67e74705SXin Li __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
54*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpge_ps
55*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ole <4 x float>
56*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
57*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
58*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
59*67e74705SXin Li   return _mm_cmpge_ps(__a, __b);
60*67e74705SXin Li }
61*67e74705SXin Li 
test_mm_cmpge_ss(__m128 __a,__m128 __b)62*67e74705SXin Li __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
63*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpge_ss
64*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
65*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
66*67e74705SXin Li   return _mm_cmpge_ss(__a, __b);
67*67e74705SXin Li }
68*67e74705SXin Li 
test_mm_cmpgt_ps(__m128 __a,__m128 __b)69*67e74705SXin Li __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
70*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpgt_ps
71*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp olt <4 x float>
72*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
73*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
74*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
75*67e74705SXin Li   return _mm_cmpgt_ps(__a, __b);
76*67e74705SXin Li }
77*67e74705SXin Li 
test_mm_cmpgt_ss(__m128 __a,__m128 __b)78*67e74705SXin Li __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
79*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpgt_ss
80*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
81*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
82*67e74705SXin Li   return _mm_cmpgt_ss(__a, __b);
83*67e74705SXin Li }
84*67e74705SXin Li 
test_mm_cmple_ps(__m128 __a,__m128 __b)85*67e74705SXin Li __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
86*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmple_ps
87*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ole <4 x float>
88*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
89*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
90*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
91*67e74705SXin Li   return _mm_cmple_ps(__a, __b);
92*67e74705SXin Li }
93*67e74705SXin Li 
test_mm_cmple_ss(__m128 __a,__m128 __b)94*67e74705SXin Li __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
95*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmple_ss
96*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
97*67e74705SXin Li   return _mm_cmple_ss(__a, __b);
98*67e74705SXin Li }
99*67e74705SXin Li 
test_mm_cmplt_ps(__m128 __a,__m128 __b)100*67e74705SXin Li __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
101*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmplt_ps
102*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp olt <4 x float>
103*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
104*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
105*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
106*67e74705SXin Li   return _mm_cmplt_ps(__a, __b);
107*67e74705SXin Li }
108*67e74705SXin Li 
test_mm_cmplt_ss(__m128 __a,__m128 __b)109*67e74705SXin Li __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
110*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmplt_ss
111*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
112*67e74705SXin Li   return _mm_cmplt_ss(__a, __b);
113*67e74705SXin Li }
114*67e74705SXin Li 
test_mm_cmpneq_ps(__m128 __a,__m128 __b)115*67e74705SXin Li __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
116*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpneq_ps
117*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp une <4 x float>
118*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
119*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
120*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
121*67e74705SXin Li   return _mm_cmpneq_ps(__a, __b);
122*67e74705SXin Li }
123*67e74705SXin Li 
test_mm_cmpneq_ss(__m128 __a,__m128 __b)124*67e74705SXin Li __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
125*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpneq_ss
126*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
127*67e74705SXin Li   return _mm_cmpneq_ss(__a, __b);
128*67e74705SXin Li }
129*67e74705SXin Li 
test_mm_cmpnge_ps(__m128 __a,__m128 __b)130*67e74705SXin Li __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
131*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpnge_ps
132*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ugt <4 x float>
133*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
134*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
135*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
136*67e74705SXin Li   return _mm_cmpnge_ps(__a, __b);
137*67e74705SXin Li }
138*67e74705SXin Li 
test_mm_cmpnge_ss(__m128 __a,__m128 __b)139*67e74705SXin Li __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
140*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpnge_ss
141*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
142*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
143*67e74705SXin Li   return _mm_cmpnge_ss(__a, __b);
144*67e74705SXin Li }
145*67e74705SXin Li 
test_mm_cmpngt_ps(__m128 __a,__m128 __b)146*67e74705SXin Li __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
147*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpngt_ps
148*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp uge <4 x float>
149*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
150*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
151*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
152*67e74705SXin Li   return _mm_cmpngt_ps(__a, __b);
153*67e74705SXin Li }
154*67e74705SXin Li 
test_mm_cmpngt_ss(__m128 __a,__m128 __b)155*67e74705SXin Li __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
156*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpngt_ss
157*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
158*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
159*67e74705SXin Li   return _mm_cmpngt_ss(__a, __b);
160*67e74705SXin Li }
161*67e74705SXin Li 
test_mm_cmpnle_ps(__m128 __a,__m128 __b)162*67e74705SXin Li __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
163*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpnle_ps
164*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ugt <4 x float>
165*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
166*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
167*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
168*67e74705SXin Li   return _mm_cmpnle_ps(__a, __b);
169*67e74705SXin Li }
170*67e74705SXin Li 
test_mm_cmpnle_ss(__m128 __a,__m128 __b)171*67e74705SXin Li __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
172*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpnle_ss
173*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
174*67e74705SXin Li   return _mm_cmpnle_ss(__a, __b);
175*67e74705SXin Li }
176*67e74705SXin Li 
test_mm_cmpnlt_ps(__m128 __a,__m128 __b)177*67e74705SXin Li __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
178*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpnlt_ps
179*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp uge <4 x float>
180*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
181*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
182*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
183*67e74705SXin Li   return _mm_cmpnlt_ps(__a, __b);
184*67e74705SXin Li }
185*67e74705SXin Li 
test_mm_cmpnlt_ss(__m128 __a,__m128 __b)186*67e74705SXin Li __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
187*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpnlt_ss
188*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
189*67e74705SXin Li   return _mm_cmpnlt_ss(__a, __b);
190*67e74705SXin Li }
191*67e74705SXin Li 
test_mm_cmpord_ps(__m128 __a,__m128 __b)192*67e74705SXin Li __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
193*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpord_ps
194*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp ord <4 x float>
195*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
196*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
197*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
198*67e74705SXin Li   return _mm_cmpord_ps(__a, __b);
199*67e74705SXin Li }
200*67e74705SXin Li 
test_mm_cmpord_ss(__m128 __a,__m128 __b)201*67e74705SXin Li __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
202*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpord_ss
203*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
204*67e74705SXin Li   return _mm_cmpord_ss(__a, __b);
205*67e74705SXin Li }
206*67e74705SXin Li 
test_mm_cmpunord_ps(__m128 __a,__m128 __b)207*67e74705SXin Li __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
208*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpunord_ps
209*67e74705SXin Li   // CHECK:         [[CMP:%.*]] = fcmp uno <4 x float>
210*67e74705SXin Li   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
211*67e74705SXin Li   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
212*67e74705SXin Li   // CHECK-NEXT:    ret <4 x float> [[BC]]
213*67e74705SXin Li   return _mm_cmpunord_ps(__a, __b);
214*67e74705SXin Li }
215*67e74705SXin Li 
test_mm_cmpunord_ss(__m128 __a,__m128 __b)216*67e74705SXin Li __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
217*67e74705SXin Li   // CHECK-LABEL: @test_mm_cmpunord_ss
218*67e74705SXin Li   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
219*67e74705SXin Li   return _mm_cmpunord_ss(__a, __b);
220*67e74705SXin Li }
221*67e74705SXin Li 
test_mm_comieq_ss(__m128 A,__m128 B)222*67e74705SXin Li int test_mm_comieq_ss(__m128 A, __m128 B) {
223*67e74705SXin Li   // CHECK-LABEL: test_mm_comieq_ss
224*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
225*67e74705SXin Li   return _mm_comieq_ss(A, B);
226*67e74705SXin Li }
227*67e74705SXin Li 
test_mm_comige_ss(__m128 A,__m128 B)228*67e74705SXin Li int test_mm_comige_ss(__m128 A, __m128 B) {
229*67e74705SXin Li   // CHECK-LABEL: test_mm_comige_ss
230*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
231*67e74705SXin Li   return _mm_comige_ss(A, B);
232*67e74705SXin Li }
233*67e74705SXin Li 
test_mm_comigt_ss(__m128 A,__m128 B)234*67e74705SXin Li int test_mm_comigt_ss(__m128 A, __m128 B) {
235*67e74705SXin Li   // CHECK-LABEL: test_mm_comigt_ss
236*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
237*67e74705SXin Li   return _mm_comigt_ss(A, B);
238*67e74705SXin Li }
239*67e74705SXin Li 
test_mm_comile_ss(__m128 A,__m128 B)240*67e74705SXin Li int test_mm_comile_ss(__m128 A, __m128 B) {
241*67e74705SXin Li   // CHECK-LABEL: test_mm_comile_ss
242*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
243*67e74705SXin Li   return _mm_comile_ss(A, B);
244*67e74705SXin Li }
245*67e74705SXin Li 
test_mm_comilt_ss(__m128 A,__m128 B)246*67e74705SXin Li int test_mm_comilt_ss(__m128 A, __m128 B) {
247*67e74705SXin Li   // CHECK-LABEL: test_mm_comilt_ss
248*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
249*67e74705SXin Li   return _mm_comilt_ss(A, B);
250*67e74705SXin Li }
251*67e74705SXin Li 
test_mm_comineq_ss(__m128 A,__m128 B)252*67e74705SXin Li int test_mm_comineq_ss(__m128 A, __m128 B) {
253*67e74705SXin Li   // CHECK-LABEL: test_mm_comineq_ss
254*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
255*67e74705SXin Li   return _mm_comineq_ss(A, B);
256*67e74705SXin Li }
257*67e74705SXin Li 
test_mm_cvt_ss2si(__m128 A)258*67e74705SXin Li int test_mm_cvt_ss2si(__m128 A) {
259*67e74705SXin Li   // CHECK-LABEL: test_mm_cvt_ss2si
260*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
261*67e74705SXin Li   return _mm_cvt_ss2si(A);
262*67e74705SXin Li }
263*67e74705SXin Li 
test_mm_cvtsi32_ss(__m128 A,int B)264*67e74705SXin Li __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
265*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsi32_ss
266*67e74705SXin Li   // CHECK: sitofp i32 %{{.*}} to float
267*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
268*67e74705SXin Li   return _mm_cvtsi32_ss(A, B);
269*67e74705SXin Li }
270*67e74705SXin Li 
test_mm_cvtsi64_ss(__m128 A,long long B)271*67e74705SXin Li __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
272*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtsi64_ss
273*67e74705SXin Li   // CHECK: sitofp i64 %{{.*}} to float
274*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
275*67e74705SXin Li   return _mm_cvtsi64_ss(A, B);
276*67e74705SXin Li }
277*67e74705SXin Li 
test_mm_cvtss_f32(__m128 A)278*67e74705SXin Li float test_mm_cvtss_f32(__m128 A) {
279*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtss_f32
280*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
281*67e74705SXin Li   return _mm_cvtss_f32(A);
282*67e74705SXin Li }
283*67e74705SXin Li 
test_mm_cvtss_si32(__m128 A)284*67e74705SXin Li int test_mm_cvtss_si32(__m128 A) {
285*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtss_si32
286*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
287*67e74705SXin Li   return _mm_cvtss_si32(A);
288*67e74705SXin Li }
289*67e74705SXin Li 
test_mm_cvtss_si64(__m128 A)290*67e74705SXin Li long long test_mm_cvtss_si64(__m128 A) {
291*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtss_si64
292*67e74705SXin Li   // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
293*67e74705SXin Li   return _mm_cvtss_si64(A);
294*67e74705SXin Li }
295*67e74705SXin Li 
test_mm_cvtt_ss2si(__m128 A)296*67e74705SXin Li int test_mm_cvtt_ss2si(__m128 A) {
297*67e74705SXin Li   // CHECK-LABEL: test_mm_cvtt_ss2si
298*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
299*67e74705SXin Li   // CHECK: fptosi float %{{.*}} to i32
300*67e74705SXin Li   return _mm_cvtt_ss2si(A);
301*67e74705SXin Li }
302*67e74705SXin Li 
test_mm_cvttss_si32(__m128 A)303*67e74705SXin Li int test_mm_cvttss_si32(__m128 A) {
304*67e74705SXin Li   // CHECK-LABEL: test_mm_cvttss_si32
305*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
306*67e74705SXin Li   // CHECK: fptosi float %{{.*}} to i32
307*67e74705SXin Li   return _mm_cvttss_si32(A);
308*67e74705SXin Li }
309*67e74705SXin Li 
test_mm_cvttss_si64(__m128 A)310*67e74705SXin Li long long test_mm_cvttss_si64(__m128 A) {
311*67e74705SXin Li   // CHECK-LABEL: test_mm_cvttss_si64
312*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
313*67e74705SXin Li   // CHECK: fptosi float %{{.*}} to i64
314*67e74705SXin Li   return _mm_cvttss_si64(A);
315*67e74705SXin Li }
316*67e74705SXin Li 
test_mm_div_ps(__m128 A,__m128 B)317*67e74705SXin Li __m128 test_mm_div_ps(__m128 A, __m128 B) {
318*67e74705SXin Li   // CHECK-LABEL: test_mm_div_ps
319*67e74705SXin Li   // CHECK: fdiv <4 x float>
320*67e74705SXin Li   return _mm_div_ps(A, B);
321*67e74705SXin Li }
322*67e74705SXin Li 
test_mm_div_ss(__m128 A,__m128 B)323*67e74705SXin Li __m128 test_mm_div_ss(__m128 A, __m128 B) {
324*67e74705SXin Li   // CHECK-LABEL: test_mm_div_ss
325*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
326*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
327*67e74705SXin Li   // CHECK: fdiv float
328*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
329*67e74705SXin Li   return _mm_div_ss(A, B);
330*67e74705SXin Li }
331*67e74705SXin Li 
test_MM_GET_EXCEPTION_MASK()332*67e74705SXin Li unsigned int test_MM_GET_EXCEPTION_MASK() {
333*67e74705SXin Li   // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
334*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
335*67e74705SXin Li   // CHECK: and i32 %{{.*}}, 8064
336*67e74705SXin Li   return _MM_GET_EXCEPTION_MASK();
337*67e74705SXin Li }
338*67e74705SXin Li 
test_MM_GET_EXCEPTION_STATE()339*67e74705SXin Li unsigned int test_MM_GET_EXCEPTION_STATE() {
340*67e74705SXin Li   // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE
341*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
342*67e74705SXin Li   // CHECK: and i32 %{{.*}}, 63
343*67e74705SXin Li   return _MM_GET_EXCEPTION_STATE();
344*67e74705SXin Li }
345*67e74705SXin Li 
test_MM_GET_FLUSH_ZERO_MODE()346*67e74705SXin Li unsigned int test_MM_GET_FLUSH_ZERO_MODE() {
347*67e74705SXin Li   // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE
348*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
349*67e74705SXin Li   // CHECK: and i32 %{{.*}}, 32768
350*67e74705SXin Li   return _MM_GET_FLUSH_ZERO_MODE();
351*67e74705SXin Li }
352*67e74705SXin Li 
test_MM_GET_ROUNDING_MODE()353*67e74705SXin Li unsigned int test_MM_GET_ROUNDING_MODE() {
354*67e74705SXin Li   // CHECK-LABEL: test_MM_GET_ROUNDING_MODE
355*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
356*67e74705SXin Li   // CHECK: and i32 %{{.*}}, 24576
357*67e74705SXin Li   return _MM_GET_ROUNDING_MODE();
358*67e74705SXin Li }
359*67e74705SXin Li 
test_mm_getcsr()360*67e74705SXin Li unsigned int test_mm_getcsr() {
361*67e74705SXin Li   // CHECK-LABEL: test_mm_getcsr
362*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
363*67e74705SXin Li   // CHECK: load i32
364*67e74705SXin Li   return _mm_getcsr();
365*67e74705SXin Li }
366*67e74705SXin Li 
test_mm_load_ps(float * y)367*67e74705SXin Li __m128 test_mm_load_ps(float* y) {
368*67e74705SXin Li   // CHECK-LABEL: test_mm_load_ps
369*67e74705SXin Li   // CHECK: load <4 x float>, <4 x float>* {{.*}}, align 16
370*67e74705SXin Li   return _mm_load_ps(y);
371*67e74705SXin Li }
372*67e74705SXin Li 
test_mm_load_ps1(float * y)373*67e74705SXin Li __m128 test_mm_load_ps1(float* y) {
374*67e74705SXin Li   // CHECK-LABEL: test_mm_load_ps1
375*67e74705SXin Li   // CHECK: load float, float* %{{.*}}, align 4
376*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
377*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
378*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
379*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
380*67e74705SXin Li   return _mm_load_ps1(y);
381*67e74705SXin Li }
382*67e74705SXin Li 
test_mm_load_ss(float * y)383*67e74705SXin Li __m128 test_mm_load_ss(float* y) {
384*67e74705SXin Li   // CHECK-LABEL: test_mm_load_ss
385*67e74705SXin Li   // CHECK: load float, float* {{.*}}, align 1{{$}}
386*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
387*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
388*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
389*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
390*67e74705SXin Li   return _mm_load_ss(y);
391*67e74705SXin Li }
392*67e74705SXin Li 
test_mm_load1_ps(float * y)393*67e74705SXin Li __m128 test_mm_load1_ps(float* y) {
394*67e74705SXin Li   // CHECK-LABEL: test_mm_load1_ps
395*67e74705SXin Li   // CHECK: load float, float* %{{.*}}, align 4
396*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
397*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
398*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
399*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
400*67e74705SXin Li   return _mm_load1_ps(y);
401*67e74705SXin Li }
402*67e74705SXin Li 
test_mm_loadh_pi(__m128 x,__m64 * y)403*67e74705SXin Li __m128 test_mm_loadh_pi(__m128 x, __m64* y) {
404*67e74705SXin Li   // CHECK-LABEL: test_mm_loadh_pi
405*67e74705SXin Li   // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
406*67e74705SXin Li   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
407*67e74705SXin Li   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
408*67e74705SXin Li   return _mm_loadh_pi(x,y);
409*67e74705SXin Li }
410*67e74705SXin Li 
test_mm_loadl_pi(__m128 x,__m64 * y)411*67e74705SXin Li __m128 test_mm_loadl_pi(__m128 x, __m64* y) {
412*67e74705SXin Li   // CHECK-LABEL: test_mm_loadl_pi
413*67e74705SXin Li   // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
414*67e74705SXin Li   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
415*67e74705SXin Li   // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
416*67e74705SXin Li   return _mm_loadl_pi(x,y);
417*67e74705SXin Li }
418*67e74705SXin Li 
test_mm_loadr_ps(float * A)419*67e74705SXin Li __m128 test_mm_loadr_ps(float* A) {
420*67e74705SXin Li   // CHECK-LABEL: test_mm_loadr_ps
421*67e74705SXin Li   // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16
422*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
423*67e74705SXin Li   return _mm_loadr_ps(A);
424*67e74705SXin Li }
425*67e74705SXin Li 
test_mm_loadu_ps(float * A)426*67e74705SXin Li __m128 test_mm_loadu_ps(float* A) {
427*67e74705SXin Li   // CHECK-LABEL: test_mm_loadu_ps
428*67e74705SXin Li   // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
429*67e74705SXin Li   return _mm_loadu_ps(A);
430*67e74705SXin Li }
431*67e74705SXin Li 
test_mm_max_ps(__m128 A,__m128 B)432*67e74705SXin Li __m128 test_mm_max_ps(__m128 A, __m128 B) {
433*67e74705SXin Li   // CHECK-LABEL: test_mm_max_ps
434*67e74705SXin Li   // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
435*67e74705SXin Li   return _mm_max_ps(A, B);
436*67e74705SXin Li }
437*67e74705SXin Li 
test_mm_max_ss(__m128 A,__m128 B)438*67e74705SXin Li __m128 test_mm_max_ss(__m128 A, __m128 B) {
439*67e74705SXin Li   // CHECK-LABEL: test_mm_max_ss
440*67e74705SXin Li   // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
441*67e74705SXin Li   return _mm_max_ss(A, B);
442*67e74705SXin Li }
443*67e74705SXin Li 
test_mm_min_ps(__m128 A,__m128 B)444*67e74705SXin Li __m128 test_mm_min_ps(__m128 A, __m128 B) {
445*67e74705SXin Li   // CHECK-LABEL: test_mm_min_ps
446*67e74705SXin Li   // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
447*67e74705SXin Li   return _mm_min_ps(A, B);
448*67e74705SXin Li }
449*67e74705SXin Li 
test_mm_min_ss(__m128 A,__m128 B)450*67e74705SXin Li __m128 test_mm_min_ss(__m128 A, __m128 B) {
451*67e74705SXin Li   // CHECK-LABEL: test_mm_min_ss
452*67e74705SXin Li   // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
453*67e74705SXin Li   return _mm_min_ss(A, B);
454*67e74705SXin Li }
455*67e74705SXin Li 
test_mm_move_ss(__m128 A,__m128 B)456*67e74705SXin Li __m128 test_mm_move_ss(__m128 A, __m128 B) {
457*67e74705SXin Li   // CHECK-LABEL: test_mm_move_ss
458*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
459*67e74705SXin Li   return _mm_move_ss(A, B);
460*67e74705SXin Li }
461*67e74705SXin Li 
test_mm_movehl_ps(__m128 A,__m128 B)462*67e74705SXin Li __m128 test_mm_movehl_ps(__m128 A, __m128 B) {
463*67e74705SXin Li   // CHECK-LABEL: test_mm_movehl_ps
464*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
465*67e74705SXin Li   return _mm_movehl_ps(A, B);
466*67e74705SXin Li }
467*67e74705SXin Li 
test_mm_movelh_ps(__m128 A,__m128 B)468*67e74705SXin Li __m128 test_mm_movelh_ps(__m128 A, __m128 B) {
469*67e74705SXin Li   // CHECK-LABEL: test_mm_movelh_ps
470*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
471*67e74705SXin Li   return _mm_movelh_ps(A, B);
472*67e74705SXin Li }
473*67e74705SXin Li 
test_mm_movemask_ps(__m128 A)474*67e74705SXin Li int test_mm_movemask_ps(__m128 A) {
475*67e74705SXin Li   // CHECK-LABEL: test_mm_movemask_ps
476*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
477*67e74705SXin Li   return _mm_movemask_ps(A);
478*67e74705SXin Li }
479*67e74705SXin Li 
test_mm_mul_ps(__m128 A,__m128 B)480*67e74705SXin Li __m128 test_mm_mul_ps(__m128 A, __m128 B) {
481*67e74705SXin Li   // CHECK-LABEL: test_mm_mul_ps
482*67e74705SXin Li   // CHECK: fmul <4 x float>
483*67e74705SXin Li   return _mm_mul_ps(A, B);
484*67e74705SXin Li }
485*67e74705SXin Li 
test_mm_mul_ss(__m128 A,__m128 B)486*67e74705SXin Li __m128 test_mm_mul_ss(__m128 A, __m128 B) {
487*67e74705SXin Li   // CHECK-LABEL: test_mm_mul_ss
488*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
489*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
490*67e74705SXin Li   // CHECK: fmul float
491*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
492*67e74705SXin Li   return _mm_mul_ss(A, B);
493*67e74705SXin Li }
494*67e74705SXin Li 
test_mm_or_ps(__m128 A,__m128 B)495*67e74705SXin Li __m128 test_mm_or_ps(__m128 A, __m128 B) {
496*67e74705SXin Li   // CHECK-LABEL: test_mm_or_ps
497*67e74705SXin Li   // CHECK: or <4 x i32>
498*67e74705SXin Li   return _mm_or_ps(A, B);
499*67e74705SXin Li }
500*67e74705SXin Li 
test_mm_prefetch(char const * p)501*67e74705SXin Li void test_mm_prefetch(char const* p) {
502*67e74705SXin Li   // CHECK-LABEL: test_mm_prefetch
503*67e74705SXin Li   // CHECK: call void @llvm.prefetch(i8* {{.*}}, i32 0, i32 0, i32 1)
504*67e74705SXin Li   _mm_prefetch(p, 0);
505*67e74705SXin Li }
506*67e74705SXin Li 
test_mm_rcp_ps(__m128 x)507*67e74705SXin Li __m128 test_mm_rcp_ps(__m128 x) {
508*67e74705SXin Li   // CHECK-LABEL: test_mm_rcp_ps
509*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}})
510*67e74705SXin Li   return _mm_rcp_ps(x);
511*67e74705SXin Li }
512*67e74705SXin Li 
test_mm_rcp_ss(__m128 x)513*67e74705SXin Li __m128 test_mm_rcp_ss(__m128 x) {
514*67e74705SXin Li   // CHECK-LABEL: test_mm_rcp_ss
515*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
516*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 0
517*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
518*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 1
519*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
520*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 2
521*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
522*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 3
523*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
524*67e74705SXin Li   return _mm_rcp_ss(x);
525*67e74705SXin Li }
526*67e74705SXin Li 
test_mm_rsqrt_ps(__m128 x)527*67e74705SXin Li __m128 test_mm_rsqrt_ps(__m128 x) {
528*67e74705SXin Li   // CHECK-LABEL: test_mm_rsqrt_ps
529*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}})
530*67e74705SXin Li   return _mm_rsqrt_ps(x);
531*67e74705SXin Li }
532*67e74705SXin Li 
test_mm_rsqrt_ss(__m128 x)533*67e74705SXin Li __m128 test_mm_rsqrt_ss(__m128 x) {
534*67e74705SXin Li   // CHECK-LABEL: test_mm_rsqrt_ss
535*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
536*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 0
537*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
538*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 1
539*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
540*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 2
541*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
542*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 3
543*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
544*67e74705SXin Li   return _mm_rsqrt_ss(x);
545*67e74705SXin Li }
546*67e74705SXin Li 
test_MM_SET_EXCEPTION_MASK(unsigned int A)547*67e74705SXin Li void test_MM_SET_EXCEPTION_MASK(unsigned int A) {
548*67e74705SXin Li   // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK
549*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
550*67e74705SXin Li   // CHECK: load i32
551*67e74705SXin Li   // CHECK: and i32 {{.*}}, -8065
552*67e74705SXin Li   // CHECK: or i32
553*67e74705SXin Li   // CHECK: store i32
554*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
555*67e74705SXin Li   _MM_SET_EXCEPTION_MASK(A);
556*67e74705SXin Li }
557*67e74705SXin Li 
test_MM_SET_EXCEPTION_STATE(unsigned int A)558*67e74705SXin Li void test_MM_SET_EXCEPTION_STATE(unsigned int A) {
559*67e74705SXin Li   // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE
560*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
561*67e74705SXin Li   // CHECK: load i32
562*67e74705SXin Li   // CHECK: and i32 {{.*}}, -64
563*67e74705SXin Li   // CHECK: or i32
564*67e74705SXin Li   // CHECK: store i32
565*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
566*67e74705SXin Li   _MM_SET_EXCEPTION_STATE(A);
567*67e74705SXin Li }
568*67e74705SXin Li 
test_MM_SET_FLUSH_ZERO_MODE(unsigned int A)569*67e74705SXin Li void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A) {
570*67e74705SXin Li   // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE
571*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
572*67e74705SXin Li   // CHECK: load i32
573*67e74705SXin Li   // CHECK: and i32 {{.*}}, -32769
574*67e74705SXin Li   // CHECK: or i32
575*67e74705SXin Li   // CHECK: store i32
576*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
577*67e74705SXin Li   _MM_SET_FLUSH_ZERO_MODE(A);
578*67e74705SXin Li }
579*67e74705SXin Li 
test_mm_set_ps(float A,float B,float C,float D)580*67e74705SXin Li __m128 test_mm_set_ps(float A, float B, float C, float D) {
581*67e74705SXin Li   // CHECK-LABEL: test_mm_set_ps
582*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
583*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
584*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
585*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
586*67e74705SXin Li   return _mm_set_ps(A, B, C, D);
587*67e74705SXin Li }
588*67e74705SXin Li 
test_mm_set_ps1(float A)589*67e74705SXin Li __m128 test_mm_set_ps1(float A) {
590*67e74705SXin Li   // CHECK-LABEL: test_mm_set_ps1
591*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
592*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
593*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
594*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
595*67e74705SXin Li   return _mm_set_ps1(A);
596*67e74705SXin Li }
597*67e74705SXin Li 
test_MM_SET_ROUNDING_MODE(unsigned int A)598*67e74705SXin Li void test_MM_SET_ROUNDING_MODE(unsigned int A) {
599*67e74705SXin Li   // CHECK-LABEL: test_MM_SET_ROUNDING_MODE
600*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
601*67e74705SXin Li   // CHECK: load i32
602*67e74705SXin Li   // CHECK: and i32 {{.*}}, -24577
603*67e74705SXin Li   // CHECK: or i32
604*67e74705SXin Li   // CHECK: store i32
605*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
606*67e74705SXin Li   _MM_SET_ROUNDING_MODE(A);
607*67e74705SXin Li }
608*67e74705SXin Li 
test_mm_set_ss(float A)609*67e74705SXin Li __m128 test_mm_set_ss(float A) {
610*67e74705SXin Li   // CHECK-LABEL: test_mm_set_ss
611*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
612*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1
613*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2
614*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
615*67e74705SXin Li   return _mm_set_ss(A);
616*67e74705SXin Li }
617*67e74705SXin Li 
test_mm_set1_ps(float A)618*67e74705SXin Li __m128 test_mm_set1_ps(float A) {
619*67e74705SXin Li   // CHECK-LABEL: test_mm_set1_ps
620*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
621*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
622*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
623*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
624*67e74705SXin Li   return _mm_set1_ps(A);
625*67e74705SXin Li }
626*67e74705SXin Li 
test_mm_setcsr(unsigned int A)627*67e74705SXin Li void test_mm_setcsr(unsigned int A) {
628*67e74705SXin Li   // CHECK-LABEL: test_mm_setcsr
629*67e74705SXin Li   // CHECK: store i32
630*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
631*67e74705SXin Li   _mm_setcsr(A);
632*67e74705SXin Li }
633*67e74705SXin Li 
test_mm_setr_ps(float A,float B,float C,float D)634*67e74705SXin Li __m128 test_mm_setr_ps(float A, float B, float C, float D) {
635*67e74705SXin Li   // CHECK-LABEL: test_mm_setr_ps
636*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
637*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
638*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
639*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
640*67e74705SXin Li   return _mm_setr_ps(A, B, C, D);
641*67e74705SXin Li }
642*67e74705SXin Li 
test_mm_setzero_ps()643*67e74705SXin Li __m128 test_mm_setzero_ps() {
644*67e74705SXin Li   // CHECK-LABEL: test_mm_setzero_ps
645*67e74705SXin Li   // CHECK: store <4 x float> zeroinitializer
646*67e74705SXin Li   return _mm_setzero_ps();
647*67e74705SXin Li }
648*67e74705SXin Li 
test_mm_sfence()649*67e74705SXin Li void test_mm_sfence() {
650*67e74705SXin Li   // CHECK-LABEL: test_mm_sfence
651*67e74705SXin Li   // CHECK: call void @llvm.x86.sse.sfence()
652*67e74705SXin Li   _mm_sfence();
653*67e74705SXin Li }
654*67e74705SXin Li 
test_mm_shuffle_ps(__m128 A,__m128 B)655*67e74705SXin Li __m128 test_mm_shuffle_ps(__m128 A, __m128 B) {
656*67e74705SXin Li   // CHECK-LABEL: test_mm_shuffle_ps
657*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
658*67e74705SXin Li   return _mm_shuffle_ps(A, B, 0);
659*67e74705SXin Li }
660*67e74705SXin Li 
test_mm_sqrt_ps(__m128 x)661*67e74705SXin Li __m128 test_mm_sqrt_ps(__m128 x) {
662*67e74705SXin Li   // CHECK-LABEL: test_mm_sqrt_ps
663*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> {{.*}})
664*67e74705SXin Li   return _mm_sqrt_ps(x);
665*67e74705SXin Li }
666*67e74705SXin Li 
test_sqrt_ss(__m128 x)667*67e74705SXin Li __m128 test_sqrt_ss(__m128 x) {
668*67e74705SXin Li   // CHECK: define {{.*}} @test_sqrt_ss
669*67e74705SXin Li   // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
670*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 0
671*67e74705SXin Li   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
672*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 1
673*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
674*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 2
675*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
676*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 3
677*67e74705SXin Li   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
678*67e74705SXin Li   return _mm_sqrt_ss(x);
679*67e74705SXin Li }
680*67e74705SXin Li 
test_mm_store_ps(float * x,__m128 y)681*67e74705SXin Li void test_mm_store_ps(float* x, __m128 y) {
682*67e74705SXin Li   // CHECK-LABEL: test_mm_store_ps
683*67e74705SXin Li   // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
684*67e74705SXin Li   _mm_store_ps(x, y);
685*67e74705SXin Li }
686*67e74705SXin Li 
test_mm_store_ps1(float * x,__m128 y)687*67e74705SXin Li void test_mm_store_ps1(float* x, __m128 y) {
688*67e74705SXin Li   // CHECK-LABEL: test_mm_store_ps1
689*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
690*67e74705SXin Li   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
691*67e74705SXin Li   _mm_store_ps1(x, y);
692*67e74705SXin Li }
693*67e74705SXin Li 
test_mm_store_ss(float * x,__m128 y)694*67e74705SXin Li void test_mm_store_ss(float* x, __m128 y) {
695*67e74705SXin Li   // CHECK-LABEL: test_mm_store_ss
696*67e74705SXin Li   // CHECK: extractelement <4 x float> {{.*}}, i32 0
697*67e74705SXin Li   // CHECK: store float %{{.*}}, float* {{.*}}, align 1{{$}}
698*67e74705SXin Li   _mm_store_ss(x, y);
699*67e74705SXin Li }
700*67e74705SXin Li 
test_mm_store1_ps(float * x,__m128 y)701*67e74705SXin Li void test_mm_store1_ps(float* x, __m128 y) {
702*67e74705SXin Li   // CHECK-LABEL: test_mm_store1_ps
703*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
704*67e74705SXin Li   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
705*67e74705SXin Li   _mm_store1_ps(x, y);
706*67e74705SXin Li }
707*67e74705SXin Li 
test_mm_storeh_pi(__m64 * x,__m128 y)708*67e74705SXin Li void test_mm_storeh_pi(__m64* x,  __m128 y) {
709*67e74705SXin Li   // CHECK-LABEL: test_mm_storeh_pi
710*67e74705SXin Li   // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
711*67e74705SXin Li   // CHECK: extractelement <2 x i64> %{{.*}}, i64 1
712*67e74705SXin Li   // CHECK: store i64 %{{.*}}, i64* {{.*}}
713*67e74705SXin Li   _mm_storeh_pi(x, y);
714*67e74705SXin Li }
715*67e74705SXin Li 
test_mm_storel_pi(__m64 * x,__m128 y)716*67e74705SXin Li void test_mm_storel_pi(__m64* x,  __m128 y) {
717*67e74705SXin Li   // CHECK-LABEL: test_mm_storel_pi
718*67e74705SXin Li   // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
719*67e74705SXin Li   // CHECK: extractelement <2 x i64> %{{.*}}, i64 0
720*67e74705SXin Li   // CHECK: store i64 %{{.*}}, i64* {{.*}}
721*67e74705SXin Li   _mm_storel_pi(x, y);
722*67e74705SXin Li }
723*67e74705SXin Li 
test_mm_storer_ps(float * x,__m128 y)724*67e74705SXin Li void test_mm_storer_ps(float* x,  __m128 y) {
725*67e74705SXin Li   // CHECK-LABEL: test_mm_storer_ps
726*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
727*67e74705SXin Li   // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
728*67e74705SXin Li   _mm_storer_ps(x, y);
729*67e74705SXin Li }
730*67e74705SXin Li 
test_mm_storeu_ps(float * x,__m128 y)731*67e74705SXin Li void test_mm_storeu_ps(float* x,  __m128 y) {
732*67e74705SXin Li   // CHECK-LABEL: test_mm_storeu_ps
733*67e74705SXin Li   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
734*67e74705SXin Li   // CHECK-NEXT: ret void
735*67e74705SXin Li   _mm_storeu_ps(x, y);
736*67e74705SXin Li }
737*67e74705SXin Li 
test_mm_stream_ps(float * A,__m128d B)738*67e74705SXin Li void test_mm_stream_ps(float*A, __m128d B) {
739*67e74705SXin Li   // CHECK-LABEL: test_mm_stream_ps
740*67e74705SXin Li   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16, !nontemporal
741*67e74705SXin Li   _mm_stream_ps(A, B);
742*67e74705SXin Li }
743*67e74705SXin Li 
test_mm_sub_ps(__m128 A,__m128 B)744*67e74705SXin Li __m128 test_mm_sub_ps(__m128 A, __m128 B) {
745*67e74705SXin Li   // CHECK-LABEL: test_mm_sub_ps
746*67e74705SXin Li   // CHECK: fsub <4 x float>
747*67e74705SXin Li   return _mm_sub_ps(A, B);
748*67e74705SXin Li }
749*67e74705SXin Li 
test_mm_sub_ss(__m128 A,__m128 B)750*67e74705SXin Li __m128 test_mm_sub_ss(__m128 A, __m128 B) {
751*67e74705SXin Li   // CHECK-LABEL: test_mm_sub_ss
752*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
753*67e74705SXin Li   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
754*67e74705SXin Li   // CHECK: fsub float
755*67e74705SXin Li   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
756*67e74705SXin Li   return _mm_sub_ss(A, B);
757*67e74705SXin Li }
758*67e74705SXin Li 
test_MM_TRANSPOSE4_PS(__m128 * A,__m128 * B,__m128 * C,__m128 * D)759*67e74705SXin Li void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
760*67e74705SXin Li   // CHECK-LABEL: test_MM_TRANSPOSE4_PS
761*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
762*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
763*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
764*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
765*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
766*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
767*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
768*67e74705SXin Li   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
769*67e74705SXin Li   _MM_TRANSPOSE4_PS(*A, *B, *C, *D);
770*67e74705SXin Li }
771*67e74705SXin Li 
test_mm_ucomieq_ss(__m128 A,__m128 B)772*67e74705SXin Li int test_mm_ucomieq_ss(__m128 A, __m128 B) {
773*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomieq_ss
774*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
775*67e74705SXin Li   return _mm_ucomieq_ss(A, B);
776*67e74705SXin Li }
777*67e74705SXin Li 
test_mm_ucomige_ss(__m128 A,__m128 B)778*67e74705SXin Li int test_mm_ucomige_ss(__m128 A, __m128 B) {
779*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomige_ss
780*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
781*67e74705SXin Li   return _mm_ucomige_ss(A, B);
782*67e74705SXin Li }
783*67e74705SXin Li 
test_mm_ucomigt_ss(__m128 A,__m128 B)784*67e74705SXin Li int test_mm_ucomigt_ss(__m128 A, __m128 B) {
785*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomigt_ss
786*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
787*67e74705SXin Li   return _mm_ucomigt_ss(A, B);
788*67e74705SXin Li }
789*67e74705SXin Li 
test_mm_ucomile_ss(__m128 A,__m128 B)790*67e74705SXin Li int test_mm_ucomile_ss(__m128 A, __m128 B) {
791*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomile_ss
792*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
793*67e74705SXin Li   return _mm_ucomile_ss(A, B);
794*67e74705SXin Li }
795*67e74705SXin Li 
test_mm_ucomilt_ss(__m128 A,__m128 B)796*67e74705SXin Li int test_mm_ucomilt_ss(__m128 A, __m128 B) {
797*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomilt_ss
798*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
799*67e74705SXin Li   return _mm_ucomilt_ss(A, B);
800*67e74705SXin Li }
801*67e74705SXin Li 
test_mm_ucomineq_ss(__m128 A,__m128 B)802*67e74705SXin Li int test_mm_ucomineq_ss(__m128 A, __m128 B) {
803*67e74705SXin Li   // CHECK-LABEL: test_mm_ucomineq_ss
804*67e74705SXin Li   // CHECK: call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
805*67e74705SXin Li   return _mm_ucomineq_ss(A, B);
806*67e74705SXin Li }
807*67e74705SXin Li 
test_mm_undefined_ps()808*67e74705SXin Li __m128 test_mm_undefined_ps() {
809*67e74705SXin Li   // CHECK-LABEL: @test_mm_undefined_ps
810*67e74705SXin Li   // CHECK: ret <4 x float> undef
811*67e74705SXin Li   return _mm_undefined_ps();
812*67e74705SXin Li }
813*67e74705SXin Li 
test_mm_unpackhi_ps(__m128 A,__m128 B)814*67e74705SXin Li __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
815*67e74705SXin Li   // CHECK-LABEL: test_mm_unpackhi_ps
816*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
817*67e74705SXin Li   return _mm_unpackhi_ps(A, B);
818*67e74705SXin Li }
819*67e74705SXin Li 
test_mm_unpacklo_ps(__m128 A,__m128 B)820*67e74705SXin Li __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
821*67e74705SXin Li   // CHECK-LABEL: test_mm_unpacklo_ps
822*67e74705SXin Li   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
823*67e74705SXin Li   return _mm_unpacklo_ps(A, B);
824*67e74705SXin Li }
825*67e74705SXin Li 
test_mm_xor_ps(__m128 A,__m128 B)826*67e74705SXin Li __m128 test_mm_xor_ps(__m128 A, __m128 B) {
827*67e74705SXin Li   // CHECK-LABEL: test_mm_xor_ps
828*67e74705SXin Li   // CHECK: xor <4 x i32>
829*67e74705SXin Li   return _mm_xor_ps(A, B);
830*67e74705SXin Li }
831