xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
6
7define i32 @test_mm_cmpestra(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind {
8; X32-LABEL: test_mm_cmpestra:
9; X32:       # BB#0:
10; X32-NEXT:    pushl %ebx
11; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
12; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
13; X32-NEXT:    xorl %ebx, %ebx
14; X32-NEXT:    pcmpestri $7, %xmm1, %xmm0
15; X32-NEXT:    seta %bl
16; X32-NEXT:    movl %ebx, %eax
17; X32-NEXT:    popl %ebx
18; X32-NEXT:    retl
19;
20; X64-LABEL: test_mm_cmpestra:
21; X64:       # BB#0:
22; X64-NEXT:    xorl %r8d, %r8d
23; X64-NEXT:    movl %edi, %eax
24; X64-NEXT:    movl %esi, %edx
25; X64-NEXT:    pcmpestri $7, %xmm1, %xmm0
26; X64-NEXT:    seta %r8b
27; X64-NEXT:    movl %r8d, %eax
28; X64-NEXT:    retq
29  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
30  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
31  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7)
32  ret i32 %res
33}
34declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
35
36define i32 @test_mm_cmpestrc(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) {
37; X32-LABEL: test_mm_cmpestrc:
38; X32:       # BB#0:
39; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
40; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
41; X32-NEXT:    pcmpestri $7, %xmm1, %xmm0
42; X32-NEXT:    sbbl %eax, %eax
43; X32-NEXT:    andl $1, %eax
44; X32-NEXT:    retl
45;
46; X64-LABEL: test_mm_cmpestrc:
47; X64:       # BB#0:
48; X64-NEXT:    movl %edi, %eax
49; X64-NEXT:    movl %esi, %edx
50; X64-NEXT:    pcmpestri $7, %xmm1, %xmm0
51; X64-NEXT:    sbbl %eax, %eax
52; X64-NEXT:    andl $1, %eax
53; X64-NEXT:    retq
54  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
55  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
56  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7)
57  ret i32 %res
58}
59declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
60
61define i32 @test_mm_cmpestri(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) {
62; X32-LABEL: test_mm_cmpestri:
63; X32:       # BB#0:
64; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
65; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
66; X32-NEXT:    pcmpestri $7, %xmm1, %xmm0
67; X32-NEXT:    movl %ecx, %eax
68; X32-NEXT:    retl
69;
70; X64-LABEL: test_mm_cmpestri:
71; X64:       # BB#0:
72; X64-NEXT:    movl %edi, %eax
73; X64-NEXT:    movl %esi, %edx
74; X64-NEXT:    pcmpestri $7, %xmm1, %xmm0
75; X64-NEXT:    movl %ecx, %eax
76; X64-NEXT:    retq
77  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
78  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
79  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7)
80  ret i32 %res
81}
82declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
83
84define <2 x i64> @test_mm_cmpestrm(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) {
85; X32-LABEL: test_mm_cmpestrm:
86; X32:       # BB#0:
87; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
88; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
89; X32-NEXT:    pcmpestrm $7, %xmm1, %xmm0
90; X32-NEXT:    retl
91;
92; X64-LABEL: test_mm_cmpestrm:
93; X64:       # BB#0:
94; X64-NEXT:    movl %edi, %eax
95; X64-NEXT:    movl %esi, %edx
96; X64-NEXT:    pcmpestrm $7, %xmm1, %xmm0
97; X64-NEXT:    retq
98  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
99  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
100  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7)
101  %bc = bitcast <16 x i8> %res to <2 x i64>
102  ret <2 x i64> %bc
103}
104declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
105
106define i32 @test_mm_cmpestro(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind {
107; X32-LABEL: test_mm_cmpestro:
108; X32:       # BB#0:
109; X32-NEXT:    pushl %ebx
110; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
111; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
112; X32-NEXT:    xorl %ebx, %ebx
113; X32-NEXT:    pcmpestri $7, %xmm1, %xmm0
114; X32-NEXT:    seto %bl
115; X32-NEXT:    movl %ebx, %eax
116; X32-NEXT:    popl %ebx
117; X32-NEXT:    retl
118;
119; X64-LABEL: test_mm_cmpestro:
120; X64:       # BB#0:
121; X64-NEXT:    xorl %r8d, %r8d
122; X64-NEXT:    movl %edi, %eax
123; X64-NEXT:    movl %esi, %edx
124; X64-NEXT:    pcmpestri $7, %xmm1, %xmm0
125; X64-NEXT:    seto %r8b
126; X64-NEXT:    movl %r8d, %eax
127; X64-NEXT:    retq
128  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
129  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
130  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7)
131  ret i32 %res
132}
133declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
134
135define i32 @test_mm_cmpestrs(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind {
136; X32-LABEL: test_mm_cmpestrs:
137; X32:       # BB#0:
138; X32-NEXT:    pushl %ebx
139; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
140; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
141; X32-NEXT:    xorl %ebx, %ebx
142; X32-NEXT:    pcmpestri $7, %xmm1, %xmm0
143; X32-NEXT:    sets %bl
144; X32-NEXT:    movl %ebx, %eax
145; X32-NEXT:    popl %ebx
146; X32-NEXT:    retl
147;
148; X64-LABEL: test_mm_cmpestrs:
149; X64:       # BB#0:
150; X64-NEXT:    xorl %r8d, %r8d
151; X64-NEXT:    movl %edi, %eax
152; X64-NEXT:    movl %esi, %edx
153; X64-NEXT:    pcmpestri $7, %xmm1, %xmm0
154; X64-NEXT:    sets %r8b
155; X64-NEXT:    movl %r8d, %eax
156; X64-NEXT:    retq
157  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
158  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
159  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7)
160  ret i32 %res
161}
162declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
163
164define i32 @test_mm_cmpestrz(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind {
165; X32-LABEL: test_mm_cmpestrz:
166; X32:       # BB#0:
167; X32-NEXT:    pushl %ebx
168; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
169; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
170; X32-NEXT:    xorl %ebx, %ebx
171; X32-NEXT:    pcmpestri $7, %xmm1, %xmm0
172; X32-NEXT:    sete %bl
173; X32-NEXT:    movl %ebx, %eax
174; X32-NEXT:    popl %ebx
175; X32-NEXT:    retl
176;
177; X64-LABEL: test_mm_cmpestrz:
178; X64:       # BB#0:
179; X64-NEXT:    xorl %r8d, %r8d
180; X64-NEXT:    movl %edi, %eax
181; X64-NEXT:    movl %esi, %edx
182; X64-NEXT:    pcmpestri $7, %xmm1, %xmm0
183; X64-NEXT:    sete %r8b
184; X64-NEXT:    movl %r8d, %eax
185; X64-NEXT:    retq
186  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
187  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
188  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %arg0, i32 %a1, <16 x i8> %arg2, i32 %a3, i8 7)
189  ret i32 %res
190}
191declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
192
193define <2 x i64> @test_mm_cmpgt_epi64(<2 x i64> %a0, <2 x i64> %a1) {
194; X32-LABEL: test_mm_cmpgt_epi64:
195; X32:       # BB#0:
196; X32-NEXT:    pcmpgtq %xmm1, %xmm0
197; X32-NEXT:    retl
198;
199; X64-LABEL: test_mm_cmpgt_epi64:
200; X64:       # BB#0:
201; X64-NEXT:    pcmpgtq %xmm1, %xmm0
202; X64-NEXT:    retq
203  %cmp = icmp sgt <2 x i64> %a0, %a1
204  %res = sext <2 x i1> %cmp to <2 x i64>
205  ret <2 x i64> %res
206}
207
208define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) {
209; X32-LABEL: test_mm_cmpistra:
210; X32:       # BB#0:
211; X32-NEXT:    xorl %eax, %eax
212; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
213; X32-NEXT:    seta %al
214; X32-NEXT:    retl
215;
216; X64-LABEL: test_mm_cmpistra:
217; X64:       # BB#0:
218; X64-NEXT:    xorl %eax, %eax
219; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
220; X64-NEXT:    seta %al
221; X64-NEXT:    retq
222  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
223  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
224  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
225  ret i32 %res
226}
227declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
228
229define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) {
230; X32-LABEL: test_mm_cmpistrc:
231; X32:       # BB#0:
232; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
233; X32-NEXT:    sbbl %eax, %eax
234; X32-NEXT:    andl $1, %eax
235; X32-NEXT:    retl
236;
237; X64-LABEL: test_mm_cmpistrc:
238; X64:       # BB#0:
239; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
240; X64-NEXT:    sbbl %eax, %eax
241; X64-NEXT:    andl $1, %eax
242; X64-NEXT:    retq
243  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
244  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
245  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
246  ret i32 %res
247}
248declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
249
250define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) {
251; X32-LABEL: test_mm_cmpistri:
252; X32:       # BB#0:
253; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
254; X32-NEXT:    movl %ecx, %eax
255; X32-NEXT:    retl
256;
257; X64-LABEL: test_mm_cmpistri:
258; X64:       # BB#0:
259; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
260; X64-NEXT:    movl %ecx, %eax
261; X64-NEXT:    retq
262  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
263  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
264  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
265  ret i32 %res
266}
267declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
268
269define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) {
270; X32-LABEL: test_mm_cmpistrm:
271; X32:       # BB#0:
272; X32-NEXT:    pcmpistrm $7, %xmm1, %xmm0
273; X32-NEXT:    retl
274;
275; X64-LABEL: test_mm_cmpistrm:
276; X64:       # BB#0:
277; X64-NEXT:    pcmpistrm $7, %xmm1, %xmm0
278; X64-NEXT:    retq
279  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
280  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
281  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
282  %bc = bitcast <16 x i8> %res to <2 x i64>
283  ret <2 x i64> %bc
284}
285declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
286
287define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) {
288; X32-LABEL: test_mm_cmpistro:
289; X32:       # BB#0:
290; X32-NEXT:    xorl %eax, %eax
291; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
292; X32-NEXT:    seto %al
293; X32-NEXT:    retl
294;
295; X64-LABEL: test_mm_cmpistro:
296; X64:       # BB#0:
297; X64-NEXT:    xorl %eax, %eax
298; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
299; X64-NEXT:    seto %al
300; X64-NEXT:    retq
301  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
302  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
303  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
304  ret i32 %res
305}
306declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
307
308define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) {
309; X32-LABEL: test_mm_cmpistrs:
310; X32:       # BB#0:
311; X32-NEXT:    xorl %eax, %eax
312; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
313; X32-NEXT:    sets %al
314; X32-NEXT:    retl
315;
316; X64-LABEL: test_mm_cmpistrs:
317; X64:       # BB#0:
318; X64-NEXT:    xorl %eax, %eax
319; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
320; X64-NEXT:    sets %al
321; X64-NEXT:    retq
322  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
323  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
324  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
325  ret i32 %res
326}
327declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
328
329define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) {
330; X32-LABEL: test_mm_cmpistrz:
331; X32:       # BB#0:
332; X32-NEXT:    xorl %eax, %eax
333; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
334; X32-NEXT:    sete %al
335; X32-NEXT:    retl
336;
337; X64-LABEL: test_mm_cmpistrz:
338; X64:       # BB#0:
339; X64-NEXT:    xorl %eax, %eax
340; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
341; X64-NEXT:    sete %al
342; X64-NEXT:    retq
343  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
344  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
345  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
346  ret i32 %res
347}
348declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
349
350define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) {
351; X32-LABEL: test_mm_crc32_u8:
352; X32:       # BB#0:
353; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
354; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
355; X32-NEXT:    crc32b %cl, %eax
356; X32-NEXT:    retl
357;
358; X64-LABEL: test_mm_crc32_u8:
359; X64:       # BB#0:
360; X64-NEXT:    crc32b %sil, %edi
361; X64-NEXT:    movl %edi, %eax
362; X64-NEXT:    retq
363  %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
364  ret i32 %res
365}
366declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
367
368define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) {
369; X32-LABEL: test_mm_crc32_u16:
370; X32:       # BB#0:
371; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
372; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
373; X32-NEXT:    crc32w %cx, %eax
374; X32-NEXT:    retl
375;
376; X64-LABEL: test_mm_crc32_u16:
377; X64:       # BB#0:
378; X64-NEXT:    crc32w %si, %edi
379; X64-NEXT:    movl %edi, %eax
380; X64-NEXT:    retq
381  %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
382  ret i32 %res
383}
384declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
385
386define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) {
387; X32-LABEL: test_mm_crc32_u32:
388; X32:       # BB#0:
389; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
390; X32-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
391; X32-NEXT:    retl
392;
393; X64-LABEL: test_mm_crc32_u32:
394; X64:       # BB#0:
395; X64-NEXT:    crc32l %esi, %edi
396; X64-NEXT:    movl %edi, %eax
397; X64-NEXT:    retq
398  %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
399  ret i32 %res
400}
401declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone
402