xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=X32
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefix=X32
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=X64
5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefix=X64
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Worker; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse4a-builtins.c
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_extracti_si64(<2 x i64> %x) {
10*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_extracti_si64:
11*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
12*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    extrq $2, $3, %xmm0
13*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
14*9880d681SAndroid Build Coastguard Worker;
15*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_extracti_si64:
16*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
17*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    extrq $2, $3, %xmm0
18*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
19*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
20*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
21*9880d681SAndroid Build Coastguard Worker}
22*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind readnone
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_extract_si64(<2 x i64> %x, <2 x i64> %y) {
25*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_extract_si64:
26*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
27*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    extrq %xmm1, %xmm0
28*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
29*9880d681SAndroid Build Coastguard Worker;
30*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_extract_si64:
31*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
32*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    extrq %xmm1, %xmm0
33*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
34*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <2 x i64> %y to <16 x i8>
35*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %bc)
36*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
37*9880d681SAndroid Build Coastguard Worker}
38*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind readnone
39*9880d681SAndroid Build Coastguard Worker
40*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_inserti_si64(<2 x i64> %x, <2 x i64> %y) {
41*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_inserti_si64:
42*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
43*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertq $6, $5, %xmm1, %xmm0
44*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
45*9880d681SAndroid Build Coastguard Worker;
46*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_inserti_si64:
47*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
48*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertq $6, $5, %xmm1, %xmm0
49*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
50*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
51*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
52*9880d681SAndroid Build Coastguard Worker}
53*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind readnone
54*9880d681SAndroid Build Coastguard Worker
55*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_insert_si64(<2 x i64> %x, <2 x i64> %y) {
56*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_insert_si64:
57*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
58*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    insertq %xmm1, %xmm0
59*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
60*9880d681SAndroid Build Coastguard Worker;
61*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_insert_si64:
62*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
63*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    insertq %xmm1, %xmm0
64*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
65*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
66*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
67*9880d681SAndroid Build Coastguard Worker}
68*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind readnone
69*9880d681SAndroid Build Coastguard Worker
70*9880d681SAndroid Build Coastguard Workerdefine void @test_stream_sd(double* %p, <2 x double> %a) {
71*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_stream_sd:
72*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
73*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
74*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movntsd %xmm0, (%eax)
75*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
76*9880d681SAndroid Build Coastguard Worker;
77*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_stream_sd:
78*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
79*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movntsd %xmm0, (%rdi)
80*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
81*9880d681SAndroid Build Coastguard Worker  %1 = extractelement <2 x double> %a, i64 0
82*9880d681SAndroid Build Coastguard Worker  store double %1, double* %p, align 1, !nontemporal !1
83*9880d681SAndroid Build Coastguard Worker  ret void
84*9880d681SAndroid Build Coastguard Worker}
85*9880d681SAndroid Build Coastguard Worker
86*9880d681SAndroid Build Coastguard Workerdefine void @test_mm_stream_ss(float* %p, <4 x float> %a) {
87*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_stream_ss:
88*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
89*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
90*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movntss %xmm0, (%eax)
91*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
92*9880d681SAndroid Build Coastguard Worker;
93*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_stream_ss:
94*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
95*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movntss %xmm0, (%rdi)
96*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
97*9880d681SAndroid Build Coastguard Worker  %1 = extractelement <4 x float> %a, i64 0
98*9880d681SAndroid Build Coastguard Worker  store float %1, float* %p, align 1, !nontemporal !1
99*9880d681SAndroid Build Coastguard Worker  ret void
100*9880d681SAndroid Build Coastguard Worker}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Worker!1 = !{i32 1}
103