xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx-load-store.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker; RUN: llc -O0 < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s -check-prefix=CHECK_O0
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps
5*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps
6*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps
7*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps
8*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps
9*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps
10*9880d681SAndroid Build Coastguard Workerdefine void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp {
11*9880d681SAndroid Build Coastguard Workerentry:
12*9880d681SAndroid Build Coastguard Worker  %0 = bitcast double* %d to <4 x double>*
13*9880d681SAndroid Build Coastguard Worker  %tmp1.i = load <4 x double>, <4 x double>* %0, align 32
14*9880d681SAndroid Build Coastguard Worker  %1 = bitcast float* %f to <8 x float>*
15*9880d681SAndroid Build Coastguard Worker  %tmp1.i17 = load <8 x float>, <8 x float>* %1, align 32
16*9880d681SAndroid Build Coastguard Worker  %tmp1.i16 = load <4 x i64>, <4 x i64>* %i, align 32
17*9880d681SAndroid Build Coastguard Worker  tail call void @dummy(<4 x double> %tmp1.i, <8 x float> %tmp1.i17, <4 x i64> %tmp1.i16) nounwind
18*9880d681SAndroid Build Coastguard Worker  store <4 x double> %tmp1.i, <4 x double>* %0, align 32
19*9880d681SAndroid Build Coastguard Worker  store <8 x float> %tmp1.i17, <8 x float>* %1, align 32
20*9880d681SAndroid Build Coastguard Worker  store <4 x i64> %tmp1.i16, <4 x i64>* %i, align 32
21*9880d681SAndroid Build Coastguard Worker  ret void
22*9880d681SAndroid Build Coastguard Worker}
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Workerdeclare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
25*9880d681SAndroid Build Coastguard Worker
26*9880d681SAndroid Build Coastguard Worker;;
27*9880d681SAndroid Build Coastguard Worker;; The two tests below check that we must fold load + scalar_to_vector
28*9880d681SAndroid Build Coastguard Worker;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory
29*9880d681SAndroid Build Coastguard Worker
30*9880d681SAndroid Build Coastguard Worker; CHECK: mov00
31*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
32*9880d681SAndroid Build Coastguard Worker  %val = load float, float* %ptr
33*9880d681SAndroid Build Coastguard Worker; CHECK: vmovss (%
34*9880d681SAndroid Build Coastguard Worker  %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
35*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %i0
36*9880d681SAndroid Build Coastguard Worker; CHECK: ret
37*9880d681SAndroid Build Coastguard Worker}
38*9880d681SAndroid Build Coastguard Worker
39*9880d681SAndroid Build Coastguard Worker; CHECK: mov01
40*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
41*9880d681SAndroid Build Coastguard Worker  %val = load double, double* %ptr
42*9880d681SAndroid Build Coastguard Worker; CHECK: vmovsd (%
43*9880d681SAndroid Build Coastguard Worker  %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
44*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %i0
45*9880d681SAndroid Build Coastguard Worker; CHECK: ret
46*9880d681SAndroid Build Coastguard Worker}
47*9880d681SAndroid Build Coastguard Worker
48*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps  %ymm
49*9880d681SAndroid Build Coastguard Workerdefine void @storev16i16(<16 x i16> %a) nounwind {
50*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %a, <16 x i16>* undef, align 32
51*9880d681SAndroid Build Coastguard Worker  unreachable
52*9880d681SAndroid Build Coastguard Worker}
53*9880d681SAndroid Build Coastguard Worker
54*9880d681SAndroid Build Coastguard Worker; CHECK: storev16i16_01
55*9880d681SAndroid Build Coastguard Worker; CHECK: vextractf128
56*9880d681SAndroid Build Coastguard Worker; CHECK: vmovups  %xmm
57*9880d681SAndroid Build Coastguard Workerdefine void @storev16i16_01(<16 x i16> %a) nounwind {
58*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %a, <16 x i16>* undef, align 4
59*9880d681SAndroid Build Coastguard Worker  unreachable
60*9880d681SAndroid Build Coastguard Worker}
61*9880d681SAndroid Build Coastguard Worker
62*9880d681SAndroid Build Coastguard Worker; CHECK: storev32i8
63*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps  %ymm
64*9880d681SAndroid Build Coastguard Workerdefine void @storev32i8(<32 x i8> %a) nounwind {
65*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %a, <32 x i8>* undef, align 32
66*9880d681SAndroid Build Coastguard Worker  unreachable
67*9880d681SAndroid Build Coastguard Worker}
68*9880d681SAndroid Build Coastguard Worker
69*9880d681SAndroid Build Coastguard Worker; CHECK: storev32i8_01
70*9880d681SAndroid Build Coastguard Worker; CHECK: vextractf128
71*9880d681SAndroid Build Coastguard Worker; CHECK: vmovups  %xmm
72*9880d681SAndroid Build Coastguard Workerdefine void @storev32i8_01(<32 x i8> %a) nounwind {
73*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %a, <32 x i8>* undef, align 4
74*9880d681SAndroid Build Coastguard Worker  unreachable
75*9880d681SAndroid Build Coastguard Worker}
76*9880d681SAndroid Build Coastguard Worker
77*9880d681SAndroid Build Coastguard Worker; It is faster to make two saves, if the data is already in XMM registers. For
78*9880d681SAndroid Build Coastguard Worker; example, after making an integer operation.
79*9880d681SAndroid Build Coastguard Worker; CHECK: _double_save
80*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vinsertf128 $1
81*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vinsertf128 $0
82*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps %xmm
83*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps %xmm
84*9880d681SAndroid Build Coastguard Workerdefine void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
85*9880d681SAndroid Build Coastguard Workerentry:
86*9880d681SAndroid Build Coastguard Worker  %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
87*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %Z, <8 x i32>* %P, align 16
88*9880d681SAndroid Build Coastguard Worker  ret void
89*9880d681SAndroid Build Coastguard Worker}
90*9880d681SAndroid Build Coastguard Worker
91*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
92*9880d681SAndroid Build Coastguard Worker
93*9880d681SAndroid Build Coastguard Worker; CHECK_O0: _f_f
94*9880d681SAndroid Build Coastguard Worker; CHECK-O0: vmovss LCPI
95*9880d681SAndroid Build Coastguard Worker; CHECK-O0: vxorps  %xmm
96*9880d681SAndroid Build Coastguard Worker; CHECK-O0: vmovss %xmm
97*9880d681SAndroid Build Coastguard Workerdefine void @f_f() nounwind {
98*9880d681SAndroid Build Coastguard Workerallocas:
99*9880d681SAndroid Build Coastguard Worker  br i1 undef, label %cif_mask_all, label %cif_mask_mixed
100*9880d681SAndroid Build Coastguard Worker
101*9880d681SAndroid Build Coastguard Workercif_mask_all:                                     ; preds = %allocas
102*9880d681SAndroid Build Coastguard Worker  unreachable
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Workercif_mask_mixed:                                   ; preds = %allocas
105*9880d681SAndroid Build Coastguard Worker  br i1 undef, label %cif_mixed_test_all, label %cif_mixed_test_any_check
106*9880d681SAndroid Build Coastguard Worker
107*9880d681SAndroid Build Coastguard Workercif_mixed_test_all:                               ; preds = %cif_mask_mixed
108*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x i32> <i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <8 x float> undef) nounwind
109*9880d681SAndroid Build Coastguard Worker  unreachable
110*9880d681SAndroid Build Coastguard Worker
111*9880d681SAndroid Build Coastguard Workercif_mixed_test_any_check:                         ; preds = %cif_mask_mixed
112*9880d681SAndroid Build Coastguard Worker  unreachable
113*9880d681SAndroid Build Coastguard Worker}
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Worker; CHECK: add8i32
116*9880d681SAndroid Build Coastguard Worker; CHECK: vmovups
117*9880d681SAndroid Build Coastguard Worker; CHECK: vmovups
118*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vinsertf128
119*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vextractf128
120*9880d681SAndroid Build Coastguard Worker; CHECK: vmovups
121*9880d681SAndroid Build Coastguard Worker; CHECK: vmovups
122*9880d681SAndroid Build Coastguard Workerdefine void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
123*9880d681SAndroid Build Coastguard Worker  %b = load <8 x i32>, <8 x i32>* %bp, align 1
124*9880d681SAndroid Build Coastguard Worker  %x = add <8 x i32> zeroinitializer, %b
125*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %x, <8 x i32>* %ret, align 1
126*9880d681SAndroid Build Coastguard Worker  ret void
127*9880d681SAndroid Build Coastguard Worker}
128*9880d681SAndroid Build Coastguard Worker
129*9880d681SAndroid Build Coastguard Worker; CHECK: add4i64a64
130*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps ({{.*}}), %ymm{{.*}}
131*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps %ymm{{.*}}, ({{.*}})
132*9880d681SAndroid Build Coastguard Workerdefine void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
133*9880d681SAndroid Build Coastguard Worker  %b = load <4 x i64>, <4 x i64>* %bp, align 64
134*9880d681SAndroid Build Coastguard Worker  %x = add <4 x i64> zeroinitializer, %b
135*9880d681SAndroid Build Coastguard Worker  store <4 x i64> %x, <4 x i64>* %ret, align 64
136*9880d681SAndroid Build Coastguard Worker  ret void
137*9880d681SAndroid Build Coastguard Worker}
138*9880d681SAndroid Build Coastguard Worker
139*9880d681SAndroid Build Coastguard Worker; CHECK: add4i64a16
140*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
141*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
142*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
143*9880d681SAndroid Build Coastguard Worker; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
144*9880d681SAndroid Build Coastguard Workerdefine void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
145*9880d681SAndroid Build Coastguard Worker  %b = load <4 x i64>, <4 x i64>* %bp, align 16
146*9880d681SAndroid Build Coastguard Worker  %x = add <4 x i64> zeroinitializer, %b
147*9880d681SAndroid Build Coastguard Worker  store <4 x i64> %x, <4 x i64>* %ret, align 16
148*9880d681SAndroid Build Coastguard Worker  ret void
149*9880d681SAndroid Build Coastguard Worker}
150