xref: /aosp_15_r20/external/llvm/test/CodeGen/ARM/arm-interleaved-accesses.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
2*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_factor2:
5*9880d681SAndroid Build Coastguard Worker; NEON: vld2.8 {d16, d17}, [r0]
6*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_factor2:
7*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld2
8*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @load_factor2(<16 x i8>* %ptr) {
9*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
10*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
11*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
12*9880d681SAndroid Build Coastguard Worker  %add = add nsw <8 x i8> %strided.v0, %strided.v1
13*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %add
14*9880d681SAndroid Build Coastguard Worker}
15*9880d681SAndroid Build Coastguard Worker
16*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_factor3:
17*9880d681SAndroid Build Coastguard Worker; NEON: vld3.32 {d16, d17, d18}, [r0]
18*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_factor3:
19*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld3
20*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @load_factor3(i32* %ptr) {
21*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <6 x i32>*
22*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
23*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
24*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
25*9880d681SAndroid Build Coastguard Worker  %add = add nsw <2 x i32> %strided.v2, %strided.v1
26*9880d681SAndroid Build Coastguard Worker  ret <2 x i32> %add
27*9880d681SAndroid Build Coastguard Worker}
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_factor4:
30*9880d681SAndroid Build Coastguard Worker; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
31*9880d681SAndroid Build Coastguard Worker; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
32*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_factor4:
33*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld4
34*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_factor4(i32* %ptr) {
35*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <16 x i32>*
36*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
37*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
38*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
39*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v0, %strided.v2
40*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
41*9880d681SAndroid Build Coastguard Worker}
42*9880d681SAndroid Build Coastguard Worker
43*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_factor2:
44*9880d681SAndroid Build Coastguard Worker; NEON: vst2.8 {d16, d17}, [r0]
45*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_factor2:
46*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst2
47*9880d681SAndroid Build Coastguard Workerdefine void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
48*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
49*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
50*9880d681SAndroid Build Coastguard Worker  ret void
51*9880d681SAndroid Build Coastguard Worker}
52*9880d681SAndroid Build Coastguard Worker
53*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_factor3:
54*9880d681SAndroid Build Coastguard Worker; NEON: vst3.32 {d16, d18, d20}, [r0]!
55*9880d681SAndroid Build Coastguard Worker; NEON: vst3.32 {d17, d19, d21}, [r0]
56*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_factor3:
57*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst3.32
58*9880d681SAndroid Build Coastguard Workerdefine void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
59*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <12 x i32>*
60*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61*9880d681SAndroid Build Coastguard Worker  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
62*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
63*9880d681SAndroid Build Coastguard Worker  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
64*9880d681SAndroid Build Coastguard Worker  ret void
65*9880d681SAndroid Build Coastguard Worker}
66*9880d681SAndroid Build Coastguard Worker
67*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_factor4:
68*9880d681SAndroid Build Coastguard Worker; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
69*9880d681SAndroid Build Coastguard Worker; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
70*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_factor4:
71*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst4
72*9880d681SAndroid Build Coastguard Workerdefine void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
73*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <16 x i32>*
74*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
75*9880d681SAndroid Build Coastguard Worker  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
76*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
77*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
78*9880d681SAndroid Build Coastguard Worker  ret void
79*9880d681SAndroid Build Coastguard Worker}
80*9880d681SAndroid Build Coastguard Worker
81*9880d681SAndroid Build Coastguard Worker; The following cases test that interleaved access of pointer vectors can be
82*9880d681SAndroid Build Coastguard Worker; matched to ldN/stN instruction.
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_ptrvec_factor2:
85*9880d681SAndroid Build Coastguard Worker; NEON: vld2.32 {d16, d17}, [r0]
86*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_ptrvec_factor2:
87*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld2
88*9880d681SAndroid Build Coastguard Workerdefine <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
89*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <4 x i32*>*
90*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
91*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
92*9880d681SAndroid Build Coastguard Worker  ret <2 x i32*> %strided.v0
93*9880d681SAndroid Build Coastguard Worker}
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_ptrvec_factor3:
96*9880d681SAndroid Build Coastguard Worker; NEON: vld3.32 {d16, d17, d18}, [r0]
97*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_ptrvec_factor3:
98*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld3
99*9880d681SAndroid Build Coastguard Workerdefine void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
100*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <6 x i32*>*
101*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
102*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
103*9880d681SAndroid Build Coastguard Worker  store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
104*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
105*9880d681SAndroid Build Coastguard Worker  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
106*9880d681SAndroid Build Coastguard Worker  ret void
107*9880d681SAndroid Build Coastguard Worker}
108*9880d681SAndroid Build Coastguard Worker
109*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_ptrvec_factor4:
110*9880d681SAndroid Build Coastguard Worker; NEON: vld4.32 {d16, d17, d18, d19}, [r0]
111*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_ptrvec_factor4:
112*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld4
113*9880d681SAndroid Build Coastguard Workerdefine void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
114*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <8 x i32*>*
115*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
116*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
117*9880d681SAndroid Build Coastguard Worker  %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
118*9880d681SAndroid Build Coastguard Worker  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
119*9880d681SAndroid Build Coastguard Worker  store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
120*9880d681SAndroid Build Coastguard Worker  ret void
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_ptrvec_factor2:
124*9880d681SAndroid Build Coastguard Worker; NEON: vst2.32 {d16, d17}, [r0]
125*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_ptrvec_factor2:
126*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst2
127*9880d681SAndroid Build Coastguard Workerdefine void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
128*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <4 x i32*>*
129*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
130*9880d681SAndroid Build Coastguard Worker  store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
131*9880d681SAndroid Build Coastguard Worker  ret void
132*9880d681SAndroid Build Coastguard Worker}
133*9880d681SAndroid Build Coastguard Worker
134*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_ptrvec_factor3:
135*9880d681SAndroid Build Coastguard Worker; NEON: vst3.32 {d16, d17, d18}, [r0]
136*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_ptrvec_factor3:
137*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst3
138*9880d681SAndroid Build Coastguard Workerdefine void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
139*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <6 x i32*>*
140*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
141*9880d681SAndroid Build Coastguard Worker  %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
142*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
143*9880d681SAndroid Build Coastguard Worker  store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
144*9880d681SAndroid Build Coastguard Worker  ret void
145*9880d681SAndroid Build Coastguard Worker}
146*9880d681SAndroid Build Coastguard Worker
147*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_ptrvec_factor4:
148*9880d681SAndroid Build Coastguard Worker; NEON: vst4.32 {d16, d17, d18, d19}, [r0]
149*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_ptrvec_factor4:
150*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst4
151*9880d681SAndroid Build Coastguard Workerdefine void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
152*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <8 x i32*>*
153*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
154*9880d681SAndroid Build Coastguard Worker  %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
155*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
156*9880d681SAndroid Build Coastguard Worker  store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
157*9880d681SAndroid Build Coastguard Worker  ret void
158*9880d681SAndroid Build Coastguard Worker}
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Worker; Following cases check that shuffle maskes with undef indices can be matched
161*9880d681SAndroid Build Coastguard Worker; into ldN/stN instruction.
162*9880d681SAndroid Build Coastguard Worker
163*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_undef_mask_factor2:
164*9880d681SAndroid Build Coastguard Worker; NEON: vld2.32 {d16, d17, d18, d19}, [r0]
165*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_undef_mask_factor2:
166*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld2
167*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
168*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <8 x i32>*
169*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
170*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
171*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
172*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v0, %strided.v1
173*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
174*9880d681SAndroid Build Coastguard Worker}
175*9880d681SAndroid Build Coastguard Worker
176*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_undef_mask_factor3:
177*9880d681SAndroid Build Coastguard Worker; NEON: vld3.32 {d16, d18, d20}, [r0]!
178*9880d681SAndroid Build Coastguard Worker; NEON: vld3.32 {d17, d19, d21}, [r0]
179*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_undef_mask_factor3:
180*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld3
181*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
182*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <12 x i32>*
183*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
184*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
185*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
186*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v2, %strided.v1
187*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
188*9880d681SAndroid Build Coastguard Worker}
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_undef_mask_factor4:
191*9880d681SAndroid Build Coastguard Worker; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
192*9880d681SAndroid Build Coastguard Worker; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
193*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_undef_mask_factor4:
194*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld4
195*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
196*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <16 x i32>*
197*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
198*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
199*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
200*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v0, %strided.v2
201*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
202*9880d681SAndroid Build Coastguard Worker}
203*9880d681SAndroid Build Coastguard Worker
204*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_undef_mask_factor2:
205*9880d681SAndroid Build Coastguard Worker; NEON: vst2.32 {d16, d17, d18, d19}, [r0]
206*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_undef_mask_factor2:
207*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst2
208*9880d681SAndroid Build Coastguard Workerdefine void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
209*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <8 x i32>*
210*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
211*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
212*9880d681SAndroid Build Coastguard Worker  ret void
213*9880d681SAndroid Build Coastguard Worker}
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_undef_mask_factor3:
216*9880d681SAndroid Build Coastguard Worker; NEON: vst3.32 {d16, d18, d20}, [r0]!
217*9880d681SAndroid Build Coastguard Worker; NEON: vst3.32 {d17, d19, d21}, [r0]
218*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_undef_mask_factor3:
219*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst3
220*9880d681SAndroid Build Coastguard Workerdefine void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
221*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <12 x i32>*
222*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
223*9880d681SAndroid Build Coastguard Worker  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
224*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
225*9880d681SAndroid Build Coastguard Worker  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
226*9880d681SAndroid Build Coastguard Worker  ret void
227*9880d681SAndroid Build Coastguard Worker}
228*9880d681SAndroid Build Coastguard Worker
229*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_undef_mask_factor4:
230*9880d681SAndroid Build Coastguard Worker; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
231*9880d681SAndroid Build Coastguard Worker; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
232*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_undef_mask_factor4:
233*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst4
234*9880d681SAndroid Build Coastguard Workerdefine void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
235*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <16 x i32>*
236*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
237*9880d681SAndroid Build Coastguard Worker  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
238*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
239*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
240*9880d681SAndroid Build Coastguard Worker  ret void
241*9880d681SAndroid Build Coastguard Worker}
242*9880d681SAndroid Build Coastguard Worker
243*9880d681SAndroid Build Coastguard Worker; The following test cases check that address spaces are properly handled
244*9880d681SAndroid Build Coastguard Worker
245*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_address_space
246*9880d681SAndroid Build Coastguard Worker; NEON: vld3.32
247*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_address_space
248*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld3
249*9880d681SAndroid Build Coastguard Workerdefine void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) {
250*9880d681SAndroid Build Coastguard Worker %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A
251*9880d681SAndroid Build Coastguard Worker %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
252*9880d681SAndroid Build Coastguard Worker store <2 x i32> %interleaved, <2 x i32>* %B
253*9880d681SAndroid Build Coastguard Worker ret void
254*9880d681SAndroid Build Coastguard Worker}
255*9880d681SAndroid Build Coastguard Worker
256*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_address_space
257*9880d681SAndroid Build Coastguard Worker; NEON: vst2.32
258*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_address_space
259*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vst2
260*9880d681SAndroid Build Coastguard Workerdefine void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) {
261*9880d681SAndroid Build Coastguard Worker %tmp0 = load <2 x i32>, <2 x i32>* %A
262*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %B
263*9880d681SAndroid Build Coastguard Worker %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
264*9880d681SAndroid Build Coastguard Worker store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C
265*9880d681SAndroid Build Coastguard Worker ret void
266*9880d681SAndroid Build Coastguard Worker}
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Worker; Check that we do something sane with illegal types.
269*9880d681SAndroid Build Coastguard Worker
270*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_illegal_factor2:
271*9880d681SAndroid Build Coastguard Worker; NEON: BB#0:
272*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: vld1.64 {d16, d17}, [r0:128]
273*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: vuzp.32 q8, {{.*}}
274*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: vmov r0, r1, d16
275*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: vmov r2, r3, {{.*}}
276*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: mov pc, lr
277*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_illegal_factor2:
278*9880d681SAndroid Build Coastguard Worker; NONEON: BB#0:
279*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0]
280*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: ldr r1, [r0, #8]
281*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: mov r0, [[ELT0]]
282*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: mov pc, lr
283*9880d681SAndroid Build Coastguard Workerdefine <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
284*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <3 x float>, <3 x float>* %p, align 16
285*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
286*9880d681SAndroid Build Coastguard Worker  ret <3 x float> %tmp2
287*9880d681SAndroid Build Coastguard Worker}
288*9880d681SAndroid Build Coastguard Worker
289*9880d681SAndroid Build Coastguard Worker; This lowering isn't great, but it's at least correct.
290*9880d681SAndroid Build Coastguard Worker
291*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_illegal_factor2:
292*9880d681SAndroid Build Coastguard Worker; NEON: BB#0:
293*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: vldr d17, [sp]
294*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: vmov d16, r2, r3
295*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: vuzp.32 q8, {{.*}}
296*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: vstr d16, [r0]
297*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: mov pc, lr
298*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_illegal_factor2:
299*9880d681SAndroid Build Coastguard Worker; NONEON: BB#0:
300*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: stm r0, {r1, r3}
301*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: mov pc, lr
302*9880d681SAndroid Build Coastguard Workerdefine void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
303*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
304*9880d681SAndroid Build Coastguard Worker  store <3 x float> %tmp1, <3 x float>* %p, align 16
305*9880d681SAndroid Build Coastguard Worker  ret void
306*9880d681SAndroid Build Coastguard Worker}
307*9880d681SAndroid Build Coastguard Worker
308*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_factor2_with_extract_user:
309*9880d681SAndroid Build Coastguard Worker; NEON: vld2.32 {d16, d17, d18, d19}, [r0:64]
310*9880d681SAndroid Build Coastguard Worker; NEON: vmov.32 r0, d16[1]
311*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_factor2_with_extract_user:
312*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: vld2
313*9880d681SAndroid Build Coastguard Workerdefine i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
314*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i32>, <8 x i32>* %a, align 8
315*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
316*9880d681SAndroid Build Coastguard Worker  %3 = extractelement <8 x i32> %1, i32 2
317*9880d681SAndroid Build Coastguard Worker  ret i32 %3
318*9880d681SAndroid Build Coastguard Worker}
319