xref: /aosp_15_r20/external/llvm/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
2*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_factor2:
5*9880d681SAndroid Build Coastguard Worker; NEON: ld2 { v0.8b, v1.8b }, [x0]
6*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_factor2:
7*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld2
8*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @load_factor2(<16 x i8>* %ptr) {
9*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
10*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
11*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
12*9880d681SAndroid Build Coastguard Worker  %add = add nsw <8 x i8> %strided.v0, %strided.v1
13*9880d681SAndroid Build Coastguard Worker  ret <8 x i8> %add
14*9880d681SAndroid Build Coastguard Worker}
15*9880d681SAndroid Build Coastguard Worker
16*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_factor3:
17*9880d681SAndroid Build Coastguard Worker; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
18*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_factor3:
19*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld3
20*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_factor3(i32* %ptr) {
21*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <12 x i32>*
22*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
23*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
24*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
25*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v2, %strided.v1
26*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
27*9880d681SAndroid Build Coastguard Worker}
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_factor4:
30*9880d681SAndroid Build Coastguard Worker; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
31*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_factor4:
32*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld4
33*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_factor4(i32* %ptr) {
34*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <16 x i32>*
35*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
36*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
37*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
38*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v0, %strided.v2
39*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
40*9880d681SAndroid Build Coastguard Worker}
41*9880d681SAndroid Build Coastguard Worker
42*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_factor2:
43*9880d681SAndroid Build Coastguard Worker; NEON: st2 { v0.8b, v1.8b }, [x0]
44*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_factor2:
45*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st2
46*9880d681SAndroid Build Coastguard Workerdefine void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
47*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
48*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
49*9880d681SAndroid Build Coastguard Worker  ret void
50*9880d681SAndroid Build Coastguard Worker}
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_factor3:
53*9880d681SAndroid Build Coastguard Worker; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
54*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_factor3:
55*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st3
56*9880d681SAndroid Build Coastguard Workerdefine void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
57*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <12 x i32>*
58*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
59*9880d681SAndroid Build Coastguard Worker  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
60*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
61*9880d681SAndroid Build Coastguard Worker  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
62*9880d681SAndroid Build Coastguard Worker  ret void
63*9880d681SAndroid Build Coastguard Worker}
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_factor4:
66*9880d681SAndroid Build Coastguard Worker; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
67*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_factor4:
68*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st4
69*9880d681SAndroid Build Coastguard Workerdefine void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
70*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <16 x i32>*
71*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
72*9880d681SAndroid Build Coastguard Worker  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
73*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
74*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
75*9880d681SAndroid Build Coastguard Worker  ret void
76*9880d681SAndroid Build Coastguard Worker}
77*9880d681SAndroid Build Coastguard Worker
78*9880d681SAndroid Build Coastguard Worker; The following cases test that interleaved access of pointer vectors can be
79*9880d681SAndroid Build Coastguard Worker; matched to ldN/stN instruction.
80*9880d681SAndroid Build Coastguard Worker
81*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_ptrvec_factor2:
82*9880d681SAndroid Build Coastguard Worker; NEON: ld2 { v0.2d, v1.2d }, [x0]
83*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_ptrvec_factor2:
84*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld2
85*9880d681SAndroid Build Coastguard Workerdefine <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
86*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <4 x i32*>*
87*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
88*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
89*9880d681SAndroid Build Coastguard Worker  ret <2 x i32*> %strided.v0
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_ptrvec_factor3:
93*9880d681SAndroid Build Coastguard Worker; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
94*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_ptrvec_factor3:
95*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld3
96*9880d681SAndroid Build Coastguard Workerdefine void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
97*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <6 x i32*>*
98*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
99*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
100*9880d681SAndroid Build Coastguard Worker  store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
101*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
102*9880d681SAndroid Build Coastguard Worker  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
103*9880d681SAndroid Build Coastguard Worker  ret void
104*9880d681SAndroid Build Coastguard Worker}
105*9880d681SAndroid Build Coastguard Worker
106*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_ptrvec_factor4:
107*9880d681SAndroid Build Coastguard Worker; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
108*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_ptrvec_factor4:
109*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld4
110*9880d681SAndroid Build Coastguard Workerdefine void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
111*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <8 x i32*>*
112*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
113*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
114*9880d681SAndroid Build Coastguard Worker  %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
115*9880d681SAndroid Build Coastguard Worker  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
116*9880d681SAndroid Build Coastguard Worker  store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
117*9880d681SAndroid Build Coastguard Worker  ret void
118*9880d681SAndroid Build Coastguard Worker}
119*9880d681SAndroid Build Coastguard Worker
120*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_ptrvec_factor2:
121*9880d681SAndroid Build Coastguard Worker; NEON: st2 { v0.2d, v1.2d }, [x0]
122*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_ptrvec_factor2:
123*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st2
124*9880d681SAndroid Build Coastguard Workerdefine void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
125*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <4 x i32*>*
126*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
127*9880d681SAndroid Build Coastguard Worker  store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
128*9880d681SAndroid Build Coastguard Worker  ret void
129*9880d681SAndroid Build Coastguard Worker}
130*9880d681SAndroid Build Coastguard Worker
131*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_ptrvec_factor3:
132*9880d681SAndroid Build Coastguard Worker; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0]
133*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_ptrvec_factor3:
134*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st3
135*9880d681SAndroid Build Coastguard Workerdefine void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
136*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32** %ptr to <6 x i32*>*
137*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
138*9880d681SAndroid Build Coastguard Worker  %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
139*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
140*9880d681SAndroid Build Coastguard Worker  store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
141*9880d681SAndroid Build Coastguard Worker  ret void
142*9880d681SAndroid Build Coastguard Worker}
143*9880d681SAndroid Build Coastguard Worker
144*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_ptrvec_factor4:
145*9880d681SAndroid Build Coastguard Worker; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
146*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_ptrvec_factor4:
147*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st4
148*9880d681SAndroid Build Coastguard Workerdefine void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
149*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <8 x i32*>*
150*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
151*9880d681SAndroid Build Coastguard Worker  %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
152*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
153*9880d681SAndroid Build Coastguard Worker  store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
154*9880d681SAndroid Build Coastguard Worker  ret void
155*9880d681SAndroid Build Coastguard Worker}
156*9880d681SAndroid Build Coastguard Worker
157*9880d681SAndroid Build Coastguard Worker; Following cases check that shuffle maskes with undef indices can be matched
158*9880d681SAndroid Build Coastguard Worker; into ldN/stN instruction.
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_undef_mask_factor2:
161*9880d681SAndroid Build Coastguard Worker; NEON: ld2 { v0.4s, v1.4s }, [x0]
162*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_undef_mask_factor2:
163*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld2
164*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
165*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <8 x i32>*
166*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
167*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
168*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
169*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v0, %strided.v1
170*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
171*9880d681SAndroid Build Coastguard Worker}
172*9880d681SAndroid Build Coastguard Worker
173*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_undef_mask_factor3:
174*9880d681SAndroid Build Coastguard Worker; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
175*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_undef_mask_factor3:
176*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld3
177*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
178*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <12 x i32>*
179*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
180*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
181*9880d681SAndroid Build Coastguard Worker  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
182*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v2, %strided.v1
183*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
184*9880d681SAndroid Build Coastguard Worker}
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_undef_mask_factor4:
187*9880d681SAndroid Build Coastguard Worker; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
188*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_undef_mask_factor4:
189*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld4
190*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
191*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <16 x i32>*
192*9880d681SAndroid Build Coastguard Worker  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
193*9880d681SAndroid Build Coastguard Worker  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
194*9880d681SAndroid Build Coastguard Worker  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
195*9880d681SAndroid Build Coastguard Worker  %add = add nsw <4 x i32> %strided.v0, %strided.v2
196*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %add
197*9880d681SAndroid Build Coastguard Worker}
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_undef_mask_factor2:
200*9880d681SAndroid Build Coastguard Worker; NEON: st2 { v0.4s, v1.4s }, [x0]
201*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_undef_mask_factor2:
202*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st2
203*9880d681SAndroid Build Coastguard Workerdefine void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
204*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <8 x i32>*
205*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
206*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
207*9880d681SAndroid Build Coastguard Worker  ret void
208*9880d681SAndroid Build Coastguard Worker}
209*9880d681SAndroid Build Coastguard Worker
210*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_undef_mask_factor3:
211*9880d681SAndroid Build Coastguard Worker; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
212*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_undef_mask_factor3:
213*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st3
214*9880d681SAndroid Build Coastguard Workerdefine void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
215*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <12 x i32>*
216*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
217*9880d681SAndroid Build Coastguard Worker  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
218*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
219*9880d681SAndroid Build Coastguard Worker  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
220*9880d681SAndroid Build Coastguard Worker  ret void
221*9880d681SAndroid Build Coastguard Worker}
222*9880d681SAndroid Build Coastguard Worker
223*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_undef_mask_factor4:
224*9880d681SAndroid Build Coastguard Worker; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
225*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_undef_mask_factor4:
226*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: st4
227*9880d681SAndroid Build Coastguard Workerdefine void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
228*9880d681SAndroid Build Coastguard Worker  %base = bitcast i32* %ptr to <16 x i32>*
229*9880d681SAndroid Build Coastguard Worker  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
230*9880d681SAndroid Build Coastguard Worker  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
231*9880d681SAndroid Build Coastguard Worker  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
232*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
233*9880d681SAndroid Build Coastguard Worker  ret void
234*9880d681SAndroid Build Coastguard Worker}
235*9880d681SAndroid Build Coastguard Worker
236*9880d681SAndroid Build Coastguard Worker; Check that we do something sane with illegal types.
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_illegal_factor2:
239*9880d681SAndroid Build Coastguard Worker; NEON: BB#0:
240*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: ldr q[[V:[0-9]+]], [x0]
241*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s
242*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: ret
243*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_illegal_factor2:
244*9880d681SAndroid Build Coastguard Worker; NONEON: BB#0:
245*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: ldr s0, [x0]
246*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: ldr s1, [x0, #8]
247*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: ret
248*9880d681SAndroid Build Coastguard Workerdefine <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
249*9880d681SAndroid Build Coastguard Worker  %tmp1 = load <3 x float>, <3 x float>* %p, align 16
250*9880d681SAndroid Build Coastguard Worker  %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
251*9880d681SAndroid Build Coastguard Worker  ret <3 x float> %tmp2
252*9880d681SAndroid Build Coastguard Worker}
253*9880d681SAndroid Build Coastguard Worker
254*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: store_illegal_factor2:
255*9880d681SAndroid Build Coastguard Worker; NEON: BB#0:
256*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s
257*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: st1 { v0.d }[0], [x0]
258*9880d681SAndroid Build Coastguard Worker; NEON-NEXT: ret
259*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: store_illegal_factor2:
260*9880d681SAndroid Build Coastguard Worker; NONEON: BB#0:
261*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2
262*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0
263*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32
264*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: str x[[RES]], [x0]
265*9880d681SAndroid Build Coastguard Worker; NONEON-NEXT: ret
266*9880d681SAndroid Build Coastguard Workerdefine void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
267*9880d681SAndroid Build Coastguard Worker  %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
268*9880d681SAndroid Build Coastguard Worker  store <3 x float> %tmp1, <3 x float>* %p, align 16
269*9880d681SAndroid Build Coastguard Worker  ret void
270*9880d681SAndroid Build Coastguard Worker}
271*9880d681SAndroid Build Coastguard Worker
272*9880d681SAndroid Build Coastguard Worker; NEON-LABEL: load_factor2_with_extract_user:
273*9880d681SAndroid Build Coastguard Worker; NEON: ld2 { v0.4s, v1.4s }, [x0]
274*9880d681SAndroid Build Coastguard Worker; NEON: mov w0, v0.s[1]
275*9880d681SAndroid Build Coastguard Worker; NONEON-LABEL: load_factor2_with_extract_user:
276*9880d681SAndroid Build Coastguard Worker; NONEON-NOT: ld2
277*9880d681SAndroid Build Coastguard Workerdefine i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
278*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i32>, <8 x i32>* %a, align 8
279*9880d681SAndroid Build Coastguard Worker  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
280*9880d681SAndroid Build Coastguard Worker  %3 = extractelement <8 x i32> %1, i32 2
281*9880d681SAndroid Build Coastguard Worker  ret i32 %3
282*9880d681SAndroid Build Coastguard Worker}
283