xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/nontemporal-loads.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
7*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
8*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VL
9*9880d681SAndroid Build Coastguard Worker
10*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_v4f32(<4 x float>* %src) {
11*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v4f32:
12*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
13*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
14*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
15*9880d681SAndroid Build Coastguard Worker;
16*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v4f32:
17*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
18*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
19*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
20*9880d681SAndroid Build Coastguard Worker;
21*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v4f32:
22*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
23*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
24*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
25*9880d681SAndroid Build Coastguard Worker;
26*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v4f32:
27*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
28*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
29*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
30*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x float>, <4 x float>* %src, align 16, !nontemporal !1
31*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %1
32*9880d681SAndroid Build Coastguard Worker}
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_v4i32(<4 x i32>* %src) {
35*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v4i32:
36*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
37*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
38*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
39*9880d681SAndroid Build Coastguard Worker;
40*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v4i32:
41*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
42*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
43*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
44*9880d681SAndroid Build Coastguard Worker;
45*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v4i32:
46*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
47*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
48*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
49*9880d681SAndroid Build Coastguard Worker;
50*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_v4i32:
51*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
52*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa (%rdi), %xmm0
53*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
54*9880d681SAndroid Build Coastguard Worker;
55*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_v4i32:
56*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
57*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovntdqa (%rdi), %xmm0
58*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
59*9880d681SAndroid Build Coastguard Worker;
60*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_v4i32:
61*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
62*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqa32 (%rdi), %xmm0
63*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
64*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1
65*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %1
66*9880d681SAndroid Build Coastguard Worker}
67*9880d681SAndroid Build Coastguard Worker
68*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_v2f64(<2 x double>* %src) {
69*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v2f64:
70*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
71*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
72*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
73*9880d681SAndroid Build Coastguard Worker;
74*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v2f64:
75*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
76*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
77*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
78*9880d681SAndroid Build Coastguard Worker;
79*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v2f64:
80*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
81*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
82*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
83*9880d681SAndroid Build Coastguard Worker;
84*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v2f64:
85*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
86*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
87*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
88*9880d681SAndroid Build Coastguard Worker  %1 = load <2 x double>, <2 x double>* %src, align 16, !nontemporal !1
89*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %1
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_v2i64(<2 x i64>* %src) {
93*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v2i64:
94*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
95*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
96*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
97*9880d681SAndroid Build Coastguard Worker;
98*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v2i64:
99*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
100*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
101*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
102*9880d681SAndroid Build Coastguard Worker;
103*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v2i64:
104*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
105*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
106*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
107*9880d681SAndroid Build Coastguard Worker;
108*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v2i64:
109*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
110*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
111*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
112*9880d681SAndroid Build Coastguard Worker  %1 = load <2 x i64>, <2 x i64>* %src, align 16, !nontemporal !1
113*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %1
114*9880d681SAndroid Build Coastguard Worker}
115*9880d681SAndroid Build Coastguard Worker
116*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_v8i16(<8 x i16>* %src) {
117*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8i16:
118*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
119*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
120*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
121*9880d681SAndroid Build Coastguard Worker;
122*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8i16:
123*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
124*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
125*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
126*9880d681SAndroid Build Coastguard Worker;
127*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v8i16:
128*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
129*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
130*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
131*9880d681SAndroid Build Coastguard Worker;
132*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v8i16:
133*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
134*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
135*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
136*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i16>, <8 x i16>* %src, align 16, !nontemporal !1
137*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %1
138*9880d681SAndroid Build Coastguard Worker}
139*9880d681SAndroid Build Coastguard Worker
140*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_v16i8(<16 x i8>* %src) {
141*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v16i8:
142*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
143*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
144*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
145*9880d681SAndroid Build Coastguard Worker;
146*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v16i8:
147*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
148*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
149*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
150*9880d681SAndroid Build Coastguard Worker;
151*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v16i8:
152*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
153*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
154*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
155*9880d681SAndroid Build Coastguard Worker;
156*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v16i8:
157*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
158*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
159*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
160*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i8>, <16 x i8>* %src, align 16, !nontemporal !1
161*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %1
162*9880d681SAndroid Build Coastguard Worker}
163*9880d681SAndroid Build Coastguard Worker
164*9880d681SAndroid Build Coastguard Worker; And now YMM versions.
165*9880d681SAndroid Build Coastguard Worker
166*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_v8f32(<8 x float>* %src) {
167*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8f32:
168*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
169*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
170*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
171*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
172*9880d681SAndroid Build Coastguard Worker;
173*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8f32:
174*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
175*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
176*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
177*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
178*9880d681SAndroid Build Coastguard Worker;
179*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v8f32:
180*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
181*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
182*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
183*9880d681SAndroid Build Coastguard Worker;
184*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v8f32:
185*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
186*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
187*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
188*9880d681SAndroid Build Coastguard Worker;
189*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v8f32:
190*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
191*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
192*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
193*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x float>, <8 x float>* %src, align 32, !nontemporal !1
194*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %1
195*9880d681SAndroid Build Coastguard Worker}
196*9880d681SAndroid Build Coastguard Worker
197*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_v8i32(<8 x i32>* %src) {
198*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8i32:
199*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
200*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
201*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
202*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
203*9880d681SAndroid Build Coastguard Worker;
204*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8i32:
205*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
206*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
207*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
208*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
209*9880d681SAndroid Build Coastguard Worker;
210*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v8i32:
211*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
212*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
213*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
214*9880d681SAndroid Build Coastguard Worker;
215*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v8i32:
216*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
217*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
218*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
219*9880d681SAndroid Build Coastguard Worker;
220*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_v8i32:
221*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
222*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm0
223*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
224*9880d681SAndroid Build Coastguard Worker;
225*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_v8i32:
226*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
227*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovntdqa (%rdi), %ymm0
228*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
229*9880d681SAndroid Build Coastguard Worker;
230*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_v8i32:
231*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
232*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqa32 (%rdi), %ymm0
233*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
234*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
235*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %1
236*9880d681SAndroid Build Coastguard Worker}
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_v4f64(<4 x double>* %src) {
239*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v4f64:
240*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
241*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
242*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
243*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
244*9880d681SAndroid Build Coastguard Worker;
245*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v4f64:
246*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
247*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
248*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
249*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
250*9880d681SAndroid Build Coastguard Worker;
251*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v4f64:
252*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
253*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
254*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
255*9880d681SAndroid Build Coastguard Worker;
256*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v4f64:
257*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
258*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
259*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
260*9880d681SAndroid Build Coastguard Worker;
261*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v4f64:
262*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
263*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
264*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
265*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x double>, <4 x double>* %src, align 32, !nontemporal !1
266*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %1
267*9880d681SAndroid Build Coastguard Worker}
268*9880d681SAndroid Build Coastguard Worker
269*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_v4i64(<4 x i64>* %src) {
270*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v4i64:
271*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
272*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
273*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
274*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
275*9880d681SAndroid Build Coastguard Worker;
276*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v4i64:
277*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
278*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
279*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
280*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
281*9880d681SAndroid Build Coastguard Worker;
282*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v4i64:
283*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
284*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
285*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
286*9880d681SAndroid Build Coastguard Worker;
287*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v4i64:
288*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
289*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
290*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
291*9880d681SAndroid Build Coastguard Worker;
292*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v4i64:
293*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
294*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
295*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
296*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i64>, <4 x i64>* %src, align 32, !nontemporal !1
297*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %1
298*9880d681SAndroid Build Coastguard Worker}
299*9880d681SAndroid Build Coastguard Worker
300*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_v16i16(<16 x i16>* %src) {
301*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v16i16:
302*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
303*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
304*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
305*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
306*9880d681SAndroid Build Coastguard Worker;
307*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v16i16:
308*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
309*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
310*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
311*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
312*9880d681SAndroid Build Coastguard Worker;
313*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v16i16:
314*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
315*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
316*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
317*9880d681SAndroid Build Coastguard Worker;
318*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v16i16:
319*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
320*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
321*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
322*9880d681SAndroid Build Coastguard Worker;
323*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v16i16:
324*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
325*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
326*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
327*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i16>, <16 x i16>* %src, align 32, !nontemporal !1
328*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %1
329*9880d681SAndroid Build Coastguard Worker}
330*9880d681SAndroid Build Coastguard Worker
331*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_v32i8(<32 x i8>* %src) {
332*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v32i8:
333*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
334*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
335*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
336*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
337*9880d681SAndroid Build Coastguard Worker;
338*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v32i8:
339*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
340*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
341*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
342*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
343*9880d681SAndroid Build Coastguard Worker;
344*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v32i8:
345*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
346*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
347*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
348*9880d681SAndroid Build Coastguard Worker;
349*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v32i8:
350*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
351*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
352*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
353*9880d681SAndroid Build Coastguard Worker;
354*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v32i8:
355*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
356*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
357*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
358*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i8>, <32 x i8>* %src, align 32, !nontemporal !1
359*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %1
360*9880d681SAndroid Build Coastguard Worker}
361*9880d681SAndroid Build Coastguard Worker
362*9880d681SAndroid Build Coastguard Worker; And now ZMM versions.
363*9880d681SAndroid Build Coastguard Worker
364*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_v16f32(<16 x float>* %src) {
365*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v16f32:
366*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
367*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
368*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
369*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
370*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
371*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
372*9880d681SAndroid Build Coastguard Worker;
373*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v16f32:
374*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
375*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
376*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
377*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
378*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
379*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
380*9880d681SAndroid Build Coastguard Worker;
381*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v16f32:
382*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
383*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
384*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
385*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
386*9880d681SAndroid Build Coastguard Worker;
387*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v16f32:
388*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
389*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
390*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
391*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
392*9880d681SAndroid Build Coastguard Worker;
393*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v16f32:
394*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
395*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %zmm0
396*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
397*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1
398*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %1
399*9880d681SAndroid Build Coastguard Worker}
400*9880d681SAndroid Build Coastguard Worker
401*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_v16i32(<16 x i32>* %src) {
402*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v16i32:
403*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
404*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
405*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
406*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
407*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
408*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
409*9880d681SAndroid Build Coastguard Worker;
410*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v16i32:
411*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
412*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
413*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
414*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
415*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
416*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
417*9880d681SAndroid Build Coastguard Worker;
418*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v16i32:
419*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
420*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
421*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
422*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
423*9880d681SAndroid Build Coastguard Worker;
424*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v16i32:
425*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
426*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
427*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
428*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
429*9880d681SAndroid Build Coastguard Worker;
430*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v16i32:
431*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
432*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %zmm0
433*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
434*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1
435*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %1
436*9880d681SAndroid Build Coastguard Worker}
437*9880d681SAndroid Build Coastguard Worker
438*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_v8f64(<8 x double>* %src) {
439*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8f64:
440*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
441*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
442*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
443*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
444*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
445*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
446*9880d681SAndroid Build Coastguard Worker;
447*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8f64:
448*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
449*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
450*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
451*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
452*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
453*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
454*9880d681SAndroid Build Coastguard Worker;
455*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v8f64:
456*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
457*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
458*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
459*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
460*9880d681SAndroid Build Coastguard Worker;
461*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v8f64:
462*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
463*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
464*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
465*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
466*9880d681SAndroid Build Coastguard Worker;
467*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v8f64:
468*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
469*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %zmm0
470*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
471*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1
472*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %1
473*9880d681SAndroid Build Coastguard Worker}
474*9880d681SAndroid Build Coastguard Worker
475*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_v8i64(<8 x i64>* %src) {
476*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8i64:
477*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
478*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
479*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
480*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
481*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
482*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
483*9880d681SAndroid Build Coastguard Worker;
484*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8i64:
485*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
486*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
487*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
488*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
489*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
490*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
491*9880d681SAndroid Build Coastguard Worker;
492*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v8i64:
493*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
494*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
495*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
496*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
497*9880d681SAndroid Build Coastguard Worker;
498*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v8i64:
499*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
500*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
501*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
502*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
503*9880d681SAndroid Build Coastguard Worker;
504*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v8i64:
505*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
506*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %zmm0
507*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
508*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1
509*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %1
510*9880d681SAndroid Build Coastguard Worker}
511*9880d681SAndroid Build Coastguard Worker
512*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test_v32i16(<32 x i16>* %src) {
513*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v32i16:
514*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
515*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
516*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
517*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
518*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
519*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
520*9880d681SAndroid Build Coastguard Worker;
521*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v32i16:
522*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
523*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
524*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
525*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
526*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
527*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
528*9880d681SAndroid Build Coastguard Worker;
529*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v32i16:
530*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
531*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
532*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
533*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
534*9880d681SAndroid Build Coastguard Worker;
535*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v32i16:
536*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
537*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
538*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
539*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
540*9880d681SAndroid Build Coastguard Worker;
541*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_v32i16:
542*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
543*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm0
544*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa 32(%rdi), %ymm1
545*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
546*9880d681SAndroid Build Coastguard Worker;
547*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_v32i16:
548*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
549*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovntdqa (%rdi), %zmm0
550*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
551*9880d681SAndroid Build Coastguard Worker;
552*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_v32i16:
553*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
554*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovntdqa (%rdi), %ymm0
555*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovntdqa 32(%rdi), %ymm1
556*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
557*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1
558*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %1
559*9880d681SAndroid Build Coastguard Worker}
560*9880d681SAndroid Build Coastguard Worker
561*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test_v64i8(<64 x i8>* %src) {
562*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v64i8:
563*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
564*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
565*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
566*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
567*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
568*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
569*9880d681SAndroid Build Coastguard Worker;
570*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v64i8:
571*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
572*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
573*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
574*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
575*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
576*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
577*9880d681SAndroid Build Coastguard Worker;
578*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v64i8:
579*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
580*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
581*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
582*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
583*9880d681SAndroid Build Coastguard Worker;
584*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v64i8:
585*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
586*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
587*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
588*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
589*9880d681SAndroid Build Coastguard Worker;
590*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_v64i8:
591*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
592*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm0
593*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa 32(%rdi), %ymm1
594*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
595*9880d681SAndroid Build Coastguard Worker;
596*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_v64i8:
597*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
598*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovntdqa (%rdi), %zmm0
599*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
600*9880d681SAndroid Build Coastguard Worker;
601*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_v64i8:
602*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
603*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovntdqa (%rdi), %ymm0
604*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovntdqa 32(%rdi), %ymm1
605*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
606*9880d681SAndroid Build Coastguard Worker  %1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1
607*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %1
608*9880d681SAndroid Build Coastguard Worker}
609*9880d681SAndroid Build Coastguard Worker
610*9880d681SAndroid Build Coastguard Worker
611*9880d681SAndroid Build Coastguard Worker; Check cases where the load would be folded.
612*9880d681SAndroid Build Coastguard Worker
613*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) {
614*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4f32:
615*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
616*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps (%rdi), %xmm0
617*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
618*9880d681SAndroid Build Coastguard Worker;
619*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4f32:
620*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
621*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddps (%rdi), %xmm0, %xmm0
622*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
623*9880d681SAndroid Build Coastguard Worker;
624*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v4f32:
625*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
626*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vaddps (%rdi), %xmm0, %xmm0
627*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
628*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x float>, <4 x float>* %src, align 16, !nontemporal !1
629*9880d681SAndroid Build Coastguard Worker  %2 = fadd <4 x float> %arg, %1
630*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %2
631*9880d681SAndroid Build Coastguard Worker}
632*9880d681SAndroid Build Coastguard Worker
633*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) {
634*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4i32:
635*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
636*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd (%rdi), %xmm0
637*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
638*9880d681SAndroid Build Coastguard Worker;
639*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4i32:
640*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
641*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddd (%rdi), %xmm0, %xmm0
642*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
643*9880d681SAndroid Build Coastguard Worker;
644*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v4i32:
645*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
646*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddd (%rdi), %xmm0, %xmm0
647*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
648*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1
649*9880d681SAndroid Build Coastguard Worker  %2 = add <4 x i32> %arg, %1
650*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %2
651*9880d681SAndroid Build Coastguard Worker}
652*9880d681SAndroid Build Coastguard Worker
653*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) {
654*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v2f64:
655*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
656*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd (%rdi), %xmm0
657*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
658*9880d681SAndroid Build Coastguard Worker;
659*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v2f64:
660*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
661*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddpd (%rdi), %xmm0, %xmm0
662*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
663*9880d681SAndroid Build Coastguard Worker;
664*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v2f64:
665*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
666*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vaddpd (%rdi), %xmm0, %xmm0
667*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
668*9880d681SAndroid Build Coastguard Worker  %1 = load <2 x double>, <2 x double>* %src, align 16, !nontemporal !1
669*9880d681SAndroid Build Coastguard Worker  %2 = fadd <2 x double> %arg, %1
670*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %2
671*9880d681SAndroid Build Coastguard Worker}
672*9880d681SAndroid Build Coastguard Worker
673*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) {
674*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v2i64:
675*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
676*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq (%rdi), %xmm0
677*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
678*9880d681SAndroid Build Coastguard Worker;
679*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v2i64:
680*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
681*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq (%rdi), %xmm0, %xmm0
682*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
683*9880d681SAndroid Build Coastguard Worker;
684*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v2i64:
685*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
686*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddq (%rdi), %xmm0, %xmm0
687*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
688*9880d681SAndroid Build Coastguard Worker  %1 = load <2 x i64>, <2 x i64>* %src, align 16, !nontemporal !1
689*9880d681SAndroid Build Coastguard Worker  %2 = add <2 x i64> %arg, %1
690*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %2
691*9880d681SAndroid Build Coastguard Worker}
692*9880d681SAndroid Build Coastguard Worker
693*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) {
694*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i16:
695*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
696*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw (%rdi), %xmm0
697*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
698*9880d681SAndroid Build Coastguard Worker;
699*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8i16:
700*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
701*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddw (%rdi), %xmm0, %xmm0
702*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
703*9880d681SAndroid Build Coastguard Worker;
704*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8i16:
705*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
706*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddw (%rdi), %xmm0, %xmm0
707*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
708*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i16>, <8 x i16>* %src, align 16, !nontemporal !1
709*9880d681SAndroid Build Coastguard Worker  %2 = add <8 x i16> %arg, %1
710*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %2
711*9880d681SAndroid Build Coastguard Worker}
712*9880d681SAndroid Build Coastguard Worker
713*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) {
714*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i8:
715*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
716*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb (%rdi), %xmm0
717*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
718*9880d681SAndroid Build Coastguard Worker;
719*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v16i8:
720*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
721*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddb (%rdi), %xmm0, %xmm0
722*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
723*9880d681SAndroid Build Coastguard Worker;
724*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v16i8:
725*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
726*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddb (%rdi), %xmm0, %xmm0
727*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
728*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i8>, <16 x i8>* %src, align 16, !nontemporal !1
729*9880d681SAndroid Build Coastguard Worker  %2 = add <16 x i8> %arg, %1
730*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %2
731*9880d681SAndroid Build Coastguard Worker}
732*9880d681SAndroid Build Coastguard Worker
733*9880d681SAndroid Build Coastguard Worker; And now YMM versions.
734*9880d681SAndroid Build Coastguard Worker
735*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) {
736*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8f32:
737*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
738*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps (%rdi), %xmm0
739*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps 16(%rdi), %xmm1
740*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
741*9880d681SAndroid Build Coastguard Worker;
742*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8f32:
743*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
744*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddps (%rdi), %ymm0, %ymm0
745*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
746*9880d681SAndroid Build Coastguard Worker;
747*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8f32:
748*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
749*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vaddps (%rdi), %ymm0, %ymm0
750*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
751*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x float>, <8 x float>* %src, align 32, !nontemporal !1
752*9880d681SAndroid Build Coastguard Worker  %2 = fadd <8 x float> %arg, %1
753*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %2
754*9880d681SAndroid Build Coastguard Worker}
755*9880d681SAndroid Build Coastguard Worker
756*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %src) {
757*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i32:
758*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
759*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd (%rdi), %xmm0
760*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd 16(%rdi), %xmm1
761*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
762*9880d681SAndroid Build Coastguard Worker;
763*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v8i32:
764*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
765*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm1
766*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
767*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
768*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
769*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
770*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
771*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
772*9880d681SAndroid Build Coastguard Worker;
773*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v8i32:
774*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
775*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd (%rdi), %ymm0, %ymm0
776*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
777*9880d681SAndroid Build Coastguard Worker;
778*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8i32:
779*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
780*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddd (%rdi), %ymm0, %ymm0
781*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
782*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
783*9880d681SAndroid Build Coastguard Worker  %2 = add <8 x i32> %arg, %1
784*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %2
785*9880d681SAndroid Build Coastguard Worker}
786*9880d681SAndroid Build Coastguard Worker
787*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) {
788*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4f64:
789*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
790*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd (%rdi), %xmm0
791*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd 16(%rdi), %xmm1
792*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
793*9880d681SAndroid Build Coastguard Worker;
794*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4f64:
795*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
796*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddpd (%rdi), %ymm0, %ymm0
797*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
798*9880d681SAndroid Build Coastguard Worker;
799*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v4f64:
800*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
801*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vaddpd (%rdi), %ymm0, %ymm0
802*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
803*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x double>, <4 x double>* %src, align 32, !nontemporal !1
804*9880d681SAndroid Build Coastguard Worker  %2 = fadd <4 x double> %arg, %1
805*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %2
806*9880d681SAndroid Build Coastguard Worker}
807*9880d681SAndroid Build Coastguard Worker
808*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) {
809*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4i64:
810*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
811*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq (%rdi), %xmm0
812*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq 16(%rdi), %xmm1
813*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
814*9880d681SAndroid Build Coastguard Worker;
815*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v4i64:
816*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
817*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm1
818*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
819*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
820*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
821*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
822*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
823*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
824*9880d681SAndroid Build Coastguard Worker;
825*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v4i64:
826*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
827*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddq (%rdi), %ymm0, %ymm0
828*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
829*9880d681SAndroid Build Coastguard Worker;
830*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v4i64:
831*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
832*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddq (%rdi), %ymm0, %ymm0
833*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
834*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i64>, <4 x i64>* %src, align 32, !nontemporal !1
835*9880d681SAndroid Build Coastguard Worker  %2 = add <4 x i64> %arg, %1
836*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %2
837*9880d681SAndroid Build Coastguard Worker}
838*9880d681SAndroid Build Coastguard Worker
839*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) {
840*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i16:
841*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
842*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw (%rdi), %xmm0
843*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw 16(%rdi), %xmm1
844*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
845*9880d681SAndroid Build Coastguard Worker;
846*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v16i16:
847*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
848*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm1
849*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
850*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
851*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
852*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
853*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
854*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
855*9880d681SAndroid Build Coastguard Worker;
856*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v16i16:
857*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
858*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
859*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
860*9880d681SAndroid Build Coastguard Worker;
861*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v16i16:
862*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
863*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
864*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
865*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i16>, <16 x i16>* %src, align 32, !nontemporal !1
866*9880d681SAndroid Build Coastguard Worker  %2 = add <16 x i16> %arg, %1
867*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %2
868*9880d681SAndroid Build Coastguard Worker}
869*9880d681SAndroid Build Coastguard Worker
870*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) {
871*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v32i8:
872*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
873*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb (%rdi), %xmm0
874*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb 16(%rdi), %xmm1
875*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
876*9880d681SAndroid Build Coastguard Worker;
877*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v32i8:
878*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
879*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm1
880*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
881*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
882*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm3, %xmm2, %xmm2
883*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
884*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
885*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
886*9880d681SAndroid Build Coastguard Worker;
887*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v32i8:
888*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
889*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
890*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
891*9880d681SAndroid Build Coastguard Worker;
892*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v32i8:
893*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
894*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
895*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
896*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i8>, <32 x i8>* %src, align 32, !nontemporal !1
897*9880d681SAndroid Build Coastguard Worker  %2 = add <32 x i8> %arg, %1
898*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %2
899*9880d681SAndroid Build Coastguard Worker}
900*9880d681SAndroid Build Coastguard Worker
901*9880d681SAndroid Build Coastguard Worker; And now ZMM versions.
902*9880d681SAndroid Build Coastguard Worker
903*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) {
904*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16f32:
905*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
906*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps (%rdi), %xmm0
907*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps 16(%rdi), %xmm1
908*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps 32(%rdi), %xmm2
909*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps 48(%rdi), %xmm3
910*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
911*9880d681SAndroid Build Coastguard Worker;
912*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v16f32:
913*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
914*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddps (%rdi), %ymm0, %ymm0
915*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddps 32(%rdi), %ymm1, %ymm1
916*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
917*9880d681SAndroid Build Coastguard Worker;
918*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v16f32:
919*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
920*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vaddps (%rdi), %zmm0, %zmm0
921*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
922*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1
923*9880d681SAndroid Build Coastguard Worker  %2 = fadd <16 x float> %arg, %1
924*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %2
925*9880d681SAndroid Build Coastguard Worker}
926*9880d681SAndroid Build Coastguard Worker
927*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) {
928*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i32:
929*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
930*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd (%rdi), %xmm0
931*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd 16(%rdi), %xmm1
932*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd 32(%rdi), %xmm2
933*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd 48(%rdi), %xmm3
934*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
935*9880d681SAndroid Build Coastguard Worker;
936*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v16i32:
937*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
938*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm2
939*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm3
940*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
941*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
942*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm5, %xmm4, %xmm4
943*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
944*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
945*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
946*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
947*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm4, %xmm2, %xmm2
948*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
949*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
950*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
951*9880d681SAndroid Build Coastguard Worker;
952*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v16i32:
953*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
954*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd (%rdi), %ymm0, %ymm0
955*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd 32(%rdi), %ymm1, %ymm1
956*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
957*9880d681SAndroid Build Coastguard Worker;
958*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v16i32:
959*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
960*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
961*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
962*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1
963*9880d681SAndroid Build Coastguard Worker  %2 = add <16 x i32> %arg, %1
964*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %2
965*9880d681SAndroid Build Coastguard Worker}
966*9880d681SAndroid Build Coastguard Worker
967*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) {
968*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8f64:
969*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
970*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd (%rdi), %xmm0
971*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd 16(%rdi), %xmm1
972*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd 32(%rdi), %xmm2
973*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd 48(%rdi), %xmm3
974*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
975*9880d681SAndroid Build Coastguard Worker;
976*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8f64:
977*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
978*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddpd (%rdi), %ymm0, %ymm0
979*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddpd 32(%rdi), %ymm1, %ymm1
980*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
981*9880d681SAndroid Build Coastguard Worker;
982*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8f64:
983*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
984*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vaddpd (%rdi), %zmm0, %zmm0
985*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
986*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1
987*9880d681SAndroid Build Coastguard Worker  %2 = fadd <8 x double> %arg, %1
988*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %2
989*9880d681SAndroid Build Coastguard Worker}
990*9880d681SAndroid Build Coastguard Worker
991*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) {
992*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i64:
993*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
994*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq (%rdi), %xmm0
995*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq 16(%rdi), %xmm1
996*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq 32(%rdi), %xmm2
997*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq 48(%rdi), %xmm3
998*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
999*9880d681SAndroid Build Coastguard Worker;
1000*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v8i64:
1001*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1002*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm2
1003*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm3
1004*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
1005*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
1006*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm5, %xmm4, %xmm4
1007*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
1008*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
1009*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1010*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
1011*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm4, %xmm2, %xmm2
1012*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
1013*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1014*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1015*9880d681SAndroid Build Coastguard Worker;
1016*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v8i64:
1017*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1018*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddq (%rdi), %ymm0, %ymm0
1019*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddq 32(%rdi), %ymm1, %ymm1
1020*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1021*9880d681SAndroid Build Coastguard Worker;
1022*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8i64:
1023*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
1024*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
1025*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1026*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1
1027*9880d681SAndroid Build Coastguard Worker  %2 = add <8 x i64> %arg, %1
1028*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %2
1029*9880d681SAndroid Build Coastguard Worker}
1030*9880d681SAndroid Build Coastguard Worker
1031*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
1032*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v32i16:
1033*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1034*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw (%rdi), %xmm0
1035*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw 16(%rdi), %xmm1
1036*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw 32(%rdi), %xmm2
1037*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw 48(%rdi), %xmm3
1038*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1039*9880d681SAndroid Build Coastguard Worker;
1040*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v32i16:
1041*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1042*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm2
1043*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm3
1044*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
1045*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
1046*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm5, %xmm4, %xmm4
1047*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
1048*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
1049*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1050*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
1051*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm4, %xmm2, %xmm2
1052*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm3, %xmm1, %xmm1
1053*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1054*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1055*9880d681SAndroid Build Coastguard Worker;
1056*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v32i16:
1057*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1058*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
1059*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw 32(%rdi), %ymm1, %ymm1
1060*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1061*9880d681SAndroid Build Coastguard Worker;
1062*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_arg_v32i16:
1063*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1064*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
1065*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpaddw 32(%rdi), %ymm1, %ymm1
1066*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1067*9880d681SAndroid Build Coastguard Worker;
1068*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_arg_v32i16:
1069*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1070*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddw (%rdi), %zmm0, %zmm0
1071*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1072*9880d681SAndroid Build Coastguard Worker;
1073*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_arg_v32i16:
1074*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1075*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
1076*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vpaddw 32(%rdi), %ymm1, %ymm1
1077*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1078*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1
1079*9880d681SAndroid Build Coastguard Worker  %2 = add <32 x i16> %arg, %1
1080*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %2
1081*9880d681SAndroid Build Coastguard Worker}
1082*9880d681SAndroid Build Coastguard Worker
1083*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) {
1084*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v64i8:
1085*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1086*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb (%rdi), %xmm0
1087*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb 16(%rdi), %xmm1
1088*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb 32(%rdi), %xmm2
1089*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb 48(%rdi), %xmm3
1090*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1091*9880d681SAndroid Build Coastguard Worker;
1092*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v64i8:
1093*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1094*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm2
1095*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm3
1096*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
1097*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
1098*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm5, %xmm4, %xmm4
1099*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
1100*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
1101*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1102*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
1103*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
1104*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
1105*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1106*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1107*9880d681SAndroid Build Coastguard Worker;
1108*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v64i8:
1109*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1110*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
1111*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb 32(%rdi), %ymm1, %ymm1
1112*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1113*9880d681SAndroid Build Coastguard Worker;
1114*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_arg_v64i8:
1115*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1116*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
1117*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vpaddb 32(%rdi), %ymm1, %ymm1
1118*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1119*9880d681SAndroid Build Coastguard Worker;
1120*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_arg_v64i8:
1121*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1122*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vpaddb (%rdi), %zmm0, %zmm0
1123*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1124*9880d681SAndroid Build Coastguard Worker;
1125*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_arg_v64i8:
1126*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1127*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
1128*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vpaddb 32(%rdi), %ymm1, %ymm1
1129*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1130*9880d681SAndroid Build Coastguard Worker  %1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1
1131*9880d681SAndroid Build Coastguard Worker  %2 = add <64 x i8> %arg, %1
1132*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %2
1133*9880d681SAndroid Build Coastguard Worker}
1134*9880d681SAndroid Build Coastguard Worker
1135*9880d681SAndroid Build Coastguard Worker
1136*9880d681SAndroid Build Coastguard Worker; Unaligned non-temporal loads (not supported)
1137*9880d681SAndroid Build Coastguard Worker
1138*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_unaligned_v4f32(<4 x float>* %src) {
1139*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v4f32:
1140*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1141*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1142*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1143*9880d681SAndroid Build Coastguard Worker;
1144*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v4f32:
1145*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1146*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %xmm0
1147*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1148*9880d681SAndroid Build Coastguard Worker;
1149*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v4f32:
1150*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
1151*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovups (%rdi), %xmm0
1152*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1153*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x float>, <4 x float>* %src, align 1, !nontemporal !1
1154*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %1
1155*9880d681SAndroid Build Coastguard Worker}
1156*9880d681SAndroid Build Coastguard Worker
1157*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_unaligned_v4i32(<4 x i32>* %src) {
1158*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v4i32:
1159*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1160*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1161*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1162*9880d681SAndroid Build Coastguard Worker;
1163*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v4i32:
1164*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1165*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %xmm0
1166*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1167*9880d681SAndroid Build Coastguard Worker;
1168*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v4i32:
1169*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1170*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %xmm0
1171*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1172*9880d681SAndroid Build Coastguard Worker;
1173*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v4i32:
1174*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1175*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %xmm0
1176*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1177*9880d681SAndroid Build Coastguard Worker;
1178*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v4i32:
1179*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1180*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu32 (%rdi), %xmm0
1181*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1182*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i32>, <4 x i32>* %src, align 1, !nontemporal !1
1183*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %1
1184*9880d681SAndroid Build Coastguard Worker}
1185*9880d681SAndroid Build Coastguard Worker
1186*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_unaligned_v2f64(<2 x double>* %src) {
1187*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v2f64:
1188*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1189*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1190*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1191*9880d681SAndroid Build Coastguard Worker;
1192*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v2f64:
1193*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1194*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %xmm0
1195*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1196*9880d681SAndroid Build Coastguard Worker;
1197*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v2f64:
1198*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1199*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %xmm0
1200*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1201*9880d681SAndroid Build Coastguard Worker;
1202*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v2f64:
1203*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1204*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %xmm0
1205*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1206*9880d681SAndroid Build Coastguard Worker;
1207*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v2f64:
1208*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1209*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovupd (%rdi), %xmm0
1210*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1211*9880d681SAndroid Build Coastguard Worker  %1 = load <2 x double>, <2 x double>* %src, align 1, !nontemporal !1
1212*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %1
1213*9880d681SAndroid Build Coastguard Worker}
1214*9880d681SAndroid Build Coastguard Worker
1215*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_unaligned_v2i64(<2 x i64>* %src) {
1216*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v2i64:
1217*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1218*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1219*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1220*9880d681SAndroid Build Coastguard Worker;
1221*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v2i64:
1222*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1223*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %xmm0
1224*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1225*9880d681SAndroid Build Coastguard Worker;
1226*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v2i64:
1227*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1228*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %xmm0
1229*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1230*9880d681SAndroid Build Coastguard Worker;
1231*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v2i64:
1232*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1233*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %xmm0
1234*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1235*9880d681SAndroid Build Coastguard Worker;
1236*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v2i64:
1237*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1238*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 (%rdi), %xmm0
1239*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1240*9880d681SAndroid Build Coastguard Worker  %1 = load <2 x i64>, <2 x i64>* %src, align 1, !nontemporal !1
1241*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %1
1242*9880d681SAndroid Build Coastguard Worker}
1243*9880d681SAndroid Build Coastguard Worker
1244*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_unaligned_v8i16(<8 x i16>* %src) {
1245*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8i16:
1246*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1247*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1248*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1249*9880d681SAndroid Build Coastguard Worker;
1250*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8i16:
1251*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1252*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %xmm0
1253*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1254*9880d681SAndroid Build Coastguard Worker;
1255*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v8i16:
1256*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1257*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %xmm0
1258*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1259*9880d681SAndroid Build Coastguard Worker;
1260*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v8i16:
1261*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1262*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %xmm0
1263*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1264*9880d681SAndroid Build Coastguard Worker;
1265*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v8i16:
1266*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1267*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 (%rdi), %xmm0
1268*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1269*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i16>, <8 x i16>* %src, align 1, !nontemporal !1
1270*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %1
1271*9880d681SAndroid Build Coastguard Worker}
1272*9880d681SAndroid Build Coastguard Worker
1273*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_unaligned_v16i8(<16 x i8>* %src) {
1274*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v16i8:
1275*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1276*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1277*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1278*9880d681SAndroid Build Coastguard Worker;
1279*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v16i8:
1280*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1281*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %xmm0
1282*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1283*9880d681SAndroid Build Coastguard Worker;
1284*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v16i8:
1285*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1286*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %xmm0
1287*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1288*9880d681SAndroid Build Coastguard Worker;
1289*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v16i8:
1290*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1291*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %xmm0
1292*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1293*9880d681SAndroid Build Coastguard Worker;
1294*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v16i8:
1295*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1296*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 (%rdi), %xmm0
1297*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1298*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i8>, <16 x i8>* %src, align 1, !nontemporal !1
1299*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %1
1300*9880d681SAndroid Build Coastguard Worker}
1301*9880d681SAndroid Build Coastguard Worker
1302*9880d681SAndroid Build Coastguard Worker; And now YMM versions.
1303*9880d681SAndroid Build Coastguard Worker
1304*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_unaligned_v8f32(<8 x float>* %src) {
1305*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8f32:
1306*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1307*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1308*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1309*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1310*9880d681SAndroid Build Coastguard Worker;
1311*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8f32:
1312*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1313*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1314*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1315*9880d681SAndroid Build Coastguard Worker;
1316*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v8f32:
1317*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
1318*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovups (%rdi), %ymm0
1319*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1320*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x float>, <8 x float>* %src, align 1, !nontemporal !1
1321*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %1
1322*9880d681SAndroid Build Coastguard Worker}
1323*9880d681SAndroid Build Coastguard Worker
1324*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_unaligned_v8i32(<8 x i32>* %src) {
1325*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8i32:
1326*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1327*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1328*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1329*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1330*9880d681SAndroid Build Coastguard Worker;
1331*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8i32:
1332*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1333*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1334*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1335*9880d681SAndroid Build Coastguard Worker;
1336*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v8i32:
1337*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1338*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %ymm0
1339*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1340*9880d681SAndroid Build Coastguard Worker;
1341*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v8i32:
1342*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1343*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %ymm0
1344*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1345*9880d681SAndroid Build Coastguard Worker;
1346*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v8i32:
1347*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1348*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu32 (%rdi), %ymm0
1349*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1350*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i32>, <8 x i32>* %src, align 1, !nontemporal !1
1351*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %1
1352*9880d681SAndroid Build Coastguard Worker}
1353*9880d681SAndroid Build Coastguard Worker
1354*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_unaligned_v4f64(<4 x double>* %src) {
1355*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v4f64:
1356*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1357*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1360*9880d681SAndroid Build Coastguard Worker;
1361*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v4f64:
1362*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1363*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1364*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1365*9880d681SAndroid Build Coastguard Worker;
1366*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v4f64:
1367*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1368*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %ymm0
1369*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1370*9880d681SAndroid Build Coastguard Worker;
1371*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v4f64:
1372*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1373*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %ymm0
1374*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1375*9880d681SAndroid Build Coastguard Worker;
1376*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v4f64:
1377*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1378*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovupd (%rdi), %ymm0
1379*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1380*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x double>, <4 x double>* %src, align 1, !nontemporal !1
1381*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %1
1382*9880d681SAndroid Build Coastguard Worker}
1383*9880d681SAndroid Build Coastguard Worker
1384*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_unaligned_v4i64(<4 x i64>* %src) {
1385*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v4i64:
1386*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1387*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1388*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1389*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1390*9880d681SAndroid Build Coastguard Worker;
1391*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v4i64:
1392*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1393*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1394*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1395*9880d681SAndroid Build Coastguard Worker;
1396*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v4i64:
1397*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1398*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %ymm0
1399*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1400*9880d681SAndroid Build Coastguard Worker;
1401*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v4i64:
1402*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1403*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %ymm0
1404*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1405*9880d681SAndroid Build Coastguard Worker;
1406*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v4i64:
1407*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1408*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 (%rdi), %ymm0
1409*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1410*9880d681SAndroid Build Coastguard Worker  %1 = load <4 x i64>, <4 x i64>* %src, align 1, !nontemporal !1
1411*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %1
1412*9880d681SAndroid Build Coastguard Worker}
1413*9880d681SAndroid Build Coastguard Worker
1414*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_unaligned_v16i16(<16 x i16>* %src) {
1415*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v16i16:
1416*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1417*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1418*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1419*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1420*9880d681SAndroid Build Coastguard Worker;
1421*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v16i16:
1422*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1423*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1424*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1425*9880d681SAndroid Build Coastguard Worker;
1426*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v16i16:
1427*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1428*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %ymm0
1429*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1430*9880d681SAndroid Build Coastguard Worker;
1431*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v16i16:
1432*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1433*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %ymm0
1434*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1435*9880d681SAndroid Build Coastguard Worker;
1436*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v16i16:
1437*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1438*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 (%rdi), %ymm0
1439*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1440*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i16>, <16 x i16>* %src, align 1, !nontemporal !1
1441*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %1
1442*9880d681SAndroid Build Coastguard Worker}
1443*9880d681SAndroid Build Coastguard Worker
1444*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_unaligned_v32i8(<32 x i8>* %src) {
1445*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v32i8:
1446*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1447*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1448*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1449*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1450*9880d681SAndroid Build Coastguard Worker;
1451*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v32i8:
1452*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1453*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1454*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1455*9880d681SAndroid Build Coastguard Worker;
1456*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v32i8:
1457*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1458*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %ymm0
1459*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1460*9880d681SAndroid Build Coastguard Worker;
1461*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v32i8:
1462*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1463*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovups (%rdi), %ymm0
1464*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1465*9880d681SAndroid Build Coastguard Worker;
1466*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v32i8:
1467*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1468*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 (%rdi), %ymm0
1469*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1470*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i8>, <32 x i8>* %src, align 1, !nontemporal !1
1471*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %1
1472*9880d681SAndroid Build Coastguard Worker}
1473*9880d681SAndroid Build Coastguard Worker
1474*9880d681SAndroid Build Coastguard Worker; And now ZMM versions.
1475*9880d681SAndroid Build Coastguard Worker
1476*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_unaligned_v16f32(<16 x float>* %src) {
1477*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v16f32:
1478*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1479*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1480*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1481*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 32(%rdi), %xmm2
1482*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 48(%rdi), %xmm3
1483*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1484*9880d681SAndroid Build Coastguard Worker;
1485*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v16f32:
1486*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1487*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1488*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 32(%rdi), %ymm1
1489*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1490*9880d681SAndroid Build Coastguard Worker;
1491*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v16f32:
1492*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
1493*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovups (%rdi), %zmm0
1494*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1495*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x float>, <16 x float>* %src, align 1, !nontemporal !1
1496*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %1
1497*9880d681SAndroid Build Coastguard Worker}
1498*9880d681SAndroid Build Coastguard Worker
1499*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_unaligned_v16i32(<16 x i32>* %src) {
1500*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v16i32:
1501*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1502*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1503*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1504*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 32(%rdi), %xmm2
1505*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 48(%rdi), %xmm3
1506*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1507*9880d681SAndroid Build Coastguard Worker;
1508*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v16i32:
1509*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1510*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1511*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 32(%rdi), %ymm1
1512*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1513*9880d681SAndroid Build Coastguard Worker;
1514*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v16i32:
1515*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
1516*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovdqu32 (%rdi), %zmm0
1517*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1518*9880d681SAndroid Build Coastguard Worker  %1 = load <16 x i32>, <16 x i32>* %src, align 1, !nontemporal !1
1519*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %1
1520*9880d681SAndroid Build Coastguard Worker}
1521*9880d681SAndroid Build Coastguard Worker
1522*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_unaligned_v8f64(<8 x double>* %src) {
1523*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8f64:
1524*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1525*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1526*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1527*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 32(%rdi), %xmm2
1528*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 48(%rdi), %xmm3
1529*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1530*9880d681SAndroid Build Coastguard Worker;
1531*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8f64:
1532*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1533*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1534*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 32(%rdi), %ymm1
1535*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1536*9880d681SAndroid Build Coastguard Worker;
1537*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v8f64:
1538*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
1539*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovupd (%rdi), %zmm0
1540*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1541*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x double>, <8 x double>* %src, align 1, !nontemporal !1
1542*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %1
1543*9880d681SAndroid Build Coastguard Worker}
1544*9880d681SAndroid Build Coastguard Worker
1545*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_unaligned_v8i64(<8 x i64>* %src) {
1546*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8i64:
1547*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1548*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1549*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1550*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 32(%rdi), %xmm2
1551*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 48(%rdi), %xmm3
1552*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1553*9880d681SAndroid Build Coastguard Worker;
1554*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8i64:
1555*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1556*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1557*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 32(%rdi), %ymm1
1558*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1559*9880d681SAndroid Build Coastguard Worker;
1560*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v8i64:
1561*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0:
1562*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
1563*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1564*9880d681SAndroid Build Coastguard Worker  %1 = load <8 x i64>, <8 x i64>* %src, align 1, !nontemporal !1
1565*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %1
1566*9880d681SAndroid Build Coastguard Worker}
1567*9880d681SAndroid Build Coastguard Worker
1568*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test_unaligned_v32i16(<32 x i16>* %src) {
1569*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v32i16:
1570*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1571*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1572*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1573*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 32(%rdi), %xmm2
1574*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 48(%rdi), %xmm3
1575*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1576*9880d681SAndroid Build Coastguard Worker;
1577*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v32i16:
1578*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1579*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1580*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 32(%rdi), %ymm1
1581*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1582*9880d681SAndroid Build Coastguard Worker;
1583*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v32i16:
1584*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1585*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %ymm0
1586*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups 32(%rdi), %ymm1
1587*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1588*9880d681SAndroid Build Coastguard Worker;
1589*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v32i16:
1590*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1591*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm0
1592*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1593*9880d681SAndroid Build Coastguard Worker;
1594*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v32i16:
1595*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1596*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 (%rdi), %ymm0
1597*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 32(%rdi), %ymm1
1598*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1599*9880d681SAndroid Build Coastguard Worker  %1 = load <32 x i16>, <32 x i16>* %src, align 1, !nontemporal !1
1600*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %1
1601*9880d681SAndroid Build Coastguard Worker}
1602*9880d681SAndroid Build Coastguard Worker
1603*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test_unaligned_v64i8(<64 x i8>* %src) {
1604*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v64i8:
1605*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1606*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
1607*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm1
1608*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 32(%rdi), %xmm2
1609*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 48(%rdi), %xmm3
1610*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1611*9880d681SAndroid Build Coastguard Worker;
1612*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v64i8:
1613*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1614*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %ymm0
1615*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 32(%rdi), %ymm1
1616*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1617*9880d681SAndroid Build Coastguard Worker;
1618*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v64i8:
1619*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
1620*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups (%rdi), %ymm0
1621*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovups 32(%rdi), %ymm1
1622*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1623*9880d681SAndroid Build Coastguard Worker;
1624*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v64i8:
1625*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
1626*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovdqu8 (%rdi), %zmm0
1627*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1628*9880d681SAndroid Build Coastguard Worker;
1629*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v64i8:
1630*9880d681SAndroid Build Coastguard Worker; AVX512VL:       # BB#0:
1631*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 (%rdi), %ymm0
1632*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    vmovdqu64 32(%rdi), %ymm1
1633*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT:    retq
1634*9880d681SAndroid Build Coastguard Worker  %1 = load <64 x i8>, <64 x i8>* %src, align 1, !nontemporal !1
1635*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %1
1636*9880d681SAndroid Build Coastguard Worker}
1637*9880d681SAndroid Build Coastguard Worker
1638*9880d681SAndroid Build Coastguard Worker!1 = !{i32 1}
1639