xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+sse4a -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE4A
4*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
8*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
9*9880d681SAndroid Build Coastguard Worker
10*9880d681SAndroid Build Coastguard Worker;
11*9880d681SAndroid Build Coastguard Worker; Scalar Stores
12*9880d681SAndroid Build Coastguard Worker;
13*9880d681SAndroid Build Coastguard Worker
14*9880d681SAndroid Build Coastguard Workerdefine void @test_nti32(i32* nocapture %ptr, i32 %X) {
15*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_nti32:
16*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0: # %entry
17*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movntil %esi, (%rdi)
18*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
19*9880d681SAndroid Build Coastguard Workerentry:
20*9880d681SAndroid Build Coastguard Worker  store i32 %X, i32* %ptr, align 4, !nontemporal !1
21*9880d681SAndroid Build Coastguard Worker  ret void
22*9880d681SAndroid Build Coastguard Worker}
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Workerdefine void @test_nti64(i64* nocapture %ptr, i64 %X) {
25*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_nti64:
26*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0: # %entry
27*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movntiq %rsi, (%rdi)
28*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
29*9880d681SAndroid Build Coastguard Workerentry:
30*9880d681SAndroid Build Coastguard Worker  store i64 %X, i64* %ptr, align 8, !nontemporal !1
31*9880d681SAndroid Build Coastguard Worker  ret void
32*9880d681SAndroid Build Coastguard Worker}
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Workerdefine void @test_ntfloat(float* nocapture %ptr, float %X) {
35*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_ntfloat:
36*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
37*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss %xmm0, (%rdi)
38*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
39*9880d681SAndroid Build Coastguard Worker;
40*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_ntfloat:
41*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
42*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movntss %xmm0, (%rdi)
43*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
44*9880d681SAndroid Build Coastguard Worker;
45*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_ntfloat:
46*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
47*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movss %xmm0, (%rdi)
48*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
49*9880d681SAndroid Build Coastguard Worker;
50*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_ntfloat:
51*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
52*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovss %xmm0, (%rdi)
53*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
54*9880d681SAndroid Build Coastguard Worker;
55*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_ntfloat:
56*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
57*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovss %xmm0, (%rdi)
58*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
59*9880d681SAndroid Build Coastguard Workerentry:
60*9880d681SAndroid Build Coastguard Worker  store float %X, float* %ptr, align 4, !nontemporal !1
61*9880d681SAndroid Build Coastguard Worker  ret void
62*9880d681SAndroid Build Coastguard Worker}
63*9880d681SAndroid Build Coastguard Worker
64*9880d681SAndroid Build Coastguard Workerdefine void @test_ntdouble(double* nocapture %ptr, double %X) {
65*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_ntdouble:
66*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
67*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movsd %xmm0, (%rdi)
68*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
69*9880d681SAndroid Build Coastguard Worker;
70*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_ntdouble:
71*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
72*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
73*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
74*9880d681SAndroid Build Coastguard Worker;
75*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_ntdouble:
76*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
77*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movsd %xmm0, (%rdi)
78*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
79*9880d681SAndroid Build Coastguard Worker;
80*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_ntdouble:
81*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
82*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovsd %xmm0, (%rdi)
83*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
84*9880d681SAndroid Build Coastguard Worker;
85*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_ntdouble:
86*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
87*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovsd %xmm0, (%rdi)
88*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
89*9880d681SAndroid Build Coastguard Workerentry:
90*9880d681SAndroid Build Coastguard Worker  store double %X, double* %ptr, align 8, !nontemporal !1
91*9880d681SAndroid Build Coastguard Worker  ret void
92*9880d681SAndroid Build Coastguard Worker}
93*9880d681SAndroid Build Coastguard Worker
94*9880d681SAndroid Build Coastguard Worker;
95*9880d681SAndroid Build Coastguard Worker; 128-bit Vector Stores
96*9880d681SAndroid Build Coastguard Worker;
97*9880d681SAndroid Build Coastguard Worker
98*9880d681SAndroid Build Coastguard Workerdefine void @test_nt4xfloat(<4 x float>* nocapture %ptr, <4 x float> %X) {
99*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt4xfloat:
100*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
101*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
102*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
103*9880d681SAndroid Build Coastguard Worker;
104*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt4xfloat:
105*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
106*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
107*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
108*9880d681SAndroid Build Coastguard Worker;
109*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt4xfloat:
110*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
111*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
112*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
113*9880d681SAndroid Build Coastguard Workerentry:
114*9880d681SAndroid Build Coastguard Worker  store <4 x float> %X, <4 x float>* %ptr, align 16, !nontemporal !1
115*9880d681SAndroid Build Coastguard Worker  ret void
116*9880d681SAndroid Build Coastguard Worker}
117*9880d681SAndroid Build Coastguard Worker
118*9880d681SAndroid Build Coastguard Workerdefine void @test_nt2xdouble(<2 x double>* nocapture %ptr, <2 x double> %X) {
119*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt2xdouble:
120*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
121*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm0, (%rdi)
122*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
123*9880d681SAndroid Build Coastguard Worker;
124*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt2xdouble:
125*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
126*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntpd %xmm0, (%rdi)
127*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
128*9880d681SAndroid Build Coastguard Worker;
129*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt2xdouble:
130*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
131*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntpd %xmm0, (%rdi)
132*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
133*9880d681SAndroid Build Coastguard Workerentry:
134*9880d681SAndroid Build Coastguard Worker  store <2 x double> %X, <2 x double>* %ptr, align 16, !nontemporal !1
135*9880d681SAndroid Build Coastguard Worker  ret void
136*9880d681SAndroid Build Coastguard Worker}
137*9880d681SAndroid Build Coastguard Worker
138*9880d681SAndroid Build Coastguard Workerdefine void @test_nt16xi8(<16 x i8>* nocapture %ptr, <16 x i8> %X) {
139*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt16xi8:
140*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
141*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
142*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
143*9880d681SAndroid Build Coastguard Worker;
144*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt16xi8:
145*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
146*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
147*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
148*9880d681SAndroid Build Coastguard Worker;
149*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt16xi8:
150*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
151*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %xmm0, (%rdi)
152*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
153*9880d681SAndroid Build Coastguard Workerentry:
154*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %X, <16 x i8>* %ptr, align 16, !nontemporal !1
155*9880d681SAndroid Build Coastguard Worker  ret void
156*9880d681SAndroid Build Coastguard Worker}
157*9880d681SAndroid Build Coastguard Worker
158*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xi16(<8 x i16>* nocapture %ptr, <8 x i16> %X) {
159*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xi16:
160*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
161*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
162*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
163*9880d681SAndroid Build Coastguard Worker;
164*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xi16:
165*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
166*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
167*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
168*9880d681SAndroid Build Coastguard Worker;
169*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xi16:
170*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
171*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %xmm0, (%rdi)
172*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
173*9880d681SAndroid Build Coastguard Workerentry:
174*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %X, <8 x i16>* %ptr, align 16, !nontemporal !1
175*9880d681SAndroid Build Coastguard Worker  ret void
176*9880d681SAndroid Build Coastguard Worker}
177*9880d681SAndroid Build Coastguard Worker
178*9880d681SAndroid Build Coastguard Workerdefine void @test_nt4xi32(<4 x i32>* nocapture %ptr, <4 x i32> %X) {
179*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt4xi32:
180*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
181*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
182*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
183*9880d681SAndroid Build Coastguard Worker;
184*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt4xi32:
185*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
186*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
187*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
188*9880d681SAndroid Build Coastguard Worker;
189*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt4xi32:
190*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
191*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %xmm0, (%rdi)
192*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
193*9880d681SAndroid Build Coastguard Workerentry:
194*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %X, <4 x i32>* %ptr, align 16, !nontemporal !1
195*9880d681SAndroid Build Coastguard Worker  ret void
196*9880d681SAndroid Build Coastguard Worker}
197*9880d681SAndroid Build Coastguard Worker
198*9880d681SAndroid Build Coastguard Workerdefine void @test_nt2xi64(<2 x i64>* nocapture %ptr, <2 x i64> %X) {
199*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt2xi64:
200*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
201*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
202*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
203*9880d681SAndroid Build Coastguard Worker;
204*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt2xi64:
205*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
206*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
207*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
208*9880d681SAndroid Build Coastguard Worker;
209*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt2xi64:
210*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
211*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %xmm0, (%rdi)
212*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
213*9880d681SAndroid Build Coastguard Workerentry:
214*9880d681SAndroid Build Coastguard Worker  store <2 x i64> %X, <2 x i64>* %ptr, align 16, !nontemporal !1
215*9880d681SAndroid Build Coastguard Worker  ret void
216*9880d681SAndroid Build Coastguard Worker}
217*9880d681SAndroid Build Coastguard Worker
218*9880d681SAndroid Build Coastguard Worker;
219*9880d681SAndroid Build Coastguard Worker; 128-bit Vector Loads
220*9880d681SAndroid Build Coastguard Worker;
221*9880d681SAndroid Build Coastguard Worker
222*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_load_nt4xfloat(<4 x float>* nocapture %ptr) {
223*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt4xfloat:
224*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
225*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
226*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
227*9880d681SAndroid Build Coastguard Worker;
228*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt4xfloat:
229*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
230*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
231*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
232*9880d681SAndroid Build Coastguard Worker;
233*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt4xfloat:
234*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
235*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
236*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
237*9880d681SAndroid Build Coastguard Worker;
238*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt4xfloat:
239*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
240*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
241*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
242*9880d681SAndroid Build Coastguard Worker;
243*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt4xfloat:
244*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
245*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
246*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
247*9880d681SAndroid Build Coastguard Workerentry:
248*9880d681SAndroid Build Coastguard Worker  %0 = load <4 x float>, <4 x float>* %ptr, align 16, !nontemporal !1
249*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %0
250*9880d681SAndroid Build Coastguard Worker}
251*9880d681SAndroid Build Coastguard Worker
252*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_load_nt2xdouble(<2 x double>* nocapture %ptr) {
253*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt2xdouble:
254*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
255*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movapd (%rdi), %xmm0
256*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
257*9880d681SAndroid Build Coastguard Worker;
258*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt2xdouble:
259*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
260*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movapd (%rdi), %xmm0
261*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
262*9880d681SAndroid Build Coastguard Worker;
263*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt2xdouble:
264*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
265*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
266*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
267*9880d681SAndroid Build Coastguard Worker;
268*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt2xdouble:
269*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
270*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
271*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
272*9880d681SAndroid Build Coastguard Worker;
273*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt2xdouble:
274*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
275*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
276*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
277*9880d681SAndroid Build Coastguard Workerentry:
278*9880d681SAndroid Build Coastguard Worker  %0 = load <2 x double>, <2 x double>* %ptr, align 16, !nontemporal !1
279*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %0
280*9880d681SAndroid Build Coastguard Worker}
281*9880d681SAndroid Build Coastguard Worker
282*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) {
283*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_load_nt16xi8:
284*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
285*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdqa (%rdi), %xmm0
286*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
287*9880d681SAndroid Build Coastguard Worker;
288*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt16xi8:
289*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
290*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
291*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
292*9880d681SAndroid Build Coastguard Worker;
293*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt16xi8:
294*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
295*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
296*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
297*9880d681SAndroid Build Coastguard Workerentry:
298*9880d681SAndroid Build Coastguard Worker  %0 = load <16 x i8>, <16 x i8>* %ptr, align 16, !nontemporal !1
299*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %0
300*9880d681SAndroid Build Coastguard Worker}
301*9880d681SAndroid Build Coastguard Worker
302*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) {
303*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_load_nt8xi16:
304*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
305*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdqa (%rdi), %xmm0
306*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
307*9880d681SAndroid Build Coastguard Worker;
308*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt8xi16:
309*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
310*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
311*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
312*9880d681SAndroid Build Coastguard Worker;
313*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xi16:
314*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
315*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
316*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
317*9880d681SAndroid Build Coastguard Workerentry:
318*9880d681SAndroid Build Coastguard Worker  %0 = load <8 x i16>, <8 x i16>* %ptr, align 16, !nontemporal !1
319*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %0
320*9880d681SAndroid Build Coastguard Worker}
321*9880d681SAndroid Build Coastguard Worker
322*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) {
323*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_load_nt4xi32:
324*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
325*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdqa (%rdi), %xmm0
326*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
327*9880d681SAndroid Build Coastguard Worker;
328*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt4xi32:
329*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
330*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
331*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
332*9880d681SAndroid Build Coastguard Worker;
333*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt4xi32:
334*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
335*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
336*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
337*9880d681SAndroid Build Coastguard Workerentry:
338*9880d681SAndroid Build Coastguard Worker  %0 = load <4 x i32>, <4 x i32>* %ptr, align 16, !nontemporal !1
339*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %0
340*9880d681SAndroid Build Coastguard Worker}
341*9880d681SAndroid Build Coastguard Worker
342*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) {
343*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_load_nt2xi64:
344*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
345*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdqa (%rdi), %xmm0
346*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
347*9880d681SAndroid Build Coastguard Worker;
348*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt2xi64:
349*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
350*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
351*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
352*9880d681SAndroid Build Coastguard Worker;
353*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt2xi64:
354*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
355*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
356*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
357*9880d681SAndroid Build Coastguard Workerentry:
358*9880d681SAndroid Build Coastguard Worker  %0 = load <2 x i64>, <2 x i64>* %ptr, align 16, !nontemporal !1
359*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %0
360*9880d681SAndroid Build Coastguard Worker}
361*9880d681SAndroid Build Coastguard Worker
362*9880d681SAndroid Build Coastguard Worker;
363*9880d681SAndroid Build Coastguard Worker; 256-bit Vector Stores
364*9880d681SAndroid Build Coastguard Worker;
365*9880d681SAndroid Build Coastguard Worker
366*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xfloat(<8 x float>* nocapture %ptr, <8 x float> %X) {
367*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xfloat:
368*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
369*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
370*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
371*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
372*9880d681SAndroid Build Coastguard Worker;
373*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xfloat:
374*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
375*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
376*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
377*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
378*9880d681SAndroid Build Coastguard Worker;
379*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xfloat:
380*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
381*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
382*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
383*9880d681SAndroid Build Coastguard Workerentry:
384*9880d681SAndroid Build Coastguard Worker  store <8 x float> %X, <8 x float>* %ptr, align 32, !nontemporal !1
385*9880d681SAndroid Build Coastguard Worker  ret void
386*9880d681SAndroid Build Coastguard Worker}
387*9880d681SAndroid Build Coastguard Worker
388*9880d681SAndroid Build Coastguard Workerdefine void @test_nt4xdouble(<4 x double>* nocapture %ptr, <4 x double> %X) {
389*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt4xdouble:
390*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
391*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm0, (%rdi)
392*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm1, 16(%rdi)
393*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
394*9880d681SAndroid Build Coastguard Worker;
395*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt4xdouble:
396*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
397*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntpd %ymm0, (%rdi)
398*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
399*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
400*9880d681SAndroid Build Coastguard Worker;
401*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt4xdouble:
402*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
403*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntpd %ymm0, (%rdi)
404*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
405*9880d681SAndroid Build Coastguard Workerentry:
406*9880d681SAndroid Build Coastguard Worker  store <4 x double> %X, <4 x double>* %ptr, align 32, !nontemporal !1
407*9880d681SAndroid Build Coastguard Worker  ret void
408*9880d681SAndroid Build Coastguard Worker}
409*9880d681SAndroid Build Coastguard Worker
410*9880d681SAndroid Build Coastguard Workerdefine void @test_nt32xi8(<32 x i8>* nocapture %ptr, <32 x i8> %X) {
411*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt32xi8:
412*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
413*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
414*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
415*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
416*9880d681SAndroid Build Coastguard Worker;
417*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt32xi8:
418*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
419*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
420*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
421*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
422*9880d681SAndroid Build Coastguard Worker;
423*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt32xi8:
424*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
425*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %ymm0, (%rdi)
426*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
427*9880d681SAndroid Build Coastguard Workerentry:
428*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %X, <32 x i8>* %ptr, align 32, !nontemporal !1
429*9880d681SAndroid Build Coastguard Worker  ret void
430*9880d681SAndroid Build Coastguard Worker}
431*9880d681SAndroid Build Coastguard Worker
432*9880d681SAndroid Build Coastguard Workerdefine void @test_nt16xi16(<16 x i16>* nocapture %ptr, <16 x i16> %X) {
433*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt16xi16:
434*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
435*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
436*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
437*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
438*9880d681SAndroid Build Coastguard Worker;
439*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt16xi16:
440*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
441*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
442*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
443*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
444*9880d681SAndroid Build Coastguard Worker;
445*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt16xi16:
446*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
447*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %ymm0, (%rdi)
448*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
449*9880d681SAndroid Build Coastguard Workerentry:
450*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %X, <16 x i16>* %ptr, align 32, !nontemporal !1
451*9880d681SAndroid Build Coastguard Worker  ret void
452*9880d681SAndroid Build Coastguard Worker}
453*9880d681SAndroid Build Coastguard Worker
454*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xi32(<8 x i32>* nocapture %ptr, <8 x i32> %X) {
455*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xi32:
456*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
457*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
458*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
459*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
460*9880d681SAndroid Build Coastguard Worker;
461*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xi32:
462*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
463*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
464*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
465*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
466*9880d681SAndroid Build Coastguard Worker;
467*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xi32:
468*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
469*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %ymm0, (%rdi)
470*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
471*9880d681SAndroid Build Coastguard Workerentry:
472*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %X, <8 x i32>* %ptr, align 32, !nontemporal !1
473*9880d681SAndroid Build Coastguard Worker  ret void
474*9880d681SAndroid Build Coastguard Worker}
475*9880d681SAndroid Build Coastguard Worker
476*9880d681SAndroid Build Coastguard Workerdefine void @test_nt4xi64(<4 x i64>* nocapture %ptr, <4 x i64> %X) {
477*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt4xi64:
478*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
479*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
480*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
481*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
482*9880d681SAndroid Build Coastguard Worker;
483*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt4xi64:
484*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
485*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
486*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
487*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
488*9880d681SAndroid Build Coastguard Worker;
489*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt4xi64:
490*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
491*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %ymm0, (%rdi)
492*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
493*9880d681SAndroid Build Coastguard Workerentry:
494*9880d681SAndroid Build Coastguard Worker  store <4 x i64> %X, <4 x i64>* %ptr, align 32, !nontemporal !1
495*9880d681SAndroid Build Coastguard Worker  ret void
496*9880d681SAndroid Build Coastguard Worker}
497*9880d681SAndroid Build Coastguard Worker
498*9880d681SAndroid Build Coastguard Worker;
499*9880d681SAndroid Build Coastguard Worker; 256-bit Vector Loads
500*9880d681SAndroid Build Coastguard Worker;
501*9880d681SAndroid Build Coastguard Worker
502*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) {
503*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt8xfloat:
504*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
505*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
506*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
507*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
508*9880d681SAndroid Build Coastguard Worker;
509*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt8xfloat:
510*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
511*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
512*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
513*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
514*9880d681SAndroid Build Coastguard Worker;
515*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt8xfloat:
516*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
517*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
518*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
519*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
520*9880d681SAndroid Build Coastguard Worker;
521*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt8xfloat:
522*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
523*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
524*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
525*9880d681SAndroid Build Coastguard Worker;
526*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt8xfloat:
527*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
528*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
529*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
530*9880d681SAndroid Build Coastguard Worker;
531*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xfloat:
532*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
533*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
534*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
535*9880d681SAndroid Build Coastguard Workerentry:
536*9880d681SAndroid Build Coastguard Worker  %0 = load <8 x float>, <8 x float>* %ptr, align 32, !nontemporal !1
537*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %0
538*9880d681SAndroid Build Coastguard Worker}
539*9880d681SAndroid Build Coastguard Worker
540*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) {
541*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt4xdouble:
542*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
543*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movapd (%rdi), %xmm0
544*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movapd 16(%rdi), %xmm1
545*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
546*9880d681SAndroid Build Coastguard Worker;
547*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt4xdouble:
548*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
549*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movapd (%rdi), %xmm0
550*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movapd 16(%rdi), %xmm1
551*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
552*9880d681SAndroid Build Coastguard Worker;
553*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt4xdouble:
554*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
555*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
556*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
557*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
558*9880d681SAndroid Build Coastguard Worker;
559*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt4xdouble:
560*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
561*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovapd (%rdi), %ymm0
562*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
563*9880d681SAndroid Build Coastguard Worker;
564*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt4xdouble:
565*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
566*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
567*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
568*9880d681SAndroid Build Coastguard Worker;
569*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt4xdouble:
570*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
571*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
572*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
573*9880d681SAndroid Build Coastguard Workerentry:
574*9880d681SAndroid Build Coastguard Worker  %0 = load <4 x double>, <4 x double>* %ptr, align 32, !nontemporal !1
575*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %0
576*9880d681SAndroid Build Coastguard Worker}
577*9880d681SAndroid Build Coastguard Worker
578*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) {
579*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt32xi8:
580*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
581*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
582*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
583*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
584*9880d681SAndroid Build Coastguard Worker;
585*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt32xi8:
586*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
587*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
588*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
589*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
590*9880d681SAndroid Build Coastguard Worker;
591*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt32xi8:
592*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
593*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
594*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
595*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
596*9880d681SAndroid Build Coastguard Worker;
597*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt32xi8:
598*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
599*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
600*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
601*9880d681SAndroid Build Coastguard Worker;
602*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt32xi8:
603*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
604*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
605*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
606*9880d681SAndroid Build Coastguard Worker;
607*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt32xi8:
608*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
609*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
610*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
611*9880d681SAndroid Build Coastguard Workerentry:
612*9880d681SAndroid Build Coastguard Worker  %0 = load <32 x i8>, <32 x i8>* %ptr, align 32, !nontemporal !1
613*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %0
614*9880d681SAndroid Build Coastguard Worker}
615*9880d681SAndroid Build Coastguard Worker
616*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) {
617*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt16xi16:
618*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
619*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
620*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
621*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
622*9880d681SAndroid Build Coastguard Worker;
623*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt16xi16:
624*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
625*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
626*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
627*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
628*9880d681SAndroid Build Coastguard Worker;
629*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt16xi16:
630*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
631*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
632*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
633*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
634*9880d681SAndroid Build Coastguard Worker;
635*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt16xi16:
636*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
637*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
638*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
639*9880d681SAndroid Build Coastguard Worker;
640*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt16xi16:
641*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
642*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
643*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
644*9880d681SAndroid Build Coastguard Worker;
645*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt16xi16:
646*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
647*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
648*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
649*9880d681SAndroid Build Coastguard Workerentry:
650*9880d681SAndroid Build Coastguard Worker  %0 = load <16 x i16>, <16 x i16>* %ptr, align 32, !nontemporal !1
651*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %0
652*9880d681SAndroid Build Coastguard Worker}
653*9880d681SAndroid Build Coastguard Worker
654*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) {
655*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt8xi32:
656*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
657*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
658*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
659*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
660*9880d681SAndroid Build Coastguard Worker;
661*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt8xi32:
662*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
663*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
664*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
665*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
666*9880d681SAndroid Build Coastguard Worker;
667*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt8xi32:
668*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
669*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
670*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
671*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
672*9880d681SAndroid Build Coastguard Worker;
673*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt8xi32:
674*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
675*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
676*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
677*9880d681SAndroid Build Coastguard Worker;
678*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt8xi32:
679*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
680*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
681*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
682*9880d681SAndroid Build Coastguard Worker;
683*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xi32:
684*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
685*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
686*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
687*9880d681SAndroid Build Coastguard Workerentry:
688*9880d681SAndroid Build Coastguard Worker  %0 = load <8 x i32>, <8 x i32>* %ptr, align 32, !nontemporal !1
689*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %0
690*9880d681SAndroid Build Coastguard Worker}
691*9880d681SAndroid Build Coastguard Worker
692*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) {
693*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt4xi64:
694*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
695*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
696*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
697*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
698*9880d681SAndroid Build Coastguard Worker;
699*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt4xi64:
700*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
701*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
702*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
703*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
704*9880d681SAndroid Build Coastguard Worker;
705*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt4xi64:
706*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
707*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
708*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
709*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
710*9880d681SAndroid Build Coastguard Worker;
711*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt4xi64:
712*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
713*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
714*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
715*9880d681SAndroid Build Coastguard Worker;
716*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt4xi64:
717*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
718*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
719*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
720*9880d681SAndroid Build Coastguard Worker;
721*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt4xi64:
722*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
723*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
724*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
725*9880d681SAndroid Build Coastguard Workerentry:
726*9880d681SAndroid Build Coastguard Worker  %0 = load <4 x i64>, <4 x i64>* %ptr, align 32, !nontemporal !1
727*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %0
728*9880d681SAndroid Build Coastguard Worker}
729*9880d681SAndroid Build Coastguard Worker
730*9880d681SAndroid Build Coastguard Worker;
731*9880d681SAndroid Build Coastguard Worker; 512-bit Vector Stores
732*9880d681SAndroid Build Coastguard Worker;
733*9880d681SAndroid Build Coastguard Worker
734*9880d681SAndroid Build Coastguard Workerdefine void @test_nt16xfloat(<16 x float>* nocapture %ptr, <16 x float> %X) {
735*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt16xfloat:
736*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
737*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
738*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
739*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm2, 32(%rdi)
740*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm3, 48(%rdi)
741*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
742*9880d681SAndroid Build Coastguard Worker;
743*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt16xfloat:
744*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
745*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
746*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm1, 32(%rdi)
747*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
748*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
749*9880d681SAndroid Build Coastguard Worker;
750*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt16xfloat:
751*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
752*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntps %zmm0, (%rdi)
753*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
754*9880d681SAndroid Build Coastguard Workerentry:
755*9880d681SAndroid Build Coastguard Worker  store <16 x float> %X, <16 x float>* %ptr, align 64, !nontemporal !1
756*9880d681SAndroid Build Coastguard Worker  ret void
757*9880d681SAndroid Build Coastguard Worker}
758*9880d681SAndroid Build Coastguard Worker
759*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xdouble(<8 x double>* nocapture %ptr, <8 x double> %X) {
760*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xdouble:
761*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
762*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm0, (%rdi)
763*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm1, 16(%rdi)
764*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm2, 32(%rdi)
765*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm3, 48(%rdi)
766*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
767*9880d681SAndroid Build Coastguard Worker;
768*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xdouble:
769*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
770*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntpd %ymm0, (%rdi)
771*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntpd %ymm1, 32(%rdi)
772*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
773*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
774*9880d681SAndroid Build Coastguard Worker;
775*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xdouble:
776*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
777*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntpd %zmm0, (%rdi)
778*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
779*9880d681SAndroid Build Coastguard Workerentry:
780*9880d681SAndroid Build Coastguard Worker  store <8 x double> %X, <8 x double>* %ptr, align 64, !nontemporal !1
781*9880d681SAndroid Build Coastguard Worker  ret void
782*9880d681SAndroid Build Coastguard Worker}
783*9880d681SAndroid Build Coastguard Worker
784*9880d681SAndroid Build Coastguard Workerdefine void @test_nt64xi8(<64 x i8>* nocapture %ptr, <64 x i8> %X) {
785*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt64xi8:
786*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
787*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
788*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
789*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm2, 32(%rdi)
790*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm3, 48(%rdi)
791*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
792*9880d681SAndroid Build Coastguard Worker;
793*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt64xi8:
794*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
795*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
796*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm1, 32(%rdi)
797*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
798*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
799*9880d681SAndroid Build Coastguard Worker;
800*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_nt64xi8:
801*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
802*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdq %ymm0, (%rdi)
803*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdq %ymm1, 32(%rdi)
804*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
805*9880d681SAndroid Build Coastguard Worker;
806*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_nt64xi8:
807*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
808*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovntdq %zmm0, (%rdi)
809*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
810*9880d681SAndroid Build Coastguard Workerentry:
811*9880d681SAndroid Build Coastguard Worker  store <64 x i8> %X, <64 x i8>* %ptr, align 64, !nontemporal !1
812*9880d681SAndroid Build Coastguard Worker  ret void
813*9880d681SAndroid Build Coastguard Worker}
814*9880d681SAndroid Build Coastguard Worker
815*9880d681SAndroid Build Coastguard Workerdefine void @test_nt32xi16(<32 x i16>* nocapture %ptr, <32 x i16> %X) {
816*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt32xi16:
817*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
818*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
819*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
820*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm2, 32(%rdi)
821*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm3, 48(%rdi)
822*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
823*9880d681SAndroid Build Coastguard Worker;
824*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt32xi16:
825*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
826*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
827*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm1, 32(%rdi)
828*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
829*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
830*9880d681SAndroid Build Coastguard Worker;
831*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_nt32xi16:
832*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
833*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdq %ymm0, (%rdi)
834*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdq %ymm1, 32(%rdi)
835*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
836*9880d681SAndroid Build Coastguard Worker;
837*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_nt32xi16:
838*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
839*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovntdq %zmm0, (%rdi)
840*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
841*9880d681SAndroid Build Coastguard Workerentry:
842*9880d681SAndroid Build Coastguard Worker  store <32 x i16> %X, <32 x i16>* %ptr, align 64, !nontemporal !1
843*9880d681SAndroid Build Coastguard Worker  ret void
844*9880d681SAndroid Build Coastguard Worker}
845*9880d681SAndroid Build Coastguard Worker
846*9880d681SAndroid Build Coastguard Workerdefine void @test_nt16xi32(<16 x i32>* nocapture %ptr, <16 x i32> %X) {
847*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt16xi32:
848*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
849*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
850*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
851*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm2, 32(%rdi)
852*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm3, 48(%rdi)
853*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
854*9880d681SAndroid Build Coastguard Worker;
855*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt16xi32:
856*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
857*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
858*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm1, 32(%rdi)
859*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
860*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
861*9880d681SAndroid Build Coastguard Worker;
862*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt16xi32:
863*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
864*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %zmm0, (%rdi)
865*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
866*9880d681SAndroid Build Coastguard Workerentry:
867*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %X, <16 x i32>* %ptr, align 64, !nontemporal !1
868*9880d681SAndroid Build Coastguard Worker  ret void
869*9880d681SAndroid Build Coastguard Worker}
870*9880d681SAndroid Build Coastguard Worker
871*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xi64(<8 x i64>* nocapture %ptr, <8 x i64> %X) {
872*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xi64:
873*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0: # %entry
874*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
875*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
876*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm2, 32(%rdi)
877*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm3, 48(%rdi)
878*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
879*9880d681SAndroid Build Coastguard Worker;
880*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xi64:
881*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0: # %entry
882*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm0, (%rdi)
883*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %ymm1, 32(%rdi)
884*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
885*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
886*9880d681SAndroid Build Coastguard Worker;
887*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xi64:
888*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
889*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdq %zmm0, (%rdi)
890*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
891*9880d681SAndroid Build Coastguard Workerentry:
892*9880d681SAndroid Build Coastguard Worker  store <8 x i64> %X, <8 x i64>* %ptr, align 64, !nontemporal !1
893*9880d681SAndroid Build Coastguard Worker  ret void
894*9880d681SAndroid Build Coastguard Worker}
895*9880d681SAndroid Build Coastguard Worker
896*9880d681SAndroid Build Coastguard Worker;
897*9880d681SAndroid Build Coastguard Worker; 512-bit Vector Loads
898*9880d681SAndroid Build Coastguard Worker;
899*9880d681SAndroid Build Coastguard Worker
900*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) {
901*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt16xfloat:
902*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
903*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
904*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
905*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
906*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
907*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
908*9880d681SAndroid Build Coastguard Worker;
909*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt16xfloat:
910*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
911*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
912*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
913*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 32(%rdi), %xmm2
914*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 48(%rdi), %xmm3
915*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
916*9880d681SAndroid Build Coastguard Worker;
917*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt16xfloat:
918*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
919*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
920*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
921*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
922*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
923*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
924*9880d681SAndroid Build Coastguard Worker;
925*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt16xfloat:
926*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
927*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
928*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
929*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
930*9880d681SAndroid Build Coastguard Worker;
931*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt16xfloat:
932*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
933*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
934*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
935*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
936*9880d681SAndroid Build Coastguard Worker;
937*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt16xfloat:
938*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
939*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %zmm0
940*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
941*9880d681SAndroid Build Coastguard Workerentry:
942*9880d681SAndroid Build Coastguard Worker  %0 = load <16 x float>, <16 x float>* %ptr, align 64, !nontemporal !1
943*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %0
944*9880d681SAndroid Build Coastguard Worker}
945*9880d681SAndroid Build Coastguard Worker
946*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) {
947*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt8xdouble:
948*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
949*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movapd (%rdi), %xmm0
950*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movapd 16(%rdi), %xmm1
951*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movapd 32(%rdi), %xmm2
952*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movapd 48(%rdi), %xmm3
953*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
954*9880d681SAndroid Build Coastguard Worker;
955*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt8xdouble:
956*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
957*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movapd (%rdi), %xmm0
958*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movapd 16(%rdi), %xmm1
959*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movapd 32(%rdi), %xmm2
960*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movapd 48(%rdi), %xmm3
961*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
962*9880d681SAndroid Build Coastguard Worker;
963*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt8xdouble:
964*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
965*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
966*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
967*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
968*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
969*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
970*9880d681SAndroid Build Coastguard Worker;
971*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt8xdouble:
972*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
973*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovapd (%rdi), %ymm0
974*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovapd 32(%rdi), %ymm1
975*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
976*9880d681SAndroid Build Coastguard Worker;
977*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt8xdouble:
978*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
979*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
980*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
981*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
982*9880d681SAndroid Build Coastguard Worker;
983*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xdouble:
984*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
985*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %zmm0
986*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
987*9880d681SAndroid Build Coastguard Workerentry:
988*9880d681SAndroid Build Coastguard Worker  %0 = load <8 x double>, <8 x double>* %ptr, align 64, !nontemporal !1
989*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %0
990*9880d681SAndroid Build Coastguard Worker}
991*9880d681SAndroid Build Coastguard Worker
992*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) {
993*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt64xi8:
994*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
995*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
996*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
997*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
998*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
999*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
1000*9880d681SAndroid Build Coastguard Worker;
1001*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt64xi8:
1002*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
1003*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
1004*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
1005*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 32(%rdi), %xmm2
1006*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 48(%rdi), %xmm3
1007*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
1008*9880d681SAndroid Build Coastguard Worker;
1009*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt64xi8:
1010*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
1011*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
1012*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
1013*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
1014*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
1015*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
1016*9880d681SAndroid Build Coastguard Worker;
1017*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt64xi8:
1018*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
1019*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
1020*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
1021*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1022*9880d681SAndroid Build Coastguard Worker;
1023*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt64xi8:
1024*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
1025*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
1026*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
1027*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1028*9880d681SAndroid Build Coastguard Worker;
1029*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_load_nt64xi8:
1030*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
1031*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm0
1032*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa 32(%rdi), %ymm1
1033*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1034*9880d681SAndroid Build Coastguard Worker;
1035*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_load_nt64xi8:
1036*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
1037*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovntdqa (%rdi), %zmm0
1038*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1039*9880d681SAndroid Build Coastguard Workerentry:
1040*9880d681SAndroid Build Coastguard Worker  %0 = load <64 x i8>, <64 x i8>* %ptr, align 64, !nontemporal !1
1041*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %0
1042*9880d681SAndroid Build Coastguard Worker}
1043*9880d681SAndroid Build Coastguard Worker
1044*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) {
1045*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt32xi16:
1046*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
1047*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
1048*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
1049*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
1050*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
1051*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
1052*9880d681SAndroid Build Coastguard Worker;
1053*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt32xi16:
1054*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
1055*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
1056*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
1057*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 32(%rdi), %xmm2
1058*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 48(%rdi), %xmm3
1059*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
1060*9880d681SAndroid Build Coastguard Worker;
1061*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt32xi16:
1062*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
1063*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
1064*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
1065*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
1066*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
1067*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
1068*9880d681SAndroid Build Coastguard Worker;
1069*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt32xi16:
1070*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
1071*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
1072*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
1073*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1074*9880d681SAndroid Build Coastguard Worker;
1075*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt32xi16:
1076*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
1077*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
1078*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
1079*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1080*9880d681SAndroid Build Coastguard Worker;
1081*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_load_nt32xi16:
1082*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0: # %entry
1083*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm0
1084*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovntdqa 32(%rdi), %ymm1
1085*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
1086*9880d681SAndroid Build Coastguard Worker;
1087*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_load_nt32xi16:
1088*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0: # %entry
1089*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovntdqa (%rdi), %zmm0
1090*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
1091*9880d681SAndroid Build Coastguard Workerentry:
1092*9880d681SAndroid Build Coastguard Worker  %0 = load <32 x i16>, <32 x i16>* %ptr, align 64, !nontemporal !1
1093*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %0
1094*9880d681SAndroid Build Coastguard Worker}
1095*9880d681SAndroid Build Coastguard Worker
1096*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) {
1097*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt16xi32:
1098*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
1099*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
1100*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
1101*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
1102*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
1103*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
1104*9880d681SAndroid Build Coastguard Worker;
1105*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt16xi32:
1106*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
1107*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
1108*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
1109*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 32(%rdi), %xmm2
1110*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 48(%rdi), %xmm3
1111*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
1112*9880d681SAndroid Build Coastguard Worker;
1113*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt16xi32:
1114*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
1115*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
1116*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
1117*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
1118*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
1119*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
1120*9880d681SAndroid Build Coastguard Worker;
1121*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt16xi32:
1122*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
1123*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
1124*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
1125*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1126*9880d681SAndroid Build Coastguard Worker;
1127*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt16xi32:
1128*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
1129*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
1130*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
1131*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1132*9880d681SAndroid Build Coastguard Worker;
1133*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt16xi32:
1134*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
1135*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %zmm0
1136*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1137*9880d681SAndroid Build Coastguard Workerentry:
1138*9880d681SAndroid Build Coastguard Worker  %0 = load <16 x i32>, <16 x i32>* %ptr, align 64, !nontemporal !1
1139*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %0
1140*9880d681SAndroid Build Coastguard Worker}
1141*9880d681SAndroid Build Coastguard Worker
1142*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) {
1143*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt8xi64:
1144*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0: # %entry
1145*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps (%rdi), %xmm0
1146*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 16(%rdi), %xmm1
1147*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 32(%rdi), %xmm2
1148*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movaps 48(%rdi), %xmm3
1149*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
1150*9880d681SAndroid Build Coastguard Worker;
1151*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt8xi64:
1152*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0: # %entry
1153*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps (%rdi), %xmm0
1154*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 16(%rdi), %xmm1
1155*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 32(%rdi), %xmm2
1156*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movaps 48(%rdi), %xmm3
1157*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
1158*9880d681SAndroid Build Coastguard Worker;
1159*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt8xi64:
1160*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0: # %entry
1161*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa (%rdi), %xmm0
1162*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
1163*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 32(%rdi), %xmm2
1164*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntdqa 48(%rdi), %xmm3
1165*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
1166*9880d681SAndroid Build Coastguard Worker;
1167*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt8xi64:
1168*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0: # %entry
1169*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
1170*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
1171*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1172*9880d681SAndroid Build Coastguard Worker;
1173*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt8xi64:
1174*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0: # %entry
1175*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
1176*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm1
1177*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1178*9880d681SAndroid Build Coastguard Worker;
1179*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xi64:
1180*9880d681SAndroid Build Coastguard Worker; AVX512:       # BB#0: # %entry
1181*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    vmovntdqa (%rdi), %zmm0
1182*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT:    retq
1183*9880d681SAndroid Build Coastguard Workerentry:
1184*9880d681SAndroid Build Coastguard Worker  %0 = load <8 x i64>, <8 x i64>* %ptr, align 64, !nontemporal !1
1185*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %0
1186*9880d681SAndroid Build Coastguard Worker}
1187*9880d681SAndroid Build Coastguard Worker
1188*9880d681SAndroid Build Coastguard Worker!1 = !{i32 1}
1189