xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/nontemporal-2.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=SSE --check-prefix=SSE4A
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=VLX
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Worker; Make sure that we generate non-temporal stores for the test cases below.
10*9880d681SAndroid Build Coastguard Worker; We use xorps for zeroing, so domain information isn't available anymore.
11*9880d681SAndroid Build Coastguard Worker
12*9880d681SAndroid Build Coastguard Worker; Scalar versions (zeroing means we can this even for fp types).
13*9880d681SAndroid Build Coastguard Worker
14*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_f32(float* %dst) {
15*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_f32:
16*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
17*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorl %eax, %eax
18*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntil %eax, (%rdi)
19*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
20*9880d681SAndroid Build Coastguard Worker;
21*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_f32:
22*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
23*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    xorl %eax, %eax
24*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntil %eax, (%rdi)
25*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
26*9880d681SAndroid Build Coastguard Worker;
27*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_f32:
28*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
29*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    xorl %eax, %eax
30*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntil %eax, (%rdi)
31*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
32*9880d681SAndroid Build Coastguard Worker  store float zeroinitializer, float* %dst, align 1, !nontemporal !1
33*9880d681SAndroid Build Coastguard Worker  ret void
34*9880d681SAndroid Build Coastguard Worker}
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_i32(i32* %dst) {
37*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_i32:
38*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
39*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorl %eax, %eax
40*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntil %eax, (%rdi)
41*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
42*9880d681SAndroid Build Coastguard Worker;
43*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_i32:
44*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
45*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    xorl %eax, %eax
46*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntil %eax, (%rdi)
47*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
48*9880d681SAndroid Build Coastguard Worker;
49*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_i32:
50*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
51*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    xorl %eax, %eax
52*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntil %eax, (%rdi)
53*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
54*9880d681SAndroid Build Coastguard Worker  store i32 zeroinitializer, i32* %dst, align 1, !nontemporal !1
55*9880d681SAndroid Build Coastguard Worker  ret void
56*9880d681SAndroid Build Coastguard Worker}
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_f64(double* %dst) {
59*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_f64:
60*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
61*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorl %eax, %eax
62*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntiq %rax, (%rdi)
63*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
64*9880d681SAndroid Build Coastguard Worker;
65*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_f64:
66*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
67*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    xorl %eax, %eax
68*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntiq %rax, (%rdi)
69*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
70*9880d681SAndroid Build Coastguard Worker;
71*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_f64:
72*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
73*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    xorl %eax, %eax
74*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntiq %rax, (%rdi)
75*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
76*9880d681SAndroid Build Coastguard Worker  store double zeroinitializer, double* %dst, align 1, !nontemporal !1
77*9880d681SAndroid Build Coastguard Worker  ret void
78*9880d681SAndroid Build Coastguard Worker}
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_i64(i64* %dst) {
81*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_i64:
82*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
83*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorl %eax, %eax
84*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntiq %rax, (%rdi)
85*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
86*9880d681SAndroid Build Coastguard Worker;
87*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_i64:
88*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
89*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    xorl %eax, %eax
90*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntiq %rax, (%rdi)
91*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
92*9880d681SAndroid Build Coastguard Worker;
93*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_i64:
94*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
95*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    xorl %eax, %eax
96*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntiq %rax, (%rdi)
97*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
98*9880d681SAndroid Build Coastguard Worker  store i64 zeroinitializer, i64* %dst, align 1, !nontemporal !1
99*9880d681SAndroid Build Coastguard Worker  ret void
100*9880d681SAndroid Build Coastguard Worker}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Worker; And now XMM versions.
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v4f32(<4 x float>* %dst) {
105*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v4f32:
106*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
107*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
108*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
109*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
110*9880d681SAndroid Build Coastguard Worker;
111*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v4f32:
112*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
113*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
114*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
115*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
116*9880d681SAndroid Build Coastguard Worker;
117*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v4f32:
118*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
119*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %xmm0, %xmm0, %xmm0
120*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
121*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
122*9880d681SAndroid Build Coastguard Worker  store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
123*9880d681SAndroid Build Coastguard Worker  ret void
124*9880d681SAndroid Build Coastguard Worker}
125*9880d681SAndroid Build Coastguard Worker
126*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v4i32(<4 x i32>* %dst) {
127*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v4i32:
128*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
129*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
130*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
131*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
132*9880d681SAndroid Build Coastguard Worker;
133*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v4i32:
134*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
135*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
136*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
137*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
138*9880d681SAndroid Build Coastguard Worker;
139*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v4i32:
140*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
141*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %xmm0, %xmm0, %xmm0
142*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
143*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
144*9880d681SAndroid Build Coastguard Worker  store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
145*9880d681SAndroid Build Coastguard Worker  store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
146*9880d681SAndroid Build Coastguard Worker  ret void
147*9880d681SAndroid Build Coastguard Worker}
148*9880d681SAndroid Build Coastguard Worker
149*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v2f64(<2 x double>* %dst) {
150*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v2f64:
151*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
152*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
153*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
154*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
155*9880d681SAndroid Build Coastguard Worker;
156*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v2f64:
157*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
158*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
159*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
160*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
161*9880d681SAndroid Build Coastguard Worker;
162*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v2f64:
163*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
164*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %xmm0, %xmm0, %xmm0
165*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
166*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
167*9880d681SAndroid Build Coastguard Worker  store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
168*9880d681SAndroid Build Coastguard Worker  ret void
169*9880d681SAndroid Build Coastguard Worker}
170*9880d681SAndroid Build Coastguard Worker
171*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v2i64(<2 x i64>* %dst) {
172*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v2i64:
173*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
174*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
175*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
176*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
177*9880d681SAndroid Build Coastguard Worker;
178*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v2i64:
179*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
180*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
181*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
182*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
183*9880d681SAndroid Build Coastguard Worker;
184*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v2i64:
185*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
186*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %xmm0, %xmm0, %xmm0
187*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
188*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
189*9880d681SAndroid Build Coastguard Worker  store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1
190*9880d681SAndroid Build Coastguard Worker  ret void
191*9880d681SAndroid Build Coastguard Worker}
192*9880d681SAndroid Build Coastguard Worker
193*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v8i16(<8 x i16>* %dst) {
194*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v8i16:
195*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
196*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
197*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
198*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
199*9880d681SAndroid Build Coastguard Worker;
200*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v8i16:
201*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
202*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
203*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
204*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
205*9880d681SAndroid Build Coastguard Worker;
206*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v8i16:
207*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
208*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %xmm0, %xmm0, %xmm0
209*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
210*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
211*9880d681SAndroid Build Coastguard Worker  store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1
212*9880d681SAndroid Build Coastguard Worker  ret void
213*9880d681SAndroid Build Coastguard Worker}
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v16i8(<16 x i8>* %dst) {
216*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v16i8:
217*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
218*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
219*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
220*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
221*9880d681SAndroid Build Coastguard Worker;
222*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v16i8:
223*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
224*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
225*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
226*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
227*9880d681SAndroid Build Coastguard Worker;
228*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v16i8:
229*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
230*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %xmm0, %xmm0, %xmm0
231*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
232*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
233*9880d681SAndroid Build Coastguard Worker  store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1
234*9880d681SAndroid Build Coastguard Worker  ret void
235*9880d681SAndroid Build Coastguard Worker}
236*9880d681SAndroid Build Coastguard Worker
237*9880d681SAndroid Build Coastguard Worker; And now YMM versions.
238*9880d681SAndroid Build Coastguard Worker
239*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v8f32(<8 x float>* %dst) {
240*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v8f32:
241*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
242*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
243*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, 16(%rdi)
244*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
245*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
246*9880d681SAndroid Build Coastguard Worker;
247*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v8f32:
248*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
249*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
250*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
251*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
252*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
253*9880d681SAndroid Build Coastguard Worker;
254*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v8f32:
255*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
256*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %ymm0, %ymm0, %ymm0
257*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
258*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
259*9880d681SAndroid Build Coastguard Worker  store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1
260*9880d681SAndroid Build Coastguard Worker  ret void
261*9880d681SAndroid Build Coastguard Worker}
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v8i32(<8 x i32>* %dst) {
264*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v8i32:
265*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
266*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
267*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, 16(%rdi)
268*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
269*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
270*9880d681SAndroid Build Coastguard Worker;
271*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v8i32:
272*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
273*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
274*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
275*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
276*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
277*9880d681SAndroid Build Coastguard Worker;
278*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v8i32:
279*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
280*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %ymm0, %ymm0, %ymm0
281*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
282*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
283*9880d681SAndroid Build Coastguard Worker  store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1
284*9880d681SAndroid Build Coastguard Worker  ret void
285*9880d681SAndroid Build Coastguard Worker}
286*9880d681SAndroid Build Coastguard Worker
287*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v4f64(<4 x double>* %dst) {
288*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v4f64:
289*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
290*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
291*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, 16(%rdi)
292*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
293*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
294*9880d681SAndroid Build Coastguard Worker;
295*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v4f64:
296*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
297*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
298*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
299*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
300*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
301*9880d681SAndroid Build Coastguard Worker;
302*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v4f64:
303*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
304*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %ymm0, %ymm0, %ymm0
305*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
306*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
307*9880d681SAndroid Build Coastguard Worker  store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1
308*9880d681SAndroid Build Coastguard Worker  ret void
309*9880d681SAndroid Build Coastguard Worker}
310*9880d681SAndroid Build Coastguard Worker
311*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v4i64(<4 x i64>* %dst) {
312*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v4i64:
313*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
314*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
315*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, 16(%rdi)
316*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
317*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
318*9880d681SAndroid Build Coastguard Worker;
319*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v4i64:
320*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
321*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
322*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
323*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
324*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
325*9880d681SAndroid Build Coastguard Worker;
326*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v4i64:
327*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
328*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %ymm0, %ymm0, %ymm0
329*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
330*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
331*9880d681SAndroid Build Coastguard Worker  store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1
332*9880d681SAndroid Build Coastguard Worker  ret void
333*9880d681SAndroid Build Coastguard Worker}
334*9880d681SAndroid Build Coastguard Worker
335*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v16i16(<16 x i16>* %dst) {
336*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v16i16:
337*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
338*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
339*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, 16(%rdi)
340*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
341*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
342*9880d681SAndroid Build Coastguard Worker;
343*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v16i16:
344*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
345*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
346*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
347*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
348*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
349*9880d681SAndroid Build Coastguard Worker;
350*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v16i16:
351*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
352*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %ymm0, %ymm0, %ymm0
353*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
354*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
355*9880d681SAndroid Build Coastguard Worker  store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1
356*9880d681SAndroid Build Coastguard Worker  ret void
357*9880d681SAndroid Build Coastguard Worker}
358*9880d681SAndroid Build Coastguard Worker
359*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v32i8(<32 x i8>* %dst) {
360*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v32i8:
361*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
362*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    xorps %xmm0, %xmm0
363*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, 16(%rdi)
364*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
365*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
366*9880d681SAndroid Build Coastguard Worker;
367*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v32i8:
368*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
369*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %ymm0, %ymm0, %ymm0
370*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
371*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
372*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
373*9880d681SAndroid Build Coastguard Worker;
374*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v32i8:
375*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
376*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpxord %ymm0, %ymm0, %ymm0
377*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
378*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
379*9880d681SAndroid Build Coastguard Worker  store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1
380*9880d681SAndroid Build Coastguard Worker  ret void
381*9880d681SAndroid Build Coastguard Worker}
382*9880d681SAndroid Build Coastguard Worker
383*9880d681SAndroid Build Coastguard Worker
384*9880d681SAndroid Build Coastguard Worker; Check that we also handle arguments.  Here the type survives longer.
385*9880d681SAndroid Build Coastguard Worker
386*9880d681SAndroid Build Coastguard Worker; Scalar versions.
387*9880d681SAndroid Build Coastguard Worker
388*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_f32(float %arg, float* %dst) {
389*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_arg_f32:
390*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
391*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss %xmm0, (%rdi)
392*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
393*9880d681SAndroid Build Coastguard Worker;
394*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_arg_f32:
395*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0:
396*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movntss %xmm0, (%rdi)
397*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
398*9880d681SAndroid Build Coastguard Worker;
399*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_arg_f32:
400*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
401*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movss %xmm0, (%rdi)
402*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
403*9880d681SAndroid Build Coastguard Worker;
404*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_f32:
405*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
406*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovss %xmm0, (%rdi)
407*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
408*9880d681SAndroid Build Coastguard Worker;
409*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_f32:
410*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
411*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovss %xmm0, (%rdi)
412*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
413*9880d681SAndroid Build Coastguard Worker  store float %arg, float* %dst, align 1, !nontemporal !1
414*9880d681SAndroid Build Coastguard Worker  ret void
415*9880d681SAndroid Build Coastguard Worker}
416*9880d681SAndroid Build Coastguard Worker
417*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_i32(i32 %arg, i32* %dst) {
418*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_i32:
419*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
420*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntil %edi, (%rsi)
421*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
422*9880d681SAndroid Build Coastguard Worker;
423*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_i32:
424*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
425*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntil %edi, (%rsi)
426*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
427*9880d681SAndroid Build Coastguard Worker;
428*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_i32:
429*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
430*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntil %edi, (%rsi)
431*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
432*9880d681SAndroid Build Coastguard Worker  store i32 %arg, i32* %dst, align 1, !nontemporal !1
433*9880d681SAndroid Build Coastguard Worker  ret void
434*9880d681SAndroid Build Coastguard Worker}
435*9880d681SAndroid Build Coastguard Worker
436*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_f64(double %arg, double* %dst) {
437*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_arg_f64:
438*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
439*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movsd %xmm0, (%rdi)
440*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
441*9880d681SAndroid Build Coastguard Worker;
442*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_arg_f64:
443*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0:
444*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
445*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
446*9880d681SAndroid Build Coastguard Worker;
447*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_arg_f64:
448*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
449*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movsd %xmm0, (%rdi)
450*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
451*9880d681SAndroid Build Coastguard Worker;
452*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_f64:
453*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
454*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovsd %xmm0, (%rdi)
455*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
456*9880d681SAndroid Build Coastguard Worker;
457*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_f64:
458*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
459*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovsd %xmm0, (%rdi)
460*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
461*9880d681SAndroid Build Coastguard Worker  store double %arg, double* %dst, align 1, !nontemporal !1
462*9880d681SAndroid Build Coastguard Worker  ret void
463*9880d681SAndroid Build Coastguard Worker}
464*9880d681SAndroid Build Coastguard Worker
465*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_i64(i64 %arg, i64* %dst) {
466*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_i64:
467*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
468*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntiq %rdi, (%rsi)
469*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
470*9880d681SAndroid Build Coastguard Worker;
471*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_i64:
472*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
473*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntiq %rdi, (%rsi)
474*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
475*9880d681SAndroid Build Coastguard Worker;
476*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_i64:
477*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
478*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntiq %rdi, (%rsi)
479*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
480*9880d681SAndroid Build Coastguard Worker  store i64 %arg, i64* %dst, align 1, !nontemporal !1
481*9880d681SAndroid Build Coastguard Worker  ret void
482*9880d681SAndroid Build Coastguard Worker}
483*9880d681SAndroid Build Coastguard Worker
484*9880d681SAndroid Build Coastguard Worker; Extract versions
485*9880d681SAndroid Build Coastguard Worker
486*9880d681SAndroid Build Coastguard Workerdefine void @test_extract_f32(<4 x float> %arg, float* %dst) {
487*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_extract_f32:
488*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
489*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
490*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss %xmm0, (%rdi)
491*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
492*9880d681SAndroid Build Coastguard Worker;
493*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_extract_f32:
494*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0:
495*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
496*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movntss %xmm0, (%rdi)
497*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
498*9880d681SAndroid Build Coastguard Worker;
499*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_extract_f32:
500*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
501*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    extractps $1, %xmm0, %eax
502*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntil %eax, (%rdi)
503*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
504*9880d681SAndroid Build Coastguard Worker;
505*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_extract_f32:
506*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
507*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vextractps $1, %xmm0, %eax
508*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntil %eax, (%rdi)
509*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
510*9880d681SAndroid Build Coastguard Worker;
511*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_extract_f32:
512*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
513*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vextractps $1, %xmm0, %eax
514*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntil %eax, (%rdi)
515*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
516*9880d681SAndroid Build Coastguard Worker  %1 = extractelement <4 x float> %arg, i32 1
517*9880d681SAndroid Build Coastguard Worker  store float %1, float* %dst, align 1, !nontemporal !1
518*9880d681SAndroid Build Coastguard Worker  ret void
519*9880d681SAndroid Build Coastguard Worker}
520*9880d681SAndroid Build Coastguard Worker
521*9880d681SAndroid Build Coastguard Workerdefine void @test_extract_i32(<4 x i32> %arg, i32* %dst) {
522*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_extract_i32:
523*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
524*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
525*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd %xmm0, %eax
526*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movntil %eax, (%rdi)
527*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
528*9880d681SAndroid Build Coastguard Worker;
529*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_extract_i32:
530*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0:
531*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
532*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movd %xmm0, %eax
533*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movntil %eax, (%rdi)
534*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
535*9880d681SAndroid Build Coastguard Worker;
536*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_extract_i32:
537*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
538*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pextrd $1, %xmm0, %eax
539*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntil %eax, (%rdi)
540*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
541*9880d681SAndroid Build Coastguard Worker;
542*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_extract_i32:
543*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
544*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpextrd $1, %xmm0, %eax
545*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntil %eax, (%rdi)
546*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
547*9880d681SAndroid Build Coastguard Worker;
548*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_extract_i32:
549*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
550*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpextrd $1, %xmm0, %eax
551*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntil %eax, (%rdi)
552*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
553*9880d681SAndroid Build Coastguard Worker  %1 = extractelement <4 x i32> %arg, i32 1
554*9880d681SAndroid Build Coastguard Worker  store i32 %1, i32* %dst, align 1, !nontemporal !1
555*9880d681SAndroid Build Coastguard Worker  ret void
556*9880d681SAndroid Build Coastguard Worker}
557*9880d681SAndroid Build Coastguard Worker
558*9880d681SAndroid Build Coastguard Workerdefine void @test_extract_f64(<2 x double> %arg, double* %dst) {
559*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_extract_f64:
560*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
561*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movhpd %xmm0, (%rdi)
562*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
563*9880d681SAndroid Build Coastguard Worker;
564*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_extract_f64:
565*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0:
566*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
567*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
568*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
569*9880d681SAndroid Build Coastguard Worker;
570*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_extract_f64:
571*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
572*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movhpd %xmm0, (%rdi)
573*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
574*9880d681SAndroid Build Coastguard Worker;
575*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_extract_f64:
576*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
577*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovhpd %xmm0, (%rdi)
578*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
579*9880d681SAndroid Build Coastguard Worker;
580*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_extract_f64:
581*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
582*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovhpd %xmm0, (%rdi)
583*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
584*9880d681SAndroid Build Coastguard Worker  %1 = extractelement <2 x double> %arg, i32 1
585*9880d681SAndroid Build Coastguard Worker  store double %1, double* %dst, align 1, !nontemporal !1
586*9880d681SAndroid Build Coastguard Worker  ret void
587*9880d681SAndroid Build Coastguard Worker}
588*9880d681SAndroid Build Coastguard Worker
589*9880d681SAndroid Build Coastguard Workerdefine void @test_extract_i64(<2 x i64> %arg, i64* %dst) {
590*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_extract_i64:
591*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
592*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
593*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movd %xmm0, %rax
594*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movntiq %rax, (%rdi)
595*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
596*9880d681SAndroid Build Coastguard Worker;
597*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_extract_i64:
598*9880d681SAndroid Build Coastguard Worker; SSE4A:       # BB#0:
599*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
600*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movd %xmm0, %rax
601*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    movntiq %rax, (%rdi)
602*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT:    retq
603*9880d681SAndroid Build Coastguard Worker;
604*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_extract_i64:
605*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
606*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    pextrq $1, %xmm0, %rax
607*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movntiq %rax, (%rdi)
608*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
609*9880d681SAndroid Build Coastguard Worker;
610*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_extract_i64:
611*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
612*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpextrq $1, %xmm0, %rax
613*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    movntiq %rax, (%rdi)
614*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
615*9880d681SAndroid Build Coastguard Worker;
616*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_extract_i64:
617*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
618*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpextrq $1, %xmm0, %rax
619*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    movntiq %rax, (%rdi)
620*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
621*9880d681SAndroid Build Coastguard Worker  %1 = extractelement <2 x i64> %arg, i32 1
622*9880d681SAndroid Build Coastguard Worker  store i64 %1, i64* %dst, align 1, !nontemporal !1
623*9880d681SAndroid Build Coastguard Worker  ret void
624*9880d681SAndroid Build Coastguard Worker}
625*9880d681SAndroid Build Coastguard Worker
626*9880d681SAndroid Build Coastguard Worker; And now XMM versions.
627*9880d681SAndroid Build Coastguard Worker
628*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v4f32(<4 x float> %arg, <4 x float>* %dst) {
629*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4f32:
630*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
631*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
632*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
633*9880d681SAndroid Build Coastguard Worker;
634*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4f32:
635*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
636*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
637*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
638*9880d681SAndroid Build Coastguard Worker;
639*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v4f32:
640*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
641*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntps %xmm0, (%rdi)
642*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
643*9880d681SAndroid Build Coastguard Worker  store <4 x float> %arg, <4 x float>* %dst, align 16, !nontemporal !1
644*9880d681SAndroid Build Coastguard Worker  ret void
645*9880d681SAndroid Build Coastguard Worker}
646*9880d681SAndroid Build Coastguard Worker
647*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %dst) {
648*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4i32:
649*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
650*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
651*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
652*9880d681SAndroid Build Coastguard Worker;
653*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4i32:
654*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
655*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
656*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
657*9880d681SAndroid Build Coastguard Worker;
658*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v4i32:
659*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
660*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
661*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
662*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %arg, <4 x i32>* %dst, align 16, !nontemporal !1
663*9880d681SAndroid Build Coastguard Worker  ret void
664*9880d681SAndroid Build Coastguard Worker}
665*9880d681SAndroid Build Coastguard Worker
666*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v2f64(<2 x double> %arg, <2 x double>* %dst) {
667*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v2f64:
668*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
669*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
670*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
671*9880d681SAndroid Build Coastguard Worker;
672*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v2f64:
673*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
674*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
675*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
676*9880d681SAndroid Build Coastguard Worker;
677*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v2f64:
678*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
679*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntpd %xmm0, (%rdi)
680*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
681*9880d681SAndroid Build Coastguard Worker  store <2 x double> %arg, <2 x double>* %dst, align 16, !nontemporal !1
682*9880d681SAndroid Build Coastguard Worker  ret void
683*9880d681SAndroid Build Coastguard Worker}
684*9880d681SAndroid Build Coastguard Worker
685*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %dst) {
686*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v2i64:
687*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
688*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
689*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
690*9880d681SAndroid Build Coastguard Worker;
691*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v2i64:
692*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
693*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
694*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
695*9880d681SAndroid Build Coastguard Worker;
696*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v2i64:
697*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
698*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
699*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
700*9880d681SAndroid Build Coastguard Worker  store <2 x i64> %arg, <2 x i64>* %dst, align 16, !nontemporal !1
701*9880d681SAndroid Build Coastguard Worker  ret void
702*9880d681SAndroid Build Coastguard Worker}
703*9880d681SAndroid Build Coastguard Worker
704*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %dst) {
705*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i16:
706*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
707*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
708*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
709*9880d681SAndroid Build Coastguard Worker;
710*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8i16:
711*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
712*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
713*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
714*9880d681SAndroid Build Coastguard Worker;
715*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v8i16:
716*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
717*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
718*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
719*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %arg, <8 x i16>* %dst, align 16, !nontemporal !1
720*9880d681SAndroid Build Coastguard Worker  ret void
721*9880d681SAndroid Build Coastguard Worker}
722*9880d681SAndroid Build Coastguard Worker
723*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %dst) {
724*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i8:
725*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
726*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
727*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
728*9880d681SAndroid Build Coastguard Worker;
729*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v16i8:
730*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
731*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
732*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
733*9880d681SAndroid Build Coastguard Worker;
734*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v16i8:
735*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
736*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
737*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
738*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %arg, <16 x i8>* %dst, align 16, !nontemporal !1
739*9880d681SAndroid Build Coastguard Worker  ret void
740*9880d681SAndroid Build Coastguard Worker}
741*9880d681SAndroid Build Coastguard Worker
742*9880d681SAndroid Build Coastguard Worker; And now YMM versions.
743*9880d681SAndroid Build Coastguard Worker
744*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v8f32(<8 x float> %arg, <8 x float>* %dst) {
745*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8f32:
746*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
747*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
748*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
749*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
750*9880d681SAndroid Build Coastguard Worker;
751*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8f32:
752*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
753*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
754*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
755*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
756*9880d681SAndroid Build Coastguard Worker;
757*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v8f32:
758*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
759*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntps %ymm0, (%rdi)
760*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
761*9880d681SAndroid Build Coastguard Worker  store <8 x float> %arg, <8 x float>* %dst, align 32, !nontemporal !1
762*9880d681SAndroid Build Coastguard Worker  ret void
763*9880d681SAndroid Build Coastguard Worker}
764*9880d681SAndroid Build Coastguard Worker
765*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %dst) {
766*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i32:
767*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
768*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
769*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
770*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
771*9880d681SAndroid Build Coastguard Worker;
772*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8i32:
773*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
774*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
775*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
776*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
777*9880d681SAndroid Build Coastguard Worker;
778*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v8i32:
779*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
780*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
781*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
782*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %arg, <8 x i32>* %dst, align 32, !nontemporal !1
783*9880d681SAndroid Build Coastguard Worker  ret void
784*9880d681SAndroid Build Coastguard Worker}
785*9880d681SAndroid Build Coastguard Worker
786*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v4f64(<4 x double> %arg, <4 x double>* %dst) {
787*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4f64:
788*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
789*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
790*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
791*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
792*9880d681SAndroid Build Coastguard Worker;
793*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4f64:
794*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
795*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
796*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
797*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
798*9880d681SAndroid Build Coastguard Worker;
799*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v4f64:
800*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
801*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntpd %ymm0, (%rdi)
802*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
803*9880d681SAndroid Build Coastguard Worker  store <4 x double> %arg, <4 x double>* %dst, align 32, !nontemporal !1
804*9880d681SAndroid Build Coastguard Worker  ret void
805*9880d681SAndroid Build Coastguard Worker}
806*9880d681SAndroid Build Coastguard Worker
807*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %dst) {
808*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4i64:
809*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
810*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
811*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
812*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
813*9880d681SAndroid Build Coastguard Worker;
814*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4i64:
815*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
816*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
817*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
818*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
819*9880d681SAndroid Build Coastguard Worker;
820*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v4i64:
821*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
822*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
823*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
824*9880d681SAndroid Build Coastguard Worker  store <4 x i64> %arg, <4 x i64>* %dst, align 32, !nontemporal !1
825*9880d681SAndroid Build Coastguard Worker  ret void
826*9880d681SAndroid Build Coastguard Worker}
827*9880d681SAndroid Build Coastguard Worker
828*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %dst) {
829*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i16:
830*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
831*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
832*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
833*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
834*9880d681SAndroid Build Coastguard Worker;
835*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v16i16:
836*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
837*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
838*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
839*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
840*9880d681SAndroid Build Coastguard Worker;
841*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v16i16:
842*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
843*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
844*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
845*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %arg, <16 x i16>* %dst, align 32, !nontemporal !1
846*9880d681SAndroid Build Coastguard Worker  ret void
847*9880d681SAndroid Build Coastguard Worker}
848*9880d681SAndroid Build Coastguard Worker
849*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %dst) {
850*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v32i8:
851*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
852*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
853*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
854*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
855*9880d681SAndroid Build Coastguard Worker;
856*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v32i8:
857*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
858*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
859*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
860*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
861*9880d681SAndroid Build Coastguard Worker;
862*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v32i8:
863*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
864*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
865*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
866*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %arg, <32 x i8>* %dst, align 32, !nontemporal !1
867*9880d681SAndroid Build Coastguard Worker  ret void
868*9880d681SAndroid Build Coastguard Worker}
869*9880d681SAndroid Build Coastguard Worker
870*9880d681SAndroid Build Coastguard Worker
871*9880d681SAndroid Build Coastguard Worker; Now check that if the execution domain is trivially visible, we use it.
872*9880d681SAndroid Build Coastguard Worker; We use an add to make the type survive all the way to the MOVNT.
873*9880d681SAndroid Build Coastguard Worker
874*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v4f32(<4 x float> %a, <4 x float> %b, <4 x float>* %dst) {
875*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v4f32:
876*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
877*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps %xmm1, %xmm0
878*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
879*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
880*9880d681SAndroid Build Coastguard Worker;
881*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v4f32:
882*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
883*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
884*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %xmm0, (%rdi)
885*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
886*9880d681SAndroid Build Coastguard Worker;
887*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v4f32:
888*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
889*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
890*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntps %xmm0, (%rdi)
891*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
892*9880d681SAndroid Build Coastguard Worker  %r = fadd <4 x float> %a, %b
893*9880d681SAndroid Build Coastguard Worker  store <4 x float> %r, <4 x float>* %dst, align 16, !nontemporal !1
894*9880d681SAndroid Build Coastguard Worker  ret void
895*9880d681SAndroid Build Coastguard Worker}
896*9880d681SAndroid Build Coastguard Worker
897*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32>* %dst) {
898*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v4i32:
899*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
900*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd %xmm1, %xmm0
901*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
902*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
903*9880d681SAndroid Build Coastguard Worker;
904*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v4i32:
905*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
906*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
907*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
908*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
909*9880d681SAndroid Build Coastguard Worker;
910*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v4i32:
911*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
912*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
913*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
914*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
915*9880d681SAndroid Build Coastguard Worker  %r = add <4 x i32> %a, %b
916*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %r, <4 x i32>* %dst, align 16, !nontemporal !1
917*9880d681SAndroid Build Coastguard Worker  ret void
918*9880d681SAndroid Build Coastguard Worker}
919*9880d681SAndroid Build Coastguard Worker
920*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v2f64(<2 x double> %a, <2 x double> %b, <2 x double>* %dst) {
921*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v2f64:
922*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
923*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd %xmm1, %xmm0
924*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm0, (%rdi)
925*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
926*9880d681SAndroid Build Coastguard Worker;
927*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v2f64:
928*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
929*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
930*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntpd %xmm0, (%rdi)
931*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
932*9880d681SAndroid Build Coastguard Worker;
933*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v2f64:
934*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
935*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
936*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntpd %xmm0, (%rdi)
937*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
938*9880d681SAndroid Build Coastguard Worker  %r = fadd <2 x double> %a, %b
939*9880d681SAndroid Build Coastguard Worker  store <2 x double> %r, <2 x double>* %dst, align 16, !nontemporal !1
940*9880d681SAndroid Build Coastguard Worker  ret void
941*9880d681SAndroid Build Coastguard Worker}
942*9880d681SAndroid Build Coastguard Worker
943*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %dst) {
944*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v2i64:
945*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
946*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm1, %xmm0
947*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
948*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
949*9880d681SAndroid Build Coastguard Worker;
950*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v2i64:
951*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
952*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
953*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
954*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
955*9880d681SAndroid Build Coastguard Worker;
956*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v2i64:
957*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
958*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
959*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
960*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
961*9880d681SAndroid Build Coastguard Worker  %r = add <2 x i64> %a, %b
962*9880d681SAndroid Build Coastguard Worker  store <2 x i64> %r, <2 x i64>* %dst, align 16, !nontemporal !1
963*9880d681SAndroid Build Coastguard Worker  ret void
964*9880d681SAndroid Build Coastguard Worker}
965*9880d681SAndroid Build Coastguard Worker
966*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16>* %dst) {
967*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v8i16:
968*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
969*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw %xmm1, %xmm0
970*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
971*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
972*9880d681SAndroid Build Coastguard Worker;
973*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v8i16:
974*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
975*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
976*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
977*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
978*9880d681SAndroid Build Coastguard Worker;
979*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v8i16:
980*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
981*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
982*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
983*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
984*9880d681SAndroid Build Coastguard Worker  %r = add <8 x i16> %a, %b
985*9880d681SAndroid Build Coastguard Worker  store <8 x i16> %r, <8 x i16>* %dst, align 16, !nontemporal !1
986*9880d681SAndroid Build Coastguard Worker  ret void
987*9880d681SAndroid Build Coastguard Worker}
988*9880d681SAndroid Build Coastguard Worker
989*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8>* %dst) {
990*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v16i8:
991*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
992*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb %xmm1, %xmm0
993*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
994*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
995*9880d681SAndroid Build Coastguard Worker;
996*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v16i8:
997*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
998*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
999*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
1000*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1001*9880d681SAndroid Build Coastguard Worker;
1002*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v16i8:
1003*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
1004*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
1005*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %xmm0, (%rdi)
1006*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
1007*9880d681SAndroid Build Coastguard Worker  %r = add <16 x i8> %a, %b
1008*9880d681SAndroid Build Coastguard Worker  store <16 x i8> %r, <16 x i8>* %dst, align 16, !nontemporal !1
1009*9880d681SAndroid Build Coastguard Worker  ret void
1010*9880d681SAndroid Build Coastguard Worker}
1011*9880d681SAndroid Build Coastguard Worker
1012*9880d681SAndroid Build Coastguard Worker; And now YMM versions.
1013*9880d681SAndroid Build Coastguard Worker
1014*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
1015*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v8f32:
1016*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1017*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps %xmm2, %xmm0
1018*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps %xmm3, %xmm1
1019*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
1020*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
1021*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1022*9880d681SAndroid Build Coastguard Worker;
1023*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v8f32:
1024*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1025*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
1026*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntps %ymm0, (%rdi)
1027*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
1028*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1029*9880d681SAndroid Build Coastguard Worker;
1030*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v8f32:
1031*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
1032*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
1033*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntps %ymm0, (%rdi)
1034*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
1035*9880d681SAndroid Build Coastguard Worker  %r = fadd <8 x float> %a, %b
1036*9880d681SAndroid Build Coastguard Worker  store <8 x float> %r, <8 x float>* %dst, align 32, !nontemporal !1
1037*9880d681SAndroid Build Coastguard Worker  ret void
1038*9880d681SAndroid Build Coastguard Worker}
1039*9880d681SAndroid Build Coastguard Worker
1040*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) {
1041*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v8i32:
1042*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1043*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd %xmm2, %xmm0
1044*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddd %xmm3, %xmm1
1045*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
1046*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
1047*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1048*9880d681SAndroid Build Coastguard Worker;
1049*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_op_v8i32:
1050*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1051*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1052*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1053*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
1054*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
1055*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1056*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
1057*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
1058*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1059*9880d681SAndroid Build Coastguard Worker;
1060*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_op_v8i32:
1061*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1062*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
1063*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdq %ymm0, (%rdi)
1064*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
1065*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1066*9880d681SAndroid Build Coastguard Worker;
1067*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v8i32:
1068*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
1069*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
1070*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
1071*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
1072*9880d681SAndroid Build Coastguard Worker  %r = add <8 x i32> %a, %b
1073*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %r, <8 x i32>* %dst, align 32, !nontemporal !1
1074*9880d681SAndroid Build Coastguard Worker  ret void
1075*9880d681SAndroid Build Coastguard Worker}
1076*9880d681SAndroid Build Coastguard Worker
1077*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v4f64(<4 x double> %a, <4 x double> %b, <4 x double>* %dst) {
1078*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v4f64:
1079*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1080*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd %xmm2, %xmm0
1081*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addpd %xmm3, %xmm1
1082*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm1, 16(%rdi)
1083*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntpd %xmm0, (%rdi)
1084*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1085*9880d681SAndroid Build Coastguard Worker;
1086*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v4f64:
1087*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1088*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
1089*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovntpd %ymm0, (%rdi)
1090*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
1091*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1092*9880d681SAndroid Build Coastguard Worker;
1093*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v4f64:
1094*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
1095*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
1096*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntpd %ymm0, (%rdi)
1097*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
1098*9880d681SAndroid Build Coastguard Worker  %r = fadd <4 x double> %a, %b
1099*9880d681SAndroid Build Coastguard Worker  store <4 x double> %r, <4 x double>* %dst, align 32, !nontemporal !1
1100*9880d681SAndroid Build Coastguard Worker  ret void
1101*9880d681SAndroid Build Coastguard Worker}
1102*9880d681SAndroid Build Coastguard Worker
1103*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) {
1104*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v4i64:
1105*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1106*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm2, %xmm0
1107*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddq %xmm3, %xmm1
1108*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
1109*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
1110*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1111*9880d681SAndroid Build Coastguard Worker;
1112*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_op_v4i64:
1113*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1114*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1115*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1116*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
1117*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
1118*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1119*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
1120*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
1121*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1122*9880d681SAndroid Build Coastguard Worker;
1123*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_op_v4i64:
1124*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1125*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
1126*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdq %ymm0, (%rdi)
1127*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
1128*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1129*9880d681SAndroid Build Coastguard Worker;
1130*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v4i64:
1131*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
1132*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
1133*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
1134*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
1135*9880d681SAndroid Build Coastguard Worker  %r = add <4 x i64> %a, %b
1136*9880d681SAndroid Build Coastguard Worker  store <4 x i64> %r, <4 x i64>* %dst, align 32, !nontemporal !1
1137*9880d681SAndroid Build Coastguard Worker  ret void
1138*9880d681SAndroid Build Coastguard Worker}
1139*9880d681SAndroid Build Coastguard Worker
1140*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) {
1141*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v16i16:
1142*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1143*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw %xmm2, %xmm0
1144*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddw %xmm3, %xmm1
1145*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
1146*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
1147*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1148*9880d681SAndroid Build Coastguard Worker;
1149*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_op_v16i16:
1150*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1151*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1152*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1153*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
1154*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
1155*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1156*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
1157*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
1158*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1159*9880d681SAndroid Build Coastguard Worker;
1160*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_op_v16i16:
1161*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1162*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
1163*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdq %ymm0, (%rdi)
1164*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
1165*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1166*9880d681SAndroid Build Coastguard Worker;
1167*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v16i16:
1168*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
1169*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
1170*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
1171*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
1172*9880d681SAndroid Build Coastguard Worker  %r = add <16 x i16> %a, %b
1173*9880d681SAndroid Build Coastguard Worker  store <16 x i16> %r, <16 x i16>* %dst, align 32, !nontemporal !1
1174*9880d681SAndroid Build Coastguard Worker  ret void
1175*9880d681SAndroid Build Coastguard Worker}
1176*9880d681SAndroid Build Coastguard Worker
1177*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
1178*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v32i8:
1179*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1180*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb %xmm2, %xmm0
1181*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    paddb %xmm3, %xmm1
1182*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm1, 16(%rdi)
1183*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntdq %xmm0, (%rdi)
1184*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1185*9880d681SAndroid Build Coastguard Worker;
1186*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_op_v32i8:
1187*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1188*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1189*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1190*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
1191*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
1192*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1193*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
1194*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vzeroupper
1195*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1196*9880d681SAndroid Build Coastguard Worker;
1197*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_op_v32i8:
1198*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1199*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
1200*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovntdq %ymm0, (%rdi)
1201*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vzeroupper
1202*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1203*9880d681SAndroid Build Coastguard Worker;
1204*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v32i8:
1205*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
1206*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
1207*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovntdq %ymm0, (%rdi)
1208*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
1209*9880d681SAndroid Build Coastguard Worker  %r = add <32 x i8> %a, %b
1210*9880d681SAndroid Build Coastguard Worker  store <32 x i8> %r, <32 x i8>* %dst, align 32, !nontemporal !1
1211*9880d681SAndroid Build Coastguard Worker  ret void
1212*9880d681SAndroid Build Coastguard Worker}
1213*9880d681SAndroid Build Coastguard Worker
1214*9880d681SAndroid Build Coastguard Worker; 256-bit NT stores require 256-bit alignment.
1215*9880d681SAndroid Build Coastguard Worker; FIXME: For AVX, we could lower this to 2x movntps %xmm. Taken further, we
1216*9880d681SAndroid Build Coastguard Worker; could even scalarize to movnti when we have 1-alignment: nontemporal is
1217*9880d681SAndroid Build Coastguard Worker; probably always worth even some 20 instruction scalarization.
1218*9880d681SAndroid Build Coastguard Workerdefine void @test_unaligned_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
1219*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8f32:
1220*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1221*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps %xmm2, %xmm0
1222*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addps %xmm3, %xmm1
1223*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm1, 16(%rdi)
1224*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movntps %xmm0, (%rdi)
1225*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1226*9880d681SAndroid Build Coastguard Worker;
1227*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8f32:
1228*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1229*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
1230*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups %ymm0, (%rdi)
1231*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vzeroupper
1232*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1233*9880d681SAndroid Build Coastguard Worker;
1234*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_unaligned_v8f32:
1235*9880d681SAndroid Build Coastguard Worker; VLX:       # BB#0:
1236*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
1237*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    vmovups %ymm0, (%rdi)
1238*9880d681SAndroid Build Coastguard Worker; VLX-NEXT:    retq
1239*9880d681SAndroid Build Coastguard Worker  %r = fadd <8 x float> %a, %b
1240*9880d681SAndroid Build Coastguard Worker  store <8 x float> %r, <8 x float>* %dst, align 16, !nontemporal !1
1241*9880d681SAndroid Build Coastguard Worker  ret void
1242*9880d681SAndroid Build Coastguard Worker}
1243*9880d681SAndroid Build Coastguard Worker
1244*9880d681SAndroid Build Coastguard Worker!1 = !{i32 1}
1245