1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=SSE --check-prefix=SSE4A 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=VLX 8*9880d681SAndroid Build Coastguard Worker 9*9880d681SAndroid Build Coastguard Worker; Make sure that we generate non-temporal stores for the test cases below. 10*9880d681SAndroid Build Coastguard Worker; We use xorps for zeroing, so domain information isn't available anymore. 11*9880d681SAndroid Build Coastguard Worker 12*9880d681SAndroid Build Coastguard Worker; Scalar versions (zeroing means we can this even for fp types). 13*9880d681SAndroid Build Coastguard Worker 14*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_f32(float* %dst) { 15*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_f32: 16*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 17*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorl %eax, %eax 18*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntil %eax, (%rdi) 19*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 20*9880d681SAndroid Build Coastguard Worker; 21*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_f32: 22*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 23*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: xorl %eax, %eax 24*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntil %eax, (%rdi) 25*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 26*9880d681SAndroid Build Coastguard Worker; 27*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_f32: 28*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 29*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: xorl %eax, %eax 30*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntil %eax, (%rdi) 31*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 32*9880d681SAndroid Build Coastguard Worker store float zeroinitializer, float* %dst, align 1, !nontemporal !1 33*9880d681SAndroid Build Coastguard Worker ret void 34*9880d681SAndroid Build Coastguard Worker} 35*9880d681SAndroid Build Coastguard Worker 36*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_i32(i32* %dst) { 37*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_i32: 38*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 39*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorl %eax, %eax 40*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntil %eax, (%rdi) 41*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 42*9880d681SAndroid Build Coastguard Worker; 43*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_i32: 44*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 45*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: xorl %eax, %eax 46*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntil %eax, (%rdi) 47*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 48*9880d681SAndroid Build Coastguard Worker; 49*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_i32: 50*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 51*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: xorl %eax, %eax 52*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntil %eax, (%rdi) 53*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 54*9880d681SAndroid Build Coastguard Worker store i32 zeroinitializer, i32* %dst, align 1, !nontemporal !1 55*9880d681SAndroid Build Coastguard Worker ret void 56*9880d681SAndroid Build Coastguard Worker} 57*9880d681SAndroid Build Coastguard Worker 58*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_f64(double* %dst) { 59*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_f64: 60*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 61*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorl %eax, %eax 62*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntiq %rax, (%rdi) 63*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 64*9880d681SAndroid Build Coastguard Worker; 65*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_f64: 66*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 67*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: xorl %eax, %eax 68*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntiq %rax, (%rdi) 69*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 70*9880d681SAndroid Build Coastguard Worker; 71*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_f64: 72*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 73*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: xorl %eax, %eax 74*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntiq %rax, (%rdi) 75*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 76*9880d681SAndroid Build Coastguard Worker store double zeroinitializer, double* %dst, align 1, !nontemporal !1 77*9880d681SAndroid Build Coastguard Worker ret void 78*9880d681SAndroid Build Coastguard Worker} 79*9880d681SAndroid Build Coastguard Worker 80*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_i64(i64* %dst) { 81*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_i64: 82*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 83*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorl %eax, %eax 84*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntiq %rax, (%rdi) 85*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 86*9880d681SAndroid Build Coastguard Worker; 87*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_i64: 88*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 89*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: xorl %eax, %eax 90*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntiq %rax, (%rdi) 91*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 92*9880d681SAndroid Build Coastguard Worker; 93*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_i64: 94*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 95*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: xorl %eax, %eax 96*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntiq %rax, (%rdi) 97*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 98*9880d681SAndroid Build Coastguard Worker store i64 zeroinitializer, i64* %dst, align 1, !nontemporal !1 99*9880d681SAndroid Build Coastguard Worker ret void 100*9880d681SAndroid Build Coastguard Worker} 101*9880d681SAndroid Build Coastguard Worker 102*9880d681SAndroid Build Coastguard Worker; And now XMM versions. 103*9880d681SAndroid Build Coastguard Worker 104*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v4f32(<4 x float>* %dst) { 105*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v4f32: 106*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 107*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 108*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 109*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 110*9880d681SAndroid Build Coastguard Worker; 111*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v4f32: 112*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 113*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 114*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 115*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 116*9880d681SAndroid Build Coastguard Worker; 117*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v4f32: 118*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 119*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 120*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 121*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 122*9880d681SAndroid Build Coastguard Worker store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1 123*9880d681SAndroid Build Coastguard Worker ret void 124*9880d681SAndroid Build Coastguard Worker} 125*9880d681SAndroid Build Coastguard Worker 126*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v4i32(<4 x i32>* %dst) { 127*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v4i32: 128*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 129*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 130*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 131*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 132*9880d681SAndroid Build Coastguard Worker; 133*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v4i32: 134*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 135*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 136*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 137*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 138*9880d681SAndroid Build Coastguard Worker; 139*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v4i32: 140*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 141*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 142*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 143*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 144*9880d681SAndroid Build Coastguard Worker store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1 145*9880d681SAndroid Build Coastguard Worker store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1 146*9880d681SAndroid Build Coastguard Worker ret void 147*9880d681SAndroid Build Coastguard Worker} 148*9880d681SAndroid Build Coastguard Worker 149*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v2f64(<2 x double>* %dst) { 150*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v2f64: 151*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 152*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 153*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 154*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 155*9880d681SAndroid Build Coastguard Worker; 156*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v2f64: 157*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 158*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 159*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 160*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 161*9880d681SAndroid Build Coastguard Worker; 162*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v2f64: 163*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 164*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 165*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 166*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 167*9880d681SAndroid Build Coastguard Worker store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1 168*9880d681SAndroid Build Coastguard Worker ret void 169*9880d681SAndroid Build Coastguard Worker} 170*9880d681SAndroid Build Coastguard Worker 171*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v2i64(<2 x i64>* %dst) { 172*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v2i64: 173*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 174*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 175*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 176*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 177*9880d681SAndroid Build Coastguard Worker; 178*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v2i64: 179*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 180*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 181*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 182*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 183*9880d681SAndroid Build Coastguard Worker; 184*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v2i64: 185*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 186*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 187*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 188*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 189*9880d681SAndroid Build Coastguard Worker store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1 190*9880d681SAndroid Build Coastguard Worker ret void 191*9880d681SAndroid Build Coastguard Worker} 192*9880d681SAndroid Build Coastguard Worker 193*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v8i16(<8 x i16>* %dst) { 194*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v8i16: 195*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 196*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 197*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 198*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 199*9880d681SAndroid Build Coastguard Worker; 200*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v8i16: 201*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 202*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 203*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 204*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 205*9880d681SAndroid Build Coastguard Worker; 206*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v8i16: 207*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 208*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 209*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 210*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 211*9880d681SAndroid Build Coastguard Worker store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1 212*9880d681SAndroid Build Coastguard Worker ret void 213*9880d681SAndroid Build Coastguard Worker} 214*9880d681SAndroid Build Coastguard Worker 215*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v16i8(<16 x i8>* %dst) { 216*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v16i8: 217*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 218*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 219*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 220*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 221*9880d681SAndroid Build Coastguard Worker; 222*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v16i8: 223*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 224*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 225*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 226*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 227*9880d681SAndroid Build Coastguard Worker; 228*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v16i8: 229*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 230*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 231*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 232*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 233*9880d681SAndroid Build Coastguard Worker store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1 234*9880d681SAndroid Build Coastguard Worker ret void 235*9880d681SAndroid Build Coastguard Worker} 236*9880d681SAndroid Build Coastguard Worker 237*9880d681SAndroid Build Coastguard Worker; And now YMM versions. 238*9880d681SAndroid Build Coastguard Worker 239*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v8f32(<8 x float>* %dst) { 240*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v8f32: 241*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 242*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 243*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, 16(%rdi) 244*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 245*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 246*9880d681SAndroid Build Coastguard Worker; 247*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v8f32: 248*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 249*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0 250*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 251*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 252*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 253*9880d681SAndroid Build Coastguard Worker; 254*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v8f32: 255*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 256*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 257*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 258*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 259*9880d681SAndroid Build Coastguard Worker store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1 260*9880d681SAndroid Build Coastguard Worker ret void 261*9880d681SAndroid Build Coastguard Worker} 262*9880d681SAndroid Build Coastguard Worker 263*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v8i32(<8 x i32>* %dst) { 264*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v8i32: 265*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 266*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 267*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, 16(%rdi) 268*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 269*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 270*9880d681SAndroid Build Coastguard Worker; 271*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v8i32: 272*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 273*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0 274*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 275*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 276*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 277*9880d681SAndroid Build Coastguard Worker; 278*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v8i32: 279*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 280*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 281*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 282*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 283*9880d681SAndroid Build Coastguard Worker store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1 284*9880d681SAndroid Build Coastguard Worker ret void 285*9880d681SAndroid Build Coastguard Worker} 286*9880d681SAndroid Build Coastguard Worker 287*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v4f64(<4 x double>* %dst) { 288*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v4f64: 289*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 290*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 291*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, 16(%rdi) 292*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 293*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 294*9880d681SAndroid Build Coastguard Worker; 295*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v4f64: 296*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 297*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0 298*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 299*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 300*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 301*9880d681SAndroid Build Coastguard Worker; 302*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v4f64: 303*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 304*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 305*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 306*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 307*9880d681SAndroid Build Coastguard Worker store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1 308*9880d681SAndroid Build Coastguard Worker ret void 309*9880d681SAndroid Build Coastguard Worker} 310*9880d681SAndroid Build Coastguard Worker 311*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v4i64(<4 x i64>* %dst) { 312*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v4i64: 313*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 314*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 315*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, 16(%rdi) 316*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 317*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 318*9880d681SAndroid Build Coastguard Worker; 319*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v4i64: 320*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 321*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0 322*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 323*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 324*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 325*9880d681SAndroid Build Coastguard Worker; 326*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v4i64: 327*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 328*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 329*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 330*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 331*9880d681SAndroid Build Coastguard Worker store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1 332*9880d681SAndroid Build Coastguard Worker ret void 333*9880d681SAndroid Build Coastguard Worker} 334*9880d681SAndroid Build Coastguard Worker 335*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v16i16(<16 x i16>* %dst) { 336*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v16i16: 337*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 338*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 339*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, 16(%rdi) 340*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 341*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 342*9880d681SAndroid Build Coastguard Worker; 343*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v16i16: 344*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 345*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0 346*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 347*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 348*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 349*9880d681SAndroid Build Coastguard Worker; 350*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v16i16: 351*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 352*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 353*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 354*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 355*9880d681SAndroid Build Coastguard Worker store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1 356*9880d681SAndroid Build Coastguard Worker ret void 357*9880d681SAndroid Build Coastguard Worker} 358*9880d681SAndroid Build Coastguard Worker 359*9880d681SAndroid Build Coastguard Workerdefine void @test_zero_v32i8(<32 x i8>* %dst) { 360*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_zero_v32i8: 361*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 362*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: xorps %xmm0, %xmm0 363*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, 16(%rdi) 364*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 365*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 366*9880d681SAndroid Build Coastguard Worker; 367*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_zero_v32i8: 368*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 369*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vxorps %ymm0, %ymm0, %ymm0 370*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 371*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 372*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 373*9880d681SAndroid Build Coastguard Worker; 374*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_zero_v32i8: 375*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 376*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 377*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 378*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 379*9880d681SAndroid Build Coastguard Worker store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1 380*9880d681SAndroid Build Coastguard Worker ret void 381*9880d681SAndroid Build Coastguard Worker} 382*9880d681SAndroid Build Coastguard Worker 383*9880d681SAndroid Build Coastguard Worker 384*9880d681SAndroid Build Coastguard Worker; Check that we also handle arguments. Here the type survives longer. 385*9880d681SAndroid Build Coastguard Worker 386*9880d681SAndroid Build Coastguard Worker; Scalar versions. 387*9880d681SAndroid Build Coastguard Worker 388*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_f32(float %arg, float* %dst) { 389*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_arg_f32: 390*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 391*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movss %xmm0, (%rdi) 392*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 393*9880d681SAndroid Build Coastguard Worker; 394*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_arg_f32: 395*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: 396*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movntss %xmm0, (%rdi) 397*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 398*9880d681SAndroid Build Coastguard Worker; 399*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_arg_f32: 400*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 401*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movss %xmm0, (%rdi) 402*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 403*9880d681SAndroid Build Coastguard Worker; 404*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_f32: 405*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 406*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovss %xmm0, (%rdi) 407*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 408*9880d681SAndroid Build Coastguard Worker; 409*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_f32: 410*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 411*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovss %xmm0, (%rdi) 412*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 413*9880d681SAndroid Build Coastguard Worker store float %arg, float* %dst, align 1, !nontemporal !1 414*9880d681SAndroid Build Coastguard Worker ret void 415*9880d681SAndroid Build Coastguard Worker} 416*9880d681SAndroid Build Coastguard Worker 417*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_i32(i32 %arg, i32* %dst) { 418*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_i32: 419*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 420*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntil %edi, (%rsi) 421*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 422*9880d681SAndroid Build Coastguard Worker; 423*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_i32: 424*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 425*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntil %edi, (%rsi) 426*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 427*9880d681SAndroid Build Coastguard Worker; 428*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_i32: 429*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 430*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntil %edi, (%rsi) 431*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 432*9880d681SAndroid Build Coastguard Worker store i32 %arg, i32* %dst, align 1, !nontemporal !1 433*9880d681SAndroid Build Coastguard Worker ret void 434*9880d681SAndroid Build Coastguard Worker} 435*9880d681SAndroid Build Coastguard Worker 436*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_f64(double %arg, double* %dst) { 437*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_arg_f64: 438*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 439*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movsd %xmm0, (%rdi) 440*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 441*9880d681SAndroid Build Coastguard Worker; 442*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_arg_f64: 443*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: 444*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movntsd %xmm0, (%rdi) 445*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 446*9880d681SAndroid Build Coastguard Worker; 447*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_arg_f64: 448*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 449*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movsd %xmm0, (%rdi) 450*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 451*9880d681SAndroid Build Coastguard Worker; 452*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_f64: 453*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 454*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovsd %xmm0, (%rdi) 455*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 456*9880d681SAndroid Build Coastguard Worker; 457*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_f64: 458*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 459*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovsd %xmm0, (%rdi) 460*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 461*9880d681SAndroid Build Coastguard Worker store double %arg, double* %dst, align 1, !nontemporal !1 462*9880d681SAndroid Build Coastguard Worker ret void 463*9880d681SAndroid Build Coastguard Worker} 464*9880d681SAndroid Build Coastguard Worker 465*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_i64(i64 %arg, i64* %dst) { 466*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_i64: 467*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 468*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntiq %rdi, (%rsi) 469*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 470*9880d681SAndroid Build Coastguard Worker; 471*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_i64: 472*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 473*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntiq %rdi, (%rsi) 474*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 475*9880d681SAndroid Build Coastguard Worker; 476*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_i64: 477*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 478*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntiq %rdi, (%rsi) 479*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 480*9880d681SAndroid Build Coastguard Worker store i64 %arg, i64* %dst, align 1, !nontemporal !1 481*9880d681SAndroid Build Coastguard Worker ret void 482*9880d681SAndroid Build Coastguard Worker} 483*9880d681SAndroid Build Coastguard Worker 484*9880d681SAndroid Build Coastguard Worker; Extract versions 485*9880d681SAndroid Build Coastguard Worker 486*9880d681SAndroid Build Coastguard Workerdefine void @test_extract_f32(<4 x float> %arg, float* %dst) { 487*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_extract_f32: 488*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 489*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 490*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movss %xmm0, (%rdi) 491*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 492*9880d681SAndroid Build Coastguard Worker; 493*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_extract_f32: 494*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: 495*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 496*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movntss %xmm0, (%rdi) 497*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 498*9880d681SAndroid Build Coastguard Worker; 499*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_extract_f32: 500*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 501*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: extractps $1, %xmm0, %eax 502*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntil %eax, (%rdi) 503*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 504*9880d681SAndroid Build Coastguard Worker; 505*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_extract_f32: 506*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 507*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vextractps $1, %xmm0, %eax 508*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntil %eax, (%rdi) 509*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 510*9880d681SAndroid Build Coastguard Worker; 511*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_extract_f32: 512*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 513*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vextractps $1, %xmm0, %eax 514*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntil %eax, (%rdi) 515*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 516*9880d681SAndroid Build Coastguard Worker %1 = extractelement <4 x float> %arg, i32 1 517*9880d681SAndroid Build Coastguard Worker store float %1, float* %dst, align 1, !nontemporal !1 518*9880d681SAndroid Build Coastguard Worker ret void 519*9880d681SAndroid Build Coastguard Worker} 520*9880d681SAndroid Build Coastguard Worker 521*9880d681SAndroid Build Coastguard Workerdefine void @test_extract_i32(<4 x i32> %arg, i32* %dst) { 522*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_extract_i32: 523*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 524*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 525*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm0, %eax 526*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movntil %eax, (%rdi) 527*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 528*9880d681SAndroid Build Coastguard Worker; 529*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_extract_i32: 530*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: 531*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 532*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movd %xmm0, %eax 533*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movntil %eax, (%rdi) 534*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 535*9880d681SAndroid Build Coastguard Worker; 536*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_extract_i32: 537*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 538*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pextrd $1, %xmm0, %eax 539*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntil %eax, (%rdi) 540*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 541*9880d681SAndroid Build Coastguard Worker; 542*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_extract_i32: 543*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 544*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrd $1, %xmm0, %eax 545*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntil %eax, (%rdi) 546*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 547*9880d681SAndroid Build Coastguard Worker; 548*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_extract_i32: 549*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 550*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpextrd $1, %xmm0, %eax 551*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntil %eax, (%rdi) 552*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 553*9880d681SAndroid Build Coastguard Worker %1 = extractelement <4 x i32> %arg, i32 1 554*9880d681SAndroid Build Coastguard Worker store i32 %1, i32* %dst, align 1, !nontemporal !1 555*9880d681SAndroid Build Coastguard Worker ret void 556*9880d681SAndroid Build Coastguard Worker} 557*9880d681SAndroid Build Coastguard Worker 558*9880d681SAndroid Build Coastguard Workerdefine void @test_extract_f64(<2 x double> %arg, double* %dst) { 559*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_extract_f64: 560*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 561*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movhpd %xmm0, (%rdi) 562*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 563*9880d681SAndroid Build Coastguard Worker; 564*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_extract_f64: 565*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: 566*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 567*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movntsd %xmm0, (%rdi) 568*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 569*9880d681SAndroid Build Coastguard Worker; 570*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_extract_f64: 571*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 572*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movhpd %xmm0, (%rdi) 573*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 574*9880d681SAndroid Build Coastguard Worker; 575*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_extract_f64: 576*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 577*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovhpd %xmm0, (%rdi) 578*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 579*9880d681SAndroid Build Coastguard Worker; 580*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_extract_f64: 581*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 582*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovhpd %xmm0, (%rdi) 583*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 584*9880d681SAndroid Build Coastguard Worker %1 = extractelement <2 x double> %arg, i32 1 585*9880d681SAndroid Build Coastguard Worker store double %1, double* %dst, align 1, !nontemporal !1 586*9880d681SAndroid Build Coastguard Worker ret void 587*9880d681SAndroid Build Coastguard Worker} 588*9880d681SAndroid Build Coastguard Worker 589*9880d681SAndroid Build Coastguard Workerdefine void @test_extract_i64(<2 x i64> %arg, i64* %dst) { 590*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_extract_i64: 591*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 592*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 593*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movd %xmm0, %rax 594*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movntiq %rax, (%rdi) 595*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 596*9880d681SAndroid Build Coastguard Worker; 597*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_extract_i64: 598*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: 599*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 600*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movd %xmm0, %rax 601*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movntiq %rax, (%rdi) 602*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 603*9880d681SAndroid Build Coastguard Worker; 604*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_extract_i64: 605*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 606*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: pextrq $1, %xmm0, %rax 607*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntiq %rax, (%rdi) 608*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 609*9880d681SAndroid Build Coastguard Worker; 610*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_extract_i64: 611*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 612*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrq $1, %xmm0, %rax 613*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: movntiq %rax, (%rdi) 614*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 615*9880d681SAndroid Build Coastguard Worker; 616*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_extract_i64: 617*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 618*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpextrq $1, %xmm0, %rax 619*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: movntiq %rax, (%rdi) 620*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 621*9880d681SAndroid Build Coastguard Worker %1 = extractelement <2 x i64> %arg, i32 1 622*9880d681SAndroid Build Coastguard Worker store i64 %1, i64* %dst, align 1, !nontemporal !1 623*9880d681SAndroid Build Coastguard Worker ret void 624*9880d681SAndroid Build Coastguard Worker} 625*9880d681SAndroid Build Coastguard Worker 626*9880d681SAndroid Build Coastguard Worker; And now XMM versions. 627*9880d681SAndroid Build Coastguard Worker 628*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v4f32(<4 x float> %arg, <4 x float>* %dst) { 629*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4f32: 630*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 631*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 632*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 633*9880d681SAndroid Build Coastguard Worker; 634*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4f32: 635*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 636*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 637*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 638*9880d681SAndroid Build Coastguard Worker; 639*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v4f32: 640*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 641*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntps %xmm0, (%rdi) 642*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 643*9880d681SAndroid Build Coastguard Worker store <4 x float> %arg, <4 x float>* %dst, align 16, !nontemporal !1 644*9880d681SAndroid Build Coastguard Worker ret void 645*9880d681SAndroid Build Coastguard Worker} 646*9880d681SAndroid Build Coastguard Worker 647*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %dst) { 648*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4i32: 649*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 650*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 651*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 652*9880d681SAndroid Build Coastguard Worker; 653*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4i32: 654*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 655*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 656*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 657*9880d681SAndroid Build Coastguard Worker; 658*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v4i32: 659*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 660*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 661*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 662*9880d681SAndroid Build Coastguard Worker store <4 x i32> %arg, <4 x i32>* %dst, align 16, !nontemporal !1 663*9880d681SAndroid Build Coastguard Worker ret void 664*9880d681SAndroid Build Coastguard Worker} 665*9880d681SAndroid Build Coastguard Worker 666*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v2f64(<2 x double> %arg, <2 x double>* %dst) { 667*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v2f64: 668*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 669*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 670*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 671*9880d681SAndroid Build Coastguard Worker; 672*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v2f64: 673*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 674*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 675*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 676*9880d681SAndroid Build Coastguard Worker; 677*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v2f64: 678*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 679*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntpd %xmm0, (%rdi) 680*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 681*9880d681SAndroid Build Coastguard Worker store <2 x double> %arg, <2 x double>* %dst, align 16, !nontemporal !1 682*9880d681SAndroid Build Coastguard Worker ret void 683*9880d681SAndroid Build Coastguard Worker} 684*9880d681SAndroid Build Coastguard Worker 685*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %dst) { 686*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v2i64: 687*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 688*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 689*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 690*9880d681SAndroid Build Coastguard Worker; 691*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v2i64: 692*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 693*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 694*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 695*9880d681SAndroid Build Coastguard Worker; 696*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v2i64: 697*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 698*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 699*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 700*9880d681SAndroid Build Coastguard Worker store <2 x i64> %arg, <2 x i64>* %dst, align 16, !nontemporal !1 701*9880d681SAndroid Build Coastguard Worker ret void 702*9880d681SAndroid Build Coastguard Worker} 703*9880d681SAndroid Build Coastguard Worker 704*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %dst) { 705*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i16: 706*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 707*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 708*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 709*9880d681SAndroid Build Coastguard Worker; 710*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8i16: 711*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 712*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 713*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 714*9880d681SAndroid Build Coastguard Worker; 715*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v8i16: 716*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 717*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 718*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 719*9880d681SAndroid Build Coastguard Worker store <8 x i16> %arg, <8 x i16>* %dst, align 16, !nontemporal !1 720*9880d681SAndroid Build Coastguard Worker ret void 721*9880d681SAndroid Build Coastguard Worker} 722*9880d681SAndroid Build Coastguard Worker 723*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %dst) { 724*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i8: 725*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 726*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 727*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 728*9880d681SAndroid Build Coastguard Worker; 729*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v16i8: 730*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 731*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 732*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 733*9880d681SAndroid Build Coastguard Worker; 734*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v16i8: 735*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 736*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 737*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 738*9880d681SAndroid Build Coastguard Worker store <16 x i8> %arg, <16 x i8>* %dst, align 16, !nontemporal !1 739*9880d681SAndroid Build Coastguard Worker ret void 740*9880d681SAndroid Build Coastguard Worker} 741*9880d681SAndroid Build Coastguard Worker 742*9880d681SAndroid Build Coastguard Worker; And now YMM versions. 743*9880d681SAndroid Build Coastguard Worker 744*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v8f32(<8 x float> %arg, <8 x float>* %dst) { 745*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8f32: 746*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 747*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 748*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 749*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 750*9880d681SAndroid Build Coastguard Worker; 751*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8f32: 752*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 753*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 754*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 755*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 756*9880d681SAndroid Build Coastguard Worker; 757*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v8f32: 758*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 759*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntps %ymm0, (%rdi) 760*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 761*9880d681SAndroid Build Coastguard Worker store <8 x float> %arg, <8 x float>* %dst, align 32, !nontemporal !1 762*9880d681SAndroid Build Coastguard Worker ret void 763*9880d681SAndroid Build Coastguard Worker} 764*9880d681SAndroid Build Coastguard Worker 765*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %dst) { 766*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i32: 767*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 768*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 769*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 770*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 771*9880d681SAndroid Build Coastguard Worker; 772*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8i32: 773*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 774*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 775*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 776*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 777*9880d681SAndroid Build Coastguard Worker; 778*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v8i32: 779*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 780*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 781*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 782*9880d681SAndroid Build Coastguard Worker store <8 x i32> %arg, <8 x i32>* %dst, align 32, !nontemporal !1 783*9880d681SAndroid Build Coastguard Worker ret void 784*9880d681SAndroid Build Coastguard Worker} 785*9880d681SAndroid Build Coastguard Worker 786*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v4f64(<4 x double> %arg, <4 x double>* %dst) { 787*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4f64: 788*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 789*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 790*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 791*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 792*9880d681SAndroid Build Coastguard Worker; 793*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4f64: 794*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 795*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 796*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 797*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 798*9880d681SAndroid Build Coastguard Worker; 799*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v4f64: 800*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 801*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntpd %ymm0, (%rdi) 802*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 803*9880d681SAndroid Build Coastguard Worker store <4 x double> %arg, <4 x double>* %dst, align 32, !nontemporal !1 804*9880d681SAndroid Build Coastguard Worker ret void 805*9880d681SAndroid Build Coastguard Worker} 806*9880d681SAndroid Build Coastguard Worker 807*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %dst) { 808*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4i64: 809*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 810*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 811*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 812*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 813*9880d681SAndroid Build Coastguard Worker; 814*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4i64: 815*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 816*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 817*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 818*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 819*9880d681SAndroid Build Coastguard Worker; 820*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v4i64: 821*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 822*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 823*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 824*9880d681SAndroid Build Coastguard Worker store <4 x i64> %arg, <4 x i64>* %dst, align 32, !nontemporal !1 825*9880d681SAndroid Build Coastguard Worker ret void 826*9880d681SAndroid Build Coastguard Worker} 827*9880d681SAndroid Build Coastguard Worker 828*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %dst) { 829*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i16: 830*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 831*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 832*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 833*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 834*9880d681SAndroid Build Coastguard Worker; 835*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v16i16: 836*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 837*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 838*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 839*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 840*9880d681SAndroid Build Coastguard Worker; 841*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v16i16: 842*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 843*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 844*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 845*9880d681SAndroid Build Coastguard Worker store <16 x i16> %arg, <16 x i16>* %dst, align 32, !nontemporal !1 846*9880d681SAndroid Build Coastguard Worker ret void 847*9880d681SAndroid Build Coastguard Worker} 848*9880d681SAndroid Build Coastguard Worker 849*9880d681SAndroid Build Coastguard Workerdefine void @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %dst) { 850*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v32i8: 851*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 852*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 853*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 854*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 855*9880d681SAndroid Build Coastguard Worker; 856*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v32i8: 857*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 858*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 859*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 860*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 861*9880d681SAndroid Build Coastguard Worker; 862*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_arg_v32i8: 863*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 864*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 865*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 866*9880d681SAndroid Build Coastguard Worker store <32 x i8> %arg, <32 x i8>* %dst, align 32, !nontemporal !1 867*9880d681SAndroid Build Coastguard Worker ret void 868*9880d681SAndroid Build Coastguard Worker} 869*9880d681SAndroid Build Coastguard Worker 870*9880d681SAndroid Build Coastguard Worker 871*9880d681SAndroid Build Coastguard Worker; Now check that if the execution domain is trivially visible, we use it. 872*9880d681SAndroid Build Coastguard Worker; We use an add to make the type survive all the way to the MOVNT. 873*9880d681SAndroid Build Coastguard Worker 874*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v4f32(<4 x float> %a, <4 x float> %b, <4 x float>* %dst) { 875*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v4f32: 876*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 877*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps %xmm1, %xmm0 878*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 879*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 880*9880d681SAndroid Build Coastguard Worker; 881*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v4f32: 882*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 883*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 884*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 885*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 886*9880d681SAndroid Build Coastguard Worker; 887*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v4f32: 888*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 889*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vaddps %xmm1, %xmm0, %xmm0 890*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntps %xmm0, (%rdi) 891*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 892*9880d681SAndroid Build Coastguard Worker %r = fadd <4 x float> %a, %b 893*9880d681SAndroid Build Coastguard Worker store <4 x float> %r, <4 x float>* %dst, align 16, !nontemporal !1 894*9880d681SAndroid Build Coastguard Worker ret void 895*9880d681SAndroid Build Coastguard Worker} 896*9880d681SAndroid Build Coastguard Worker 897*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32>* %dst) { 898*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v4i32: 899*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 900*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd %xmm1, %xmm0 901*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 902*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 903*9880d681SAndroid Build Coastguard Worker; 904*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v4i32: 905*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 906*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 907*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %xmm0, (%rdi) 908*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 909*9880d681SAndroid Build Coastguard Worker; 910*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v4i32: 911*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 912*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 913*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 914*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 915*9880d681SAndroid Build Coastguard Worker %r = add <4 x i32> %a, %b 916*9880d681SAndroid Build Coastguard Worker store <4 x i32> %r, <4 x i32>* %dst, align 16, !nontemporal !1 917*9880d681SAndroid Build Coastguard Worker ret void 918*9880d681SAndroid Build Coastguard Worker} 919*9880d681SAndroid Build Coastguard Worker 920*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v2f64(<2 x double> %a, <2 x double> %b, <2 x double>* %dst) { 921*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v2f64: 922*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 923*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd %xmm1, %xmm0 924*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm0, (%rdi) 925*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 926*9880d681SAndroid Build Coastguard Worker; 927*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v2f64: 928*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 929*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 930*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntpd %xmm0, (%rdi) 931*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 932*9880d681SAndroid Build Coastguard Worker; 933*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v2f64: 934*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 935*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 936*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntpd %xmm0, (%rdi) 937*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 938*9880d681SAndroid Build Coastguard Worker %r = fadd <2 x double> %a, %b 939*9880d681SAndroid Build Coastguard Worker store <2 x double> %r, <2 x double>* %dst, align 16, !nontemporal !1 940*9880d681SAndroid Build Coastguard Worker ret void 941*9880d681SAndroid Build Coastguard Worker} 942*9880d681SAndroid Build Coastguard Worker 943*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %dst) { 944*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v2i64: 945*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 946*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm1, %xmm0 947*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 948*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 949*9880d681SAndroid Build Coastguard Worker; 950*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v2i64: 951*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 952*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 953*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %xmm0, (%rdi) 954*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 955*9880d681SAndroid Build Coastguard Worker; 956*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v2i64: 957*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 958*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 959*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 960*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 961*9880d681SAndroid Build Coastguard Worker %r = add <2 x i64> %a, %b 962*9880d681SAndroid Build Coastguard Worker store <2 x i64> %r, <2 x i64>* %dst, align 16, !nontemporal !1 963*9880d681SAndroid Build Coastguard Worker ret void 964*9880d681SAndroid Build Coastguard Worker} 965*9880d681SAndroid Build Coastguard Worker 966*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16>* %dst) { 967*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v8i16: 968*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 969*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw %xmm1, %xmm0 970*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 971*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 972*9880d681SAndroid Build Coastguard Worker; 973*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v8i16: 974*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 975*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 976*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %xmm0, (%rdi) 977*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 978*9880d681SAndroid Build Coastguard Worker; 979*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v8i16: 980*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 981*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 982*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 983*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 984*9880d681SAndroid Build Coastguard Worker %r = add <8 x i16> %a, %b 985*9880d681SAndroid Build Coastguard Worker store <8 x i16> %r, <8 x i16>* %dst, align 16, !nontemporal !1 986*9880d681SAndroid Build Coastguard Worker ret void 987*9880d681SAndroid Build Coastguard Worker} 988*9880d681SAndroid Build Coastguard Worker 989*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8>* %dst) { 990*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v16i8: 991*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 992*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb %xmm1, %xmm0 993*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 994*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 995*9880d681SAndroid Build Coastguard Worker; 996*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v16i8: 997*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 998*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 999*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %xmm0, (%rdi) 1000*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1001*9880d681SAndroid Build Coastguard Worker; 1002*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v16i8: 1003*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 1004*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 1005*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %xmm0, (%rdi) 1006*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 1007*9880d681SAndroid Build Coastguard Worker %r = add <16 x i8> %a, %b 1008*9880d681SAndroid Build Coastguard Worker store <16 x i8> %r, <16 x i8>* %dst, align 16, !nontemporal !1 1009*9880d681SAndroid Build Coastguard Worker ret void 1010*9880d681SAndroid Build Coastguard Worker} 1011*9880d681SAndroid Build Coastguard Worker 1012*9880d681SAndroid Build Coastguard Worker; And now YMM versions. 1013*9880d681SAndroid Build Coastguard Worker 1014*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) { 1015*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v8f32: 1016*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1017*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps %xmm2, %xmm0 1018*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps %xmm3, %xmm1 1019*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 1020*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 1021*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1022*9880d681SAndroid Build Coastguard Worker; 1023*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v8f32: 1024*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1025*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 1026*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 1027*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 1028*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1029*9880d681SAndroid Build Coastguard Worker; 1030*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v8f32: 1031*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 1032*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vaddps %ymm1, %ymm0, %ymm0 1033*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntps %ymm0, (%rdi) 1034*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 1035*9880d681SAndroid Build Coastguard Worker %r = fadd <8 x float> %a, %b 1036*9880d681SAndroid Build Coastguard Worker store <8 x float> %r, <8 x float>* %dst, align 32, !nontemporal !1 1037*9880d681SAndroid Build Coastguard Worker ret void 1038*9880d681SAndroid Build Coastguard Worker} 1039*9880d681SAndroid Build Coastguard Worker 1040*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) { 1041*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v8i32: 1042*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1043*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd %xmm2, %xmm0 1044*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd %xmm3, %xmm1 1045*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 1046*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 1047*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1048*9880d681SAndroid Build Coastguard Worker; 1049*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_op_v8i32: 1050*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1051*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1052*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1053*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2 1054*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1055*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1056*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovntps %ymm0, (%rdi) 1057*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vzeroupper 1058*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1059*9880d681SAndroid Build Coastguard Worker; 1060*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_op_v8i32: 1061*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1062*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1063*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdq %ymm0, (%rdi) 1064*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 1065*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1066*9880d681SAndroid Build Coastguard Worker; 1067*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v8i32: 1068*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 1069*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1070*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 1071*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 1072*9880d681SAndroid Build Coastguard Worker %r = add <8 x i32> %a, %b 1073*9880d681SAndroid Build Coastguard Worker store <8 x i32> %r, <8 x i32>* %dst, align 32, !nontemporal !1 1074*9880d681SAndroid Build Coastguard Worker ret void 1075*9880d681SAndroid Build Coastguard Worker} 1076*9880d681SAndroid Build Coastguard Worker 1077*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v4f64(<4 x double> %a, <4 x double> %b, <4 x double>* %dst) { 1078*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v4f64: 1079*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1080*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd %xmm2, %xmm0 1081*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd %xmm3, %xmm1 1082*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm1, 16(%rdi) 1083*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm0, (%rdi) 1084*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1085*9880d681SAndroid Build Coastguard Worker; 1086*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_op_v4f64: 1087*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1088*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1089*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntpd %ymm0, (%rdi) 1090*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 1091*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1092*9880d681SAndroid Build Coastguard Worker; 1093*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v4f64: 1094*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 1095*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1096*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntpd %ymm0, (%rdi) 1097*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 1098*9880d681SAndroid Build Coastguard Worker %r = fadd <4 x double> %a, %b 1099*9880d681SAndroid Build Coastguard Worker store <4 x double> %r, <4 x double>* %dst, align 32, !nontemporal !1 1100*9880d681SAndroid Build Coastguard Worker ret void 1101*9880d681SAndroid Build Coastguard Worker} 1102*9880d681SAndroid Build Coastguard Worker 1103*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) { 1104*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v4i64: 1105*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1106*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm2, %xmm0 1107*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq %xmm3, %xmm1 1108*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 1109*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 1110*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1111*9880d681SAndroid Build Coastguard Worker; 1112*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_op_v4i64: 1113*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1114*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1115*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1116*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 1117*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1118*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1119*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovntps %ymm0, (%rdi) 1120*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vzeroupper 1121*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1122*9880d681SAndroid Build Coastguard Worker; 1123*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_op_v4i64: 1124*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1125*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1126*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdq %ymm0, (%rdi) 1127*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 1128*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1129*9880d681SAndroid Build Coastguard Worker; 1130*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v4i64: 1131*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 1132*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1133*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 1134*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 1135*9880d681SAndroid Build Coastguard Worker %r = add <4 x i64> %a, %b 1136*9880d681SAndroid Build Coastguard Worker store <4 x i64> %r, <4 x i64>* %dst, align 32, !nontemporal !1 1137*9880d681SAndroid Build Coastguard Worker ret void 1138*9880d681SAndroid Build Coastguard Worker} 1139*9880d681SAndroid Build Coastguard Worker 1140*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) { 1141*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v16i16: 1142*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1143*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw %xmm2, %xmm0 1144*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw %xmm3, %xmm1 1145*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 1146*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 1147*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1148*9880d681SAndroid Build Coastguard Worker; 1149*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_op_v16i16: 1150*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1151*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1152*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1153*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2 1154*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 1155*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1156*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovntps %ymm0, (%rdi) 1157*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vzeroupper 1158*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1159*9880d681SAndroid Build Coastguard Worker; 1160*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_op_v16i16: 1161*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1162*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 1163*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdq %ymm0, (%rdi) 1164*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 1165*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1166*9880d681SAndroid Build Coastguard Worker; 1167*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v16i16: 1168*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 1169*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 1170*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 1171*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 1172*9880d681SAndroid Build Coastguard Worker %r = add <16 x i16> %a, %b 1173*9880d681SAndroid Build Coastguard Worker store <16 x i16> %r, <16 x i16>* %dst, align 32, !nontemporal !1 1174*9880d681SAndroid Build Coastguard Worker ret void 1175*9880d681SAndroid Build Coastguard Worker} 1176*9880d681SAndroid Build Coastguard Worker 1177*9880d681SAndroid Build Coastguard Workerdefine void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) { 1178*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_op_v32i8: 1179*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1180*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb %xmm2, %xmm0 1181*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb %xmm3, %xmm1 1182*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 1183*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 1184*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1185*9880d681SAndroid Build Coastguard Worker; 1186*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_op_v32i8: 1187*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1188*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1189*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1190*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 1191*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 1192*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1193*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovntps %ymm0, (%rdi) 1194*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vzeroupper 1195*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1196*9880d681SAndroid Build Coastguard Worker; 1197*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_op_v32i8: 1198*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1199*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0 1200*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdq %ymm0, (%rdi) 1201*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vzeroupper 1202*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1203*9880d681SAndroid Build Coastguard Worker; 1204*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_op_v32i8: 1205*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 1206*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 1207*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovntdq %ymm0, (%rdi) 1208*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 1209*9880d681SAndroid Build Coastguard Worker %r = add <32 x i8> %a, %b 1210*9880d681SAndroid Build Coastguard Worker store <32 x i8> %r, <32 x i8>* %dst, align 32, !nontemporal !1 1211*9880d681SAndroid Build Coastguard Worker ret void 1212*9880d681SAndroid Build Coastguard Worker} 1213*9880d681SAndroid Build Coastguard Worker 1214*9880d681SAndroid Build Coastguard Worker; 256-bit NT stores require 256-bit alignment. 1215*9880d681SAndroid Build Coastguard Worker; FIXME: For AVX, we could lower this to 2x movntps %xmm. Taken further, we 1216*9880d681SAndroid Build Coastguard Worker; could even scalarize to movnti when we have 1-alignment: nontemporal is 1217*9880d681SAndroid Build Coastguard Worker; probably always worth even some 20 instruction scalarization. 1218*9880d681SAndroid Build Coastguard Workerdefine void @test_unaligned_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) { 1219*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8f32: 1220*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1221*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps %xmm2, %xmm0 1222*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps %xmm3, %xmm1 1223*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 1224*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 1225*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1226*9880d681SAndroid Build Coastguard Worker; 1227*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8f32: 1228*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1229*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 1230*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups %ymm0, (%rdi) 1231*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 1232*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1233*9880d681SAndroid Build Coastguard Worker; 1234*9880d681SAndroid Build Coastguard Worker; VLX-LABEL: test_unaligned_v8f32: 1235*9880d681SAndroid Build Coastguard Worker; VLX: # BB#0: 1236*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vaddps %ymm1, %ymm0, %ymm0 1237*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: vmovups %ymm0, (%rdi) 1238*9880d681SAndroid Build Coastguard Worker; VLX-NEXT: retq 1239*9880d681SAndroid Build Coastguard Worker %r = fadd <8 x float> %a, %b 1240*9880d681SAndroid Build Coastguard Worker store <8 x float> %r, <8 x float>* %dst, align 16, !nontemporal !1 1241*9880d681SAndroid Build Coastguard Worker ret void 1242*9880d681SAndroid Build Coastguard Worker} 1243*9880d681SAndroid Build Coastguard Worker 1244*9880d681SAndroid Build Coastguard Worker!1 = !{i32 1} 1245