1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+sse4a -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE4A 4*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 5*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F 8*9880d681SAndroid Build Coastguard Worker; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 9*9880d681SAndroid Build Coastguard Worker 10*9880d681SAndroid Build Coastguard Worker; 11*9880d681SAndroid Build Coastguard Worker; Scalar Stores 12*9880d681SAndroid Build Coastguard Worker; 13*9880d681SAndroid Build Coastguard Worker 14*9880d681SAndroid Build Coastguard Workerdefine void @test_nti32(i32* nocapture %ptr, i32 %X) { 15*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_nti32: 16*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: # %entry 17*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: movntil %esi, (%rdi) 18*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 19*9880d681SAndroid Build Coastguard Workerentry: 20*9880d681SAndroid Build Coastguard Worker store i32 %X, i32* %ptr, align 4, !nontemporal !1 21*9880d681SAndroid Build Coastguard Worker ret void 22*9880d681SAndroid Build Coastguard Worker} 23*9880d681SAndroid Build Coastguard Worker 24*9880d681SAndroid Build Coastguard Workerdefine void @test_nti64(i64* nocapture %ptr, i64 %X) { 25*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: test_nti64: 26*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: # %entry 27*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: movntiq %rsi, (%rdi) 28*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 29*9880d681SAndroid Build Coastguard Workerentry: 30*9880d681SAndroid Build Coastguard Worker store i64 %X, i64* %ptr, align 8, !nontemporal !1 31*9880d681SAndroid Build Coastguard Worker ret void 32*9880d681SAndroid Build Coastguard Worker} 33*9880d681SAndroid Build Coastguard Worker 34*9880d681SAndroid Build Coastguard Workerdefine void @test_ntfloat(float* nocapture %ptr, float %X) { 35*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_ntfloat: 36*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 37*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movss %xmm0, (%rdi) 38*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 39*9880d681SAndroid Build Coastguard Worker; 40*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_ntfloat: 41*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 42*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movntss %xmm0, (%rdi) 43*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 44*9880d681SAndroid Build Coastguard Worker; 45*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_ntfloat: 46*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 47*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movss %xmm0, (%rdi) 48*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 49*9880d681SAndroid Build Coastguard Worker; 50*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_ntfloat: 51*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 52*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovss %xmm0, (%rdi) 53*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 54*9880d681SAndroid Build Coastguard Worker; 55*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_ntfloat: 56*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 57*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovss %xmm0, (%rdi) 58*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 59*9880d681SAndroid Build Coastguard Workerentry: 60*9880d681SAndroid Build Coastguard Worker store float %X, float* %ptr, align 4, !nontemporal !1 61*9880d681SAndroid Build Coastguard Worker ret void 62*9880d681SAndroid Build Coastguard Worker} 63*9880d681SAndroid Build Coastguard Worker 64*9880d681SAndroid Build Coastguard Workerdefine void @test_ntdouble(double* nocapture %ptr, double %X) { 65*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_ntdouble: 66*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 67*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movsd %xmm0, (%rdi) 68*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 69*9880d681SAndroid Build Coastguard Worker; 70*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_ntdouble: 71*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 72*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movntsd %xmm0, (%rdi) 73*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 74*9880d681SAndroid Build Coastguard Worker; 75*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_ntdouble: 76*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 77*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movsd %xmm0, (%rdi) 78*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 79*9880d681SAndroid Build Coastguard Worker; 80*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_ntdouble: 81*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 82*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovsd %xmm0, (%rdi) 83*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 84*9880d681SAndroid Build Coastguard Worker; 85*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_ntdouble: 86*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 87*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovsd %xmm0, (%rdi) 88*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 89*9880d681SAndroid Build Coastguard Workerentry: 90*9880d681SAndroid Build Coastguard Worker store double %X, double* %ptr, align 8, !nontemporal !1 91*9880d681SAndroid Build Coastguard Worker ret void 92*9880d681SAndroid Build Coastguard Worker} 93*9880d681SAndroid Build Coastguard Worker 94*9880d681SAndroid Build Coastguard Worker; 95*9880d681SAndroid Build Coastguard Worker; 128-bit Vector Stores 96*9880d681SAndroid Build Coastguard Worker; 97*9880d681SAndroid Build Coastguard Worker 98*9880d681SAndroid Build Coastguard Workerdefine void @test_nt4xfloat(<4 x float>* nocapture %ptr, <4 x float> %X) { 99*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt4xfloat: 100*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 101*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 102*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 103*9880d681SAndroid Build Coastguard Worker; 104*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt4xfloat: 105*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 106*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %xmm0, (%rdi) 107*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 108*9880d681SAndroid Build Coastguard Worker; 109*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt4xfloat: 110*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 111*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntps %xmm0, (%rdi) 112*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 113*9880d681SAndroid Build Coastguard Workerentry: 114*9880d681SAndroid Build Coastguard Worker store <4 x float> %X, <4 x float>* %ptr, align 16, !nontemporal !1 115*9880d681SAndroid Build Coastguard Worker ret void 116*9880d681SAndroid Build Coastguard Worker} 117*9880d681SAndroid Build Coastguard Worker 118*9880d681SAndroid Build Coastguard Workerdefine void @test_nt2xdouble(<2 x double>* nocapture %ptr, <2 x double> %X) { 119*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt2xdouble: 120*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 121*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm0, (%rdi) 122*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 123*9880d681SAndroid Build Coastguard Worker; 124*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt2xdouble: 125*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 126*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntpd %xmm0, (%rdi) 127*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 128*9880d681SAndroid Build Coastguard Worker; 129*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt2xdouble: 130*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 131*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntpd %xmm0, (%rdi) 132*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 133*9880d681SAndroid Build Coastguard Workerentry: 134*9880d681SAndroid Build Coastguard Worker store <2 x double> %X, <2 x double>* %ptr, align 16, !nontemporal !1 135*9880d681SAndroid Build Coastguard Worker ret void 136*9880d681SAndroid Build Coastguard Worker} 137*9880d681SAndroid Build Coastguard Worker 138*9880d681SAndroid Build Coastguard Workerdefine void @test_nt16xi8(<16 x i8>* nocapture %ptr, <16 x i8> %X) { 139*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt16xi8: 140*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 141*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 142*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 143*9880d681SAndroid Build Coastguard Worker; 144*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt16xi8: 145*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 146*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %xmm0, (%rdi) 147*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 148*9880d681SAndroid Build Coastguard Worker; 149*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt16xi8: 150*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 151*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %xmm0, (%rdi) 152*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 153*9880d681SAndroid Build Coastguard Workerentry: 154*9880d681SAndroid Build Coastguard Worker store <16 x i8> %X, <16 x i8>* %ptr, align 16, !nontemporal !1 155*9880d681SAndroid Build Coastguard Worker ret void 156*9880d681SAndroid Build Coastguard Worker} 157*9880d681SAndroid Build Coastguard Worker 158*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xi16(<8 x i16>* nocapture %ptr, <8 x i16> %X) { 159*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xi16: 160*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 161*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 162*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 163*9880d681SAndroid Build Coastguard Worker; 164*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xi16: 165*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 166*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %xmm0, (%rdi) 167*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 168*9880d681SAndroid Build Coastguard Worker; 169*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xi16: 170*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 171*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %xmm0, (%rdi) 172*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 173*9880d681SAndroid Build Coastguard Workerentry: 174*9880d681SAndroid Build Coastguard Worker store <8 x i16> %X, <8 x i16>* %ptr, align 16, !nontemporal !1 175*9880d681SAndroid Build Coastguard Worker ret void 176*9880d681SAndroid Build Coastguard Worker} 177*9880d681SAndroid Build Coastguard Worker 178*9880d681SAndroid Build Coastguard Workerdefine void @test_nt4xi32(<4 x i32>* nocapture %ptr, <4 x i32> %X) { 179*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt4xi32: 180*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 181*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 182*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 183*9880d681SAndroid Build Coastguard Worker; 184*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt4xi32: 185*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 186*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %xmm0, (%rdi) 187*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 188*9880d681SAndroid Build Coastguard Worker; 189*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt4xi32: 190*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 191*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %xmm0, (%rdi) 192*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 193*9880d681SAndroid Build Coastguard Workerentry: 194*9880d681SAndroid Build Coastguard Worker store <4 x i32> %X, <4 x i32>* %ptr, align 16, !nontemporal !1 195*9880d681SAndroid Build Coastguard Worker ret void 196*9880d681SAndroid Build Coastguard Worker} 197*9880d681SAndroid Build Coastguard Worker 198*9880d681SAndroid Build Coastguard Workerdefine void @test_nt2xi64(<2 x i64>* nocapture %ptr, <2 x i64> %X) { 199*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt2xi64: 200*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 201*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 202*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 203*9880d681SAndroid Build Coastguard Worker; 204*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt2xi64: 205*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 206*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %xmm0, (%rdi) 207*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 208*9880d681SAndroid Build Coastguard Worker; 209*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt2xi64: 210*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 211*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %xmm0, (%rdi) 212*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 213*9880d681SAndroid Build Coastguard Workerentry: 214*9880d681SAndroid Build Coastguard Worker store <2 x i64> %X, <2 x i64>* %ptr, align 16, !nontemporal !1 215*9880d681SAndroid Build Coastguard Worker ret void 216*9880d681SAndroid Build Coastguard Worker} 217*9880d681SAndroid Build Coastguard Worker 218*9880d681SAndroid Build Coastguard Worker; 219*9880d681SAndroid Build Coastguard Worker; 128-bit Vector Loads 220*9880d681SAndroid Build Coastguard Worker; 221*9880d681SAndroid Build Coastguard Worker 222*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_load_nt4xfloat(<4 x float>* nocapture %ptr) { 223*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt4xfloat: 224*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 225*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 226*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 227*9880d681SAndroid Build Coastguard Worker; 228*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt4xfloat: 229*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 230*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 231*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 232*9880d681SAndroid Build Coastguard Worker; 233*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt4xfloat: 234*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 235*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 236*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 237*9880d681SAndroid Build Coastguard Worker; 238*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt4xfloat: 239*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 240*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 241*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 242*9880d681SAndroid Build Coastguard Worker; 243*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt4xfloat: 244*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 245*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 246*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 247*9880d681SAndroid Build Coastguard Workerentry: 248*9880d681SAndroid Build Coastguard Worker %0 = load <4 x float>, <4 x float>* %ptr, align 16, !nontemporal !1 249*9880d681SAndroid Build Coastguard Worker ret <4 x float> %0 250*9880d681SAndroid Build Coastguard Worker} 251*9880d681SAndroid Build Coastguard Worker 252*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_load_nt2xdouble(<2 x double>* nocapture %ptr) { 253*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt2xdouble: 254*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 255*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movapd (%rdi), %xmm0 256*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 257*9880d681SAndroid Build Coastguard Worker; 258*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt2xdouble: 259*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 260*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movapd (%rdi), %xmm0 261*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 262*9880d681SAndroid Build Coastguard Worker; 263*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt2xdouble: 264*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 265*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 266*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 267*9880d681SAndroid Build Coastguard Worker; 268*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt2xdouble: 269*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 270*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 271*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 272*9880d681SAndroid Build Coastguard Worker; 273*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt2xdouble: 274*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 275*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 276*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 277*9880d681SAndroid Build Coastguard Workerentry: 278*9880d681SAndroid Build Coastguard Worker %0 = load <2 x double>, <2 x double>* %ptr, align 16, !nontemporal !1 279*9880d681SAndroid Build Coastguard Worker ret <2 x double> %0 280*9880d681SAndroid Build Coastguard Worker} 281*9880d681SAndroid Build Coastguard Worker 282*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) { 283*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_load_nt16xi8: 284*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 285*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdqa (%rdi), %xmm0 286*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 287*9880d681SAndroid Build Coastguard Worker; 288*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt16xi8: 289*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 290*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 291*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 292*9880d681SAndroid Build Coastguard Worker; 293*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt16xi8: 294*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 295*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 296*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 297*9880d681SAndroid Build Coastguard Workerentry: 298*9880d681SAndroid Build Coastguard Worker %0 = load <16 x i8>, <16 x i8>* %ptr, align 16, !nontemporal !1 299*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %0 300*9880d681SAndroid Build Coastguard Worker} 301*9880d681SAndroid Build Coastguard Worker 302*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) { 303*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_load_nt8xi16: 304*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 305*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdqa (%rdi), %xmm0 306*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 307*9880d681SAndroid Build Coastguard Worker; 308*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt8xi16: 309*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 310*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 311*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 312*9880d681SAndroid Build Coastguard Worker; 313*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xi16: 314*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 315*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 316*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 317*9880d681SAndroid Build Coastguard Workerentry: 318*9880d681SAndroid Build Coastguard Worker %0 = load <8 x i16>, <8 x i16>* %ptr, align 16, !nontemporal !1 319*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %0 320*9880d681SAndroid Build Coastguard Worker} 321*9880d681SAndroid Build Coastguard Worker 322*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) { 323*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_load_nt4xi32: 324*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 325*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdqa (%rdi), %xmm0 326*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 327*9880d681SAndroid Build Coastguard Worker; 328*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt4xi32: 329*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 330*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 331*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 332*9880d681SAndroid Build Coastguard Worker; 333*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt4xi32: 334*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 335*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 336*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 337*9880d681SAndroid Build Coastguard Workerentry: 338*9880d681SAndroid Build Coastguard Worker %0 = load <4 x i32>, <4 x i32>* %ptr, align 16, !nontemporal !1 339*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %0 340*9880d681SAndroid Build Coastguard Worker} 341*9880d681SAndroid Build Coastguard Worker 342*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) { 343*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_load_nt2xi64: 344*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 345*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdqa (%rdi), %xmm0 346*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 347*9880d681SAndroid Build Coastguard Worker; 348*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_load_nt2xi64: 349*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 350*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 351*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 352*9880d681SAndroid Build Coastguard Worker; 353*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt2xi64: 354*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 355*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 356*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 357*9880d681SAndroid Build Coastguard Workerentry: 358*9880d681SAndroid Build Coastguard Worker %0 = load <2 x i64>, <2 x i64>* %ptr, align 16, !nontemporal !1 359*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %0 360*9880d681SAndroid Build Coastguard Worker} 361*9880d681SAndroid Build Coastguard Worker 362*9880d681SAndroid Build Coastguard Worker; 363*9880d681SAndroid Build Coastguard Worker; 256-bit Vector Stores 364*9880d681SAndroid Build Coastguard Worker; 365*9880d681SAndroid Build Coastguard Worker 366*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xfloat(<8 x float>* nocapture %ptr, <8 x float> %X) { 367*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xfloat: 368*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 369*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 370*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 371*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 372*9880d681SAndroid Build Coastguard Worker; 373*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xfloat: 374*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 375*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 376*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 377*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 378*9880d681SAndroid Build Coastguard Worker; 379*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xfloat: 380*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 381*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntps %ymm0, (%rdi) 382*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 383*9880d681SAndroid Build Coastguard Workerentry: 384*9880d681SAndroid Build Coastguard Worker store <8 x float> %X, <8 x float>* %ptr, align 32, !nontemporal !1 385*9880d681SAndroid Build Coastguard Worker ret void 386*9880d681SAndroid Build Coastguard Worker} 387*9880d681SAndroid Build Coastguard Worker 388*9880d681SAndroid Build Coastguard Workerdefine void @test_nt4xdouble(<4 x double>* nocapture %ptr, <4 x double> %X) { 389*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt4xdouble: 390*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 391*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm0, (%rdi) 392*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm1, 16(%rdi) 393*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 394*9880d681SAndroid Build Coastguard Worker; 395*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt4xdouble: 396*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 397*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntpd %ymm0, (%rdi) 398*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 399*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 400*9880d681SAndroid Build Coastguard Worker; 401*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt4xdouble: 402*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 403*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntpd %ymm0, (%rdi) 404*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 405*9880d681SAndroid Build Coastguard Workerentry: 406*9880d681SAndroid Build Coastguard Worker store <4 x double> %X, <4 x double>* %ptr, align 32, !nontemporal !1 407*9880d681SAndroid Build Coastguard Worker ret void 408*9880d681SAndroid Build Coastguard Worker} 409*9880d681SAndroid Build Coastguard Worker 410*9880d681SAndroid Build Coastguard Workerdefine void @test_nt32xi8(<32 x i8>* nocapture %ptr, <32 x i8> %X) { 411*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt32xi8: 412*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 413*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 414*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 415*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 416*9880d681SAndroid Build Coastguard Worker; 417*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt32xi8: 418*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 419*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm0, (%rdi) 420*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 421*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 422*9880d681SAndroid Build Coastguard Worker; 423*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt32xi8: 424*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 425*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %ymm0, (%rdi) 426*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 427*9880d681SAndroid Build Coastguard Workerentry: 428*9880d681SAndroid Build Coastguard Worker store <32 x i8> %X, <32 x i8>* %ptr, align 32, !nontemporal !1 429*9880d681SAndroid Build Coastguard Worker ret void 430*9880d681SAndroid Build Coastguard Worker} 431*9880d681SAndroid Build Coastguard Worker 432*9880d681SAndroid Build Coastguard Workerdefine void @test_nt16xi16(<16 x i16>* nocapture %ptr, <16 x i16> %X) { 433*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt16xi16: 434*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 435*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 436*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 437*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 438*9880d681SAndroid Build Coastguard Worker; 439*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt16xi16: 440*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 441*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm0, (%rdi) 442*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 443*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 444*9880d681SAndroid Build Coastguard Worker; 445*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt16xi16: 446*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 447*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %ymm0, (%rdi) 448*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 449*9880d681SAndroid Build Coastguard Workerentry: 450*9880d681SAndroid Build Coastguard Worker store <16 x i16> %X, <16 x i16>* %ptr, align 32, !nontemporal !1 451*9880d681SAndroid Build Coastguard Worker ret void 452*9880d681SAndroid Build Coastguard Worker} 453*9880d681SAndroid Build Coastguard Worker 454*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xi32(<8 x i32>* nocapture %ptr, <8 x i32> %X) { 455*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xi32: 456*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 457*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 458*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 459*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 460*9880d681SAndroid Build Coastguard Worker; 461*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xi32: 462*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 463*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm0, (%rdi) 464*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 465*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 466*9880d681SAndroid Build Coastguard Worker; 467*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xi32: 468*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 469*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %ymm0, (%rdi) 470*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 471*9880d681SAndroid Build Coastguard Workerentry: 472*9880d681SAndroid Build Coastguard Worker store <8 x i32> %X, <8 x i32>* %ptr, align 32, !nontemporal !1 473*9880d681SAndroid Build Coastguard Worker ret void 474*9880d681SAndroid Build Coastguard Worker} 475*9880d681SAndroid Build Coastguard Worker 476*9880d681SAndroid Build Coastguard Workerdefine void @test_nt4xi64(<4 x i64>* nocapture %ptr, <4 x i64> %X) { 477*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt4xi64: 478*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 479*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 480*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 481*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 482*9880d681SAndroid Build Coastguard Worker; 483*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt4xi64: 484*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 485*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm0, (%rdi) 486*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 487*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 488*9880d681SAndroid Build Coastguard Worker; 489*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt4xi64: 490*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 491*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %ymm0, (%rdi) 492*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 493*9880d681SAndroid Build Coastguard Workerentry: 494*9880d681SAndroid Build Coastguard Worker store <4 x i64> %X, <4 x i64>* %ptr, align 32, !nontemporal !1 495*9880d681SAndroid Build Coastguard Worker ret void 496*9880d681SAndroid Build Coastguard Worker} 497*9880d681SAndroid Build Coastguard Worker 498*9880d681SAndroid Build Coastguard Worker; 499*9880d681SAndroid Build Coastguard Worker; 256-bit Vector Loads 500*9880d681SAndroid Build Coastguard Worker; 501*9880d681SAndroid Build Coastguard Worker 502*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) { 503*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt8xfloat: 504*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 505*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 506*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 507*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 508*9880d681SAndroid Build Coastguard Worker; 509*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt8xfloat: 510*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 511*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 512*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 513*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 514*9880d681SAndroid Build Coastguard Worker; 515*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt8xfloat: 516*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 517*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 518*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 519*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 520*9880d681SAndroid Build Coastguard Worker; 521*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt8xfloat: 522*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 523*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 524*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 525*9880d681SAndroid Build Coastguard Worker; 526*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt8xfloat: 527*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 528*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 529*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 530*9880d681SAndroid Build Coastguard Worker; 531*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xfloat: 532*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 533*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 534*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 535*9880d681SAndroid Build Coastguard Workerentry: 536*9880d681SAndroid Build Coastguard Worker %0 = load <8 x float>, <8 x float>* %ptr, align 32, !nontemporal !1 537*9880d681SAndroid Build Coastguard Worker ret <8 x float> %0 538*9880d681SAndroid Build Coastguard Worker} 539*9880d681SAndroid Build Coastguard Worker 540*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) { 541*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt4xdouble: 542*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 543*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movapd (%rdi), %xmm0 544*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movapd 16(%rdi), %xmm1 545*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 546*9880d681SAndroid Build Coastguard Worker; 547*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt4xdouble: 548*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 549*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movapd (%rdi), %xmm0 550*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movapd 16(%rdi), %xmm1 551*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 552*9880d681SAndroid Build Coastguard Worker; 553*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt4xdouble: 554*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 555*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 556*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 557*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 558*9880d681SAndroid Build Coastguard Worker; 559*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt4xdouble: 560*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 561*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovapd (%rdi), %ymm0 562*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 563*9880d681SAndroid Build Coastguard Worker; 564*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt4xdouble: 565*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 566*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 567*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 568*9880d681SAndroid Build Coastguard Worker; 569*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt4xdouble: 570*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 571*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 572*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 573*9880d681SAndroid Build Coastguard Workerentry: 574*9880d681SAndroid Build Coastguard Worker %0 = load <4 x double>, <4 x double>* %ptr, align 32, !nontemporal !1 575*9880d681SAndroid Build Coastguard Worker ret <4 x double> %0 576*9880d681SAndroid Build Coastguard Worker} 577*9880d681SAndroid Build Coastguard Worker 578*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) { 579*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt32xi8: 580*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 581*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 582*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 583*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 584*9880d681SAndroid Build Coastguard Worker; 585*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt32xi8: 586*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 587*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 588*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 589*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 590*9880d681SAndroid Build Coastguard Worker; 591*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt32xi8: 592*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 593*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 594*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 595*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 596*9880d681SAndroid Build Coastguard Worker; 597*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt32xi8: 598*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 599*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa (%rdi), %ymm0 600*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 601*9880d681SAndroid Build Coastguard Worker; 602*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt32xi8: 603*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 604*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 605*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 606*9880d681SAndroid Build Coastguard Worker; 607*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt32xi8: 608*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 609*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 610*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 611*9880d681SAndroid Build Coastguard Workerentry: 612*9880d681SAndroid Build Coastguard Worker %0 = load <32 x i8>, <32 x i8>* %ptr, align 32, !nontemporal !1 613*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %0 614*9880d681SAndroid Build Coastguard Worker} 615*9880d681SAndroid Build Coastguard Worker 616*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) { 617*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt16xi16: 618*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 619*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 620*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 621*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 622*9880d681SAndroid Build Coastguard Worker; 623*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt16xi16: 624*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 625*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 626*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 627*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 628*9880d681SAndroid Build Coastguard Worker; 629*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt16xi16: 630*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 631*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 632*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 633*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 634*9880d681SAndroid Build Coastguard Worker; 635*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt16xi16: 636*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 637*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa (%rdi), %ymm0 638*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 639*9880d681SAndroid Build Coastguard Worker; 640*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt16xi16: 641*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 642*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 643*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 644*9880d681SAndroid Build Coastguard Worker; 645*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt16xi16: 646*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 647*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 648*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 649*9880d681SAndroid Build Coastguard Workerentry: 650*9880d681SAndroid Build Coastguard Worker %0 = load <16 x i16>, <16 x i16>* %ptr, align 32, !nontemporal !1 651*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %0 652*9880d681SAndroid Build Coastguard Worker} 653*9880d681SAndroid Build Coastguard Worker 654*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) { 655*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt8xi32: 656*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 657*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 658*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 659*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 660*9880d681SAndroid Build Coastguard Worker; 661*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt8xi32: 662*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 663*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 664*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 665*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 666*9880d681SAndroid Build Coastguard Worker; 667*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt8xi32: 668*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 669*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 670*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 671*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 672*9880d681SAndroid Build Coastguard Worker; 673*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt8xi32: 674*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 675*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa (%rdi), %ymm0 676*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 677*9880d681SAndroid Build Coastguard Worker; 678*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt8xi32: 679*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 680*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 681*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 682*9880d681SAndroid Build Coastguard Worker; 683*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xi32: 684*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 685*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 686*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 687*9880d681SAndroid Build Coastguard Workerentry: 688*9880d681SAndroid Build Coastguard Worker %0 = load <8 x i32>, <8 x i32>* %ptr, align 32, !nontemporal !1 689*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %0 690*9880d681SAndroid Build Coastguard Worker} 691*9880d681SAndroid Build Coastguard Worker 692*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) { 693*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt4xi64: 694*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 695*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 696*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 697*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 698*9880d681SAndroid Build Coastguard Worker; 699*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt4xi64: 700*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 701*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 702*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 703*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 704*9880d681SAndroid Build Coastguard Worker; 705*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt4xi64: 706*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 707*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 708*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 709*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 710*9880d681SAndroid Build Coastguard Worker; 711*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt4xi64: 712*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 713*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa (%rdi), %ymm0 714*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 715*9880d681SAndroid Build Coastguard Worker; 716*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt4xi64: 717*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 718*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 719*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 720*9880d681SAndroid Build Coastguard Worker; 721*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt4xi64: 722*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 723*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 724*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 725*9880d681SAndroid Build Coastguard Workerentry: 726*9880d681SAndroid Build Coastguard Worker %0 = load <4 x i64>, <4 x i64>* %ptr, align 32, !nontemporal !1 727*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %0 728*9880d681SAndroid Build Coastguard Worker} 729*9880d681SAndroid Build Coastguard Worker 730*9880d681SAndroid Build Coastguard Worker; 731*9880d681SAndroid Build Coastguard Worker; 512-bit Vector Stores 732*9880d681SAndroid Build Coastguard Worker; 733*9880d681SAndroid Build Coastguard Worker 734*9880d681SAndroid Build Coastguard Workerdefine void @test_nt16xfloat(<16 x float>* nocapture %ptr, <16 x float> %X) { 735*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt16xfloat: 736*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 737*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm0, (%rdi) 738*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm1, 16(%rdi) 739*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm2, 32(%rdi) 740*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntps %xmm3, 48(%rdi) 741*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 742*9880d681SAndroid Build Coastguard Worker; 743*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt16xfloat: 744*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 745*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm0, (%rdi) 746*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntps %ymm1, 32(%rdi) 747*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 748*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 749*9880d681SAndroid Build Coastguard Worker; 750*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt16xfloat: 751*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 752*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntps %zmm0, (%rdi) 753*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 754*9880d681SAndroid Build Coastguard Workerentry: 755*9880d681SAndroid Build Coastguard Worker store <16 x float> %X, <16 x float>* %ptr, align 64, !nontemporal !1 756*9880d681SAndroid Build Coastguard Worker ret void 757*9880d681SAndroid Build Coastguard Worker} 758*9880d681SAndroid Build Coastguard Worker 759*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xdouble(<8 x double>* nocapture %ptr, <8 x double> %X) { 760*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xdouble: 761*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 762*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm0, (%rdi) 763*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm1, 16(%rdi) 764*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm2, 32(%rdi) 765*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntpd %xmm3, 48(%rdi) 766*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 767*9880d681SAndroid Build Coastguard Worker; 768*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xdouble: 769*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 770*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntpd %ymm0, (%rdi) 771*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntpd %ymm1, 32(%rdi) 772*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 773*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 774*9880d681SAndroid Build Coastguard Worker; 775*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xdouble: 776*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 777*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntpd %zmm0, (%rdi) 778*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 779*9880d681SAndroid Build Coastguard Workerentry: 780*9880d681SAndroid Build Coastguard Worker store <8 x double> %X, <8 x double>* %ptr, align 64, !nontemporal !1 781*9880d681SAndroid Build Coastguard Worker ret void 782*9880d681SAndroid Build Coastguard Worker} 783*9880d681SAndroid Build Coastguard Worker 784*9880d681SAndroid Build Coastguard Workerdefine void @test_nt64xi8(<64 x i8>* nocapture %ptr, <64 x i8> %X) { 785*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt64xi8: 786*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 787*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 788*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 789*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm2, 32(%rdi) 790*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm3, 48(%rdi) 791*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 792*9880d681SAndroid Build Coastguard Worker; 793*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt64xi8: 794*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 795*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm0, (%rdi) 796*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm1, 32(%rdi) 797*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 798*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 799*9880d681SAndroid Build Coastguard Worker; 800*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_nt64xi8: 801*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 802*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdq %ymm0, (%rdi) 803*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdq %ymm1, 32(%rdi) 804*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 805*9880d681SAndroid Build Coastguard Worker; 806*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_nt64xi8: 807*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 808*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovntdq %zmm0, (%rdi) 809*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 810*9880d681SAndroid Build Coastguard Workerentry: 811*9880d681SAndroid Build Coastguard Worker store <64 x i8> %X, <64 x i8>* %ptr, align 64, !nontemporal !1 812*9880d681SAndroid Build Coastguard Worker ret void 813*9880d681SAndroid Build Coastguard Worker} 814*9880d681SAndroid Build Coastguard Worker 815*9880d681SAndroid Build Coastguard Workerdefine void @test_nt32xi16(<32 x i16>* nocapture %ptr, <32 x i16> %X) { 816*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt32xi16: 817*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 818*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 819*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 820*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm2, 32(%rdi) 821*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm3, 48(%rdi) 822*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 823*9880d681SAndroid Build Coastguard Worker; 824*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt32xi16: 825*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 826*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm0, (%rdi) 827*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm1, 32(%rdi) 828*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 829*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 830*9880d681SAndroid Build Coastguard Worker; 831*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_nt32xi16: 832*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 833*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdq %ymm0, (%rdi) 834*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdq %ymm1, 32(%rdi) 835*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 836*9880d681SAndroid Build Coastguard Worker; 837*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_nt32xi16: 838*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 839*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovntdq %zmm0, (%rdi) 840*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 841*9880d681SAndroid Build Coastguard Workerentry: 842*9880d681SAndroid Build Coastguard Worker store <32 x i16> %X, <32 x i16>* %ptr, align 64, !nontemporal !1 843*9880d681SAndroid Build Coastguard Worker ret void 844*9880d681SAndroid Build Coastguard Worker} 845*9880d681SAndroid Build Coastguard Worker 846*9880d681SAndroid Build Coastguard Workerdefine void @test_nt16xi32(<16 x i32>* nocapture %ptr, <16 x i32> %X) { 847*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt16xi32: 848*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 849*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 850*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 851*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm2, 32(%rdi) 852*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm3, 48(%rdi) 853*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 854*9880d681SAndroid Build Coastguard Worker; 855*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt16xi32: 856*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 857*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm0, (%rdi) 858*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm1, 32(%rdi) 859*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 860*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 861*9880d681SAndroid Build Coastguard Worker; 862*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt16xi32: 863*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 864*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %zmm0, (%rdi) 865*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 866*9880d681SAndroid Build Coastguard Workerentry: 867*9880d681SAndroid Build Coastguard Worker store <16 x i32> %X, <16 x i32>* %ptr, align 64, !nontemporal !1 868*9880d681SAndroid Build Coastguard Worker ret void 869*9880d681SAndroid Build Coastguard Worker} 870*9880d681SAndroid Build Coastguard Worker 871*9880d681SAndroid Build Coastguard Workerdefine void @test_nt8xi64(<8 x i64>* nocapture %ptr, <8 x i64> %X) { 872*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_nt8xi64: 873*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: # %entry 874*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm0, (%rdi) 875*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm1, 16(%rdi) 876*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm2, 32(%rdi) 877*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movntdq %xmm3, 48(%rdi) 878*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 879*9880d681SAndroid Build Coastguard Worker; 880*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_nt8xi64: 881*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: # %entry 882*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm0, (%rdi) 883*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdq %ymm1, 32(%rdi) 884*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vzeroupper 885*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 886*9880d681SAndroid Build Coastguard Worker; 887*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_nt8xi64: 888*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 889*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdq %zmm0, (%rdi) 890*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 891*9880d681SAndroid Build Coastguard Workerentry: 892*9880d681SAndroid Build Coastguard Worker store <8 x i64> %X, <8 x i64>* %ptr, align 64, !nontemporal !1 893*9880d681SAndroid Build Coastguard Worker ret void 894*9880d681SAndroid Build Coastguard Worker} 895*9880d681SAndroid Build Coastguard Worker 896*9880d681SAndroid Build Coastguard Worker; 897*9880d681SAndroid Build Coastguard Worker; 512-bit Vector Loads 898*9880d681SAndroid Build Coastguard Worker; 899*9880d681SAndroid Build Coastguard Worker 900*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) { 901*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt16xfloat: 902*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 903*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 904*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 905*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 906*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 907*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 908*9880d681SAndroid Build Coastguard Worker; 909*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt16xfloat: 910*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 911*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 912*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 913*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 32(%rdi), %xmm2 914*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 48(%rdi), %xmm3 915*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 916*9880d681SAndroid Build Coastguard Worker; 917*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt16xfloat: 918*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 919*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 920*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 921*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 922*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 923*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 924*9880d681SAndroid Build Coastguard Worker; 925*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt16xfloat: 926*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 927*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 928*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 929*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 930*9880d681SAndroid Build Coastguard Worker; 931*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt16xfloat: 932*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 933*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 934*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 935*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 936*9880d681SAndroid Build Coastguard Worker; 937*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt16xfloat: 938*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 939*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %zmm0 940*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 941*9880d681SAndroid Build Coastguard Workerentry: 942*9880d681SAndroid Build Coastguard Worker %0 = load <16 x float>, <16 x float>* %ptr, align 64, !nontemporal !1 943*9880d681SAndroid Build Coastguard Worker ret <16 x float> %0 944*9880d681SAndroid Build Coastguard Worker} 945*9880d681SAndroid Build Coastguard Worker 946*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) { 947*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt8xdouble: 948*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 949*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movapd (%rdi), %xmm0 950*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movapd 16(%rdi), %xmm1 951*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movapd 32(%rdi), %xmm2 952*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movapd 48(%rdi), %xmm3 953*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 954*9880d681SAndroid Build Coastguard Worker; 955*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt8xdouble: 956*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 957*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movapd (%rdi), %xmm0 958*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movapd 16(%rdi), %xmm1 959*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movapd 32(%rdi), %xmm2 960*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movapd 48(%rdi), %xmm3 961*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 962*9880d681SAndroid Build Coastguard Worker; 963*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt8xdouble: 964*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 965*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 966*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 967*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 968*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 969*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 970*9880d681SAndroid Build Coastguard Worker; 971*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt8xdouble: 972*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 973*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovapd (%rdi), %ymm0 974*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovapd 32(%rdi), %ymm1 975*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 976*9880d681SAndroid Build Coastguard Worker; 977*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt8xdouble: 978*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 979*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 980*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 981*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 982*9880d681SAndroid Build Coastguard Worker; 983*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xdouble: 984*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 985*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %zmm0 986*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 987*9880d681SAndroid Build Coastguard Workerentry: 988*9880d681SAndroid Build Coastguard Worker %0 = load <8 x double>, <8 x double>* %ptr, align 64, !nontemporal !1 989*9880d681SAndroid Build Coastguard Worker ret <8 x double> %0 990*9880d681SAndroid Build Coastguard Worker} 991*9880d681SAndroid Build Coastguard Worker 992*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) { 993*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt64xi8: 994*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 995*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 996*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 997*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 998*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 999*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 1000*9880d681SAndroid Build Coastguard Worker; 1001*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt64xi8: 1002*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 1003*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 1004*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 1005*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 32(%rdi), %xmm2 1006*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 48(%rdi), %xmm3 1007*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 1008*9880d681SAndroid Build Coastguard Worker; 1009*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt64xi8: 1010*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 1011*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 1012*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 1013*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 1014*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 1015*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 1016*9880d681SAndroid Build Coastguard Worker; 1017*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt64xi8: 1018*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 1019*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 1020*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 1021*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1022*9880d681SAndroid Build Coastguard Worker; 1023*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt64xi8: 1024*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 1025*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 1026*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 1027*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1028*9880d681SAndroid Build Coastguard Worker; 1029*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_load_nt64xi8: 1030*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 1031*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa (%rdi), %ymm0 1032*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm1 1033*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1034*9880d681SAndroid Build Coastguard Worker; 1035*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_load_nt64xi8: 1036*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 1037*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm0 1038*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1039*9880d681SAndroid Build Coastguard Workerentry: 1040*9880d681SAndroid Build Coastguard Worker %0 = load <64 x i8>, <64 x i8>* %ptr, align 64, !nontemporal !1 1041*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %0 1042*9880d681SAndroid Build Coastguard Worker} 1043*9880d681SAndroid Build Coastguard Worker 1044*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) { 1045*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt32xi16: 1046*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 1047*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 1048*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 1049*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 1050*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 1051*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 1052*9880d681SAndroid Build Coastguard Worker; 1053*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt32xi16: 1054*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 1055*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 1056*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 1057*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 32(%rdi), %xmm2 1058*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 48(%rdi), %xmm3 1059*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 1060*9880d681SAndroid Build Coastguard Worker; 1061*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt32xi16: 1062*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 1063*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 1064*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 1065*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 1066*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 1067*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 1068*9880d681SAndroid Build Coastguard Worker; 1069*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt32xi16: 1070*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 1071*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 1072*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 1073*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1074*9880d681SAndroid Build Coastguard Worker; 1075*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt32xi16: 1076*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 1077*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 1078*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 1079*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1080*9880d681SAndroid Build Coastguard Worker; 1081*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_load_nt32xi16: 1082*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: # %entry 1083*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa (%rdi), %ymm0 1084*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm1 1085*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1086*9880d681SAndroid Build Coastguard Worker; 1087*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_load_nt32xi16: 1088*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: # %entry 1089*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm0 1090*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1091*9880d681SAndroid Build Coastguard Workerentry: 1092*9880d681SAndroid Build Coastguard Worker %0 = load <32 x i16>, <32 x i16>* %ptr, align 64, !nontemporal !1 1093*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %0 1094*9880d681SAndroid Build Coastguard Worker} 1095*9880d681SAndroid Build Coastguard Worker 1096*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) { 1097*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt16xi32: 1098*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 1099*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 1100*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 1101*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 1102*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 1103*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 1104*9880d681SAndroid Build Coastguard Worker; 1105*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt16xi32: 1106*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 1107*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 1108*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 1109*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 32(%rdi), %xmm2 1110*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 48(%rdi), %xmm3 1111*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 1112*9880d681SAndroid Build Coastguard Worker; 1113*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt16xi32: 1114*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 1115*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 1116*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 1117*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 1118*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 1119*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 1120*9880d681SAndroid Build Coastguard Worker; 1121*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt16xi32: 1122*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 1123*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 1124*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 1125*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1126*9880d681SAndroid Build Coastguard Worker; 1127*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt16xi32: 1128*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 1129*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 1130*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 1131*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1132*9880d681SAndroid Build Coastguard Worker; 1133*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt16xi32: 1134*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 1135*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %zmm0 1136*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1137*9880d681SAndroid Build Coastguard Workerentry: 1138*9880d681SAndroid Build Coastguard Worker %0 = load <16 x i32>, <16 x i32>* %ptr, align 64, !nontemporal !1 1139*9880d681SAndroid Build Coastguard Worker ret <16 x i32> %0 1140*9880d681SAndroid Build Coastguard Worker} 1141*9880d681SAndroid Build Coastguard Worker 1142*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) { 1143*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_load_nt8xi64: 1144*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: # %entry 1145*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 1146*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 1147*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 1148*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 1149*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 1150*9880d681SAndroid Build Coastguard Worker; 1151*9880d681SAndroid Build Coastguard Worker; SSE4A-LABEL: test_load_nt8xi64: 1152*9880d681SAndroid Build Coastguard Worker; SSE4A: # BB#0: # %entry 1153*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps (%rdi), %xmm0 1154*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 16(%rdi), %xmm1 1155*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 32(%rdi), %xmm2 1156*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: movaps 48(%rdi), %xmm3 1157*9880d681SAndroid Build Coastguard Worker; SSE4A-NEXT: retq 1158*9880d681SAndroid Build Coastguard Worker; 1159*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_load_nt8xi64: 1160*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: # %entry 1161*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 1162*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 1163*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 1164*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 1165*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 1166*9880d681SAndroid Build Coastguard Worker; 1167*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_load_nt8xi64: 1168*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: # %entry 1169*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 1170*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 1171*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1172*9880d681SAndroid Build Coastguard Worker; 1173*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_load_nt8xi64: 1174*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: # %entry 1175*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 1176*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 1177*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1178*9880d681SAndroid Build Coastguard Worker; 1179*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_load_nt8xi64: 1180*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: # %entry 1181*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %zmm0 1182*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1183*9880d681SAndroid Build Coastguard Workerentry: 1184*9880d681SAndroid Build Coastguard Worker %0 = load <8 x i64>, <8 x i64>* %ptr, align 64, !nontemporal !1 1185*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %0 1186*9880d681SAndroid Build Coastguard Worker} 1187*9880d681SAndroid Build Coastguard Worker 1188*9880d681SAndroid Build Coastguard Worker!1 = !{i32 1} 1189