1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 7*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW 8*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VL 9*9880d681SAndroid Build Coastguard Worker 10*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_v4f32(<4 x float>* %src) { 11*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v4f32: 12*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 13*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 14*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 15*9880d681SAndroid Build Coastguard Worker; 16*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v4f32: 17*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 18*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 19*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 20*9880d681SAndroid Build Coastguard Worker; 21*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v4f32: 22*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 23*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 24*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 25*9880d681SAndroid Build Coastguard Worker; 26*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v4f32: 27*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 28*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 29*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 30*9880d681SAndroid Build Coastguard Worker %1 = load <4 x float>, <4 x float>* %src, align 16, !nontemporal !1 31*9880d681SAndroid Build Coastguard Worker ret <4 x float> %1 32*9880d681SAndroid Build Coastguard Worker} 33*9880d681SAndroid Build Coastguard Worker 34*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_v4i32(<4 x i32>* %src) { 35*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v4i32: 36*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 37*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 38*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 39*9880d681SAndroid Build Coastguard Worker; 40*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v4i32: 41*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 42*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 43*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 44*9880d681SAndroid Build Coastguard Worker; 45*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v4i32: 46*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 47*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 48*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 49*9880d681SAndroid Build Coastguard Worker; 50*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_v4i32: 51*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 52*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa (%rdi), %xmm0 53*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 54*9880d681SAndroid Build Coastguard Worker; 55*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_v4i32: 56*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 57*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovntdqa (%rdi), %xmm0 58*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 59*9880d681SAndroid Build Coastguard Worker; 60*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_v4i32: 61*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 62*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqa32 (%rdi), %xmm0 63*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 64*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1 65*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %1 66*9880d681SAndroid Build Coastguard Worker} 67*9880d681SAndroid Build Coastguard Worker 68*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_v2f64(<2 x double>* %src) { 69*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v2f64: 70*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 71*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 72*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 73*9880d681SAndroid Build Coastguard Worker; 74*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v2f64: 75*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 76*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 77*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 78*9880d681SAndroid Build Coastguard Worker; 79*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v2f64: 80*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 81*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 82*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 83*9880d681SAndroid Build Coastguard Worker; 84*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v2f64: 85*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 86*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 87*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 88*9880d681SAndroid Build Coastguard Worker %1 = load <2 x double>, <2 x double>* %src, align 16, !nontemporal !1 89*9880d681SAndroid Build Coastguard Worker ret <2 x double> %1 90*9880d681SAndroid Build Coastguard Worker} 91*9880d681SAndroid Build Coastguard Worker 92*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_v2i64(<2 x i64>* %src) { 93*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v2i64: 94*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 95*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 96*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 97*9880d681SAndroid Build Coastguard Worker; 98*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v2i64: 99*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 100*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 101*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 102*9880d681SAndroid Build Coastguard Worker; 103*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v2i64: 104*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 105*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 106*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 107*9880d681SAndroid Build Coastguard Worker; 108*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v2i64: 109*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 110*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 111*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 112*9880d681SAndroid Build Coastguard Worker %1 = load <2 x i64>, <2 x i64>* %src, align 16, !nontemporal !1 113*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %1 114*9880d681SAndroid Build Coastguard Worker} 115*9880d681SAndroid Build Coastguard Worker 116*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_v8i16(<8 x i16>* %src) { 117*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8i16: 118*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 119*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 120*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 121*9880d681SAndroid Build Coastguard Worker; 122*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8i16: 123*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 124*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 125*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 126*9880d681SAndroid Build Coastguard Worker; 127*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v8i16: 128*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 129*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 130*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 131*9880d681SAndroid Build Coastguard Worker; 132*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v8i16: 133*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 134*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 135*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 136*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i16>, <8 x i16>* %src, align 16, !nontemporal !1 137*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %1 138*9880d681SAndroid Build Coastguard Worker} 139*9880d681SAndroid Build Coastguard Worker 140*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_v16i8(<16 x i8>* %src) { 141*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v16i8: 142*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 143*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 144*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 145*9880d681SAndroid Build Coastguard Worker; 146*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v16i8: 147*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 148*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 149*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 150*9880d681SAndroid Build Coastguard Worker; 151*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_v16i8: 152*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 153*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovntdqa (%rdi), %xmm0 154*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 155*9880d681SAndroid Build Coastguard Worker; 156*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v16i8: 157*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 158*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %xmm0 159*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 160*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i8>, <16 x i8>* %src, align 16, !nontemporal !1 161*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %1 162*9880d681SAndroid Build Coastguard Worker} 163*9880d681SAndroid Build Coastguard Worker 164*9880d681SAndroid Build Coastguard Worker; And now YMM versions. 165*9880d681SAndroid Build Coastguard Worker 166*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_v8f32(<8 x float>* %src) { 167*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8f32: 168*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 169*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 170*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 171*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 172*9880d681SAndroid Build Coastguard Worker; 173*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8f32: 174*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 175*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 176*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 177*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 178*9880d681SAndroid Build Coastguard Worker; 179*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v8f32: 180*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 181*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 182*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 183*9880d681SAndroid Build Coastguard Worker; 184*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v8f32: 185*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 186*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 187*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 188*9880d681SAndroid Build Coastguard Worker; 189*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v8f32: 190*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 191*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 192*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 193*9880d681SAndroid Build Coastguard Worker %1 = load <8 x float>, <8 x float>* %src, align 32, !nontemporal !1 194*9880d681SAndroid Build Coastguard Worker ret <8 x float> %1 195*9880d681SAndroid Build Coastguard Worker} 196*9880d681SAndroid Build Coastguard Worker 197*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_v8i32(<8 x i32>* %src) { 198*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8i32: 199*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 200*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 201*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 202*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 203*9880d681SAndroid Build Coastguard Worker; 204*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8i32: 205*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 206*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 207*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 208*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 209*9880d681SAndroid Build Coastguard Worker; 210*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v8i32: 211*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 212*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 213*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 214*9880d681SAndroid Build Coastguard Worker; 215*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v8i32: 216*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 217*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 218*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 219*9880d681SAndroid Build Coastguard Worker; 220*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_v8i32: 221*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 222*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa (%rdi), %ymm0 223*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 224*9880d681SAndroid Build Coastguard Worker; 225*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_v8i32: 226*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 227*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm0 228*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 229*9880d681SAndroid Build Coastguard Worker; 230*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_v8i32: 231*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 232*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqa32 (%rdi), %ymm0 233*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 234*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1 235*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %1 236*9880d681SAndroid Build Coastguard Worker} 237*9880d681SAndroid Build Coastguard Worker 238*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_v4f64(<4 x double>* %src) { 239*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v4f64: 240*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 241*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 242*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 243*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 244*9880d681SAndroid Build Coastguard Worker; 245*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v4f64: 246*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 247*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 248*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 249*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 250*9880d681SAndroid Build Coastguard Worker; 251*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v4f64: 252*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 253*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 254*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 255*9880d681SAndroid Build Coastguard Worker; 256*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v4f64: 257*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 258*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 259*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 260*9880d681SAndroid Build Coastguard Worker; 261*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v4f64: 262*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 263*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 264*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 265*9880d681SAndroid Build Coastguard Worker %1 = load <4 x double>, <4 x double>* %src, align 32, !nontemporal !1 266*9880d681SAndroid Build Coastguard Worker ret <4 x double> %1 267*9880d681SAndroid Build Coastguard Worker} 268*9880d681SAndroid Build Coastguard Worker 269*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_v4i64(<4 x i64>* %src) { 270*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v4i64: 271*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 272*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 273*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 274*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 275*9880d681SAndroid Build Coastguard Worker; 276*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v4i64: 277*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 278*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 279*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 280*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 281*9880d681SAndroid Build Coastguard Worker; 282*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v4i64: 283*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 284*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 285*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 286*9880d681SAndroid Build Coastguard Worker; 287*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v4i64: 288*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 289*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 290*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 291*9880d681SAndroid Build Coastguard Worker; 292*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v4i64: 293*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 294*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 295*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 296*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i64>, <4 x i64>* %src, align 32, !nontemporal !1 297*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %1 298*9880d681SAndroid Build Coastguard Worker} 299*9880d681SAndroid Build Coastguard Worker 300*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_v16i16(<16 x i16>* %src) { 301*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v16i16: 302*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 303*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 304*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 305*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 306*9880d681SAndroid Build Coastguard Worker; 307*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v16i16: 308*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 309*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 310*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 311*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 312*9880d681SAndroid Build Coastguard Worker; 313*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v16i16: 314*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 315*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 316*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 317*9880d681SAndroid Build Coastguard Worker; 318*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v16i16: 319*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 320*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 321*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 322*9880d681SAndroid Build Coastguard Worker; 323*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v16i16: 324*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 325*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 326*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 327*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i16>, <16 x i16>* %src, align 32, !nontemporal !1 328*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %1 329*9880d681SAndroid Build Coastguard Worker} 330*9880d681SAndroid Build Coastguard Worker 331*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_v32i8(<32 x i8>* %src) { 332*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v32i8: 333*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 334*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 335*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 336*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 337*9880d681SAndroid Build Coastguard Worker; 338*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v32i8: 339*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 340*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 341*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 342*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 343*9880d681SAndroid Build Coastguard Worker; 344*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v32i8: 345*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 346*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 347*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 348*9880d681SAndroid Build Coastguard Worker; 349*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v32i8: 350*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 351*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 352*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 353*9880d681SAndroid Build Coastguard Worker; 354*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v32i8: 355*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 356*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %ymm0 357*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 358*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i8>, <32 x i8>* %src, align 32, !nontemporal !1 359*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %1 360*9880d681SAndroid Build Coastguard Worker} 361*9880d681SAndroid Build Coastguard Worker 362*9880d681SAndroid Build Coastguard Worker; And now ZMM versions. 363*9880d681SAndroid Build Coastguard Worker 364*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_v16f32(<16 x float>* %src) { 365*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v16f32: 366*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 367*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 368*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 369*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 370*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 371*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 372*9880d681SAndroid Build Coastguard Worker; 373*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v16f32: 374*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 375*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 376*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 377*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 378*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 379*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 380*9880d681SAndroid Build Coastguard Worker; 381*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v16f32: 382*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 383*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 384*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 385*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 386*9880d681SAndroid Build Coastguard Worker; 387*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v16f32: 388*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 389*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 390*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 391*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 392*9880d681SAndroid Build Coastguard Worker; 393*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v16f32: 394*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 395*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %zmm0 396*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 397*9880d681SAndroid Build Coastguard Worker %1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1 398*9880d681SAndroid Build Coastguard Worker ret <16 x float> %1 399*9880d681SAndroid Build Coastguard Worker} 400*9880d681SAndroid Build Coastguard Worker 401*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_v16i32(<16 x i32>* %src) { 402*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v16i32: 403*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 404*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 405*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 406*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 407*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 408*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 409*9880d681SAndroid Build Coastguard Worker; 410*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v16i32: 411*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 412*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 413*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 414*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 415*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 416*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 417*9880d681SAndroid Build Coastguard Worker; 418*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v16i32: 419*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 420*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 421*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 422*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 423*9880d681SAndroid Build Coastguard Worker; 424*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v16i32: 425*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 426*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 427*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 428*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 429*9880d681SAndroid Build Coastguard Worker; 430*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v16i32: 431*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 432*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %zmm0 433*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 434*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1 435*9880d681SAndroid Build Coastguard Worker ret <16 x i32> %1 436*9880d681SAndroid Build Coastguard Worker} 437*9880d681SAndroid Build Coastguard Worker 438*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_v8f64(<8 x double>* %src) { 439*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8f64: 440*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 441*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 442*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 443*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 444*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 445*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 446*9880d681SAndroid Build Coastguard Worker; 447*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8f64: 448*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 449*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 450*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 451*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 452*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 453*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 454*9880d681SAndroid Build Coastguard Worker; 455*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v8f64: 456*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 457*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 458*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 459*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 460*9880d681SAndroid Build Coastguard Worker; 461*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v8f64: 462*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 463*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 464*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 465*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 466*9880d681SAndroid Build Coastguard Worker; 467*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v8f64: 468*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 469*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %zmm0 470*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 471*9880d681SAndroid Build Coastguard Worker %1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1 472*9880d681SAndroid Build Coastguard Worker ret <8 x double> %1 473*9880d681SAndroid Build Coastguard Worker} 474*9880d681SAndroid Build Coastguard Worker 475*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_v8i64(<8 x i64>* %src) { 476*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v8i64: 477*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 478*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 479*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 480*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 481*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 482*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 483*9880d681SAndroid Build Coastguard Worker; 484*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v8i64: 485*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 486*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 487*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 488*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 489*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 490*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 491*9880d681SAndroid Build Coastguard Worker; 492*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v8i64: 493*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 494*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 495*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 496*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 497*9880d681SAndroid Build Coastguard Worker; 498*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v8i64: 499*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 500*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 501*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 502*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 503*9880d681SAndroid Build Coastguard Worker; 504*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_v8i64: 505*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 506*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovntdqa (%rdi), %zmm0 507*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 508*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1 509*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %1 510*9880d681SAndroid Build Coastguard Worker} 511*9880d681SAndroid Build Coastguard Worker 512*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test_v32i16(<32 x i16>* %src) { 513*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v32i16: 514*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 515*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 516*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 517*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 518*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 519*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 520*9880d681SAndroid Build Coastguard Worker; 521*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v32i16: 522*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 523*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 524*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 525*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 526*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 527*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 528*9880d681SAndroid Build Coastguard Worker; 529*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v32i16: 530*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 531*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 532*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 533*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 534*9880d681SAndroid Build Coastguard Worker; 535*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v32i16: 536*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 537*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 538*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 539*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 540*9880d681SAndroid Build Coastguard Worker; 541*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_v32i16: 542*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 543*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa (%rdi), %ymm0 544*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm1 545*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 546*9880d681SAndroid Build Coastguard Worker; 547*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_v32i16: 548*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 549*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm0 550*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 551*9880d681SAndroid Build Coastguard Worker; 552*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_v32i16: 553*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 554*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm0 555*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm1 556*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 557*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1 558*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %1 559*9880d681SAndroid Build Coastguard Worker} 560*9880d681SAndroid Build Coastguard Worker 561*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test_v64i8(<64 x i8>* %src) { 562*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: test_v64i8: 563*9880d681SAndroid Build Coastguard Worker; SSE2: # BB#0: 564*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps (%rdi), %xmm0 565*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 16(%rdi), %xmm1 566*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 32(%rdi), %xmm2 567*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: movaps 48(%rdi), %xmm3 568*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT: retq 569*9880d681SAndroid Build Coastguard Worker; 570*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: test_v64i8: 571*9880d681SAndroid Build Coastguard Worker; SSE41: # BB#0: 572*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa (%rdi), %xmm0 573*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 16(%rdi), %xmm1 574*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 32(%rdi), %xmm2 575*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: movntdqa 48(%rdi), %xmm3 576*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT: retq 577*9880d681SAndroid Build Coastguard Worker; 578*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_v64i8: 579*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 580*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 581*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 582*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 583*9880d681SAndroid Build Coastguard Worker; 584*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_v64i8: 585*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 586*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa (%rdi), %ymm0 587*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm1 588*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 589*9880d681SAndroid Build Coastguard Worker; 590*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_v64i8: 591*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 592*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa (%rdi), %ymm0 593*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm1 594*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 595*9880d681SAndroid Build Coastguard Worker; 596*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_v64i8: 597*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 598*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm0 599*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 600*9880d681SAndroid Build Coastguard Worker; 601*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_v64i8: 602*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 603*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm0 604*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm1 605*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 606*9880d681SAndroid Build Coastguard Worker %1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1 607*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %1 608*9880d681SAndroid Build Coastguard Worker} 609*9880d681SAndroid Build Coastguard Worker 610*9880d681SAndroid Build Coastguard Worker 611*9880d681SAndroid Build Coastguard Worker; Check cases where the load would be folded. 612*9880d681SAndroid Build Coastguard Worker 613*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) { 614*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4f32: 615*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 616*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps (%rdi), %xmm0 617*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 618*9880d681SAndroid Build Coastguard Worker; 619*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4f32: 620*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 621*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddps (%rdi), %xmm0, %xmm0 622*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 623*9880d681SAndroid Build Coastguard Worker; 624*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v4f32: 625*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 626*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vaddps (%rdi), %xmm0, %xmm0 627*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 628*9880d681SAndroid Build Coastguard Worker %1 = load <4 x float>, <4 x float>* %src, align 16, !nontemporal !1 629*9880d681SAndroid Build Coastguard Worker %2 = fadd <4 x float> %arg, %1 630*9880d681SAndroid Build Coastguard Worker ret <4 x float> %2 631*9880d681SAndroid Build Coastguard Worker} 632*9880d681SAndroid Build Coastguard Worker 633*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) { 634*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4i32: 635*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 636*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd (%rdi), %xmm0 637*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 638*9880d681SAndroid Build Coastguard Worker; 639*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4i32: 640*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 641*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 642*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 643*9880d681SAndroid Build Coastguard Worker; 644*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v4i32: 645*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 646*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddd (%rdi), %xmm0, %xmm0 647*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 648*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1 649*9880d681SAndroid Build Coastguard Worker %2 = add <4 x i32> %arg, %1 650*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %2 651*9880d681SAndroid Build Coastguard Worker} 652*9880d681SAndroid Build Coastguard Worker 653*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) { 654*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v2f64: 655*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 656*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd (%rdi), %xmm0 657*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 658*9880d681SAndroid Build Coastguard Worker; 659*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v2f64: 660*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 661*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 662*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 663*9880d681SAndroid Build Coastguard Worker; 664*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v2f64: 665*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 666*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vaddpd (%rdi), %xmm0, %xmm0 667*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 668*9880d681SAndroid Build Coastguard Worker %1 = load <2 x double>, <2 x double>* %src, align 16, !nontemporal !1 669*9880d681SAndroid Build Coastguard Worker %2 = fadd <2 x double> %arg, %1 670*9880d681SAndroid Build Coastguard Worker ret <2 x double> %2 671*9880d681SAndroid Build Coastguard Worker} 672*9880d681SAndroid Build Coastguard Worker 673*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) { 674*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v2i64: 675*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 676*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq (%rdi), %xmm0 677*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 678*9880d681SAndroid Build Coastguard Worker; 679*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v2i64: 680*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 681*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 682*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 683*9880d681SAndroid Build Coastguard Worker; 684*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v2i64: 685*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 686*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddq (%rdi), %xmm0, %xmm0 687*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 688*9880d681SAndroid Build Coastguard Worker %1 = load <2 x i64>, <2 x i64>* %src, align 16, !nontemporal !1 689*9880d681SAndroid Build Coastguard Worker %2 = add <2 x i64> %arg, %1 690*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %2 691*9880d681SAndroid Build Coastguard Worker} 692*9880d681SAndroid Build Coastguard Worker 693*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) { 694*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i16: 695*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 696*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw (%rdi), %xmm0 697*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 698*9880d681SAndroid Build Coastguard Worker; 699*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8i16: 700*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 701*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 702*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 703*9880d681SAndroid Build Coastguard Worker; 704*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8i16: 705*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 706*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddw (%rdi), %xmm0, %xmm0 707*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 708*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i16>, <8 x i16>* %src, align 16, !nontemporal !1 709*9880d681SAndroid Build Coastguard Worker %2 = add <8 x i16> %arg, %1 710*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %2 711*9880d681SAndroid Build Coastguard Worker} 712*9880d681SAndroid Build Coastguard Worker 713*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) { 714*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i8: 715*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 716*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb (%rdi), %xmm0 717*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 718*9880d681SAndroid Build Coastguard Worker; 719*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v16i8: 720*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 721*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 722*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 723*9880d681SAndroid Build Coastguard Worker; 724*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v16i8: 725*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 726*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddb (%rdi), %xmm0, %xmm0 727*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 728*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i8>, <16 x i8>* %src, align 16, !nontemporal !1 729*9880d681SAndroid Build Coastguard Worker %2 = add <16 x i8> %arg, %1 730*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %2 731*9880d681SAndroid Build Coastguard Worker} 732*9880d681SAndroid Build Coastguard Worker 733*9880d681SAndroid Build Coastguard Worker; And now YMM versions. 734*9880d681SAndroid Build Coastguard Worker 735*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) { 736*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8f32: 737*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 738*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps (%rdi), %xmm0 739*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps 16(%rdi), %xmm1 740*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 741*9880d681SAndroid Build Coastguard Worker; 742*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8f32: 743*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 744*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddps (%rdi), %ymm0, %ymm0 745*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 746*9880d681SAndroid Build Coastguard Worker; 747*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8f32: 748*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 749*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vaddps (%rdi), %ymm0, %ymm0 750*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 751*9880d681SAndroid Build Coastguard Worker %1 = load <8 x float>, <8 x float>* %src, align 32, !nontemporal !1 752*9880d681SAndroid Build Coastguard Worker %2 = fadd <8 x float> %arg, %1 753*9880d681SAndroid Build Coastguard Worker ret <8 x float> %2 754*9880d681SAndroid Build Coastguard Worker} 755*9880d681SAndroid Build Coastguard Worker 756*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %src) { 757*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i32: 758*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 759*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd (%rdi), %xmm0 760*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd 16(%rdi), %xmm1 761*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 762*9880d681SAndroid Build Coastguard Worker; 763*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v8i32: 764*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 765*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm1 766*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 767*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 768*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 769*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 770*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 771*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 772*9880d681SAndroid Build Coastguard Worker; 773*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v8i32: 774*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 775*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd (%rdi), %ymm0, %ymm0 776*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 777*9880d681SAndroid Build Coastguard Worker; 778*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8i32: 779*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 780*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddd (%rdi), %ymm0, %ymm0 781*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 782*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1 783*9880d681SAndroid Build Coastguard Worker %2 = add <8 x i32> %arg, %1 784*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %2 785*9880d681SAndroid Build Coastguard Worker} 786*9880d681SAndroid Build Coastguard Worker 787*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) { 788*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4f64: 789*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 790*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd (%rdi), %xmm0 791*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd 16(%rdi), %xmm1 792*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 793*9880d681SAndroid Build Coastguard Worker; 794*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v4f64: 795*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 796*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 797*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 798*9880d681SAndroid Build Coastguard Worker; 799*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v4f64: 800*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 801*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vaddpd (%rdi), %ymm0, %ymm0 802*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 803*9880d681SAndroid Build Coastguard Worker %1 = load <4 x double>, <4 x double>* %src, align 32, !nontemporal !1 804*9880d681SAndroid Build Coastguard Worker %2 = fadd <4 x double> %arg, %1 805*9880d681SAndroid Build Coastguard Worker ret <4 x double> %2 806*9880d681SAndroid Build Coastguard Worker} 807*9880d681SAndroid Build Coastguard Worker 808*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) { 809*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v4i64: 810*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 811*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq (%rdi), %xmm0 812*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq 16(%rdi), %xmm1 813*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 814*9880d681SAndroid Build Coastguard Worker; 815*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v4i64: 816*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 817*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm1 818*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 819*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 820*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2 821*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 822*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 823*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 824*9880d681SAndroid Build Coastguard Worker; 825*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v4i64: 826*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 827*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddq (%rdi), %ymm0, %ymm0 828*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 829*9880d681SAndroid Build Coastguard Worker; 830*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v4i64: 831*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 832*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddq (%rdi), %ymm0, %ymm0 833*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 834*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i64>, <4 x i64>* %src, align 32, !nontemporal !1 835*9880d681SAndroid Build Coastguard Worker %2 = add <4 x i64> %arg, %1 836*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %2 837*9880d681SAndroid Build Coastguard Worker} 838*9880d681SAndroid Build Coastguard Worker 839*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) { 840*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i16: 841*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 842*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw (%rdi), %xmm0 843*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw 16(%rdi), %xmm1 844*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 845*9880d681SAndroid Build Coastguard Worker; 846*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v16i16: 847*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 848*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm1 849*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 850*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 851*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 852*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 853*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 854*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 855*9880d681SAndroid Build Coastguard Worker; 856*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v16i16: 857*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 858*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddw (%rdi), %ymm0, %ymm0 859*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 860*9880d681SAndroid Build Coastguard Worker; 861*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v16i16: 862*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 863*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddw (%rdi), %ymm0, %ymm0 864*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 865*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i16>, <16 x i16>* %src, align 32, !nontemporal !1 866*9880d681SAndroid Build Coastguard Worker %2 = add <16 x i16> %arg, %1 867*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %2 868*9880d681SAndroid Build Coastguard Worker} 869*9880d681SAndroid Build Coastguard Worker 870*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) { 871*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v32i8: 872*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 873*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb (%rdi), %xmm0 874*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb 16(%rdi), %xmm1 875*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 876*9880d681SAndroid Build Coastguard Worker; 877*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v32i8: 878*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 879*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm1 880*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 881*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 882*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2 883*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 884*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 885*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 886*9880d681SAndroid Build Coastguard Worker; 887*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v32i8: 888*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 889*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddb (%rdi), %ymm0, %ymm0 890*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 891*9880d681SAndroid Build Coastguard Worker; 892*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v32i8: 893*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 894*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddb (%rdi), %ymm0, %ymm0 895*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 896*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i8>, <32 x i8>* %src, align 32, !nontemporal !1 897*9880d681SAndroid Build Coastguard Worker %2 = add <32 x i8> %arg, %1 898*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %2 899*9880d681SAndroid Build Coastguard Worker} 900*9880d681SAndroid Build Coastguard Worker 901*9880d681SAndroid Build Coastguard Worker; And now ZMM versions. 902*9880d681SAndroid Build Coastguard Worker 903*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) { 904*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16f32: 905*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 906*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps (%rdi), %xmm0 907*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps 16(%rdi), %xmm1 908*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps 32(%rdi), %xmm2 909*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addps 48(%rdi), %xmm3 910*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 911*9880d681SAndroid Build Coastguard Worker; 912*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v16f32: 913*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 914*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddps (%rdi), %ymm0, %ymm0 915*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddps 32(%rdi), %ymm1, %ymm1 916*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 917*9880d681SAndroid Build Coastguard Worker; 918*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v16f32: 919*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 920*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vaddps (%rdi), %zmm0, %zmm0 921*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 922*9880d681SAndroid Build Coastguard Worker %1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1 923*9880d681SAndroid Build Coastguard Worker %2 = fadd <16 x float> %arg, %1 924*9880d681SAndroid Build Coastguard Worker ret <16 x float> %2 925*9880d681SAndroid Build Coastguard Worker} 926*9880d681SAndroid Build Coastguard Worker 927*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) { 928*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v16i32: 929*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 930*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd (%rdi), %xmm0 931*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd 16(%rdi), %xmm1 932*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd 32(%rdi), %xmm2 933*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddd 48(%rdi), %xmm3 934*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 935*9880d681SAndroid Build Coastguard Worker; 936*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v16i32: 937*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 938*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm2 939*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm3 940*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 941*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 942*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 943*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 944*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 945*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 946*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 947*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 948*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 949*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 950*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 951*9880d681SAndroid Build Coastguard Worker; 952*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v16i32: 953*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 954*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd (%rdi), %ymm0, %ymm0 955*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd 32(%rdi), %ymm1, %ymm1 956*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 957*9880d681SAndroid Build Coastguard Worker; 958*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v16i32: 959*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 960*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddd (%rdi), %zmm0, %zmm0 961*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 962*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1 963*9880d681SAndroid Build Coastguard Worker %2 = add <16 x i32> %arg, %1 964*9880d681SAndroid Build Coastguard Worker ret <16 x i32> %2 965*9880d681SAndroid Build Coastguard Worker} 966*9880d681SAndroid Build Coastguard Worker 967*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) { 968*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8f64: 969*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 970*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd (%rdi), %xmm0 971*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd 16(%rdi), %xmm1 972*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd 32(%rdi), %xmm2 973*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addpd 48(%rdi), %xmm3 974*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 975*9880d681SAndroid Build Coastguard Worker; 976*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_arg_v8f64: 977*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 978*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 979*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddpd 32(%rdi), %ymm1, %ymm1 980*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 981*9880d681SAndroid Build Coastguard Worker; 982*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8f64: 983*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 984*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vaddpd (%rdi), %zmm0, %zmm0 985*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 986*9880d681SAndroid Build Coastguard Worker %1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1 987*9880d681SAndroid Build Coastguard Worker %2 = fadd <8 x double> %arg, %1 988*9880d681SAndroid Build Coastguard Worker ret <8 x double> %2 989*9880d681SAndroid Build Coastguard Worker} 990*9880d681SAndroid Build Coastguard Worker 991*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) { 992*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v8i64: 993*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 994*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq (%rdi), %xmm0 995*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq 16(%rdi), %xmm1 996*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq 32(%rdi), %xmm2 997*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddq 48(%rdi), %xmm3 998*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 999*9880d681SAndroid Build Coastguard Worker; 1000*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v8i64: 1001*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1002*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm2 1003*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm3 1004*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1005*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 1006*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4 1007*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 1008*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1009*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1010*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 1011*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2 1012*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 1013*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1014*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1015*9880d681SAndroid Build Coastguard Worker; 1016*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v8i64: 1017*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1018*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddq (%rdi), %ymm0, %ymm0 1019*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddq 32(%rdi), %ymm1, %ymm1 1020*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1021*9880d681SAndroid Build Coastguard Worker; 1022*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_arg_v8i64: 1023*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 1024*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vpaddq (%rdi), %zmm0, %zmm0 1025*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1026*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1 1027*9880d681SAndroid Build Coastguard Worker %2 = add <8 x i64> %arg, %1 1028*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %2 1029*9880d681SAndroid Build Coastguard Worker} 1030*9880d681SAndroid Build Coastguard Worker 1031*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) { 1032*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v32i16: 1033*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1034*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw (%rdi), %xmm0 1035*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw 16(%rdi), %xmm1 1036*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw 32(%rdi), %xmm2 1037*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddw 48(%rdi), %xmm3 1038*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1039*9880d681SAndroid Build Coastguard Worker; 1040*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v32i16: 1041*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1042*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm2 1043*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm3 1044*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1045*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 1046*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw %xmm5, %xmm4, %xmm4 1047*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0 1048*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1049*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1050*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 1051*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw %xmm4, %xmm2, %xmm2 1052*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1 1053*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1054*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1055*9880d681SAndroid Build Coastguard Worker; 1056*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v32i16: 1057*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1058*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddw (%rdi), %ymm0, %ymm0 1059*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1 1060*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1061*9880d681SAndroid Build Coastguard Worker; 1062*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_arg_v32i16: 1063*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1064*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpaddw (%rdi), %ymm0, %ymm0 1065*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1 1066*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1067*9880d681SAndroid Build Coastguard Worker; 1068*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_arg_v32i16: 1069*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1070*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddw (%rdi), %zmm0, %zmm0 1071*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1072*9880d681SAndroid Build Coastguard Worker; 1073*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_arg_v32i16: 1074*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1075*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 1076*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1 1077*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1078*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1 1079*9880d681SAndroid Build Coastguard Worker %2 = add <32 x i16> %arg, %1 1080*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %2 1081*9880d681SAndroid Build Coastguard Worker} 1082*9880d681SAndroid Build Coastguard Worker 1083*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) { 1084*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_arg_v64i8: 1085*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1086*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb (%rdi), %xmm0 1087*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb 16(%rdi), %xmm1 1088*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb 32(%rdi), %xmm2 1089*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: paddb 48(%rdi), %xmm3 1090*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1091*9880d681SAndroid Build Coastguard Worker; 1092*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test_arg_v64i8: 1093*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1094*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm2 1095*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps 32(%rdi), %ymm3 1096*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1097*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 1098*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm5, %xmm4, %xmm4 1099*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 1100*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1101*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1102*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 1103*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 1104*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 1105*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1106*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1107*9880d681SAndroid Build Coastguard Worker; 1108*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_arg_v64i8: 1109*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1110*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddb (%rdi), %ymm0, %ymm0 1111*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1 1112*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1113*9880d681SAndroid Build Coastguard Worker; 1114*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_arg_v64i8: 1115*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1116*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpaddb (%rdi), %ymm0, %ymm0 1117*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1 1118*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1119*9880d681SAndroid Build Coastguard Worker; 1120*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_arg_v64i8: 1121*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1122*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vpaddb (%rdi), %zmm0, %zmm0 1123*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1124*9880d681SAndroid Build Coastguard Worker; 1125*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_arg_v64i8: 1126*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1127*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 1128*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1 1129*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1130*9880d681SAndroid Build Coastguard Worker %1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1 1131*9880d681SAndroid Build Coastguard Worker %2 = add <64 x i8> %arg, %1 1132*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %2 1133*9880d681SAndroid Build Coastguard Worker} 1134*9880d681SAndroid Build Coastguard Worker 1135*9880d681SAndroid Build Coastguard Worker 1136*9880d681SAndroid Build Coastguard Worker; Unaligned non-temporal loads (not supported) 1137*9880d681SAndroid Build Coastguard Worker 1138*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_unaligned_v4f32(<4 x float>* %src) { 1139*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v4f32: 1140*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1141*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1142*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1143*9880d681SAndroid Build Coastguard Worker; 1144*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v4f32: 1145*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1146*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %xmm0 1147*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1148*9880d681SAndroid Build Coastguard Worker; 1149*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v4f32: 1150*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 1151*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovups (%rdi), %xmm0 1152*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1153*9880d681SAndroid Build Coastguard Worker %1 = load <4 x float>, <4 x float>* %src, align 1, !nontemporal !1 1154*9880d681SAndroid Build Coastguard Worker ret <4 x float> %1 1155*9880d681SAndroid Build Coastguard Worker} 1156*9880d681SAndroid Build Coastguard Worker 1157*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_unaligned_v4i32(<4 x i32>* %src) { 1158*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v4i32: 1159*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1160*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1161*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1162*9880d681SAndroid Build Coastguard Worker; 1163*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v4i32: 1164*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1165*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %xmm0 1166*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1167*9880d681SAndroid Build Coastguard Worker; 1168*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v4i32: 1169*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1170*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %xmm0 1171*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1172*9880d681SAndroid Build Coastguard Worker; 1173*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v4i32: 1174*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1175*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %xmm0 1176*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1177*9880d681SAndroid Build Coastguard Worker; 1178*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v4i32: 1179*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1180*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu32 (%rdi), %xmm0 1181*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1182*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i32>, <4 x i32>* %src, align 1, !nontemporal !1 1183*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %1 1184*9880d681SAndroid Build Coastguard Worker} 1185*9880d681SAndroid Build Coastguard Worker 1186*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_unaligned_v2f64(<2 x double>* %src) { 1187*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v2f64: 1188*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1189*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1190*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1191*9880d681SAndroid Build Coastguard Worker; 1192*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v2f64: 1193*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1194*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %xmm0 1195*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1196*9880d681SAndroid Build Coastguard Worker; 1197*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v2f64: 1198*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1199*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %xmm0 1200*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1201*9880d681SAndroid Build Coastguard Worker; 1202*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v2f64: 1203*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1204*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %xmm0 1205*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1206*9880d681SAndroid Build Coastguard Worker; 1207*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v2f64: 1208*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1209*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovupd (%rdi), %xmm0 1210*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1211*9880d681SAndroid Build Coastguard Worker %1 = load <2 x double>, <2 x double>* %src, align 1, !nontemporal !1 1212*9880d681SAndroid Build Coastguard Worker ret <2 x double> %1 1213*9880d681SAndroid Build Coastguard Worker} 1214*9880d681SAndroid Build Coastguard Worker 1215*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_unaligned_v2i64(<2 x i64>* %src) { 1216*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v2i64: 1217*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1218*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1219*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1220*9880d681SAndroid Build Coastguard Worker; 1221*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v2i64: 1222*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1223*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %xmm0 1224*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1225*9880d681SAndroid Build Coastguard Worker; 1226*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v2i64: 1227*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1228*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %xmm0 1229*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1230*9880d681SAndroid Build Coastguard Worker; 1231*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v2i64: 1232*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1233*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %xmm0 1234*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1235*9880d681SAndroid Build Coastguard Worker; 1236*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v2i64: 1237*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1238*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0 1239*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1240*9880d681SAndroid Build Coastguard Worker %1 = load <2 x i64>, <2 x i64>* %src, align 1, !nontemporal !1 1241*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %1 1242*9880d681SAndroid Build Coastguard Worker} 1243*9880d681SAndroid Build Coastguard Worker 1244*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_unaligned_v8i16(<8 x i16>* %src) { 1245*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8i16: 1246*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1247*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1248*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1249*9880d681SAndroid Build Coastguard Worker; 1250*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8i16: 1251*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1252*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %xmm0 1253*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1254*9880d681SAndroid Build Coastguard Worker; 1255*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v8i16: 1256*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1257*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %xmm0 1258*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1259*9880d681SAndroid Build Coastguard Worker; 1260*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v8i16: 1261*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1262*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %xmm0 1263*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1264*9880d681SAndroid Build Coastguard Worker; 1265*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v8i16: 1266*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1267*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0 1268*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1269*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i16>, <8 x i16>* %src, align 1, !nontemporal !1 1270*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %1 1271*9880d681SAndroid Build Coastguard Worker} 1272*9880d681SAndroid Build Coastguard Worker 1273*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @test_unaligned_v16i8(<16 x i8>* %src) { 1274*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v16i8: 1275*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1276*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1277*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1278*9880d681SAndroid Build Coastguard Worker; 1279*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v16i8: 1280*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1281*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %xmm0 1282*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1283*9880d681SAndroid Build Coastguard Worker; 1284*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v16i8: 1285*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1286*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %xmm0 1287*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1288*9880d681SAndroid Build Coastguard Worker; 1289*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v16i8: 1290*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1291*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %xmm0 1292*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1293*9880d681SAndroid Build Coastguard Worker; 1294*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v16i8: 1295*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1296*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0 1297*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1298*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i8>, <16 x i8>* %src, align 1, !nontemporal !1 1299*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %1 1300*9880d681SAndroid Build Coastguard Worker} 1301*9880d681SAndroid Build Coastguard Worker 1302*9880d681SAndroid Build Coastguard Worker; And now YMM versions. 1303*9880d681SAndroid Build Coastguard Worker 1304*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_unaligned_v8f32(<8 x float>* %src) { 1305*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8f32: 1306*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1307*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1308*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1309*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1310*9880d681SAndroid Build Coastguard Worker; 1311*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8f32: 1312*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1313*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1314*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1315*9880d681SAndroid Build Coastguard Worker; 1316*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v8f32: 1317*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 1318*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovups (%rdi), %ymm0 1319*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1320*9880d681SAndroid Build Coastguard Worker %1 = load <8 x float>, <8 x float>* %src, align 1, !nontemporal !1 1321*9880d681SAndroid Build Coastguard Worker ret <8 x float> %1 1322*9880d681SAndroid Build Coastguard Worker} 1323*9880d681SAndroid Build Coastguard Worker 1324*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_unaligned_v8i32(<8 x i32>* %src) { 1325*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8i32: 1326*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1327*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1328*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1329*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1330*9880d681SAndroid Build Coastguard Worker; 1331*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8i32: 1332*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1333*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1334*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1335*9880d681SAndroid Build Coastguard Worker; 1336*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v8i32: 1337*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1338*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %ymm0 1339*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1340*9880d681SAndroid Build Coastguard Worker; 1341*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v8i32: 1342*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1343*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %ymm0 1344*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1345*9880d681SAndroid Build Coastguard Worker; 1346*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v8i32: 1347*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1348*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu32 (%rdi), %ymm0 1349*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1350*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i32>, <8 x i32>* %src, align 1, !nontemporal !1 1351*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %1 1352*9880d681SAndroid Build Coastguard Worker} 1353*9880d681SAndroid Build Coastguard Worker 1354*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_unaligned_v4f64(<4 x double>* %src) { 1355*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v4f64: 1356*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1357*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1360*9880d681SAndroid Build Coastguard Worker; 1361*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v4f64: 1362*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1363*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1364*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1365*9880d681SAndroid Build Coastguard Worker; 1366*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v4f64: 1367*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1368*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %ymm0 1369*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1370*9880d681SAndroid Build Coastguard Worker; 1371*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v4f64: 1372*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1373*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %ymm0 1374*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1375*9880d681SAndroid Build Coastguard Worker; 1376*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v4f64: 1377*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1378*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovupd (%rdi), %ymm0 1379*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1380*9880d681SAndroid Build Coastguard Worker %1 = load <4 x double>, <4 x double>* %src, align 1, !nontemporal !1 1381*9880d681SAndroid Build Coastguard Worker ret <4 x double> %1 1382*9880d681SAndroid Build Coastguard Worker} 1383*9880d681SAndroid Build Coastguard Worker 1384*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_unaligned_v4i64(<4 x i64>* %src) { 1385*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v4i64: 1386*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1387*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1388*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1389*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1390*9880d681SAndroid Build Coastguard Worker; 1391*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v4i64: 1392*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1393*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1394*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1395*9880d681SAndroid Build Coastguard Worker; 1396*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v4i64: 1397*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1398*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %ymm0 1399*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1400*9880d681SAndroid Build Coastguard Worker; 1401*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v4i64: 1402*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1403*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %ymm0 1404*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1405*9880d681SAndroid Build Coastguard Worker; 1406*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v4i64: 1407*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1408*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 1409*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1410*9880d681SAndroid Build Coastguard Worker %1 = load <4 x i64>, <4 x i64>* %src, align 1, !nontemporal !1 1411*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %1 1412*9880d681SAndroid Build Coastguard Worker} 1413*9880d681SAndroid Build Coastguard Worker 1414*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @test_unaligned_v16i16(<16 x i16>* %src) { 1415*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v16i16: 1416*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1417*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1418*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1419*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1420*9880d681SAndroid Build Coastguard Worker; 1421*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v16i16: 1422*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1423*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1424*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1425*9880d681SAndroid Build Coastguard Worker; 1426*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v16i16: 1427*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1428*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %ymm0 1429*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1430*9880d681SAndroid Build Coastguard Worker; 1431*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v16i16: 1432*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1433*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %ymm0 1434*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1435*9880d681SAndroid Build Coastguard Worker; 1436*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v16i16: 1437*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1438*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 1439*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1440*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i16>, <16 x i16>* %src, align 1, !nontemporal !1 1441*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %1 1442*9880d681SAndroid Build Coastguard Worker} 1443*9880d681SAndroid Build Coastguard Worker 1444*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @test_unaligned_v32i8(<32 x i8>* %src) { 1445*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v32i8: 1446*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1447*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1448*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1449*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1450*9880d681SAndroid Build Coastguard Worker; 1451*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v32i8: 1452*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1453*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1454*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1455*9880d681SAndroid Build Coastguard Worker; 1456*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v32i8: 1457*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1458*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %ymm0 1459*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1460*9880d681SAndroid Build Coastguard Worker; 1461*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v32i8: 1462*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1463*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovups (%rdi), %ymm0 1464*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1465*9880d681SAndroid Build Coastguard Worker; 1466*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v32i8: 1467*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1468*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 1469*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1470*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i8>, <32 x i8>* %src, align 1, !nontemporal !1 1471*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %1 1472*9880d681SAndroid Build Coastguard Worker} 1473*9880d681SAndroid Build Coastguard Worker 1474*9880d681SAndroid Build Coastguard Worker; And now ZMM versions. 1475*9880d681SAndroid Build Coastguard Worker 1476*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_unaligned_v16f32(<16 x float>* %src) { 1477*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v16f32: 1478*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1479*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1480*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1481*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 32(%rdi), %xmm2 1482*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 48(%rdi), %xmm3 1483*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1484*9880d681SAndroid Build Coastguard Worker; 1485*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v16f32: 1486*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1487*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1488*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups 32(%rdi), %ymm1 1489*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1490*9880d681SAndroid Build Coastguard Worker; 1491*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v16f32: 1492*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 1493*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovups (%rdi), %zmm0 1494*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1495*9880d681SAndroid Build Coastguard Worker %1 = load <16 x float>, <16 x float>* %src, align 1, !nontemporal !1 1496*9880d681SAndroid Build Coastguard Worker ret <16 x float> %1 1497*9880d681SAndroid Build Coastguard Worker} 1498*9880d681SAndroid Build Coastguard Worker 1499*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @test_unaligned_v16i32(<16 x i32>* %src) { 1500*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v16i32: 1501*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1502*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1503*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1504*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 32(%rdi), %xmm2 1505*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 48(%rdi), %xmm3 1506*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1507*9880d681SAndroid Build Coastguard Worker; 1508*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v16i32: 1509*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1510*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1511*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups 32(%rdi), %ymm1 1512*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1513*9880d681SAndroid Build Coastguard Worker; 1514*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v16i32: 1515*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 1516*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0 1517*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1518*9880d681SAndroid Build Coastguard Worker %1 = load <16 x i32>, <16 x i32>* %src, align 1, !nontemporal !1 1519*9880d681SAndroid Build Coastguard Worker ret <16 x i32> %1 1520*9880d681SAndroid Build Coastguard Worker} 1521*9880d681SAndroid Build Coastguard Worker 1522*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @test_unaligned_v8f64(<8 x double>* %src) { 1523*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8f64: 1524*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1525*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1526*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1527*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 32(%rdi), %xmm2 1528*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 48(%rdi), %xmm3 1529*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1530*9880d681SAndroid Build Coastguard Worker; 1531*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8f64: 1532*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1533*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1534*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups 32(%rdi), %ymm1 1535*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1536*9880d681SAndroid Build Coastguard Worker; 1537*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v8f64: 1538*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 1539*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovupd (%rdi), %zmm0 1540*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1541*9880d681SAndroid Build Coastguard Worker %1 = load <8 x double>, <8 x double>* %src, align 1, !nontemporal !1 1542*9880d681SAndroid Build Coastguard Worker ret <8 x double> %1 1543*9880d681SAndroid Build Coastguard Worker} 1544*9880d681SAndroid Build Coastguard Worker 1545*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @test_unaligned_v8i64(<8 x i64>* %src) { 1546*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v8i64: 1547*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1548*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1549*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1550*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 32(%rdi), %xmm2 1551*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 48(%rdi), %xmm3 1552*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1553*9880d681SAndroid Build Coastguard Worker; 1554*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v8i64: 1555*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1556*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1557*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups 32(%rdi), %ymm1 1558*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1559*9880d681SAndroid Build Coastguard Worker; 1560*9880d681SAndroid Build Coastguard Worker; AVX512-LABEL: test_unaligned_v8i64: 1561*9880d681SAndroid Build Coastguard Worker; AVX512: # BB#0: 1562*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 1563*9880d681SAndroid Build Coastguard Worker; AVX512-NEXT: retq 1564*9880d681SAndroid Build Coastguard Worker %1 = load <8 x i64>, <8 x i64>* %src, align 1, !nontemporal !1 1565*9880d681SAndroid Build Coastguard Worker ret <8 x i64> %1 1566*9880d681SAndroid Build Coastguard Worker} 1567*9880d681SAndroid Build Coastguard Worker 1568*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @test_unaligned_v32i16(<32 x i16>* %src) { 1569*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v32i16: 1570*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1571*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1572*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1573*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 32(%rdi), %xmm2 1574*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 48(%rdi), %xmm3 1575*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1576*9880d681SAndroid Build Coastguard Worker; 1577*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v32i16: 1578*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1579*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1580*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups 32(%rdi), %ymm1 1581*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1582*9880d681SAndroid Build Coastguard Worker; 1583*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v32i16: 1584*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1585*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %ymm0 1586*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups 32(%rdi), %ymm1 1587*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1588*9880d681SAndroid Build Coastguard Worker; 1589*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v32i16: 1590*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1591*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0 1592*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1593*9880d681SAndroid Build Coastguard Worker; 1594*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v32i16: 1595*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1596*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 1597*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 32(%rdi), %ymm1 1598*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1599*9880d681SAndroid Build Coastguard Worker %1 = load <32 x i16>, <32 x i16>* %src, align 1, !nontemporal !1 1600*9880d681SAndroid Build Coastguard Worker ret <32 x i16> %1 1601*9880d681SAndroid Build Coastguard Worker} 1602*9880d681SAndroid Build Coastguard Worker 1603*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @test_unaligned_v64i8(<64 x i8>* %src) { 1604*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test_unaligned_v64i8: 1605*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1606*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups (%rdi), %xmm0 1607*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 16(%rdi), %xmm1 1608*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 32(%rdi), %xmm2 1609*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movups 48(%rdi), %xmm3 1610*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1611*9880d681SAndroid Build Coastguard Worker; 1612*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test_unaligned_v64i8: 1613*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1614*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups (%rdi), %ymm0 1615*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovups 32(%rdi), %ymm1 1616*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1617*9880d681SAndroid Build Coastguard Worker; 1618*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: test_unaligned_v64i8: 1619*9880d681SAndroid Build Coastguard Worker; AVX512F: # BB#0: 1620*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups (%rdi), %ymm0 1621*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: vmovups 32(%rdi), %ymm1 1622*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT: retq 1623*9880d681SAndroid Build Coastguard Worker; 1624*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: test_unaligned_v64i8: 1625*9880d681SAndroid Build Coastguard Worker; AVX512BW: # BB#0: 1626*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0 1627*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT: retq 1628*9880d681SAndroid Build Coastguard Worker; 1629*9880d681SAndroid Build Coastguard Worker; AVX512VL-LABEL: test_unaligned_v64i8: 1630*9880d681SAndroid Build Coastguard Worker; AVX512VL: # BB#0: 1631*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0 1632*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: vmovdqu64 32(%rdi), %ymm1 1633*9880d681SAndroid Build Coastguard Worker; AVX512VL-NEXT: retq 1634*9880d681SAndroid Build Coastguard Worker %1 = load <64 x i8>, <64 x i8>* %src, align 1, !nontemporal !1 1635*9880d681SAndroid Build Coastguard Worker ret <64 x i8> %1 1636*9880d681SAndroid Build Coastguard Worker} 1637*9880d681SAndroid Build Coastguard Worker 1638*9880d681SAndroid Build Coastguard Worker!1 = !{i32 1} 1639