1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=X64-SSSE3 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Workertarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 7*9880d681SAndroid Build Coastguard Worker 8*9880d681SAndroid Build Coastguard Workerdefine i32 @t(<2 x i64>* %val) nounwind { 9*9880d681SAndroid Build Coastguard Worker; X32-SSE2-LABEL: t: 10*9880d681SAndroid Build Coastguard Worker; X32-SSE2: # BB#0: 11*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 12*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: movl 8(%eax), %eax 13*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: retl 14*9880d681SAndroid Build Coastguard Worker; 15*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-LABEL: t: 16*9880d681SAndroid Build Coastguard Worker; X64-SSSE3: # BB#0: 17*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-NEXT: movl 8(%rdi), %eax 18*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-NEXT: retq 19*9880d681SAndroid Build Coastguard Worker; 20*9880d681SAndroid Build Coastguard Worker; X64-AVX-LABEL: t: 21*9880d681SAndroid Build Coastguard Worker; X64-AVX: # BB#0: 22*9880d681SAndroid Build Coastguard Worker; X64-AVX-NEXT: movl 8(%rdi), %eax 23*9880d681SAndroid Build Coastguard Worker; X64-AVX-NEXT: retq 24*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i64>, <2 x i64>* %val, align 16 ; <<2 x i64>> [#uses=1] 25*9880d681SAndroid Build Coastguard Worker %tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1] 26*9880d681SAndroid Build Coastguard Worker %tmp4 = extractelement <4 x i32> %tmp3, i32 2 ; <i32> [#uses=1] 27*9880d681SAndroid Build Coastguard Worker ret i32 %tmp4 28*9880d681SAndroid Build Coastguard Worker} 29*9880d681SAndroid Build Coastguard Worker 30*9880d681SAndroid Build Coastguard Worker; Case where extractelement of load ends up as undef. 31*9880d681SAndroid Build Coastguard Worker; (Making sure this doesn't crash.) 32*9880d681SAndroid Build Coastguard Workerdefine i32 @t2(<8 x i32>* %xp) { 33*9880d681SAndroid Build Coastguard Worker; X32-SSE2-LABEL: t2: 34*9880d681SAndroid Build Coastguard Worker; X32-SSE2: # BB#0: 35*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: retl 36*9880d681SAndroid Build Coastguard Worker; 37*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-LABEL: t2: 38*9880d681SAndroid Build Coastguard Worker; X64-SSSE3: # BB#0: 39*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-NEXT: retq 40*9880d681SAndroid Build Coastguard Worker; 41*9880d681SAndroid Build Coastguard Worker; X64-AVX-LABEL: t2: 42*9880d681SAndroid Build Coastguard Worker; X64-AVX: # BB#0: 43*9880d681SAndroid Build Coastguard Worker; X64-AVX-NEXT: retq 44*9880d681SAndroid Build Coastguard Worker %x = load <8 x i32>, <8 x i32>* %xp 45*9880d681SAndroid Build Coastguard Worker %Shuff68 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 undef, i32 7, i32 9, i32 undef, i32 13, i32 15, i32 1, i32 3> 46*9880d681SAndroid Build Coastguard Worker %y = extractelement <8 x i32> %Shuff68, i32 0 47*9880d681SAndroid Build Coastguard Worker ret i32 %y 48*9880d681SAndroid Build Coastguard Worker} 49*9880d681SAndroid Build Coastguard Worker 50*9880d681SAndroid Build Coastguard Worker; This case could easily end up inf-looping in the DAG combiner due to an 51*9880d681SAndroid Build Coastguard Worker; low alignment load of the vector which prevents us from reliably forming a 52*9880d681SAndroid Build Coastguard Worker; narrow load. 53*9880d681SAndroid Build Coastguard Worker 54*9880d681SAndroid Build Coastguard Worker; The expected codegen is identical for the AVX case except 55*9880d681SAndroid Build Coastguard Worker; load/store instructions will have a leading 'v', so we don't 56*9880d681SAndroid Build Coastguard Worker; need to special-case the checks. 57*9880d681SAndroid Build Coastguard Worker 58*9880d681SAndroid Build Coastguard Workerdefine void @t3() { 59*9880d681SAndroid Build Coastguard Worker; X32-SSE2-LABEL: t3: 60*9880d681SAndroid Build Coastguard Worker; X32-SSE2: # BB#0: # %bb 61*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: movupd (%eax), %xmm0 62*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: movhpd %xmm0, (%eax) 63*9880d681SAndroid Build Coastguard Worker; 64*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-LABEL: t3: 65*9880d681SAndroid Build Coastguard Worker; X64-SSSE3: # BB#0: # %bb 66*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 67*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-NEXT: movlpd %xmm0, (%rax) 68*9880d681SAndroid Build Coastguard Worker; 69*9880d681SAndroid Build Coastguard Worker; X64-AVX-LABEL: t3: 70*9880d681SAndroid Build Coastguard Worker; X64-AVX: # BB#0: # %bb 71*9880d681SAndroid Build Coastguard Worker; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 72*9880d681SAndroid Build Coastguard Worker; X64-AVX-NEXT: vmovlpd %xmm0, (%rax) 73*9880d681SAndroid Build Coastguard Workerbb: 74*9880d681SAndroid Build Coastguard Worker %tmp13 = load <2 x double>, <2 x double>* undef, align 1 75*9880d681SAndroid Build Coastguard Worker %.sroa.3.24.vec.extract = extractelement <2 x double> %tmp13, i32 1 76*9880d681SAndroid Build Coastguard Worker store double %.sroa.3.24.vec.extract, double* undef, align 8 77*9880d681SAndroid Build Coastguard Worker unreachable 78*9880d681SAndroid Build Coastguard Worker} 79*9880d681SAndroid Build Coastguard Worker 80*9880d681SAndroid Build Coastguard Worker; Case where a load is unary shuffled, then bitcast (to a type with the same 81*9880d681SAndroid Build Coastguard Worker; number of elements) before extractelement. 82*9880d681SAndroid Build Coastguard Worker; This is testing for an assertion - the extraction was assuming that the undef 83*9880d681SAndroid Build Coastguard Worker; second shuffle operand was a post-bitcast type instead of a pre-bitcast type. 84*9880d681SAndroid Build Coastguard Workerdefine i64 @t4(<2 x double>* %a) { 85*9880d681SAndroid Build Coastguard Worker; X32-SSE2-LABEL: t4: 86*9880d681SAndroid Build Coastguard Worker; X32-SSE2: # BB#0: 87*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 88*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: movapd (%eax), %xmm0 89*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 90*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 91*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: movd %xmm1, %eax 92*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 93*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: movd %xmm0, %edx 94*9880d681SAndroid Build Coastguard Worker; X32-SSE2-NEXT: retl 95*9880d681SAndroid Build Coastguard Worker; 96*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-LABEL: t4: 97*9880d681SAndroid Build Coastguard Worker; X64-SSSE3: # BB#0: 98*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-NEXT: movq (%rdi), %rax 99*9880d681SAndroid Build Coastguard Worker; X64-SSSE3-NEXT: retq 100*9880d681SAndroid Build Coastguard Worker; 101*9880d681SAndroid Build Coastguard Worker; X64-AVX-LABEL: t4: 102*9880d681SAndroid Build Coastguard Worker; X64-AVX: # BB#0: 103*9880d681SAndroid Build Coastguard Worker; X64-AVX-NEXT: movq (%rdi), %rax 104*9880d681SAndroid Build Coastguard Worker; X64-AVX-NEXT: retq 105*9880d681SAndroid Build Coastguard Worker %b = load <2 x double>, <2 x double>* %a, align 16 106*9880d681SAndroid Build Coastguard Worker %c = shufflevector <2 x double> %b, <2 x double> %b, <2 x i32> <i32 1, i32 0> 107*9880d681SAndroid Build Coastguard Worker %d = bitcast <2 x double> %c to <2 x i64> 108*9880d681SAndroid Build Coastguard Worker %e = extractelement <2 x i64> %d, i32 1 109*9880d681SAndroid Build Coastguard Worker ret i64 %e 110*9880d681SAndroid Build Coastguard Worker} 111*9880d681SAndroid Build Coastguard Worker 112