1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 6*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_45670123: 7*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 8*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 9*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 10*9880d681SAndroid Build Coastguard Workerentry: 11*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 12*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 13*9880d681SAndroid Build Coastguard Worker} 14*9880d681SAndroid Build Coastguard Worker 15*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_45670123_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp { 16*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_45670123_mem: 17*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 18*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3,0,1] 19*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 20*9880d681SAndroid Build Coastguard Workerentry: 21*9880d681SAndroid Build Coastguard Worker %a = load <8 x float>, <8 x float>* %pa 22*9880d681SAndroid Build Coastguard Worker %b = load <8 x float>, <8 x float>* %pb 23*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 24*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 25*9880d681SAndroid Build Coastguard Worker} 26*9880d681SAndroid Build Coastguard Worker 27*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_0123cdef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 28*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_0123cdef: 29*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 30*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 31*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 32*9880d681SAndroid Build Coastguard Workerentry: 33*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 34*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 35*9880d681SAndroid Build Coastguard Worker} 36*9880d681SAndroid Build Coastguard Worker 37*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_01230123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 38*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v8f32_01230123: 39*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 40*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 41*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 42*9880d681SAndroid Build Coastguard Worker; 43*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v8f32_01230123: 44*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 45*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1] 46*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 47*9880d681SAndroid Build Coastguard Workerentry: 48*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 49*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 50*9880d681SAndroid Build Coastguard Worker} 51*9880d681SAndroid Build Coastguard Worker 52*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_01230123_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp { 53*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v8f32_01230123_mem: 54*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 55*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rdi), %ymm0 56*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 57*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 58*9880d681SAndroid Build Coastguard Worker; 59*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v8f32_01230123_mem: 60*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 61*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,1,0,1] 62*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 63*9880d681SAndroid Build Coastguard Workerentry: 64*9880d681SAndroid Build Coastguard Worker %a = load <8 x float>, <8 x float>* %pa 65*9880d681SAndroid Build Coastguard Worker %b = load <8 x float>, <8 x float>* %pb 66*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 67*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 68*9880d681SAndroid Build Coastguard Worker} 69*9880d681SAndroid Build Coastguard Worker 70*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_45674567(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 71*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_45674567: 72*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 73*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 74*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 75*9880d681SAndroid Build Coastguard Workerentry: 76*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 77*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 78*9880d681SAndroid Build Coastguard Worker} 79*9880d681SAndroid Build Coastguard Worker 80*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_45674567_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp { 81*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_45674567_mem: 82*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 83*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3,2,3] 84*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 85*9880d681SAndroid Build Coastguard Workerentry: 86*9880d681SAndroid Build Coastguard Worker %a = load <8 x float>, <8 x float>* %pa 87*9880d681SAndroid Build Coastguard Worker %b = load <8 x float>, <8 x float>* %pb 88*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 89*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 90*9880d681SAndroid Build Coastguard Worker} 91*9880d681SAndroid Build Coastguard Worker 92*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @shuffle_v32i8_2323(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { 93*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v32i8_2323: 94*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 95*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 96*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 97*9880d681SAndroid Build Coastguard Workerentry: 98*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 99*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %shuffle 100*9880d681SAndroid Build Coastguard Worker} 101*9880d681SAndroid Build Coastguard Worker 102*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @shuffle_v32i8_2323_domain(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { 103*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v32i8_2323_domain: 104*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 105*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 106*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 107*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 108*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 109*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 110*9880d681SAndroid Build Coastguard Worker; 111*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v32i8_2323_domain: 112*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 113*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 114*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 115*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 116*9880d681SAndroid Build Coastguard Workerentry: 117*9880d681SAndroid Build Coastguard Worker ; add forces execution domain 118*9880d681SAndroid Build Coastguard Worker %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 119*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 120*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %shuffle 121*9880d681SAndroid Build Coastguard Worker} 122*9880d681SAndroid Build Coastguard Worker 123*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @shuffle_v4i64_6701(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 124*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4i64_6701: 125*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 126*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 127*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 128*9880d681SAndroid Build Coastguard Workerentry: 129*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 130*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %shuffle 131*9880d681SAndroid Build Coastguard Worker} 132*9880d681SAndroid Build Coastguard Worker 133*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @shuffle_v4i64_6701_domain(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 134*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v4i64_6701_domain: 135*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 136*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 137*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 138*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 139*9880d681SAndroid Build Coastguard Worker; 140*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v4i64_6701_domain: 141*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 142*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 143*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 144*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 145*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 146*9880d681SAndroid Build Coastguard Workerentry: 147*9880d681SAndroid Build Coastguard Worker ; add forces execution domain 148*9880d681SAndroid Build Coastguard Worker %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 149*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 150*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %shuffle 151*9880d681SAndroid Build Coastguard Worker} 152*9880d681SAndroid Build Coastguard Worker 153*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @shuffle_v8i32_u5u7cdef(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { 154*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v8i32_u5u7cdef: 155*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 156*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 157*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 158*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 159*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 160*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 161*9880d681SAndroid Build Coastguard Worker; 162*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v8i32_u5u7cdef: 163*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 164*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 165*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 166*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 167*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 168*9880d681SAndroid Build Coastguard Workerentry: 169*9880d681SAndroid Build Coastguard Worker ; add forces execution domain 170*9880d681SAndroid Build Coastguard Worker %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 171*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15> 172*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %shuffle 173*9880d681SAndroid Build Coastguard Worker} 174*9880d681SAndroid Build Coastguard Worker 175*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @shuffle_v16i16_4501(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp { 176*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v16i16_4501: 177*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 178*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 179*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 180*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 181*9880d681SAndroid Build Coastguard Worker; 182*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v16i16_4501: 183*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 184*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 185*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 186*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 187*9880d681SAndroid Build Coastguard Workerentry: 188*9880d681SAndroid Build Coastguard Worker ; add forces execution domain 189*9880d681SAndroid Build Coastguard Worker %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 190*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 191*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %shuffle 192*9880d681SAndroid Build Coastguard Worker} 193*9880d681SAndroid Build Coastguard Worker 194*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @shuffle_v16i16_4501_mem(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp { 195*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v16i16_4501_mem: 196*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 197*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa (%rdi), %ymm0 198*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovaps (%rsi), %ymm1 199*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 200*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 201*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 202*9880d681SAndroid Build Coastguard Worker; 203*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v16i16_4501_mem: 204*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 205*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa (%rdi), %ymm0 206*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vmovdqa (%rsi), %ymm1 207*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 208*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 209*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 210*9880d681SAndroid Build Coastguard Workerentry: 211*9880d681SAndroid Build Coastguard Worker %c = load <16 x i16>, <16 x i16>* %a 212*9880d681SAndroid Build Coastguard Worker %d = load <16 x i16>, <16 x i16>* %b 213*9880d681SAndroid Build Coastguard Worker %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 214*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 215*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %shuffle 216*9880d681SAndroid Build Coastguard Worker} 217*9880d681SAndroid Build Coastguard Worker 218*9880d681SAndroid Build Coastguard Worker;;;; Cases with undef indicies mixed in the mask 219*9880d681SAndroid Build Coastguard Worker 220*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67u9ub(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 221*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67u9ub: 222*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 223*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 224*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 225*9880d681SAndroid Build Coastguard Workerentry: 226*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11> 227*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 228*9880d681SAndroid Build Coastguard Worker} 229*9880d681SAndroid Build Coastguard Worker 230*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67uu67(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 231*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67uu67: 232*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 233*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 234*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 235*9880d681SAndroid Build Coastguard Workerentry: 236*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7> 237*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 238*9880d681SAndroid Build Coastguard Worker} 239*9880d681SAndroid Build Coastguard Worker 240*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67uuab(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 241*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67uuab: 242*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 243*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 244*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 245*9880d681SAndroid Build Coastguard Workerentry: 246*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11> 247*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 248*9880d681SAndroid Build Coastguard Worker} 249*9880d681SAndroid Build Coastguard Worker 250*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67uuef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 251*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67uuef: 252*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 253*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 254*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 255*9880d681SAndroid Build Coastguard Workerentry: 256*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15> 257*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 258*9880d681SAndroid Build Coastguard Worker} 259*9880d681SAndroid Build Coastguard Worker 260*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu674567(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 261*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu674567: 262*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 263*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 264*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 265*9880d681SAndroid Build Coastguard Workerentry: 266*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 267*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 268*9880d681SAndroid Build Coastguard Worker} 269*9880d681SAndroid Build Coastguard Worker 270*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu6789ab(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 271*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu6789ab: 272*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 273*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 274*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 275*9880d681SAndroid Build Coastguard Workerentry: 276*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 277*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 278*9880d681SAndroid Build Coastguard Worker} 279*9880d681SAndroid Build Coastguard Worker 280*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_4567uu67(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 281*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_4567uu67: 282*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 283*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 284*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 285*9880d681SAndroid Build Coastguard Workerentry: 286*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7> 287*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 288*9880d681SAndroid Build Coastguard Worker} 289*9880d681SAndroid Build Coastguard Worker 290*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_4567uuef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 291*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_4567uuef: 292*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 293*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 294*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 295*9880d681SAndroid Build Coastguard Workerentry: 296*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15> 297*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 298*9880d681SAndroid Build Coastguard Worker} 299*9880d681SAndroid Build Coastguard Worker 300*9880d681SAndroid Build Coastguard Worker;;;; Cases we must not select vperm2f128 301*9880d681SAndroid Build Coastguard Worker 302*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67ucuf(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { 303*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67ucuf: 304*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: ## %entry 305*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 306*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7] 307*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 308*9880d681SAndroid Build Coastguard Workerentry: 309*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15> 310*9880d681SAndroid Build Coastguard Worker ret <8 x float> %shuffle 311*9880d681SAndroid Build Coastguard Worker} 312*9880d681SAndroid Build Coastguard Worker 313*9880d681SAndroid Build Coastguard Worker;; Test zero mask generation. 314*9880d681SAndroid Build Coastguard Worker;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984 315*9880d681SAndroid Build Coastguard Worker;; Prefer xor+vblendpd over vperm2f128 because that has better performance. 316*9880d681SAndroid Build Coastguard Worker;; TODO: When building for optsize we should use vperm2f128. 317*9880d681SAndroid Build Coastguard Worker 318*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz01(<4 x double> %a) { 319*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz01: 320*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 321*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 322*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 323*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 324*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 325*9880d681SAndroid Build Coastguard Worker} 326*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz01_optsize(<4 x double> %a) optsize { 327*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz01_optsize: 328*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 329*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 330*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 331*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 332*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 333*9880d681SAndroid Build Coastguard Worker} 334*9880d681SAndroid Build Coastguard Worker 335*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz23(<4 x double> %a) { 336*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz23: 337*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 338*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 339*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 340*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 341*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 342*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 343*9880d681SAndroid Build Coastguard Worker} 344*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz23_optsize(<4 x double> %a) optsize { 345*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz23_optsize: 346*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 347*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 348*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 349*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 350*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 351*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 352*9880d681SAndroid Build Coastguard Worker} 353*9880d681SAndroid Build Coastguard Worker 354*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz45(<4 x double> %a) { 355*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz45: 356*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 357*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 358*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 359*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 360*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 361*9880d681SAndroid Build Coastguard Worker} 362*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz45_optsize(<4 x double> %a) optsize { 363*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz45_optsize: 364*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 365*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 366*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 367*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 368*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 369*9880d681SAndroid Build Coastguard Worker} 370*9880d681SAndroid Build Coastguard Worker 371*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz67(<4 x double> %a) { 372*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz67: 373*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 374*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 375*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 376*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 377*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 378*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 379*9880d681SAndroid Build Coastguard Worker} 380*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize { 381*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz67_optsize: 382*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 383*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 384*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] 385*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 386*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 387*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 388*9880d681SAndroid Build Coastguard Worker} 389*9880d681SAndroid Build Coastguard Worker 390*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) { 391*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_01zz: 392*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 393*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 394*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 395*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 396*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 397*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 398*9880d681SAndroid Build Coastguard Worker} 399*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_01zz_optsize(<4 x double> %a) optsize { 400*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_01zz_optsize: 401*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 402*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 403*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 404*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 405*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 406*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 407*9880d681SAndroid Build Coastguard Worker} 408*9880d681SAndroid Build Coastguard Worker 409*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_23zz(<4 x double> %a) { 410*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_23zz: 411*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 412*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 413*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 414*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 415*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 416*9880d681SAndroid Build Coastguard Worker} 417*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_23zz_optsize(<4 x double> %a) optsize { 418*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_23zz_optsize: 419*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 420*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 421*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 422*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 423*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 424*9880d681SAndroid Build Coastguard Worker} 425*9880d681SAndroid Build Coastguard Worker 426*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) { 427*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_45zz: 428*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 429*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 430*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 431*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 432*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 433*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 434*9880d681SAndroid Build Coastguard Worker} 435*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_45zz_optsize(<4 x double> %a) optsize { 436*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_45zz_optsize: 437*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 438*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 439*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 440*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 441*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 442*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 443*9880d681SAndroid Build Coastguard Worker} 444*9880d681SAndroid Build Coastguard Worker 445*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_67zz(<4 x double> %a) { 446*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_67zz: 447*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 448*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 449*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 450*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 451*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 452*9880d681SAndroid Build Coastguard Worker} 453*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_67zz_optsize(<4 x double> %a) optsize { 454*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_67zz_optsize: 455*9880d681SAndroid Build Coastguard Worker; ALL: ## BB#0: 456*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 457*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 458*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 459*9880d681SAndroid Build Coastguard Worker ret <4 x double> %s 460*9880d681SAndroid Build Coastguard Worker} 461*9880d681SAndroid Build Coastguard Worker 462*9880d681SAndroid Build Coastguard Worker;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection. 463*9880d681SAndroid Build Coastguard Worker 464*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) { 465*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v4i64_67zz: 466*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: 467*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 468*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 469*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 470*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 471*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 472*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 473*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 474*9880d681SAndroid Build Coastguard Worker; 475*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v4i64_67zz: 476*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: 477*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 478*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0 479*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 480*9880d681SAndroid Build Coastguard Worker %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1> 481*9880d681SAndroid Build Coastguard Worker %c = add <4 x i64> %b, %s 482*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %c 483*9880d681SAndroid Build Coastguard Worker} 484*9880d681SAndroid Build Coastguard Worker 485*9880d681SAndroid Build Coastguard Worker;;; Memory folding cases 486*9880d681SAndroid Build Coastguard Worker 487*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @ld0_hi0_lo1_4f64(<4 x double> * %pa, <4 x double> %b) nounwind uwtable readnone ssp { 488*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld0_hi0_lo1_4f64: 489*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 490*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 491*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 492*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 493*9880d681SAndroid Build Coastguard Worker; 494*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld0_hi0_lo1_4f64: 495*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 496*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 497*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 498*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 499*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 500*9880d681SAndroid Build Coastguard Workerentry: 501*9880d681SAndroid Build Coastguard Worker %a = load <4 x double>, <4 x double> * %pa 502*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 503*9880d681SAndroid Build Coastguard Worker %res = fadd <4 x double> %shuffle, <double 1.0, double 1.0, double 1.0, double 1.0> 504*9880d681SAndroid Build Coastguard Worker ret <4 x double> %res 505*9880d681SAndroid Build Coastguard Worker} 506*9880d681SAndroid Build Coastguard Worker 507*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @ld1_hi0_hi1_4f64(<4 x double> %a, <4 x double> * %pb) nounwind uwtable readnone ssp { 508*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld1_hi0_hi1_4f64: 509*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 510*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 511*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 512*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 513*9880d681SAndroid Build Coastguard Worker; 514*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld1_hi0_hi1_4f64: 515*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 516*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 517*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 518*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 519*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 520*9880d681SAndroid Build Coastguard Workerentry: 521*9880d681SAndroid Build Coastguard Worker %b = load <4 x double>, <4 x double> * %pb 522*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 523*9880d681SAndroid Build Coastguard Worker %res = fadd <4 x double> %shuffle, <double 1.0, double 1.0, double 1.0, double 1.0> 524*9880d681SAndroid Build Coastguard Worker ret <4 x double> %res 525*9880d681SAndroid Build Coastguard Worker} 526*9880d681SAndroid Build Coastguard Worker 527*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @ld0_hi0_lo1_8f32(<8 x float> * %pa, <8 x float> %b) nounwind uwtable readnone ssp { 528*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld0_hi0_lo1_8f32: 529*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 530*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 531*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0 532*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 533*9880d681SAndroid Build Coastguard Worker; 534*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld0_hi0_lo1_8f32: 535*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 536*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 537*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 538*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 539*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 540*9880d681SAndroid Build Coastguard Workerentry: 541*9880d681SAndroid Build Coastguard Worker %a = load <8 x float>, <8 x float> * %pa 542*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 543*9880d681SAndroid Build Coastguard Worker %res = fadd <8 x float> %shuffle, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 544*9880d681SAndroid Build Coastguard Worker ret <8 x float> %res 545*9880d681SAndroid Build Coastguard Worker} 546*9880d681SAndroid Build Coastguard Worker 547*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @ld1_hi0_hi1_8f32(<8 x float> %a, <8 x float> * %pb) nounwind uwtable readnone ssp { 548*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld1_hi0_hi1_8f32: 549*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 550*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 551*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0 552*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 553*9880d681SAndroid Build Coastguard Worker; 554*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld1_hi0_hi1_8f32: 555*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 556*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 557*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 558*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 559*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 560*9880d681SAndroid Build Coastguard Workerentry: 561*9880d681SAndroid Build Coastguard Worker %b = load <8 x float>, <8 x float> * %pb 562*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 563*9880d681SAndroid Build Coastguard Worker %res = fadd <8 x float> %shuffle, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 564*9880d681SAndroid Build Coastguard Worker ret <8 x float> %res 565*9880d681SAndroid Build Coastguard Worker} 566*9880d681SAndroid Build Coastguard Worker 567*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @ld0_hi0_lo1_4i64(<4 x i64> * %pa, <4 x i64> %b) nounwind uwtable readnone ssp { 568*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld0_hi0_lo1_4i64: 569*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 570*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 571*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 572*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 573*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 574*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 575*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 576*9880d681SAndroid Build Coastguard Worker; 577*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld0_hi0_lo1_4i64: 578*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 579*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 580*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 581*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 582*9880d681SAndroid Build Coastguard Workerentry: 583*9880d681SAndroid Build Coastguard Worker %a = load <4 x i64>, <4 x i64> * %pa 584*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 585*9880d681SAndroid Build Coastguard Worker %res = add <4 x i64> %shuffle, <i64 1, i64 2, i64 3, i64 4> 586*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %res 587*9880d681SAndroid Build Coastguard Worker} 588*9880d681SAndroid Build Coastguard Worker 589*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @ld1_hi0_hi1_4i64(<4 x i64> %a, <4 x i64> * %pb) nounwind uwtable readnone ssp { 590*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld1_hi0_hi1_4i64: 591*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 592*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 593*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1 594*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 595*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 596*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 597*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 598*9880d681SAndroid Build Coastguard Worker; 599*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld1_hi0_hi1_4i64: 600*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 601*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 602*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 603*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 604*9880d681SAndroid Build Coastguard Workerentry: 605*9880d681SAndroid Build Coastguard Worker %b = load <4 x i64>, <4 x i64> * %pb 606*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 607*9880d681SAndroid Build Coastguard Worker %res = add <4 x i64> %shuffle, <i64 1, i64 2, i64 3, i64 4> 608*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %res 609*9880d681SAndroid Build Coastguard Worker} 610*9880d681SAndroid Build Coastguard Worker 611*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @ld0_hi0_lo1_8i32(<8 x i32> * %pa, <8 x i32> %b) nounwind uwtable readnone ssp { 612*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld0_hi0_lo1_8i32: 613*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 614*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 615*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 616*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,2,3,4] 617*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 618*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 619*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 620*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 621*9880d681SAndroid Build Coastguard Worker; 622*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld0_hi0_lo1_8i32: 623*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 624*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1] 625*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 626*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 627*9880d681SAndroid Build Coastguard Workerentry: 628*9880d681SAndroid Build Coastguard Worker %a = load <8 x i32>, <8 x i32> * %pa 629*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 630*9880d681SAndroid Build Coastguard Worker %res = add <8 x i32> %shuffle, <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 631*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %res 632*9880d681SAndroid Build Coastguard Worker} 633*9880d681SAndroid Build Coastguard Worker 634*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @ld1_hi0_hi1_8i32(<8 x i32> %a, <8 x i32> * %pb) nounwind uwtable readnone ssp { 635*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld1_hi0_hi1_8i32: 636*9880d681SAndroid Build Coastguard Worker; AVX1: ## BB#0: ## %entry 637*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 638*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 639*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,2,3,4] 640*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 641*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 642*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 643*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 644*9880d681SAndroid Build Coastguard Worker; 645*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld1_hi0_hi1_8i32: 646*9880d681SAndroid Build Coastguard Worker; AVX2: ## BB#0: ## %entry 647*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] 648*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 649*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 650*9880d681SAndroid Build Coastguard Workerentry: 651*9880d681SAndroid Build Coastguard Worker %b = load <8 x i32>, <8 x i32> * %pb 652*9880d681SAndroid Build Coastguard Worker %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 653*9880d681SAndroid Build Coastguard Worker %res = add <8 x i32> %shuffle, <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 654*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %res 655*9880d681SAndroid Build Coastguard Worker} 656