1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi8: 5*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 6*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d16, [r1] 7*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d17, [r0] 8*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.8 d17, d16 9*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i8 d16, d17, d16 10*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 11*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 12*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 13*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 14*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 15*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 16*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i8> %tmp3, %tmp4 17*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %tmp5 18*9880d681SAndroid Build Coastguard Worker} 19*9880d681SAndroid Build Coastguard Worker 20*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 21*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi8_Qres: 22*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 24*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.8 [[LDR0]], [[LDR1]] 26*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, [[LDR0]] 27*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, [[LDR1]] 28*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 29*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 30*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 31*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 32*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp3 33*9880d681SAndroid Build Coastguard Worker} 34*9880d681SAndroid Build Coastguard Worker 35*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 36*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi16: 37*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 38*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d16, [r1] 39*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d17, [r0] 40*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.16 d17, d16 41*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i16 d16, d17, d16 42*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 44*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 45*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 46*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 47*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 48*9880d681SAndroid Build Coastguard Worker %tmp5 = add <4 x i16> %tmp3, %tmp4 49*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp5 50*9880d681SAndroid Build Coastguard Worker} 51*9880d681SAndroid Build Coastguard Worker 52*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { 53*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi16_Qres: 54*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 57*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.16 [[LDR0]], [[LDR1]] 58*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, [[LDR0]] 59*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, [[LDR1]] 60*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 61*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 62*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 63*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 64*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp3 65*9880d681SAndroid Build Coastguard Worker} 66*9880d681SAndroid Build Coastguard Worker 67*9880d681SAndroid Build Coastguard Worker; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors. 68*9880d681SAndroid Build Coastguard Worker 69*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 70*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi8: 71*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 72*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 73*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 74*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.8 q9, q8 75*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i8 q8, q9, q8 76*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 77*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 78*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 79*9880d681SAndroid Build Coastguard Worker %tmp1 = load <16 x i8>, <16 x i8>* %A 80*9880d681SAndroid Build Coastguard Worker %tmp2 = load <16 x i8>, <16 x i8>* %B 81*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 82*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 83*9880d681SAndroid Build Coastguard Worker %tmp5 = add <16 x i8> %tmp3, %tmp4 84*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp5 85*9880d681SAndroid Build Coastguard Worker} 86*9880d681SAndroid Build Coastguard Worker 87*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @vuzpQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { 88*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi8_QQres: 89*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.8 q9, q8 93*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! 94*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 95*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 96*9880d681SAndroid Build Coastguard Worker %tmp1 = load <16 x i8>, <16 x i8>* %A 97*9880d681SAndroid Build Coastguard Worker %tmp2 = load <16 x i8>, <16 x i8>* %B 98*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 99*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %tmp3 100*9880d681SAndroid Build Coastguard Worker} 101*9880d681SAndroid Build Coastguard Worker 102*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 103*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi16: 104*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 106*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 107*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.16 q9, q8 108*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i16 q8, q9, q8 109*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 110*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 111*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 112*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 113*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 114*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 115*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 116*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i16> %tmp3, %tmp4 117*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp5 118*9880d681SAndroid Build Coastguard Worker} 119*9880d681SAndroid Build Coastguard Worker 120*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vuzpQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 121*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi16_QQres: 122*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 123*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 124*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 125*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.16 q9, q8 126*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 127*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 128*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 129*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 130*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 131*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 132*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %tmp3 133*9880d681SAndroid Build Coastguard Worker} 134*9880d681SAndroid Build Coastguard Worker 135*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 136*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi32: 137*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.32 q9, q8 141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i32 q8, q9, q8 142*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 143*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 144*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 145*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 146*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 147*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 148*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 149*9880d681SAndroid Build Coastguard Worker %tmp5 = add <4 x i32> %tmp3, %tmp4 150*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 151*9880d681SAndroid Build Coastguard Worker} 152*9880d681SAndroid Build Coastguard Worker 153*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @vuzpQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { 154*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi32_QQres: 155*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 156*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 157*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 158*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.32 q9, q8 159*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 160*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 161*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 162*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 163*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 164*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 165*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %tmp3 166*9880d681SAndroid Build Coastguard Worker} 167*9880d681SAndroid Build Coastguard Worker 168*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { 169*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQf: 170*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 171*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 172*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 173*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.32 q9, q8 174*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.f32 q8, q9, q8 175*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 176*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 177*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 178*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 179*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 180*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 181*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 182*9880d681SAndroid Build Coastguard Worker %tmp5 = fadd <4 x float> %tmp3, %tmp4 183*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp5 184*9880d681SAndroid Build Coastguard Worker} 185*9880d681SAndroid Build Coastguard Worker 186*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @vuzpQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { 187*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQf_QQres: 188*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 189*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 190*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 191*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.32 q9, q8 192*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 193*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 194*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 195*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 196*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 197*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 198*9880d681SAndroid Build Coastguard Worker ret <8 x float> %tmp3 199*9880d681SAndroid Build Coastguard Worker} 200*9880d681SAndroid Build Coastguard Worker 201*9880d681SAndroid Build Coastguard Worker; Undef shuffle indices should not prevent matching to VUZP: 202*9880d681SAndroid Build Coastguard Worker 203*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { 204*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi8_undef: 205*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 206*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d16, [r1] 207*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d17, [r0] 208*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.8 d17, d16 209*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i8 d16, d17, d16 210*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 211*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 212*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 213*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 214*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14> 215*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> 216*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i8> %tmp3, %tmp4 217*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %tmp5 218*9880d681SAndroid Build Coastguard Worker} 219*9880d681SAndroid Build Coastguard Worker 220*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 221*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpi8_undef_Qres: 222*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 223*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 224*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 225*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.8 [[LDR0]], [[LDR1]] 226*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, [[LDR0]] 227*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, [[LDR1]] 228*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 229*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 230*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 231*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15> 232*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp3 233*9880d681SAndroid Build Coastguard Worker} 234*9880d681SAndroid Build Coastguard Worker 235*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { 236*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi16_undef: 237*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 238*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 239*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 240*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.16 q9, q8 241*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i16 q8, q9, q8 242*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 243*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 244*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 245*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 246*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 247*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14> 248*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> 249*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i16> %tmp3, %tmp4 250*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp5 251*9880d681SAndroid Build Coastguard Worker} 252*9880d681SAndroid Build Coastguard Worker 253*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 254*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzpQi16_undef_QQres: 255*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 257*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 258*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vuzp.16 q9, q8 259*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 260*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 261*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 262*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 263*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 264*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15> 265*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %tmp3 266*9880d681SAndroid Build Coastguard Worker} 267*9880d681SAndroid Build Coastguard Worker 268*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) { 269*9880d681SAndroid Build Coastguard Workerentry: 270*9880d681SAndroid Build Coastguard Worker ; CHECK-LABEL: vuzp_lower_shufflemask_undef 271*9880d681SAndroid Build Coastguard Worker ; CHECK: vuzp 272*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 273*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 274*9880d681SAndroid Build Coastguard Worker %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7> 275*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %0 276*9880d681SAndroid Build Coastguard Worker} 277*9880d681SAndroid Build Coastguard Worker 278*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) { 279*9880d681SAndroid Build Coastguard Workerentry: 280*9880d681SAndroid Build Coastguard Worker ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed 281*9880d681SAndroid Build Coastguard Worker ; CHECK-NOT: vtrn 282*9880d681SAndroid Build Coastguard Worker ; CHECK: vuzp 283*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 284*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 285*9880d681SAndroid Build Coastguard Worker %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3> 286*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %0 287*9880d681SAndroid Build Coastguard Worker} 288*9880d681SAndroid Build Coastguard Worker 289*9880d681SAndroid Build Coastguard Workerdefine void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) { 290*9880d681SAndroid Build Coastguard Workerentry: 291*9880d681SAndroid Build Coastguard Worker ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn 292*9880d681SAndroid Build Coastguard Worker ; CHECK-NOT: vtrn 293*9880d681SAndroid Build Coastguard Worker ; CHECK: vuzp 294*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 295*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 296*9880d681SAndroid Build Coastguard Worker %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 0, i32 2> 297*9880d681SAndroid Build Coastguard Worker store <4 x i32> %0, <4 x i32>* %C 298*9880d681SAndroid Build Coastguard Worker ret void 299*9880d681SAndroid Build Coastguard Worker} 300*9880d681SAndroid Build Coastguard Worker 301*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) { 302*9880d681SAndroid Build Coastguard Worker; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8. 303*9880d681SAndroid Build Coastguard Worker; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to 304*9880d681SAndroid Build Coastguard Worker; truncate from i32 to i16 and one vuzp to perform the final truncation for i8. 305*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_trunc 306*9880d681SAndroid Build Coastguard Worker; CHECK: vmovn.i32 307*9880d681SAndroid Build Coastguard Worker; CHECK: vmovn.i32 308*9880d681SAndroid Build Coastguard Worker; CHECK: vuzp 309*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl 310*9880d681SAndroid Build Coastguard Worker %c = icmp ult <8 x i32> %cmp0, %cmp1 311*9880d681SAndroid Build Coastguard Worker %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1 312*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %res 313*9880d681SAndroid Build Coastguard Worker} 314*9880d681SAndroid Build Coastguard Worker 315*9880d681SAndroid Build Coastguard Worker; Shuffle the result from the compare with a <4 x i8>. 316*9880d681SAndroid Build Coastguard Worker; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we wouldn't be able 317*9880d681SAndroid Build Coastguard Worker; to perform the vuzp and get the vbsl mask. 318*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1, 319*9880d681SAndroid Build Coastguard Worker <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) { 320*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_trunc_and_shuffle 321*9880d681SAndroid Build Coastguard Worker; CHECK: vmovl 322*9880d681SAndroid Build Coastguard Worker; CHECK: vuzp 323*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl 324*9880d681SAndroid Build Coastguard Worker %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4 325*9880d681SAndroid Build Coastguard Worker %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1> 326*9880d681SAndroid Build Coastguard Worker %c0 = icmp ult <4 x i32> %cmp0, %cmp1 327*9880d681SAndroid Build Coastguard Worker %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 328*9880d681SAndroid Build Coastguard Worker %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 329*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %rv 330*9880d681SAndroid Build Coastguard Worker} 331*9880d681SAndroid Build Coastguard Worker 332*9880d681SAndroid Build Coastguard Worker; Use an undef value for the <4 x i8> that is being shuffled with the compare result. 333*9880d681SAndroid Build Coastguard Worker; This produces a build_vector with some of the operands undefs. 334*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1, 335*9880d681SAndroid Build Coastguard Worker <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) { 336*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right 337*9880d681SAndroid Build Coastguard Worker; CHECK: vuzp 338*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl 339*9880d681SAndroid Build Coastguard Worker %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4 340*9880d681SAndroid Build Coastguard Worker %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1> 341*9880d681SAndroid Build Coastguard Worker %c0 = icmp ult <4 x i32> %cmp0, %cmp1 342*9880d681SAndroid Build Coastguard Worker %c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 343*9880d681SAndroid Build Coastguard Worker %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 344*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %rv 345*9880d681SAndroid Build Coastguard Worker} 346*9880d681SAndroid Build Coastguard Worker 347*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1, 348*9880d681SAndroid Build Coastguard Worker <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) { 349*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left 350*9880d681SAndroid Build Coastguard Worker; CHECK: vuzp 351*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl 352*9880d681SAndroid Build Coastguard Worker %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4 353*9880d681SAndroid Build Coastguard Worker %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1> 354*9880d681SAndroid Build Coastguard Worker %c0 = icmp ult <4 x i32> %cmp0, %cmp1 355*9880d681SAndroid Build Coastguard Worker %c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 356*9880d681SAndroid Build Coastguard Worker %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 357*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %rv 358*9880d681SAndroid Build Coastguard Worker} 359*9880d681SAndroid Build Coastguard Worker 360*9880d681SAndroid Build Coastguard Worker; We're using large data types here, and we have to fill with undef values until we 361*9880d681SAndroid Build Coastguard Worker; get some vector size that we can represent. 362*9880d681SAndroid Build Coastguard Workerdefine <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1, 363*9880d681SAndroid Build Coastguard Worker <5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8> *%cmp2_ptr) { 364*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vuzp_wide_type 365*9880d681SAndroid Build Coastguard Worker; CHECK: vbsl 366*9880d681SAndroid Build Coastguard Worker %cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4 367*9880d681SAndroid Build Coastguard Worker %cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1> 368*9880d681SAndroid Build Coastguard Worker %c0 = icmp ult <5 x i32> %cmp0, %cmp1 369*9880d681SAndroid Build Coastguard Worker %c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> 370*9880d681SAndroid Build Coastguard Worker %rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1 371*9880d681SAndroid Build Coastguard Worker ret <10 x i8> %rv 372*9880d681SAndroid Build Coastguard Worker} 373