1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni8: 5*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 6*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d16, [r1] 7*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d17, [r0] 8*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.8 d17, d16 9*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i8 d16, d17, d16 10*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 11*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 12*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 13*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 14*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 15*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 16*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i8> %tmp3, %tmp4 17*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %tmp5 18*9880d681SAndroid Build Coastguard Worker} 19*9880d681SAndroid Build Coastguard Worker 20*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 21*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni8_Qres: 22*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 24*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]] 26*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, [[LDR0]] 27*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, [[LDR1]] 28*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 29*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 30*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 31*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 32*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp3 33*9880d681SAndroid Build Coastguard Worker} 34*9880d681SAndroid Build Coastguard Worker 35*9880d681SAndroid Build Coastguard Workerdefine <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 36*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni16: 37*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 38*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d16, [r1] 39*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d17, [r0] 40*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.16 d17, d16 41*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i16 d16, d17, d16 42*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 44*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 45*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 46*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 47*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 48*9880d681SAndroid Build Coastguard Worker %tmp5 = add <4 x i16> %tmp3, %tmp4 49*9880d681SAndroid Build Coastguard Worker ret <4 x i16> %tmp5 50*9880d681SAndroid Build Coastguard Worker} 51*9880d681SAndroid Build Coastguard Worker 52*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { 53*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni16_Qres: 54*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 57*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.16 [[LDR0]], [[LDR1]] 58*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, [[LDR0]] 59*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, [[LDR1]] 60*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 61*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 62*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 63*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 64*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp3 65*9880d681SAndroid Build Coastguard Worker} 66*9880d681SAndroid Build Coastguard Worker 67*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 68*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni32: 69*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 70*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d16, [r1] 71*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d17, [r0] 72*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.32 d17, d16 73*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i32 d16, d17, d16 74*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 75*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 76*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 77*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 78*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2> 79*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3> 80*9880d681SAndroid Build Coastguard Worker %tmp5 = add <2 x i32> %tmp3, %tmp4 81*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %tmp5 82*9880d681SAndroid Build Coastguard Worker} 83*9880d681SAndroid Build Coastguard Worker 84*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind { 85*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni32_Qres: 86*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 87*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 88*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 89*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]] 90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, [[LDR0]] 91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, [[LDR1]] 92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 93*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 94*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 95*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 96*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp3 97*9880d681SAndroid Build Coastguard Worker} 98*9880d681SAndroid Build Coastguard Worker 99*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { 100*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnf: 101*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 102*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d16, [r1] 103*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d17, [r0] 104*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.32 d17, d16 105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.f32 d16, d17, d16 106*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 107*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 108*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 109*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 110*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2> 111*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3> 112*9880d681SAndroid Build Coastguard Worker %tmp5 = fadd <2 x float> %tmp3, %tmp4 113*9880d681SAndroid Build Coastguard Worker ret <2 x float> %tmp5 114*9880d681SAndroid Build Coastguard Worker} 115*9880d681SAndroid Build Coastguard Worker 116*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind { 117*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnf_Qres: 118*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 119*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 120*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 121*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]] 122*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, [[LDR0]] 123*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, [[LDR1]] 124*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 125*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x float>, <2 x float>* %A 126*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x float>, <2 x float>* %B 127*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 128*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp3 129*9880d681SAndroid Build Coastguard Worker} 130*9880d681SAndroid Build Coastguard Worker 131*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 132*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi8: 133*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 134*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 135*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 136*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.8 q9, q8 137*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i8 q8, q9, q8 138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 141*9880d681SAndroid Build Coastguard Worker %tmp1 = load <16 x i8>, <16 x i8>* %A 142*9880d681SAndroid Build Coastguard Worker %tmp2 = load <16 x i8>, <16 x i8>* %B 143*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 144*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 145*9880d681SAndroid Build Coastguard Worker %tmp5 = add <16 x i8> %tmp3, %tmp4 146*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp5 147*9880d681SAndroid Build Coastguard Worker} 148*9880d681SAndroid Build Coastguard Worker 149*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @vtrnQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { 150*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi8_QQres: 151*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 152*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 153*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 154*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.8 q9, q8 155*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! 156*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 157*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 158*9880d681SAndroid Build Coastguard Worker %tmp1 = load <16 x i8>, <16 x i8>* %A 159*9880d681SAndroid Build Coastguard Worker %tmp2 = load <16 x i8>, <16 x i8>* %B 160*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30, i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 161*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %tmp3 162*9880d681SAndroid Build Coastguard Worker} 163*9880d681SAndroid Build Coastguard Worker 164*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 165*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi16: 166*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 167*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 168*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 169*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.16 q9, q8 170*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i16 q8, q9, q8 171*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 172*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 173*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 174*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 175*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 176*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 177*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 178*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i16> %tmp3, %tmp4 179*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp5 180*9880d681SAndroid Build Coastguard Worker} 181*9880d681SAndroid Build Coastguard Worker 182*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vtrnQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 183*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi16_QQres: 184*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 185*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 186*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 187*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.16 q9, q8 188*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 189*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 190*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 191*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 192*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 193*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 194*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %tmp3 195*9880d681SAndroid Build Coastguard Worker} 196*9880d681SAndroid Build Coastguard Worker 197*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 198*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi32: 199*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 200*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 201*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 202*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.32 q9, q8 203*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i32 q8, q9, q8 204*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 205*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 206*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 207*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 208*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 209*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 210*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 211*9880d681SAndroid Build Coastguard Worker %tmp5 = add <4 x i32> %tmp3, %tmp4 212*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 213*9880d681SAndroid Build Coastguard Worker} 214*9880d681SAndroid Build Coastguard Worker 215*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @vtrnQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { 216*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi32_QQres: 217*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 218*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 219*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 220*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.32 q9, q8 221*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 222*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 223*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 224*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 225*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i32>, <4 x i32>* %B 226*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 227*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %tmp3 228*9880d681SAndroid Build Coastguard Worker} 229*9880d681SAndroid Build Coastguard Worker 230*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { 231*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQf: 232*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 233*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 234*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 235*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.32 q9, q8 236*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.f32 q8, q9, q8 237*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 238*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 239*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 240*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 241*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 242*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 243*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 244*9880d681SAndroid Build Coastguard Worker %tmp5 = fadd <4 x float> %tmp3, %tmp4 245*9880d681SAndroid Build Coastguard Worker ret <4 x float> %tmp5 246*9880d681SAndroid Build Coastguard Worker} 247*9880d681SAndroid Build Coastguard Worker 248*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @vtrnQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { 249*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQf_QQres: 250*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 251*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 252*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 253*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.32 q9, q8 254*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 255*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 257*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x float>, <4 x float>* %A 258*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x float>, <4 x float>* %B 259*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 260*9880d681SAndroid Build Coastguard Worker ret <8 x float> %tmp3 261*9880d681SAndroid Build Coastguard Worker} 262*9880d681SAndroid Build Coastguard Worker 263*9880d681SAndroid Build Coastguard Worker 264*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { 265*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni8_undef: 266*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 267*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d16, [r1] 268*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr d17, [r0] 269*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.8 d17, d16 270*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i8 d16, d17, d16 271*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 272*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 273*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 274*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 275*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> 276*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> 277*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i8> %tmp3, %tmp4 278*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %tmp5 279*9880d681SAndroid Build Coastguard Worker} 280*9880d681SAndroid Build Coastguard Worker 281*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 282*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrni8_undef_Qres: 283*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 284*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 285*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 286*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]] 287*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, [[LDR0]] 288*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, [[LDR1]] 289*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 290*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 291*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 292*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> 293*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %tmp3 294*9880d681SAndroid Build Coastguard Worker} 295*9880d681SAndroid Build Coastguard Worker 296*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { 297*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi16_undef: 298*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 299*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 300*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 301*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.16 q9, q8 302*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vadd.i16 q8, q9, q8 303*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r0, r1, d16 304*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmov r2, r3, d17 305*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 306*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 307*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 308*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> 309*9880d681SAndroid Build Coastguard Worker %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> 310*9880d681SAndroid Build Coastguard Worker %tmp5 = add <8 x i16> %tmp3, %tmp4 311*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp5 312*9880d681SAndroid Build Coastguard Worker} 313*9880d681SAndroid Build Coastguard Worker 314*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @vtrnQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 315*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: vtrnQi16_undef_QQres: 316*9880d681SAndroid Build Coastguard Worker; CHECK: @ BB#0: 317*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 318*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 319*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vtrn.16 q9, q8 320*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 321*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 322*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: mov pc, lr 323*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 324*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i16>, <8 x i16>* %B 325*9880d681SAndroid Build Coastguard Worker %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> 326*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %tmp3 327*9880d681SAndroid Build Coastguard Worker} 328*9880d681SAndroid Build Coastguard Worker 329*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @vtrn_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) { 330*9880d681SAndroid Build Coastguard Workerentry: 331*9880d681SAndroid Build Coastguard Worker ; CHECK-LABEL: vtrn_lower_shufflemask_undef 332*9880d681SAndroid Build Coastguard Worker ; CHECK: vtrn 333*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 334*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 335*9880d681SAndroid Build Coastguard Worker %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7> 336*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %0 337*9880d681SAndroid Build Coastguard Worker} 338*9880d681SAndroid Build Coastguard Worker 339*9880d681SAndroid Build Coastguard Worker; Here we get a build_vector node, where all the incoming extract_element 340*9880d681SAndroid Build Coastguard Worker; values do modify the type. However, we get different input types, as some of 341*9880d681SAndroid Build Coastguard Worker; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of 342*9880d681SAndroid Build Coastguard Worker; them get truncated from i16 to i8 (from comparing cmp2 with cmp3). 343*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1, 344*9880d681SAndroid Build Coastguard Worker <4 x i32> %cmp0, <4 x i32> %cmp1, 345*9880d681SAndroid Build Coastguard Worker <4 x i16> %cmp2, <4 x i16> %cmp3) { 346*9880d681SAndroid Build Coastguard Worker ; CHECK-LABEL: vtrn_mismatched_builvector0 347*9880d681SAndroid Build Coastguard Worker ; CHECK: vmovn.i32 348*9880d681SAndroid Build Coastguard Worker ; CHECK: vtrn 349*9880d681SAndroid Build Coastguard Worker ; CHECK: vbsl 350*9880d681SAndroid Build Coastguard Worker %c0 = icmp ult <4 x i32> %cmp0, %cmp1 351*9880d681SAndroid Build Coastguard Worker %c1 = icmp ult <4 x i16> %cmp2, %cmp3 352*9880d681SAndroid Build Coastguard Worker %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 353*9880d681SAndroid Build Coastguard Worker %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 354*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %rv 355*9880d681SAndroid Build Coastguard Worker} 356*9880d681SAndroid Build Coastguard Worker 357*9880d681SAndroid Build Coastguard Worker; Here we get a build_vector node, where half the incoming extract_element 358*9880d681SAndroid Build Coastguard Worker; values do not modify the type (the values form cmp2), but half of them do 359*9880d681SAndroid Build Coastguard Worker; (from the icmp operation). 360*9880d681SAndroid Build Coastguard Workerdefine <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1, 361*9880d681SAndroid Build Coastguard Worker <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) { 362*9880d681SAndroid Build Coastguard Worker ; CHECK-LABEL: vtrn_mismatched_builvector1 363*9880d681SAndroid Build Coastguard Worker ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn 364*9880d681SAndroid Build Coastguard Worker ; CHECK: vmovl 365*9880d681SAndroid Build Coastguard Worker ; CHECK: vtrn.8 366*9880d681SAndroid Build Coastguard Worker ; CHECK: vbsl 367*9880d681SAndroid Build Coastguard Worker %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4 368*9880d681SAndroid Build Coastguard Worker %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1> 369*9880d681SAndroid Build Coastguard Worker %c0 = icmp ult <4 x i32> %cmp0, %cmp1 370*9880d681SAndroid Build Coastguard Worker %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 371*9880d681SAndroid Build Coastguard Worker %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 372*9880d681SAndroid Build Coastguard Worker ret <8 x i8> %rv 373*9880d681SAndroid Build Coastguard Worker} 374*9880d681SAndroid Build Coastguard Worker 375*9880d681SAndroid Build Coastguard Worker; Negative test that should not generate a vtrn 376*9880d681SAndroid Build Coastguard Workerdefine void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { 377*9880d681SAndroid Build Coastguard Workerentry: 378*9880d681SAndroid Build Coastguard Worker ; CHECK-LABEL: lower_twice_no_vtrn 379*9880d681SAndroid Build Coastguard Worker ; CHECK: @ BB#0: 380*9880d681SAndroid Build Coastguard Worker ; CHECK-NOT: vtrn 381*9880d681SAndroid Build Coastguard Worker ; CHECK: mov pc, lr 382*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 383*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 384*9880d681SAndroid Build Coastguard Worker %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7> 385*9880d681SAndroid Build Coastguard Worker store <8 x i16> %0, <8 x i16>* %C 386*9880d681SAndroid Build Coastguard Worker ret void 387*9880d681SAndroid Build Coastguard Worker} 388*9880d681SAndroid Build Coastguard Worker 389*9880d681SAndroid Build Coastguard Worker; Negative test that should not generate a vtrn 390*9880d681SAndroid Build Coastguard Workerdefine void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { 391*9880d681SAndroid Build Coastguard Workerentry: 392*9880d681SAndroid Build Coastguard Worker ; CHECK-LABEL: upper_twice_no_vtrn 393*9880d681SAndroid Build Coastguard Worker ; CHECK: @ BB#0: 394*9880d681SAndroid Build Coastguard Worker ; CHECK-NOT: vtrn 395*9880d681SAndroid Build Coastguard Worker ; CHECK: mov pc, lr 396*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 397*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 398*9880d681SAndroid Build Coastguard Worker %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6> 399*9880d681SAndroid Build Coastguard Worker store <8 x i16> %0, <8 x i16>* %C 400*9880d681SAndroid Build Coastguard Worker ret void 401*9880d681SAndroid Build Coastguard Worker} 402