1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o -| FileCheck %s 2*9880d681SAndroid Build Coastguard Worker 3*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_v8i8_v8i16: 5*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 6*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 7*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 8*9880d681SAndroid Build Coastguard Worker %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> 9*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> 10*9880d681SAndroid Build Coastguard Worker %tmp5 = mul <8 x i16> %tmp3, %tmp4 11*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp5 12*9880d681SAndroid Build Coastguard Worker} 13*9880d681SAndroid Build Coastguard Worker 14*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind { 15*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_v4i16_v4i32: 16*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 17*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 18*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 19*9880d681SAndroid Build Coastguard Worker %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> 20*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> 21*9880d681SAndroid Build Coastguard Worker %tmp5 = mul <4 x i32> %tmp3, %tmp4 22*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 23*9880d681SAndroid Build Coastguard Worker} 24*9880d681SAndroid Build Coastguard Worker 25*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind { 26*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_v2i32_v2i64: 27*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 28*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 29*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 30*9880d681SAndroid Build Coastguard Worker %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> 31*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> 32*9880d681SAndroid Build Coastguard Worker %tmp5 = mul <2 x i64> %tmp3, %tmp4 33*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 34*9880d681SAndroid Build Coastguard Worker} 35*9880d681SAndroid Build Coastguard Worker 36*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind { 37*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_v8i8_v8i16: 38*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 39*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i8>, <8 x i8>* %A 40*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 41*9880d681SAndroid Build Coastguard Worker %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> 42*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> 43*9880d681SAndroid Build Coastguard Worker %tmp5 = mul <8 x i16> %tmp3, %tmp4 44*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp5 45*9880d681SAndroid Build Coastguard Worker} 46*9880d681SAndroid Build Coastguard Worker 47*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind { 48*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_v4i16_v4i32: 49*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 50*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i16>, <4 x i16>* %A 51*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 52*9880d681SAndroid Build Coastguard Worker %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> 53*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> 54*9880d681SAndroid Build Coastguard Worker %tmp5 = mul <4 x i32> %tmp3, %tmp4 55*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp5 56*9880d681SAndroid Build Coastguard Worker} 57*9880d681SAndroid Build Coastguard Worker 58*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind { 59*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_v2i32_v2i64: 60*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 61*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i32>, <2 x i32>* %A 62*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 63*9880d681SAndroid Build Coastguard Worker %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> 64*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> 65*9880d681SAndroid Build Coastguard Worker %tmp5 = mul <2 x i64> %tmp3, %tmp4 66*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp5 67*9880d681SAndroid Build Coastguard Worker} 68*9880d681SAndroid Build Coastguard Worker 69*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { 70*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlal_v8i8_v8i16: 71*9880d681SAndroid Build Coastguard Worker; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 72*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 73*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 74*9880d681SAndroid Build Coastguard Worker %tmp3 = load <8 x i8>, <8 x i8>* %C 75*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> 76*9880d681SAndroid Build Coastguard Worker %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> 77*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <8 x i16> %tmp4, %tmp5 78*9880d681SAndroid Build Coastguard Worker %tmp7 = add <8 x i16> %tmp1, %tmp6 79*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp7 80*9880d681SAndroid Build Coastguard Worker} 81*9880d681SAndroid Build Coastguard Worker 82*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { 83*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlal_v4i16_v4i32: 84*9880d681SAndroid Build Coastguard Worker; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 85*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 86*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 87*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i16>, <4 x i16>* %C 88*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> 89*9880d681SAndroid Build Coastguard Worker %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> 90*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <4 x i32> %tmp4, %tmp5 91*9880d681SAndroid Build Coastguard Worker %tmp7 = add <4 x i32> %tmp1, %tmp6 92*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp7 93*9880d681SAndroid Build Coastguard Worker} 94*9880d681SAndroid Build Coastguard Worker 95*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { 96*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlal_v2i32_v2i64: 97*9880d681SAndroid Build Coastguard Worker; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 98*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i64>, <2 x i64>* %A 99*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 100*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i32>, <2 x i32>* %C 101*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> 102*9880d681SAndroid Build Coastguard Worker %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> 103*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <2 x i64> %tmp4, %tmp5 104*9880d681SAndroid Build Coastguard Worker %tmp7 = add <2 x i64> %tmp1, %tmp6 105*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp7 106*9880d681SAndroid Build Coastguard Worker} 107*9880d681SAndroid Build Coastguard Worker 108*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { 109*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlal_v8i8_v8i16: 110*9880d681SAndroid Build Coastguard Worker; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 111*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 112*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 113*9880d681SAndroid Build Coastguard Worker %tmp3 = load <8 x i8>, <8 x i8>* %C 114*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> 115*9880d681SAndroid Build Coastguard Worker %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> 116*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <8 x i16> %tmp4, %tmp5 117*9880d681SAndroid Build Coastguard Worker %tmp7 = add <8 x i16> %tmp1, %tmp6 118*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp7 119*9880d681SAndroid Build Coastguard Worker} 120*9880d681SAndroid Build Coastguard Worker 121*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { 122*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlal_v4i16_v4i32: 123*9880d681SAndroid Build Coastguard Worker; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 124*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 125*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 126*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i16>, <4 x i16>* %C 127*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> 128*9880d681SAndroid Build Coastguard Worker %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> 129*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <4 x i32> %tmp4, %tmp5 130*9880d681SAndroid Build Coastguard Worker %tmp7 = add <4 x i32> %tmp1, %tmp6 131*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp7 132*9880d681SAndroid Build Coastguard Worker} 133*9880d681SAndroid Build Coastguard Worker 134*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { 135*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlal_v2i32_v2i64: 136*9880d681SAndroid Build Coastguard Worker; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 137*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i64>, <2 x i64>* %A 138*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 139*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i32>, <2 x i32>* %C 140*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> 141*9880d681SAndroid Build Coastguard Worker %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> 142*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <2 x i64> %tmp4, %tmp5 143*9880d681SAndroid Build Coastguard Worker %tmp7 = add <2 x i64> %tmp1, %tmp6 144*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp7 145*9880d681SAndroid Build Coastguard Worker} 146*9880d681SAndroid Build Coastguard Worker 147*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { 148*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlsl_v8i8_v8i16: 149*9880d681SAndroid Build Coastguard Worker; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 150*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 151*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 152*9880d681SAndroid Build Coastguard Worker %tmp3 = load <8 x i8>, <8 x i8>* %C 153*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> 154*9880d681SAndroid Build Coastguard Worker %tmp5 = sext <8 x i8> %tmp3 to <8 x i16> 155*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <8 x i16> %tmp4, %tmp5 156*9880d681SAndroid Build Coastguard Worker %tmp7 = sub <8 x i16> %tmp1, %tmp6 157*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp7 158*9880d681SAndroid Build Coastguard Worker} 159*9880d681SAndroid Build Coastguard Worker 160*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { 161*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlsl_v4i16_v4i32: 162*9880d681SAndroid Build Coastguard Worker; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 163*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 164*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 165*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i16>, <4 x i16>* %C 166*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> 167*9880d681SAndroid Build Coastguard Worker %tmp5 = sext <4 x i16> %tmp3 to <4 x i32> 168*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <4 x i32> %tmp4, %tmp5 169*9880d681SAndroid Build Coastguard Worker %tmp7 = sub <4 x i32> %tmp1, %tmp6 170*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp7 171*9880d681SAndroid Build Coastguard Worker} 172*9880d681SAndroid Build Coastguard Worker 173*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { 174*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smlsl_v2i32_v2i64: 175*9880d681SAndroid Build Coastguard Worker; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 176*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i64>, <2 x i64>* %A 177*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 178*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i32>, <2 x i32>* %C 179*9880d681SAndroid Build Coastguard Worker %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> 180*9880d681SAndroid Build Coastguard Worker %tmp5 = sext <2 x i32> %tmp3 to <2 x i64> 181*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <2 x i64> %tmp4, %tmp5 182*9880d681SAndroid Build Coastguard Worker %tmp7 = sub <2 x i64> %tmp1, %tmp6 183*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp7 184*9880d681SAndroid Build Coastguard Worker} 185*9880d681SAndroid Build Coastguard Worker 186*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { 187*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlsl_v8i8_v8i16: 188*9880d681SAndroid Build Coastguard Worker; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 189*9880d681SAndroid Build Coastguard Worker %tmp1 = load <8 x i16>, <8 x i16>* %A 190*9880d681SAndroid Build Coastguard Worker %tmp2 = load <8 x i8>, <8 x i8>* %B 191*9880d681SAndroid Build Coastguard Worker %tmp3 = load <8 x i8>, <8 x i8>* %C 192*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> 193*9880d681SAndroid Build Coastguard Worker %tmp5 = zext <8 x i8> %tmp3 to <8 x i16> 194*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <8 x i16> %tmp4, %tmp5 195*9880d681SAndroid Build Coastguard Worker %tmp7 = sub <8 x i16> %tmp1, %tmp6 196*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp7 197*9880d681SAndroid Build Coastguard Worker} 198*9880d681SAndroid Build Coastguard Worker 199*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { 200*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlsl_v4i16_v4i32: 201*9880d681SAndroid Build Coastguard Worker; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 202*9880d681SAndroid Build Coastguard Worker %tmp1 = load <4 x i32>, <4 x i32>* %A 203*9880d681SAndroid Build Coastguard Worker %tmp2 = load <4 x i16>, <4 x i16>* %B 204*9880d681SAndroid Build Coastguard Worker %tmp3 = load <4 x i16>, <4 x i16>* %C 205*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> 206*9880d681SAndroid Build Coastguard Worker %tmp5 = zext <4 x i16> %tmp3 to <4 x i32> 207*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <4 x i32> %tmp4, %tmp5 208*9880d681SAndroid Build Coastguard Worker %tmp7 = sub <4 x i32> %tmp1, %tmp6 209*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp7 210*9880d681SAndroid Build Coastguard Worker} 211*9880d681SAndroid Build Coastguard Worker 212*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { 213*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umlsl_v2i32_v2i64: 214*9880d681SAndroid Build Coastguard Worker; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 215*9880d681SAndroid Build Coastguard Worker %tmp1 = load <2 x i64>, <2 x i64>* %A 216*9880d681SAndroid Build Coastguard Worker %tmp2 = load <2 x i32>, <2 x i32>* %B 217*9880d681SAndroid Build Coastguard Worker %tmp3 = load <2 x i32>, <2 x i32>* %C 218*9880d681SAndroid Build Coastguard Worker %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> 219*9880d681SAndroid Build Coastguard Worker %tmp5 = zext <2 x i32> %tmp3 to <2 x i64> 220*9880d681SAndroid Build Coastguard Worker %tmp6 = mul <2 x i64> %tmp4, %tmp5 221*9880d681SAndroid Build Coastguard Worker %tmp7 = sub <2 x i64> %tmp1, %tmp6 222*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp7 223*9880d681SAndroid Build Coastguard Worker} 224*9880d681SAndroid Build Coastguard Worker 225*9880d681SAndroid Build Coastguard Worker; SMULL recognizing BUILD_VECTORs with sign/zero-extended elements. 226*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind { 227*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_extvec_v8i8_v8i16: 228*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 229*9880d681SAndroid Build Coastguard Worker %tmp3 = sext <8 x i8> %arg to <8 x i16> 230*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12> 231*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp4 232*9880d681SAndroid Build Coastguard Worker} 233*9880d681SAndroid Build Coastguard Worker 234*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind { 235*9880d681SAndroid Build Coastguard Worker; Do not use SMULL if the BUILD_VECTOR element values are too big. 236*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_noextvec_v8i8_v8i16: 237*9880d681SAndroid Build Coastguard Worker; CHECK: mov 238*9880d681SAndroid Build Coastguard Worker; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 239*9880d681SAndroid Build Coastguard Worker %tmp3 = sext <8 x i8> %arg to <8 x i16> 240*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999> 241*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp4 242*9880d681SAndroid Build Coastguard Worker} 243*9880d681SAndroid Build Coastguard Worker 244*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @smull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind { 245*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smull_extvec_v4i16_v4i32: 246*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 247*9880d681SAndroid Build Coastguard Worker %tmp3 = sext <4 x i16> %arg to <4 x i32> 248*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12> 249*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 250*9880d681SAndroid Build Coastguard Worker} 251*9880d681SAndroid Build Coastguard Worker 252*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @smull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind { 253*9880d681SAndroid Build Coastguard Worker; CHECK: smull_extvec_v2i32_v2i64 254*9880d681SAndroid Build Coastguard Worker; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 255*9880d681SAndroid Build Coastguard Worker %tmp3 = sext <2 x i32> %arg to <2 x i64> 256*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234> 257*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp4 258*9880d681SAndroid Build Coastguard Worker} 259*9880d681SAndroid Build Coastguard Worker 260*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind { 261*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_extvec_v8i8_v8i16: 262*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 263*9880d681SAndroid Build Coastguard Worker %tmp3 = zext <8 x i8> %arg to <8 x i16> 264*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12> 265*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp4 266*9880d681SAndroid Build Coastguard Worker} 267*9880d681SAndroid Build Coastguard Worker 268*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind { 269*9880d681SAndroid Build Coastguard Worker; Do not use SMULL if the BUILD_VECTOR element values are too big. 270*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_noextvec_v8i8_v8i16: 271*9880d681SAndroid Build Coastguard Worker; CHECK: mov 272*9880d681SAndroid Build Coastguard Worker; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 273*9880d681SAndroid Build Coastguard Worker %tmp3 = zext <8 x i8> %arg to <8 x i16> 274*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999> 275*9880d681SAndroid Build Coastguard Worker ret <8 x i16> %tmp4 276*9880d681SAndroid Build Coastguard Worker} 277*9880d681SAndroid Build Coastguard Worker 278*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @umull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind { 279*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_extvec_v4i16_v4i32: 280*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 281*9880d681SAndroid Build Coastguard Worker %tmp3 = zext <4 x i16> %arg to <4 x i32> 282*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234> 283*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %tmp4 284*9880d681SAndroid Build Coastguard Worker} 285*9880d681SAndroid Build Coastguard Worker 286*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @umull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind { 287*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: umull_extvec_v2i32_v2i64: 288*9880d681SAndroid Build Coastguard Worker; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 289*9880d681SAndroid Build Coastguard Worker %tmp3 = zext <2 x i32> %arg to <2 x i64> 290*9880d681SAndroid Build Coastguard Worker %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234> 291*9880d681SAndroid Build Coastguard Worker ret <2 x i64> %tmp4 292*9880d681SAndroid Build Coastguard Worker} 293*9880d681SAndroid Build Coastguard Worker 294*9880d681SAndroid Build Coastguard Workerdefine i16 @smullWithInconsistentExtensions(<8 x i8> %vec) { 295*9880d681SAndroid Build Coastguard Worker; If one operand has a zero-extend and the other a sign-extend, smull 296*9880d681SAndroid Build Coastguard Worker; cannot be used. 297*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: smullWithInconsistentExtensions: 298*9880d681SAndroid Build Coastguard Worker; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 299*9880d681SAndroid Build Coastguard Worker %1 = sext <8 x i8> %vec to <8 x i16> 300*9880d681SAndroid Build Coastguard Worker %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 301*9880d681SAndroid Build Coastguard Worker %3 = extractelement <8 x i16> %2, i32 0 302*9880d681SAndroid Build Coastguard Worker ret i16 %3 303*9880d681SAndroid Build Coastguard Worker} 304*9880d681SAndroid Build Coastguard Worker 305*9880d681SAndroid Build Coastguard Workerdefine void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind { 306*9880d681SAndroid Build Coastguard Workerentry: 307*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: distribute: 308*9880d681SAndroid Build Coastguard Worker; CHECK: umull [[REG1:(v[0-9]+.8h)]], {{v[0-9]+}}.8b, [[REG2:(v[0-9]+.8b)]] 309*9880d681SAndroid Build Coastguard Worker; CHECK: umlal [[REG1]], {{v[0-9]+}}.8b, [[REG2]] 310*9880d681SAndroid Build Coastguard Worker %0 = trunc i32 %mul to i8 311*9880d681SAndroid Build Coastguard Worker %1 = insertelement <8 x i8> undef, i8 %0, i32 0 312*9880d681SAndroid Build Coastguard Worker %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer 313*9880d681SAndroid Build Coastguard Worker %3 = tail call <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8* %src, i32 1) 314*9880d681SAndroid Build Coastguard Worker %4 = bitcast <16 x i8> %3 to <2 x double> 315*9880d681SAndroid Build Coastguard Worker %5 = extractelement <2 x double> %4, i32 1 316*9880d681SAndroid Build Coastguard Worker %6 = bitcast double %5 to <8 x i8> 317*9880d681SAndroid Build Coastguard Worker %7 = zext <8 x i8> %6 to <8 x i16> 318*9880d681SAndroid Build Coastguard Worker %8 = zext <8 x i8> %2 to <8 x i16> 319*9880d681SAndroid Build Coastguard Worker %9 = extractelement <2 x double> %4, i32 0 320*9880d681SAndroid Build Coastguard Worker %10 = bitcast double %9 to <8 x i8> 321*9880d681SAndroid Build Coastguard Worker %11 = zext <8 x i8> %10 to <8 x i16> 322*9880d681SAndroid Build Coastguard Worker %12 = add <8 x i16> %7, %11 323*9880d681SAndroid Build Coastguard Worker %13 = mul <8 x i16> %12, %8 324*9880d681SAndroid Build Coastguard Worker %14 = bitcast i16* %dst to i8* 325*9880d681SAndroid Build Coastguard Worker tail call void @llvm.aarch64.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2) 326*9880d681SAndroid Build Coastguard Worker ret void 327*9880d681SAndroid Build Coastguard Worker} 328*9880d681SAndroid Build Coastguard Worker 329*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8*, i32) nounwind readonly 330*9880d681SAndroid Build Coastguard Worker 331*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.aarch64.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind 332*9880d681SAndroid Build Coastguard Worker 333