1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 3*9880d681SAndroid Build Coastguard Worker 4*9880d681SAndroid Build Coastguard Worker@c = external global i32*, align 8 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i8> 7*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val1 8*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i8> 9*9880d681SAndroid Build Coastguard Worker; %op2 = zext<2 x i32> %val2 10*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 11*9880d681SAndroid Build Coastguard Worker; 12*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 13*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8: 14*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 15*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 16*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rdx), %ecx 17*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 18*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rsi,%rdx), %ecx 19*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm1 20*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm2, %xmm2 21*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 22*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 24*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm1, (%rax,%rdx,4) 26*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 27*9880d681SAndroid Build Coastguard Workerentry: 28*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 29*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 30*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 31*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 32*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <2 x i8> %wide.load to <2 x i32> 33*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 34*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <2 x i8>* 35*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1 36*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <2 x i8> %wide.load17 to <2 x i32> 37*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8 38*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 39*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 40*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 41*9880d681SAndroid Build Coastguard Worker ret void 42*9880d681SAndroid Build Coastguard Worker} 43*9880d681SAndroid Build Coastguard Worker 44*9880d681SAndroid Build Coastguard Worker; %val1 = load <4 x i8> 45*9880d681SAndroid Build Coastguard Worker; %op1 = zext<4 x i32> %val1 46*9880d681SAndroid Build Coastguard Worker; %val2 = load <4 x i8> 47*9880d681SAndroid Build Coastguard Worker; %op2 = zext<4 x i32> %val2 48*9880d681SAndroid Build Coastguard Worker; %rst = mul <4 x i32> %op1, %op2 49*9880d681SAndroid Build Coastguard Worker; 50*9880d681SAndroid Build Coastguard Workerdefine void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 51*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_4xi8: 52*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 53*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 54*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm2, %xmm2 57*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 58*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 59*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 60*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 61*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm1, (%rax,%rdx,4) 62*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 63*9880d681SAndroid Build Coastguard Workerentry: 64*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 65*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 66*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <4 x i8>* 67*9880d681SAndroid Build Coastguard Worker %wide.load = load <4 x i8>, <4 x i8>* %tmp7, align 1 68*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <4 x i8> %wide.load to <4 x i32> 69*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 70*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <4 x i8>* 71*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <4 x i8>, <4 x i8>* %tmp11, align 1 72*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <4 x i8> %wide.load17 to <4 x i32> 73*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <4 x i32> %tmp12, %tmp8 74*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 75*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <4 x i32>* 76*9880d681SAndroid Build Coastguard Worker store <4 x i32> %tmp13, <4 x i32>* %tmp15, align 4 77*9880d681SAndroid Build Coastguard Worker ret void 78*9880d681SAndroid Build Coastguard Worker} 79*9880d681SAndroid Build Coastguard Worker 80*9880d681SAndroid Build Coastguard Worker; %val1 = load <8 x i8> 81*9880d681SAndroid Build Coastguard Worker; %op1 = zext<8 x i32> %val1 82*9880d681SAndroid Build Coastguard Worker; %val2 = load <8 x i8> 83*9880d681SAndroid Build Coastguard Worker; %op2 = zext<8 x i32> %val2 84*9880d681SAndroid Build Coastguard Worker; %rst = mul <8 x i32> %op1, %op2 85*9880d681SAndroid Build Coastguard Worker; 86*9880d681SAndroid Build Coastguard Workerdefine void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 87*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_8xi8: 88*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 89*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm2, %xmm2 93*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 94*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 95*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 96*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm0 97*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 98*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 99*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm1, 16(%rax,%rdx,4) 100*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm0, (%rax,%rdx,4) 101*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 102*9880d681SAndroid Build Coastguard Workerentry: 103*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 104*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 105*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <8 x i8>* 106*9880d681SAndroid Build Coastguard Worker %wide.load = load <8 x i8>, <8 x i8>* %tmp7, align 1 107*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <8 x i8> %wide.load to <8 x i32> 108*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 109*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <8 x i8>* 110*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <8 x i8>, <8 x i8>* %tmp11, align 1 111*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <8 x i8> %wide.load17 to <8 x i32> 112*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <8 x i32> %tmp12, %tmp8 113*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 114*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <8 x i32>* 115*9880d681SAndroid Build Coastguard Worker store <8 x i32> %tmp13, <8 x i32>* %tmp15, align 4 116*9880d681SAndroid Build Coastguard Worker ret void 117*9880d681SAndroid Build Coastguard Worker} 118*9880d681SAndroid Build Coastguard Worker 119*9880d681SAndroid Build Coastguard Worker; %val1 = load <16 x i8> 120*9880d681SAndroid Build Coastguard Worker; %op1 = zext<16 x i32> %val1 121*9880d681SAndroid Build Coastguard Worker; %val2 = load <16 x i8> 122*9880d681SAndroid Build Coastguard Worker; %op2 = zext<16 x i32> %val2 123*9880d681SAndroid Build Coastguard Worker; %rst = mul <16 x i32> %op1, %op2 124*9880d681SAndroid Build Coastguard Worker; 125*9880d681SAndroid Build Coastguard Workerdefine void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 126*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_16xi8: 127*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 128*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 129*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu (%rdi,%rdx), %xmm0 130*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu (%rsi,%rdx), %xmm1 131*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm2, %xmm2 132*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm3 133*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 134*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm4 135*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 136*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm3, %xmm4 137*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm4, %xmm3 138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] 141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 142*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 143*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm0 144*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 145*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 146*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm1, 48(%rax,%rdx,4) 147*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm0, 32(%rax,%rdx,4) 148*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm4, 16(%rax,%rdx,4) 149*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm3, (%rax,%rdx,4) 150*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 151*9880d681SAndroid Build Coastguard Workerentry: 152*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 153*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 154*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <16 x i8>* 155*9880d681SAndroid Build Coastguard Worker %wide.load = load <16 x i8>, <16 x i8>* %tmp7, align 1 156*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <16 x i8> %wide.load to <16 x i32> 157*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 158*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <16 x i8>* 159*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <16 x i8>, <16 x i8>* %tmp11, align 1 160*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <16 x i8> %wide.load17 to <16 x i32> 161*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <16 x i32> %tmp12, %tmp8 162*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 163*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <16 x i32>* 164*9880d681SAndroid Build Coastguard Worker store <16 x i32> %tmp13, <16 x i32>* %tmp15, align 4 165*9880d681SAndroid Build Coastguard Worker ret void 166*9880d681SAndroid Build Coastguard Worker} 167*9880d681SAndroid Build Coastguard Worker 168*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i16> 169*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val1 170*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i16> 171*9880d681SAndroid Build Coastguard Worker; %op2 = zext<2 x i32> %val2 172*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 173*9880d681SAndroid Build Coastguard Worker; 174*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 175*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16: 176*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 177*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 178*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 179*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 180*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm2 181*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhuw %xmm0, %xmm2 182*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 183*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 184*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm1, (%rax,%rdx,4) 185*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 186*9880d681SAndroid Build Coastguard Workerentry: 187*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 188*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 189*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i16>* 190*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1 191*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <2 x i16> %wide.load to <2 x i32> 192*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 193*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <2 x i16>* 194*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <2 x i16>, <2 x i16>* %tmp11, align 1 195*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <2 x i16> %wide.load17 to <2 x i32> 196*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8 197*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 198*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 199*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 200*9880d681SAndroid Build Coastguard Worker ret void 201*9880d681SAndroid Build Coastguard Worker} 202*9880d681SAndroid Build Coastguard Worker 203*9880d681SAndroid Build Coastguard Worker; %val1 = load <4 x i16> 204*9880d681SAndroid Build Coastguard Worker; %op1 = zext<4 x i32> %val1 205*9880d681SAndroid Build Coastguard Worker; %val2 = load <4 x i16> 206*9880d681SAndroid Build Coastguard Worker; %op2 = zext<4 x i32> %val2 207*9880d681SAndroid Build Coastguard Worker; %rst = mul <4 x i32> %op1, %op2 208*9880d681SAndroid Build Coastguard Worker; 209*9880d681SAndroid Build Coastguard Workerdefine void @mul_4xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 210*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_4xi16: 211*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 212*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 213*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 214*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 215*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm2 216*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhuw %xmm0, %xmm2 217*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 218*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 219*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm1, (%rax,%rdx,4) 220*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 221*9880d681SAndroid Build Coastguard Workerentry: 222*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 223*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 224*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <4 x i16>* 225*9880d681SAndroid Build Coastguard Worker %wide.load = load <4 x i16>, <4 x i16>* %tmp7, align 1 226*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <4 x i16> %wide.load to <4 x i32> 227*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 228*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <4 x i16>* 229*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <4 x i16>, <4 x i16>* %tmp11, align 1 230*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <4 x i16> %wide.load17 to <4 x i32> 231*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <4 x i32> %tmp12, %tmp8 232*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 233*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <4 x i32>* 234*9880d681SAndroid Build Coastguard Worker store <4 x i32> %tmp13, <4 x i32>* %tmp15, align 4 235*9880d681SAndroid Build Coastguard Worker ret void 236*9880d681SAndroid Build Coastguard Worker} 237*9880d681SAndroid Build Coastguard Worker 238*9880d681SAndroid Build Coastguard Worker; %val1 = load <8 x i16> 239*9880d681SAndroid Build Coastguard Worker; %op1 = zext<8 x i32> %val1 240*9880d681SAndroid Build Coastguard Worker; %val2 = load <8 x i16> 241*9880d681SAndroid Build Coastguard Worker; %op2 = zext<8 x i32> %val2 242*9880d681SAndroid Build Coastguard Worker; %rst = mul <8 x i32> %op1, %op2 243*9880d681SAndroid Build Coastguard Worker; 244*9880d681SAndroid Build Coastguard Workerdefine void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 245*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_8xi16: 246*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 247*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 248*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu (%rdi,%rdx), %xmm0 249*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu (%rsi,%rdx), %xmm1 250*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm2 251*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhuw %xmm0, %xmm2 252*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 253*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm0 254*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 255*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm1, 16(%rax,%rdx,4) 257*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm0, (%rax,%rdx,4) 258*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 259*9880d681SAndroid Build Coastguard Workerentry: 260*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 261*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 262*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <8 x i16>* 263*9880d681SAndroid Build Coastguard Worker %wide.load = load <8 x i16>, <8 x i16>* %tmp7, align 1 264*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <8 x i16> %wide.load to <8 x i32> 265*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 266*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <8 x i16>* 267*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <8 x i16>, <8 x i16>* %tmp11, align 1 268*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <8 x i16> %wide.load17 to <8 x i32> 269*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <8 x i32> %tmp12, %tmp8 270*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 271*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <8 x i32>* 272*9880d681SAndroid Build Coastguard Worker store <8 x i32> %tmp13, <8 x i32>* %tmp15, align 4 273*9880d681SAndroid Build Coastguard Worker ret void 274*9880d681SAndroid Build Coastguard Worker} 275*9880d681SAndroid Build Coastguard Worker 276*9880d681SAndroid Build Coastguard Worker; %val1 = load <16 x i16> 277*9880d681SAndroid Build Coastguard Worker; %op1 = zext<16 x i32> %val1 278*9880d681SAndroid Build Coastguard Worker; %val2 = load <16 x i16> 279*9880d681SAndroid Build Coastguard Worker; %op2 = zext<16 x i32> %val2 280*9880d681SAndroid Build Coastguard Worker; %rst = mul <16 x i32> %op1, %op2 281*9880d681SAndroid Build Coastguard Worker; 282*9880d681SAndroid Build Coastguard Workerdefine void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 283*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_16xi16: 284*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 285*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 286*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu (%rdi,%rdx), %xmm0 287*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu 16(%rdi,%rdx), %xmm1 288*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu (%rsi,%rdx), %xmm2 289*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu 16(%rsi,%rdx), %xmm3 290*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm2, %xmm4 291*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhuw %xmm0, %xmm4 292*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm2 293*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm2, %xmm0 294*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 295*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 296*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm3, %xmm4 297*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhuw %xmm1, %xmm4 298*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm3 299*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm3, %xmm1 300*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 301*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 302*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm3, 48(%rax,%rdx,4) 303*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm1, 32(%rax,%rdx,4) 304*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm2, 16(%rax,%rdx,4) 305*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm0, (%rax,%rdx,4) 306*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 307*9880d681SAndroid Build Coastguard Workerentry: 308*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 309*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 310*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <16 x i16>* 311*9880d681SAndroid Build Coastguard Worker %wide.load = load <16 x i16>, <16 x i16>* %tmp7, align 1 312*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <16 x i16> %wide.load to <16 x i32> 313*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 314*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <16 x i16>* 315*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <16 x i16>, <16 x i16>* %tmp11, align 1 316*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <16 x i16> %wide.load17 to <16 x i32> 317*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <16 x i32> %tmp12, %tmp8 318*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 319*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <16 x i32>* 320*9880d681SAndroid Build Coastguard Worker store <16 x i32> %tmp13, <16 x i32>* %tmp15, align 4 321*9880d681SAndroid Build Coastguard Worker ret void 322*9880d681SAndroid Build Coastguard Worker} 323*9880d681SAndroid Build Coastguard Worker 324*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i8> 325*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val1 326*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i8> 327*9880d681SAndroid Build Coastguard Worker; %op2 = sext<2 x i32> %val2 328*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 329*9880d681SAndroid Build Coastguard Worker; 330*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 331*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_sext: 332*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 333*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 334*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rdx), %ecx 335*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 336*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rsi,%rdx), %ecx 337*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm1 338*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 339*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psraw $8, %xmm0 340*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 341*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psraw $8, %xmm1 342*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 343*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 344*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psrad $16, %xmm0 345*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rdx,4) 346*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 347*9880d681SAndroid Build Coastguard Workerentry: 348*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 349*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 350*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 351*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 352*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i8> %wide.load to <2 x i32> 353*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 354*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <2 x i8>* 355*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1 356*9880d681SAndroid Build Coastguard Worker %tmp12 = sext <2 x i8> %wide.load17 to <2 x i32> 357*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8 358*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 359*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 360*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 361*9880d681SAndroid Build Coastguard Worker ret void 362*9880d681SAndroid Build Coastguard Worker} 363*9880d681SAndroid Build Coastguard Worker 364*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i8> 365*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val1 366*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i8> 367*9880d681SAndroid Build Coastguard Worker; %op2 = zext<2 x i32> %val2 368*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 369*9880d681SAndroid Build Coastguard Worker; 370*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 371*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_sext_zext: 372*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 373*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 374*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rdx), %ecx 375*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 376*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rsi,%rdx), %ecx 377*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm1 378*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm2, %xmm2 379*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 380*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 381*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psraw $8, %xmm0 382*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm2 383*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm0, %xmm2 384*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm0 385*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 386*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rdx,4) 387*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 388*9880d681SAndroid Build Coastguard Workerentry: 389*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 390*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 391*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 392*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 393*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i8> %wide.load to <2 x i32> 394*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 395*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <2 x i8>* 396*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1 397*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <2 x i8> %wide.load17 to <2 x i32> 398*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8 399*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 400*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 401*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 402*9880d681SAndroid Build Coastguard Worker ret void 403*9880d681SAndroid Build Coastguard Worker} 404*9880d681SAndroid Build Coastguard Worker 405*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i16> 406*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val1 407*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i16> 408*9880d681SAndroid Build Coastguard Worker; %op2 = sext<2 x i32> %val2 409*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 410*9880d681SAndroid Build Coastguard Worker; 411*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 412*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_sext: 413*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 414*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 415*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 416*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 417*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm2 418*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm0, %xmm2 419*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm1 420*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 421*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm1, (%rax,%rdx,4) 422*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 423*9880d681SAndroid Build Coastguard Workerentry: 424*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 425*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 426*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i16>* 427*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1 428*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i16> %wide.load to <2 x i32> 429*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 430*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <2 x i16>* 431*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <2 x i16>, <2 x i16>* %tmp11, align 1 432*9880d681SAndroid Build Coastguard Worker %tmp12 = sext <2 x i16> %wide.load17 to <2 x i32> 433*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8 434*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 435*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 436*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 437*9880d681SAndroid Build Coastguard Worker ret void 438*9880d681SAndroid Build Coastguard Worker} 439*9880d681SAndroid Build Coastguard Worker 440*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i16> 441*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val1 442*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i16> 443*9880d681SAndroid Build Coastguard Worker; %op2 = zext<2 x i32> %val2 444*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 445*9880d681SAndroid Build Coastguard Worker; 446*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 447*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_sext_zext: 448*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 449*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 450*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 451*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 452*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psrad $16, %xmm0 453*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 454*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 455*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm2, %xmm2 456*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 457*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3] 458*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm1, %xmm2 459*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmuludq %xmm0, %xmm2 460*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm3 461*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psrlq $32, %xmm3 462*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmuludq %xmm1, %xmm3 463*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psllq $32, %xmm3 464*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: paddq %xmm2, %xmm3 465*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psrlq $32, %xmm1 466*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmuludq %xmm0, %xmm1 467*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psllq $32, %xmm1 468*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: paddq %xmm3, %xmm1 469*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 470*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rdx,4) 471*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 472*9880d681SAndroid Build Coastguard Workerentry: 473*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 474*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 475*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i16>* 476*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1 477*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i16> %wide.load to <2 x i32> 478*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 479*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <2 x i16>* 480*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <2 x i16>, <2 x i16>* %tmp11, align 1 481*9880d681SAndroid Build Coastguard Worker %tmp12 = zext <2 x i16> %wide.load17 to <2 x i32> 482*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8 483*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 484*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 485*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 486*9880d681SAndroid Build Coastguard Worker ret void 487*9880d681SAndroid Build Coastguard Worker} 488*9880d681SAndroid Build Coastguard Worker 489*9880d681SAndroid Build Coastguard Worker; %val1 = load <16 x i16> 490*9880d681SAndroid Build Coastguard Worker; %op1 = sext<16 x i32> %val1 491*9880d681SAndroid Build Coastguard Worker; %val2 = load <16 x i16> 492*9880d681SAndroid Build Coastguard Worker; %op2 = sext<16 x i32> %val2 493*9880d681SAndroid Build Coastguard Worker; %rst = mul <16 x i32> %op1, %op2 494*9880d681SAndroid Build Coastguard Worker; 495*9880d681SAndroid Build Coastguard Workerdefine void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) { 496*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_16xi16_sext: 497*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 498*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 499*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu (%rdi,%rdx), %xmm0 500*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu 16(%rdi,%rdx), %xmm1 501*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu (%rsi,%rdx), %xmm2 502*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu 16(%rsi,%rdx), %xmm3 503*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm2, %xmm4 504*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm0, %xmm4 505*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm0, %xmm2 506*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm2, %xmm0 507*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 508*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 509*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm3, %xmm4 510*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm1, %xmm4 511*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm3 512*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm3, %xmm1 513*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 514*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 515*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm3, 48(%rax,%rdx,4) 516*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm1, 32(%rax,%rdx,4) 517*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm2, 16(%rax,%rdx,4) 518*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqu %xmm0, (%rax,%rdx,4) 519*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 520*9880d681SAndroid Build Coastguard Workerentry: 521*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 522*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 523*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <16 x i16>* 524*9880d681SAndroid Build Coastguard Worker %wide.load = load <16 x i16>, <16 x i16>* %tmp7, align 1 525*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <16 x i16> %wide.load to <16 x i32> 526*9880d681SAndroid Build Coastguard Worker %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index 527*9880d681SAndroid Build Coastguard Worker %tmp11 = bitcast i8* %tmp10 to <16 x i16>* 528*9880d681SAndroid Build Coastguard Worker %wide.load17 = load <16 x i16>, <16 x i16>* %tmp11, align 1 529*9880d681SAndroid Build Coastguard Worker %tmp12 = sext <16 x i16> %wide.load17 to <16 x i32> 530*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <16 x i32> %tmp12, %tmp8 531*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 532*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <16 x i32>* 533*9880d681SAndroid Build Coastguard Worker store <16 x i32> %tmp13, <16 x i32>* %tmp15, align 4 534*9880d681SAndroid Build Coastguard Worker ret void 535*9880d681SAndroid Build Coastguard Worker} 536*9880d681SAndroid Build Coastguard Worker 537*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8> 538*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val 539*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 255) 540*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 541*9880d681SAndroid Build Coastguard Worker; 542*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) { 543*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst1: 544*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 545*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 546*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx 547*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 548*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm1, %xmm1 549*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 550*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw {{.*}}(%rip), %xmm0 551*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 552*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 553*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 554*9880d681SAndroid Build Coastguard Workerentry: 555*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 556*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 557*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 558*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 559*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <2 x i8> %wide.load to <2 x i32> 560*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 255> 561*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 562*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 563*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 564*9880d681SAndroid Build Coastguard Worker ret void 565*9880d681SAndroid Build Coastguard Worker} 566*9880d681SAndroid Build Coastguard Worker 567*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8> 568*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val 569*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-128 ~ 127) 570*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 571*9880d681SAndroid Build Coastguard Worker; 572*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) { 573*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst2: 574*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 575*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 576*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx 577*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 578*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 579*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psraw $8, %xmm0 580*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw {{.*}}(%rip), %xmm0 581*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 582*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psrad $16, %xmm0 583*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 584*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 585*9880d681SAndroid Build Coastguard Workerentry: 586*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 587*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 588*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 589*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 590*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i8> %wide.load to <2 x i32> 591*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -128, i32 127> 592*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 593*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 594*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 595*9880d681SAndroid Build Coastguard Worker ret void 596*9880d681SAndroid Build Coastguard Worker} 597*9880d681SAndroid Build Coastguard Worker 598*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8> 599*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val 600*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 256) 601*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 602*9880d681SAndroid Build Coastguard Worker; 603*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) { 604*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst3: 605*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 606*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 607*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx 608*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 609*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm1, %xmm1 610*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 611*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <0,256,u,u,u,u,u,u> 612*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm2 613*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm1, %xmm2 614*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm0 615*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 616*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 617*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 618*9880d681SAndroid Build Coastguard Workerentry: 619*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 620*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 621*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 622*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 623*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <2 x i8> %wide.load to <2 x i32> 624*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 256> 625*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 626*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 627*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 628*9880d681SAndroid Build Coastguard Worker ret void 629*9880d681SAndroid Build Coastguard Worker} 630*9880d681SAndroid Build Coastguard Worker 631*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8> 632*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val 633*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-1 ~ 255) 634*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 635*9880d681SAndroid Build Coastguard Worker; 636*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) { 637*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst4: 638*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 639*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 640*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx 641*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 642*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm1, %xmm1 643*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 644*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <65535,255,u,u,u,u,u,u> 645*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm2 646*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm1, %xmm2 647*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm0 648*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 649*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 650*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 651*9880d681SAndroid Build Coastguard Workerentry: 652*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 653*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 654*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 655*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 656*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <2 x i8> %wide.load to <2 x i32> 657*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -1, i32 255> 658*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 659*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 660*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 661*9880d681SAndroid Build Coastguard Worker ret void 662*9880d681SAndroid Build Coastguard Worker} 663*9880d681SAndroid Build Coastguard Worker 664*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8> 665*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val 666*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-129 ~ 127) 667*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 668*9880d681SAndroid Build Coastguard Worker; 669*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) { 670*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst5: 671*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 672*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 673*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx 674*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 675*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 676*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psraw $8, %xmm0 677*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <65407,127,u,u,u,u,u,u> 678*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm2 679*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm1, %xmm2 680*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm0 681*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 682*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 683*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 684*9880d681SAndroid Build Coastguard Workerentry: 685*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 686*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 687*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 688*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 689*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i8> %wide.load to <2 x i32> 690*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -129, i32 127> 691*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 692*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 693*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 694*9880d681SAndroid Build Coastguard Worker ret void 695*9880d681SAndroid Build Coastguard Worker} 696*9880d681SAndroid Build Coastguard Worker 697*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8> 698*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val 699*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-128 ~ 128) 700*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 701*9880d681SAndroid Build Coastguard Worker; 702*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) { 703*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst6: 704*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 705*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 706*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movzwl (%rdi,%rsi), %ecx 707*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %ecx, %xmm0 708*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 709*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psraw $8, %xmm0 710*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <65408,128,u,u,u,u,u,u> 711*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm2 712*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm1, %xmm2 713*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm0 714*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 715*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 716*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 717*9880d681SAndroid Build Coastguard Workerentry: 718*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 719*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 720*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i8>* 721*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 722*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i8> %wide.load to <2 x i32> 723*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -128, i32 128> 724*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 725*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 726*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 727*9880d681SAndroid Build Coastguard Worker ret void 728*9880d681SAndroid Build Coastguard Worker} 729*9880d681SAndroid Build Coastguard Worker 730*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i16> 731*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val 732*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 65535) 733*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 734*9880d681SAndroid Build Coastguard Worker; 735*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) { 736*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_varconst1: 737*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 738*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 739*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 740*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <0,65535,u,u,u,u,u,u> 741*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm2 742*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhuw %xmm1, %xmm2 743*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm0 744*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 745*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 746*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 747*9880d681SAndroid Build Coastguard Workerentry: 748*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 749*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 750*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i16>* 751*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1 752*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <2 x i16> %wide.load to <2 x i32> 753*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 65535> 754*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 755*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 756*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 757*9880d681SAndroid Build Coastguard Worker ret void 758*9880d681SAndroid Build Coastguard Worker} 759*9880d681SAndroid Build Coastguard Worker 760*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i16> 761*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val 762*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-32768 ~ 32767) 763*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 764*9880d681SAndroid Build Coastguard Worker; 765*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) { 766*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_varconst2: 767*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 768*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 769*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 770*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <32768,32767,u,u,u,u,u,u> 771*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm2 772*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmulhw %xmm1, %xmm2 773*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmullw %xmm1, %xmm0 774*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 775*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 776*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 777*9880d681SAndroid Build Coastguard Workerentry: 778*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 779*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 780*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i16>* 781*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1 782*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i16> %wide.load to <2 x i32> 783*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -32768, i32 32767> 784*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 785*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 786*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 787*9880d681SAndroid Build Coastguard Worker ret void 788*9880d681SAndroid Build Coastguard Worker} 789*9880d681SAndroid Build Coastguard Worker 790*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i16> 791*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val 792*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 65536) 793*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 794*9880d681SAndroid Build Coastguard Worker; 795*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) { 796*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_varconst3: 797*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 798*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 799*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 800*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pxor %xmm1, %xmm1 801*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 802*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 803*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movl $65536, %ecx # imm = 0x10000 804*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %rcx, %xmm1 805*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 806*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm2 807*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmuludq %xmm1, %xmm2 808*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psrlq $32, %xmm0 809*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmuludq %xmm1, %xmm0 810*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psllq $32, %xmm0 811*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: paddq %xmm2, %xmm0 812*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 813*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 814*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 815*9880d681SAndroid Build Coastguard Workerentry: 816*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 817*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 818*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i16>* 819*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1 820*9880d681SAndroid Build Coastguard Worker %tmp8 = zext <2 x i16> %wide.load to <2 x i32> 821*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 65536> 822*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 823*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 824*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 825*9880d681SAndroid Build Coastguard Worker ret void 826*9880d681SAndroid Build Coastguard Worker} 827*9880d681SAndroid Build Coastguard Worker 828*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i16> 829*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val 830*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 32768) 831*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2 832*9880d681SAndroid Build Coastguard Worker; 833*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) { 834*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_varconst4: 835*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: # %entry 836*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq {{.*}}(%rip), %rax 837*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 838*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 839*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psrad $16, %xmm0 840*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] 841*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000 842*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movd %rcx, %xmm1 843*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 844*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movdqa %xmm0, %xmm2 845*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmuludq %xmm1, %xmm2 846*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psrlq $32, %xmm0 847*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pmuludq %xmm1, %xmm0 848*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: psllq $32, %xmm0 849*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: paddq %xmm2, %xmm0 850*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 851*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq %xmm0, (%rax,%rsi,4) 852*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 853*9880d681SAndroid Build Coastguard Workerentry: 854*9880d681SAndroid Build Coastguard Worker %pre = load i32*, i32** @c 855*9880d681SAndroid Build Coastguard Worker %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index 856*9880d681SAndroid Build Coastguard Worker %tmp7 = bitcast i8* %tmp6 to <2 x i16>* 857*9880d681SAndroid Build Coastguard Worker %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1 858*9880d681SAndroid Build Coastguard Worker %tmp8 = sext <2 x i16> %wide.load to <2 x i32> 859*9880d681SAndroid Build Coastguard Worker %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 32768> 860*9880d681SAndroid Build Coastguard Worker %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index 861*9880d681SAndroid Build Coastguard Worker %tmp15 = bitcast i32* %tmp14 to <2 x i32>* 862*9880d681SAndroid Build Coastguard Worker store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 863*9880d681SAndroid Build Coastguard Worker ret void 864*9880d681SAndroid Build Coastguard Worker} 865