xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/shrink_vmul.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker@c = external global i32*, align 8
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i8>
7*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val1
8*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i8>
9*9880d681SAndroid Build Coastguard Worker; %op2 = zext<2 x i32> %val2
10*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
11*9880d681SAndroid Build Coastguard Worker;
12*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
13*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8:
14*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
15*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
16*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rdx), %ecx
17*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
18*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rsi,%rdx), %ecx
19*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm1
20*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm2, %xmm2
21*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
22*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
24*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm1, (%rax,%rdx,4)
26*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
27*9880d681SAndroid Build Coastguard Workerentry:
28*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
29*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
30*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
31*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
32*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <2 x i8> %wide.load to <2 x i32>
33*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
34*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <2 x i8>*
35*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1
36*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <2 x i8> %wide.load17 to <2 x i32>
37*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
38*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
39*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
40*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
41*9880d681SAndroid Build Coastguard Worker  ret void
42*9880d681SAndroid Build Coastguard Worker}
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Worker; %val1 = load <4 x i8>
45*9880d681SAndroid Build Coastguard Worker; %op1 = zext<4 x i32> %val1
46*9880d681SAndroid Build Coastguard Worker; %val2 = load <4 x i8>
47*9880d681SAndroid Build Coastguard Worker; %op2 = zext<4 x i32> %val2
48*9880d681SAndroid Build Coastguard Worker; %rst = mul <4 x i32> %op1, %op2
49*9880d681SAndroid Build Coastguard Worker;
50*9880d681SAndroid Build Coastguard Workerdefine void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
51*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_4xi8:
52*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
53*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
54*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm2, %xmm2
57*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
58*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
59*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
60*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
61*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm1, (%rax,%rdx,4)
62*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
63*9880d681SAndroid Build Coastguard Workerentry:
64*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
65*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
66*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <4 x i8>*
67*9880d681SAndroid Build Coastguard Worker  %wide.load = load <4 x i8>, <4 x i8>* %tmp7, align 1
68*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <4 x i8> %wide.load to <4 x i32>
69*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
70*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <4 x i8>*
71*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <4 x i8>, <4 x i8>* %tmp11, align 1
72*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <4 x i8> %wide.load17 to <4 x i32>
73*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <4 x i32> %tmp12, %tmp8
74*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
75*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <4 x i32>*
76*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %tmp13, <4 x i32>* %tmp15, align 4
77*9880d681SAndroid Build Coastguard Worker  ret void
78*9880d681SAndroid Build Coastguard Worker}
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Worker; %val1 = load <8 x i8>
81*9880d681SAndroid Build Coastguard Worker; %op1 = zext<8 x i32> %val1
82*9880d681SAndroid Build Coastguard Worker; %val2 = load <8 x i8>
83*9880d681SAndroid Build Coastguard Worker; %op2 = zext<8 x i32> %val2
84*9880d681SAndroid Build Coastguard Worker; %rst = mul <8 x i32> %op1, %op2
85*9880d681SAndroid Build Coastguard Worker;
86*9880d681SAndroid Build Coastguard Workerdefine void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
87*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_8xi8:
88*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
89*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm2, %xmm2
93*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
94*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
95*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
96*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm0
97*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
98*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
99*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm1, 16(%rax,%rdx,4)
100*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm0, (%rax,%rdx,4)
101*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
102*9880d681SAndroid Build Coastguard Workerentry:
103*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
104*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
105*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <8 x i8>*
106*9880d681SAndroid Build Coastguard Worker  %wide.load = load <8 x i8>, <8 x i8>* %tmp7, align 1
107*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <8 x i8> %wide.load to <8 x i32>
108*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
109*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <8 x i8>*
110*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <8 x i8>, <8 x i8>* %tmp11, align 1
111*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <8 x i8> %wide.load17 to <8 x i32>
112*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <8 x i32> %tmp12, %tmp8
113*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
114*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <8 x i32>*
115*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %tmp13, <8 x i32>* %tmp15, align 4
116*9880d681SAndroid Build Coastguard Worker  ret void
117*9880d681SAndroid Build Coastguard Worker}
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Worker; %val1 = load <16 x i8>
120*9880d681SAndroid Build Coastguard Worker; %op1 = zext<16 x i32> %val1
121*9880d681SAndroid Build Coastguard Worker; %val2 = load <16 x i8>
122*9880d681SAndroid Build Coastguard Worker; %op2 = zext<16 x i32> %val2
123*9880d681SAndroid Build Coastguard Worker; %rst = mul <16 x i32> %op1, %op2
124*9880d681SAndroid Build Coastguard Worker;
125*9880d681SAndroid Build Coastguard Workerdefine void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
126*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_16xi8:
127*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
128*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
129*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu (%rdi,%rdx), %xmm0
130*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu (%rsi,%rdx), %xmm1
131*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm2, %xmm2
132*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm3
133*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
134*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm4
135*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
136*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm3, %xmm4
137*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm4, %xmm3
138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
142*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
143*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm0
144*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
145*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
146*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm1, 48(%rax,%rdx,4)
147*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm0, 32(%rax,%rdx,4)
148*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm4, 16(%rax,%rdx,4)
149*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm3, (%rax,%rdx,4)
150*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
151*9880d681SAndroid Build Coastguard Workerentry:
152*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
153*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
154*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <16 x i8>*
155*9880d681SAndroid Build Coastguard Worker  %wide.load = load <16 x i8>, <16 x i8>* %tmp7, align 1
156*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <16 x i8> %wide.load to <16 x i32>
157*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
158*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <16 x i8>*
159*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <16 x i8>, <16 x i8>* %tmp11, align 1
160*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <16 x i8> %wide.load17 to <16 x i32>
161*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <16 x i32> %tmp12, %tmp8
162*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
163*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <16 x i32>*
164*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %tmp13, <16 x i32>* %tmp15, align 4
165*9880d681SAndroid Build Coastguard Worker  ret void
166*9880d681SAndroid Build Coastguard Worker}
167*9880d681SAndroid Build Coastguard Worker
168*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i16>
169*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val1
170*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i16>
171*9880d681SAndroid Build Coastguard Worker; %op2 = zext<2 x i32> %val2
172*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
173*9880d681SAndroid Build Coastguard Worker;
174*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
175*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16:
176*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
177*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
178*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
179*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
180*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm2
181*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhuw %xmm0, %xmm2
182*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
183*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
184*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm1, (%rax,%rdx,4)
185*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
186*9880d681SAndroid Build Coastguard Workerentry:
187*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
188*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
189*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i16>*
190*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1
191*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <2 x i16> %wide.load to <2 x i32>
192*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
193*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <2 x i16>*
194*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <2 x i16>, <2 x i16>* %tmp11, align 1
195*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <2 x i16> %wide.load17 to <2 x i32>
196*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
197*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
198*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
199*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
200*9880d681SAndroid Build Coastguard Worker  ret void
201*9880d681SAndroid Build Coastguard Worker}
202*9880d681SAndroid Build Coastguard Worker
203*9880d681SAndroid Build Coastguard Worker; %val1 = load <4 x i16>
204*9880d681SAndroid Build Coastguard Worker; %op1 = zext<4 x i32> %val1
205*9880d681SAndroid Build Coastguard Worker; %val2 = load <4 x i16>
206*9880d681SAndroid Build Coastguard Worker; %op2 = zext<4 x i32> %val2
207*9880d681SAndroid Build Coastguard Worker; %rst = mul <4 x i32> %op1, %op2
208*9880d681SAndroid Build Coastguard Worker;
209*9880d681SAndroid Build Coastguard Workerdefine void @mul_4xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
210*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_4xi16:
211*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
212*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
213*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
214*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
215*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm2
216*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhuw %xmm0, %xmm2
217*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
218*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
219*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm1, (%rax,%rdx,4)
220*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
221*9880d681SAndroid Build Coastguard Workerentry:
222*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
223*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
224*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <4 x i16>*
225*9880d681SAndroid Build Coastguard Worker  %wide.load = load <4 x i16>, <4 x i16>* %tmp7, align 1
226*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <4 x i16> %wide.load to <4 x i32>
227*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
228*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <4 x i16>*
229*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <4 x i16>, <4 x i16>* %tmp11, align 1
230*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <4 x i16> %wide.load17 to <4 x i32>
231*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <4 x i32> %tmp12, %tmp8
232*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
233*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <4 x i32>*
234*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %tmp13, <4 x i32>* %tmp15, align 4
235*9880d681SAndroid Build Coastguard Worker  ret void
236*9880d681SAndroid Build Coastguard Worker}
237*9880d681SAndroid Build Coastguard Worker
238*9880d681SAndroid Build Coastguard Worker; %val1 = load <8 x i16>
239*9880d681SAndroid Build Coastguard Worker; %op1 = zext<8 x i32> %val1
240*9880d681SAndroid Build Coastguard Worker; %val2 = load <8 x i16>
241*9880d681SAndroid Build Coastguard Worker; %op2 = zext<8 x i32> %val2
242*9880d681SAndroid Build Coastguard Worker; %rst = mul <8 x i32> %op1, %op2
243*9880d681SAndroid Build Coastguard Worker;
244*9880d681SAndroid Build Coastguard Workerdefine void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
245*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_8xi16:
246*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
247*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
248*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu (%rdi,%rdx), %xmm0
249*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu (%rsi,%rdx), %xmm1
250*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm2
251*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhuw %xmm0, %xmm2
252*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
253*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm0
254*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
255*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm1, 16(%rax,%rdx,4)
257*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm0, (%rax,%rdx,4)
258*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
259*9880d681SAndroid Build Coastguard Workerentry:
260*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
261*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
262*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <8 x i16>*
263*9880d681SAndroid Build Coastguard Worker  %wide.load = load <8 x i16>, <8 x i16>* %tmp7, align 1
264*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <8 x i16> %wide.load to <8 x i32>
265*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
266*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <8 x i16>*
267*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <8 x i16>, <8 x i16>* %tmp11, align 1
268*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <8 x i16> %wide.load17 to <8 x i32>
269*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <8 x i32> %tmp12, %tmp8
270*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
271*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <8 x i32>*
272*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %tmp13, <8 x i32>* %tmp15, align 4
273*9880d681SAndroid Build Coastguard Worker  ret void
274*9880d681SAndroid Build Coastguard Worker}
275*9880d681SAndroid Build Coastguard Worker
276*9880d681SAndroid Build Coastguard Worker; %val1 = load <16 x i16>
277*9880d681SAndroid Build Coastguard Worker; %op1 = zext<16 x i32> %val1
278*9880d681SAndroid Build Coastguard Worker; %val2 = load <16 x i16>
279*9880d681SAndroid Build Coastguard Worker; %op2 = zext<16 x i32> %val2
280*9880d681SAndroid Build Coastguard Worker; %rst = mul <16 x i32> %op1, %op2
281*9880d681SAndroid Build Coastguard Worker;
282*9880d681SAndroid Build Coastguard Workerdefine void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
283*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_16xi16:
284*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
285*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
286*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu (%rdi,%rdx), %xmm0
287*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu 16(%rdi,%rdx), %xmm1
288*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu (%rsi,%rdx), %xmm2
289*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu 16(%rsi,%rdx), %xmm3
290*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm2, %xmm4
291*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhuw %xmm0, %xmm4
292*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm2
293*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm2, %xmm0
294*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
295*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
296*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm3, %xmm4
297*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhuw %xmm1, %xmm4
298*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm3
299*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm3, %xmm1
300*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
301*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
302*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm3, 48(%rax,%rdx,4)
303*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm1, 32(%rax,%rdx,4)
304*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm2, 16(%rax,%rdx,4)
305*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm0, (%rax,%rdx,4)
306*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
307*9880d681SAndroid Build Coastguard Workerentry:
308*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
309*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
310*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <16 x i16>*
311*9880d681SAndroid Build Coastguard Worker  %wide.load = load <16 x i16>, <16 x i16>* %tmp7, align 1
312*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <16 x i16> %wide.load to <16 x i32>
313*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
314*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <16 x i16>*
315*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <16 x i16>, <16 x i16>* %tmp11, align 1
316*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <16 x i16> %wide.load17 to <16 x i32>
317*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <16 x i32> %tmp12, %tmp8
318*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
319*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <16 x i32>*
320*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %tmp13, <16 x i32>* %tmp15, align 4
321*9880d681SAndroid Build Coastguard Worker  ret void
322*9880d681SAndroid Build Coastguard Worker}
323*9880d681SAndroid Build Coastguard Worker
324*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i8>
325*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val1
326*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i8>
327*9880d681SAndroid Build Coastguard Worker; %op2 = sext<2 x i32> %val2
328*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
329*9880d681SAndroid Build Coastguard Worker;
330*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
331*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_sext:
332*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
333*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
334*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rdx), %ecx
335*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
336*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rsi,%rdx), %ecx
337*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm1
338*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
339*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psraw $8, %xmm0
340*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
341*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psraw $8, %xmm1
342*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
343*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
344*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psrad $16, %xmm0
345*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rdx,4)
346*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
347*9880d681SAndroid Build Coastguard Workerentry:
348*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
349*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
350*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
351*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
352*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i8> %wide.load to <2 x i32>
353*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
354*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <2 x i8>*
355*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1
356*9880d681SAndroid Build Coastguard Worker  %tmp12 = sext <2 x i8> %wide.load17 to <2 x i32>
357*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
358*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
359*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
360*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
361*9880d681SAndroid Build Coastguard Worker  ret void
362*9880d681SAndroid Build Coastguard Worker}
363*9880d681SAndroid Build Coastguard Worker
364*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i8>
365*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val1
366*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i8>
367*9880d681SAndroid Build Coastguard Worker; %op2 = zext<2 x i32> %val2
368*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
369*9880d681SAndroid Build Coastguard Worker;
370*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_sext_zext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
371*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_sext_zext:
372*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
373*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
374*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rdx), %ecx
375*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
376*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rsi,%rdx), %ecx
377*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm1
378*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm2, %xmm2
379*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
380*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
381*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psraw $8, %xmm0
382*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm2
383*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm0, %xmm2
384*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm0
385*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
386*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rdx,4)
387*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
388*9880d681SAndroid Build Coastguard Workerentry:
389*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
390*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
391*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
392*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
393*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i8> %wide.load to <2 x i32>
394*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
395*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <2 x i8>*
396*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1
397*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <2 x i8> %wide.load17 to <2 x i32>
398*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
399*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
400*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
401*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
402*9880d681SAndroid Build Coastguard Worker  ret void
403*9880d681SAndroid Build Coastguard Worker}
404*9880d681SAndroid Build Coastguard Worker
405*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i16>
406*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val1
407*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i16>
408*9880d681SAndroid Build Coastguard Worker; %op2 = sext<2 x i32> %val2
409*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
410*9880d681SAndroid Build Coastguard Worker;
411*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
412*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_sext:
413*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
414*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
415*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
416*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
417*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm2
418*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm0, %xmm2
419*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm1
420*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
421*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm1, (%rax,%rdx,4)
422*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
423*9880d681SAndroid Build Coastguard Workerentry:
424*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
425*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
426*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i16>*
427*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1
428*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i16> %wide.load to <2 x i32>
429*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
430*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <2 x i16>*
431*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <2 x i16>, <2 x i16>* %tmp11, align 1
432*9880d681SAndroid Build Coastguard Worker  %tmp12 = sext <2 x i16> %wide.load17 to <2 x i32>
433*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
434*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
435*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
436*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
437*9880d681SAndroid Build Coastguard Worker  ret void
438*9880d681SAndroid Build Coastguard Worker}
439*9880d681SAndroid Build Coastguard Worker
440*9880d681SAndroid Build Coastguard Worker; %val1 = load <2 x i16>
441*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val1
442*9880d681SAndroid Build Coastguard Worker; %val2 = load <2 x i16>
443*9880d681SAndroid Build Coastguard Worker; %op2 = zext<2 x i32> %val2
444*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
445*9880d681SAndroid Build Coastguard Worker;
446*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
447*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_sext_zext:
448*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
449*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
450*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
451*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
452*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psrad $16, %xmm0
453*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
454*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
455*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm2, %xmm2
456*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
457*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
458*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm1, %xmm2
459*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmuludq %xmm0, %xmm2
460*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm3
461*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psrlq $32, %xmm3
462*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmuludq %xmm1, %xmm3
463*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psllq $32, %xmm3
464*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    paddq %xmm2, %xmm3
465*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psrlq $32, %xmm1
466*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmuludq %xmm0, %xmm1
467*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psllq $32, %xmm1
468*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    paddq %xmm3, %xmm1
469*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
470*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rdx,4)
471*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
472*9880d681SAndroid Build Coastguard Workerentry:
473*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
474*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
475*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i16>*
476*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1
477*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i16> %wide.load to <2 x i32>
478*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
479*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <2 x i16>*
480*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <2 x i16>, <2 x i16>* %tmp11, align 1
481*9880d681SAndroid Build Coastguard Worker  %tmp12 = zext <2 x i16> %wide.load17 to <2 x i32>
482*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
483*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
484*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
485*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
486*9880d681SAndroid Build Coastguard Worker  ret void
487*9880d681SAndroid Build Coastguard Worker}
488*9880d681SAndroid Build Coastguard Worker
489*9880d681SAndroid Build Coastguard Worker; %val1 = load <16 x i16>
490*9880d681SAndroid Build Coastguard Worker; %op1 = sext<16 x i32> %val1
491*9880d681SAndroid Build Coastguard Worker; %val2 = load <16 x i16>
492*9880d681SAndroid Build Coastguard Worker; %op2 = sext<16 x i32> %val2
493*9880d681SAndroid Build Coastguard Worker; %rst = mul <16 x i32> %op1, %op2
494*9880d681SAndroid Build Coastguard Worker;
495*9880d681SAndroid Build Coastguard Workerdefine void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) {
496*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_16xi16_sext:
497*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
498*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
499*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu (%rdi,%rdx), %xmm0
500*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu 16(%rdi,%rdx), %xmm1
501*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu (%rsi,%rdx), %xmm2
502*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu 16(%rsi,%rdx), %xmm3
503*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm2, %xmm4
504*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm0, %xmm4
505*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm0, %xmm2
506*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm2, %xmm0
507*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
508*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
509*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm3, %xmm4
510*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm1, %xmm4
511*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm3
512*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm3, %xmm1
513*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
514*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
515*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm3, 48(%rax,%rdx,4)
516*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm1, 32(%rax,%rdx,4)
517*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm2, 16(%rax,%rdx,4)
518*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqu %xmm0, (%rax,%rdx,4)
519*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
520*9880d681SAndroid Build Coastguard Workerentry:
521*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
522*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
523*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <16 x i16>*
524*9880d681SAndroid Build Coastguard Worker  %wide.load = load <16 x i16>, <16 x i16>* %tmp7, align 1
525*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <16 x i16> %wide.load to <16 x i32>
526*9880d681SAndroid Build Coastguard Worker  %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
527*9880d681SAndroid Build Coastguard Worker  %tmp11 = bitcast i8* %tmp10 to <16 x i16>*
528*9880d681SAndroid Build Coastguard Worker  %wide.load17 = load <16 x i16>, <16 x i16>* %tmp11, align 1
529*9880d681SAndroid Build Coastguard Worker  %tmp12 = sext <16 x i16> %wide.load17 to <16 x i32>
530*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <16 x i32> %tmp12, %tmp8
531*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
532*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <16 x i32>*
533*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %tmp13, <16 x i32>* %tmp15, align 4
534*9880d681SAndroid Build Coastguard Worker  ret void
535*9880d681SAndroid Build Coastguard Worker}
536*9880d681SAndroid Build Coastguard Worker
537*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8>
538*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val
539*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 255)
540*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
541*9880d681SAndroid Build Coastguard Worker;
542*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst1(i8* nocapture readonly %a, i64 %index) {
543*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst1:
544*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
545*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
546*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rsi), %ecx
547*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
548*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm1, %xmm1
549*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
550*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw {{.*}}(%rip), %xmm0
551*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
552*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
553*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
554*9880d681SAndroid Build Coastguard Workerentry:
555*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
556*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
557*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
558*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
559*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <2 x i8> %wide.load to <2 x i32>
560*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 255>
561*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
562*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
563*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
564*9880d681SAndroid Build Coastguard Worker  ret void
565*9880d681SAndroid Build Coastguard Worker}
566*9880d681SAndroid Build Coastguard Worker
567*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8>
568*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val
569*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-128 ~ 127)
570*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
571*9880d681SAndroid Build Coastguard Worker;
572*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst2(i8* nocapture readonly %a, i64 %index) {
573*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst2:
574*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
575*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
576*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rsi), %ecx
577*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
578*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
579*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psraw $8, %xmm0
580*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw {{.*}}(%rip), %xmm0
581*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
582*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psrad $16, %xmm0
583*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
584*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
585*9880d681SAndroid Build Coastguard Workerentry:
586*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
587*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
588*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
589*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
590*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i8> %wide.load to <2 x i32>
591*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -128, i32 127>
592*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
593*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
594*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
595*9880d681SAndroid Build Coastguard Worker  ret void
596*9880d681SAndroid Build Coastguard Worker}
597*9880d681SAndroid Build Coastguard Worker
598*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8>
599*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val
600*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 256)
601*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
602*9880d681SAndroid Build Coastguard Worker;
603*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst3(i8* nocapture readonly %a, i64 %index) {
604*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst3:
605*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
606*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
607*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rsi), %ecx
608*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
609*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm1, %xmm1
610*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
611*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = <0,256,u,u,u,u,u,u>
612*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm2
613*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm1, %xmm2
614*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm0
615*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
616*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
617*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
618*9880d681SAndroid Build Coastguard Workerentry:
619*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
620*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
621*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
622*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
623*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <2 x i8> %wide.load to <2 x i32>
624*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 256>
625*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
626*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
627*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
628*9880d681SAndroid Build Coastguard Worker  ret void
629*9880d681SAndroid Build Coastguard Worker}
630*9880d681SAndroid Build Coastguard Worker
631*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8>
632*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val
633*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-1 ~ 255)
634*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
635*9880d681SAndroid Build Coastguard Worker;
636*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst4(i8* nocapture readonly %a, i64 %index) {
637*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst4:
638*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
639*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
640*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rsi), %ecx
641*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
642*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm1, %xmm1
643*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
644*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = <65535,255,u,u,u,u,u,u>
645*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm2
646*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm1, %xmm2
647*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm0
648*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
649*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
650*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
651*9880d681SAndroid Build Coastguard Workerentry:
652*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
653*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
654*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
655*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
656*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <2 x i8> %wide.load to <2 x i32>
657*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -1, i32 255>
658*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
659*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
660*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
661*9880d681SAndroid Build Coastguard Worker  ret void
662*9880d681SAndroid Build Coastguard Worker}
663*9880d681SAndroid Build Coastguard Worker
664*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8>
665*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val
666*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-129 ~ 127)
667*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
668*9880d681SAndroid Build Coastguard Worker;
669*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst5(i8* nocapture readonly %a, i64 %index) {
670*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst5:
671*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
672*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
673*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rsi), %ecx
674*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
675*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
676*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psraw $8, %xmm0
677*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = <65407,127,u,u,u,u,u,u>
678*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm2
679*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm1, %xmm2
680*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm0
681*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
682*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
683*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
684*9880d681SAndroid Build Coastguard Workerentry:
685*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
686*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
687*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
688*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
689*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i8> %wide.load to <2 x i32>
690*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -129, i32 127>
691*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
692*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
693*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
694*9880d681SAndroid Build Coastguard Worker  ret void
695*9880d681SAndroid Build Coastguard Worker}
696*9880d681SAndroid Build Coastguard Worker
697*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i8>
698*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val
699*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-128 ~ 128)
700*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
701*9880d681SAndroid Build Coastguard Worker;
702*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi8_varconst6(i8* nocapture readonly %a, i64 %index) {
703*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi8_varconst6:
704*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
705*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
706*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movzwl (%rdi,%rsi), %ecx
707*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %ecx, %xmm0
708*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
709*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psraw $8, %xmm0
710*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = <65408,128,u,u,u,u,u,u>
711*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm2
712*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm1, %xmm2
713*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm0
714*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
715*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
716*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
717*9880d681SAndroid Build Coastguard Workerentry:
718*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
719*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
720*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
721*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
722*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i8> %wide.load to <2 x i32>
723*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -128, i32 128>
724*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
725*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
726*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
727*9880d681SAndroid Build Coastguard Worker  ret void
728*9880d681SAndroid Build Coastguard Worker}
729*9880d681SAndroid Build Coastguard Worker
730*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i16>
731*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val
732*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 65535)
733*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
734*9880d681SAndroid Build Coastguard Worker;
735*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
736*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_varconst1:
737*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
738*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
739*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
740*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = <0,65535,u,u,u,u,u,u>
741*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm2
742*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhuw %xmm1, %xmm2
743*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm0
744*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
745*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
746*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
747*9880d681SAndroid Build Coastguard Workerentry:
748*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
749*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
750*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i16>*
751*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1
752*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <2 x i16> %wide.load to <2 x i32>
753*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 65535>
754*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
755*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
756*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
757*9880d681SAndroid Build Coastguard Worker  ret void
758*9880d681SAndroid Build Coastguard Worker}
759*9880d681SAndroid Build Coastguard Worker
760*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i16>
761*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val
762*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (-32768 ~ 32767)
763*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
764*9880d681SAndroid Build Coastguard Worker;
765*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_varconst2(i8* nocapture readonly %a, i64 %index) {
766*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_varconst2:
767*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
768*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
769*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
770*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = <32768,32767,u,u,u,u,u,u>
771*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm2
772*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmulhw %xmm1, %xmm2
773*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmullw %xmm1, %xmm0
774*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
775*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
776*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
777*9880d681SAndroid Build Coastguard Workerentry:
778*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
779*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
780*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i16>*
781*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1
782*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i16> %wide.load to <2 x i32>
783*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 -32768, i32 32767>
784*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
785*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
786*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
787*9880d681SAndroid Build Coastguard Worker  ret void
788*9880d681SAndroid Build Coastguard Worker}
789*9880d681SAndroid Build Coastguard Worker
790*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i16>
791*9880d681SAndroid Build Coastguard Worker; %op1 = zext<2 x i32> %val
792*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 65536)
793*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
794*9880d681SAndroid Build Coastguard Worker;
795*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
796*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_varconst3:
797*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
798*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
799*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
800*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pxor %xmm1, %xmm1
801*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
802*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
803*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movl $65536, %ecx # imm = 0x10000
804*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %rcx, %xmm1
805*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
806*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm2
807*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmuludq %xmm1, %xmm2
808*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psrlq $32, %xmm0
809*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmuludq %xmm1, %xmm0
810*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psllq $32, %xmm0
811*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    paddq %xmm2, %xmm0
812*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
813*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
814*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
815*9880d681SAndroid Build Coastguard Workerentry:
816*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
817*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
818*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i16>*
819*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1
820*9880d681SAndroid Build Coastguard Worker  %tmp8 = zext <2 x i16> %wide.load to <2 x i32>
821*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 65536>
822*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
823*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
824*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
825*9880d681SAndroid Build Coastguard Worker  ret void
826*9880d681SAndroid Build Coastguard Worker}
827*9880d681SAndroid Build Coastguard Worker
828*9880d681SAndroid Build Coastguard Worker; %val = load <2 x i16>
829*9880d681SAndroid Build Coastguard Worker; %op1 = sext<2 x i32> %val
830*9880d681SAndroid Build Coastguard Worker; %op2 = const <2 x i32> {c1, c2} // c1 and c2 are within (0 ~ 32768)
831*9880d681SAndroid Build Coastguard Worker; %rst = mul <2 x i32> %op1, %op2
832*9880d681SAndroid Build Coastguard Worker;
833*9880d681SAndroid Build Coastguard Workerdefine void @mul_2xi16_varconst4(i8* nocapture readonly %a, i64 %index) {
834*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: mul_2xi16_varconst4:
835*9880d681SAndroid Build Coastguard Worker; CHECK:       # BB#0: # %entry
836*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq {{.*}}(%rip), %rax
837*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
838*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
839*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psrad $16, %xmm0
840*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
841*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movl $32768, %ecx # imm = 0x8000
842*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movd %rcx, %xmm1
843*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
844*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movdqa %xmm0, %xmm2
845*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmuludq %xmm1, %xmm2
846*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psrlq $32, %xmm0
847*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pmuludq %xmm1, %xmm0
848*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    psllq $32, %xmm0
849*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    paddq %xmm2, %xmm0
850*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
851*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movq %xmm0, (%rax,%rsi,4)
852*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retq
853*9880d681SAndroid Build Coastguard Workerentry:
854*9880d681SAndroid Build Coastguard Worker  %pre = load i32*, i32** @c
855*9880d681SAndroid Build Coastguard Worker  %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
856*9880d681SAndroid Build Coastguard Worker  %tmp7 = bitcast i8* %tmp6 to <2 x i16>*
857*9880d681SAndroid Build Coastguard Worker  %wide.load = load <2 x i16>, <2 x i16>* %tmp7, align 1
858*9880d681SAndroid Build Coastguard Worker  %tmp8 = sext <2 x i16> %wide.load to <2 x i32>
859*9880d681SAndroid Build Coastguard Worker  %tmp13 = mul nuw nsw <2 x i32> %tmp8, <i32 0, i32 32768>
860*9880d681SAndroid Build Coastguard Worker  %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
861*9880d681SAndroid Build Coastguard Worker  %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
862*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
863*9880d681SAndroid Build Coastguard Worker  ret void
864*9880d681SAndroid Build Coastguard Worker}
865