xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/combine-multiplies.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mattr=sse2 -mtriple=i386-unknown-linux-gnu | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker; Source file looks something like this:
4*9880d681SAndroid Build Coastguard Worker;
5*9880d681SAndroid Build Coastguard Worker; typedef int AAA[100][100];
6*9880d681SAndroid Build Coastguard Worker;
7*9880d681SAndroid Build Coastguard Worker; void testCombineMultiplies(AAA a,int lll)
8*9880d681SAndroid Build Coastguard Worker; {
9*9880d681SAndroid Build Coastguard Worker;   int LOC = lll + 5;
10*9880d681SAndroid Build Coastguard Worker;
11*9880d681SAndroid Build Coastguard Worker;   a[LOC][LOC] = 11;
12*9880d681SAndroid Build Coastguard Worker;
13*9880d681SAndroid Build Coastguard Worker;   a[LOC][20] = 22;
14*9880d681SAndroid Build Coastguard Worker;   a[LOC+20][20] = 33;
15*9880d681SAndroid Build Coastguard Worker; }
16*9880d681SAndroid Build Coastguard Worker;
17*9880d681SAndroid Build Coastguard Worker; We want to make sure we don't generate 2 multiply instructions,
18*9880d681SAndroid Build Coastguard Worker; one for a[LOC][] and one for a[LOC+20]. visitMUL in DAGCombiner.cpp
19*9880d681SAndroid Build Coastguard Worker; should combine the instructions in such a way to avoid the extra
20*9880d681SAndroid Build Coastguard Worker; multiply.
21*9880d681SAndroid Build Coastguard Worker;
22*9880d681SAndroid Build Coastguard Worker; Output looks roughly like this:
23*9880d681SAndroid Build Coastguard Worker;
24*9880d681SAndroid Build Coastguard Worker;	movl	8(%esp), %eax
25*9880d681SAndroid Build Coastguard Worker;	movl	12(%esp), %ecx
26*9880d681SAndroid Build Coastguard Worker;	imull	$400, %ecx, %edx        # imm = 0x190
27*9880d681SAndroid Build Coastguard Worker;	leal	(%edx,%eax), %esi
28*9880d681SAndroid Build Coastguard Worker;	movl	$11, 2020(%esi,%ecx,4)
29*9880d681SAndroid Build Coastguard Worker;	movl	$22, 2080(%edx,%eax)
30*9880d681SAndroid Build Coastguard Worker;	movl	$33, 10080(%edx,%eax)
31*9880d681SAndroid Build Coastguard Worker;
32*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: testCombineMultiplies
33*9880d681SAndroid Build Coastguard Worker; CHECK: imull $400, [[ARG1:%[a-z]+]], [[MUL:%[a-z]+]] # imm = 0x190
34*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: leal ([[ARG2:%[a-z]+]],[[MUL]]), [[LEA:%[a-z]+]]
35*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movl $11, {{[0-9]+}}([[LEA]],[[ARG1]],4)
36*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movl $22, {{[0-9]+}}([[ARG2]],[[MUL]])
37*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movl $33, {{[0-9]+}}([[ARG2]],[[MUL]])
38*9880d681SAndroid Build Coastguard Worker; CHECK: retl
39*9880d681SAndroid Build Coastguard Worker;
40*9880d681SAndroid Build Coastguard Worker
41*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind
42*9880d681SAndroid Build Coastguard Workerdefine void @testCombineMultiplies([100 x i32]* nocapture %a, i32 %lll) {
43*9880d681SAndroid Build Coastguard Workerentry:
44*9880d681SAndroid Build Coastguard Worker  %add = add nsw i32 %lll, 5
45*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add, i32 %add
46*9880d681SAndroid Build Coastguard Worker  store i32 11, i32* %arrayidx1, align 4
47*9880d681SAndroid Build Coastguard Worker  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add, i32 20
48*9880d681SAndroid Build Coastguard Worker  store i32 22, i32* %arrayidx3, align 4
49*9880d681SAndroid Build Coastguard Worker  %add4 = add nsw i32 %lll, 25
50*9880d681SAndroid Build Coastguard Worker  %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add4, i32 20
51*9880d681SAndroid Build Coastguard Worker  store i32 33, i32* %arrayidx6, align 4
52*9880d681SAndroid Build Coastguard Worker  ret void
53*9880d681SAndroid Build Coastguard Worker}
54*9880d681SAndroid Build Coastguard Worker
55*9880d681SAndroid Build Coastguard Worker
56*9880d681SAndroid Build Coastguard Worker; Test for the same optimization on vector multiplies.
57*9880d681SAndroid Build Coastguard Worker;
58*9880d681SAndroid Build Coastguard Worker; Source looks something like this:
59*9880d681SAndroid Build Coastguard Worker;
60*9880d681SAndroid Build Coastguard Worker; typedef int v4int __attribute__((__vector_size__(16)));
61*9880d681SAndroid Build Coastguard Worker;
62*9880d681SAndroid Build Coastguard Worker; v4int x;
63*9880d681SAndroid Build Coastguard Worker; v4int v2, v3;
64*9880d681SAndroid Build Coastguard Worker; void testCombineMultiplies_splat(v4int v1) {
65*9880d681SAndroid Build Coastguard Worker;   v2 = (v1 + (v4int){ 11, 11, 11, 11 }) * (v4int) {22, 22, 22, 22};
66*9880d681SAndroid Build Coastguard Worker;   v3 = (v1 + (v4int){ 33, 33, 33, 33 }) * (v4int) {22, 22, 22, 22};
67*9880d681SAndroid Build Coastguard Worker;   x = (v1 + (v4int){ 11, 11, 11, 11 });
68*9880d681SAndroid Build Coastguard Worker; }
69*9880d681SAndroid Build Coastguard Worker;
70*9880d681SAndroid Build Coastguard Worker; Output looks something like this:
71*9880d681SAndroid Build Coastguard Worker;
72*9880d681SAndroid Build Coastguard Worker; testCombineMultiplies_splat:                              # @testCombineMultiplies_splat
73*9880d681SAndroid Build Coastguard Worker; # BB#0:                                 # %entry
74*9880d681SAndroid Build Coastguard Worker; 	movdqa	.LCPI1_0, %xmm1         # xmm1 = [11,11,11,11]
75*9880d681SAndroid Build Coastguard Worker; 	paddd	%xmm0, %xmm1
76*9880d681SAndroid Build Coastguard Worker; 	movdqa	.LCPI1_1, %xmm2         # xmm2 = [22,22,22,22]
77*9880d681SAndroid Build Coastguard Worker; 	pshufd	$245, %xmm0, %xmm3      # xmm3 = xmm0[1,1,3,3]
78*9880d681SAndroid Build Coastguard Worker; 	pmuludq	%xmm2, %xmm0
79*9880d681SAndroid Build Coastguard Worker; 	pshufd	$232, %xmm0, %xmm0      # xmm0 = xmm0[0,2,2,3]
80*9880d681SAndroid Build Coastguard Worker; 	pmuludq	%xmm2, %xmm3
81*9880d681SAndroid Build Coastguard Worker; 	pshufd	$232, %xmm3, %xmm2      # xmm2 = xmm3[0,2,2,3]
82*9880d681SAndroid Build Coastguard Worker; 	punpckldq	%xmm2, %xmm0    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
83*9880d681SAndroid Build Coastguard Worker; 	movdqa	.LCPI1_2, %xmm2         # xmm2 = [242,242,242,242]
84*9880d681SAndroid Build Coastguard Worker;	paddd	%xmm0, %xmm2
85*9880d681SAndroid Build Coastguard Worker;	paddd	.LCPI1_3, %xmm0
86*9880d681SAndroid Build Coastguard Worker;	movdqa	%xmm2, v2
87*9880d681SAndroid Build Coastguard Worker;	movdqa	%xmm0, v3
88*9880d681SAndroid Build Coastguard Worker;	movdqa	%xmm1, x
89*9880d681SAndroid Build Coastguard Worker;	retl
90*9880d681SAndroid Build Coastguard Worker;
91*9880d681SAndroid Build Coastguard Worker; Again, we want to make sure we don't generate two different multiplies.
92*9880d681SAndroid Build Coastguard Worker; We should have a single multiply for "v1 * {22, 22, 22, 22}" (made up of two
93*9880d681SAndroid Build Coastguard Worker; pmuludq instructions), followed by two adds. Without this optimization, we'd
94*9880d681SAndroid Build Coastguard Worker; do 2 adds, followed by 2 multiplies (i.e. 4 pmuludq instructions).
95*9880d681SAndroid Build Coastguard Worker;
96*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: testCombineMultiplies_splat
97*9880d681SAndroid Build Coastguard Worker; CHECK:       movdqa .LCPI1_0, [[C11:%xmm[0-9]]]
98*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  paddd %xmm0, [[C11]]
99*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  movdqa .LCPI1_1, [[C22:%xmm[0-9]]]
100*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pshufd $245, %xmm0, [[T1:%xmm[0-9]]]
101*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pmuludq [[C22]], [[T2:%xmm[0-9]]]
102*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pshufd $232, [[T2]], [[T3:%xmm[0-9]]]
103*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pmuludq [[C22]], [[T4:%xmm[0-9]]]
104*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pshufd $232, [[T4]], [[T5:%xmm[0-9]]]
105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  punpckldq [[T5]], [[T6:%xmm[0-9]]]
106*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  movdqa .LCPI1_2, [[C242:%xmm[0-9]]]
107*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  paddd [[T6]], [[C242]]
108*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  paddd .LCPI1_3, [[C726:%xmm[0-9]]]
109*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  movdqa [[C242]], v2
110*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  [[C726]], v3
111*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  [[C11]], x
112*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  retl
113*9880d681SAndroid Build Coastguard Worker
114*9880d681SAndroid Build Coastguard Worker@v2 = common global <4 x i32> zeroinitializer, align 16
115*9880d681SAndroid Build Coastguard Worker@v3 = common global <4 x i32> zeroinitializer, align 16
116*9880d681SAndroid Build Coastguard Worker@x = common global <4 x i32> zeroinitializer, align 16
117*9880d681SAndroid Build Coastguard Worker
118*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind
119*9880d681SAndroid Build Coastguard Workerdefine void @testCombineMultiplies_splat(<4 x i32> %v1) {
120*9880d681SAndroid Build Coastguard Workerentry:
121*9880d681SAndroid Build Coastguard Worker  %add1 = add <4 x i32> %v1, <i32 11, i32 11, i32 11, i32 11>
122*9880d681SAndroid Build Coastguard Worker  %mul1 = mul <4 x i32> %add1, <i32 22, i32 22, i32 22, i32 22>
123*9880d681SAndroid Build Coastguard Worker  %add2 = add <4 x i32> %v1, <i32 33, i32 33, i32 33, i32 33>
124*9880d681SAndroid Build Coastguard Worker  %mul2 = mul <4 x i32> %add2, <i32 22, i32 22, i32 22, i32 22>
125*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %mul1, <4 x i32>* @v2, align 16
126*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %mul2, <4 x i32>* @v3, align 16
127*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %add1, <4 x i32>* @x, align 16
128*9880d681SAndroid Build Coastguard Worker  ret void
129*9880d681SAndroid Build Coastguard Worker}
130*9880d681SAndroid Build Coastguard Worker
131*9880d681SAndroid Build Coastguard Worker; Finally, check the non-splatted vector case. This is very similar
132*9880d681SAndroid Build Coastguard Worker; to the previous test case, except for the vector values.
133*9880d681SAndroid Build Coastguard Worker;
134*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: testCombineMultiplies_non_splat
135*9880d681SAndroid Build Coastguard Worker; CHECK:       movdqa .LCPI2_0, [[C11:%xmm[0-9]]]
136*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  paddd %xmm0, [[C11]]
137*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  movdqa .LCPI2_1, [[C22:%xmm[0-9]]]
138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pshufd $245, %xmm0, [[T1:%xmm[0-9]]]
139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pmuludq [[C22]], [[T2:%xmm[0-9]]]
140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pshufd $232, [[T2]], [[T3:%xmm[0-9]]]
141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pshufd $245, [[C22]], [[T7:%xmm[0-9]]]
142*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pmuludq [[T1]], [[T7]]
143*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  pshufd $232, [[T7]], [[T5:%xmm[0-9]]]
144*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  punpckldq [[T5]], [[T6:%xmm[0-9]]]
145*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  movdqa .LCPI2_2, [[C242:%xmm[0-9]]]
146*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  paddd [[T6]], [[C242]]
147*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  paddd .LCPI2_3, [[C726:%xmm[0-9]]]
148*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  movdqa [[C242]], v2
149*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  [[C726]], v3
150*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  [[C11]], x
151*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  retl
152*9880d681SAndroid Build Coastguard Worker; Function Attrs: nounwind
153*9880d681SAndroid Build Coastguard Workerdefine void @testCombineMultiplies_non_splat(<4 x i32> %v1) {
154*9880d681SAndroid Build Coastguard Workerentry:
155*9880d681SAndroid Build Coastguard Worker  %add1 = add <4 x i32> %v1, <i32 11, i32 22, i32 33, i32 44>
156*9880d681SAndroid Build Coastguard Worker  %mul1 = mul <4 x i32> %add1, <i32 22, i32 33, i32 44, i32 55>
157*9880d681SAndroid Build Coastguard Worker  %add2 = add <4 x i32> %v1, <i32 33, i32 44, i32 55, i32 66>
158*9880d681SAndroid Build Coastguard Worker  %mul2 = mul <4 x i32> %add2, <i32 22, i32 33, i32 44, i32 55>
159*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %mul1, <4 x i32>* @v2, align 16
160*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %mul2, <4 x i32>* @v3, align 16
161*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %add1, <4 x i32>* @x, align 16
162*9880d681SAndroid Build Coastguard Worker  ret void
163*9880d681SAndroid Build Coastguard Worker}
164