xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/movtopush.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
2; RUN: llc < %s -mtriple=i686-windows -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
3; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
4; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
5; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX
6
7%class.Class = type { i32 }
8%struct.s = type { i64 }
9
10declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
11declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
12declare x86_thiscallcc void @thiscall(%class.Class* %class, i32 %a, i32 %b, i32 %c, i32 %d)
13declare void @oneparam(i32 %a)
14declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
15declare void @struct(%struct.s* byval %a, i32 %b, i32 %c, i32 %d)
16declare void @inalloca(<{ %struct.s }>* inalloca)
17
18declare i8* @llvm.stacksave()
19declare void @llvm.stackrestore(i8*)
20
21; We should get pushes for x86, even though there is a reserved call frame.
22; Make sure we don't touch x86-64, and that turning it off works.
23; NORMAL-LABEL: test1:
24; NORMAL-NOT: subl {{.*}} %esp
25; NORMAL: pushl   $4
26; NORMAL-NEXT: pushl   $3
27; NORMAL-NEXT: pushl   $2
28; NORMAL-NEXT: pushl   $1
29; NORMAL-NEXT: call
30; NORMAL-NEXT: addl $16, %esp
31; X64-LABEL: test1:
32; X64: movl    $1, %ecx
33; X64-NEXT: movl    $2, %edx
34; X64-NEXT: movl    $3, %r8d
35; X64-NEXT: movl    $4, %r9d
36; X64-NEXT: callq   good
37; NOPUSH-LABEL: test1:
38; NOPUSH: subl    $16, %esp
39; NOPUSH-NEXT: movl    $4, 12(%esp)
40; NOPUSH-NEXT: movl    $3, 8(%esp)
41; NOPUSH-NEXT: movl    $2, 4(%esp)
42; NOPUSH-NEXT: movl    $1, (%esp)
43; NOPUSH-NEXT: call
44; NOPUSH-NEXT: addl $16, %esp
45define void @test1() {
46entry:
47  call void @good(i32 1, i32 2, i32 3, i32 4)
48  ret void
49}
50
51; If we have a reserved frame, we should have pushes
52; NORMAL-LABEL: test2:
53; NORMAL-NOT: subl {{.*}} %esp
54; NORMAL: pushl   $4
55; NORMAL-NEXT: pushl   $3
56; NORMAL-NEXT: pushl   $2
57; NORMAL-NEXT: pushl   $1
58; NORMAL-NEXT: call
59define void @test2(i32 %k) {
60entry:
61  %a = alloca i32, i32 %k
62  call void @good(i32 1, i32 2, i32 3, i32 4)
63  ret void
64}
65
66; Again, we expect a sequence of 4 immediate pushes
67; Checks that we generate the right pushes for >8bit immediates
68; NORMAL-LABEL: test2b:
69; NORMAL-NOT: subl {{.*}} %esp
70; NORMAL: pushl   $4096
71; NORMAL-NEXT: pushl   $3072
72; NORMAL-NEXT: pushl   $2048
73; NORMAL-NEXT: pushl   $1024
74; NORMAL-NEXT: call
75; NORMAL-NEXT: addl $16, %esp
76define void @test2b() optsize {
77entry:
78  call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
79  ret void
80}
81
82; The first push should push a register
83; NORMAL-LABEL: test3:
84; NORMAL-NOT: subl {{.*}} %esp
85; NORMAL: pushl   $4
86; NORMAL-NEXT: pushl   $3
87; NORMAL-NEXT: pushl   $2
88; NORMAL-NEXT: pushl   %e{{..}}
89; NORMAL-NEXT: call
90; NORMAL-NEXT: addl $16, %esp
91define void @test3(i32 %k) optsize {
92entry:
93  %f = add i32 %k, 1
94  call void @good(i32 %f, i32 2, i32 3, i32 4)
95  ret void
96}
97
98; We support weird calling conventions
99; NORMAL-LABEL: test4:
100; NORMAL: movl    $2, %eax
101; NORMAL-NEXT: pushl   $4
102; NORMAL-NEXT: pushl   $3
103; NORMAL-NEXT: pushl   $1
104; NORMAL-NEXT: call
105; NORMAL-NEXT: addl $12, %esp
106define void @test4() optsize {
107entry:
108  call void @inreg(i32 1, i32 2, i32 3, i32 4)
109  ret void
110}
111
112; NORMAL-LABEL: test4b:
113; NORMAL: movl 4(%esp), %ecx
114; NORMAL-NEXT: pushl   $4
115; NORMAL-NEXT: pushl   $3
116; NORMAL-NEXT: pushl   $2
117; NORMAL-NEXT: pushl   $1
118; NORMAL-NEXT: call
119; NORMAL-NEXT: ret
120define void @test4b(%class.Class* %f) optsize {
121entry:
122  call x86_thiscallcc void @thiscall(%class.Class* %f, i32 1, i32 2, i32 3, i32 4)
123  ret void
124}
125
126; When there is no reserved call frame, check that additional alignment
127; is added when the pushes don't add up to the required alignment.
128; ALIGNED-LABEL: test5:
129; ALIGNED: subl    $16, %esp
130; ALIGNED-NEXT: pushl   $4
131; ALIGNED-NEXT: pushl   $3
132; ALIGNED-NEXT: pushl   $2
133; ALIGNED-NEXT: pushl   $1
134; ALIGNED-NEXT: call
135define void @test5(i32 %k) {
136entry:
137  %a = alloca i32, i32 %k
138  call void @good(i32 1, i32 2, i32 3, i32 4)
139  ret void
140}
141
142; When the alignment adds up, do the transformation
143; ALIGNED-LABEL: test5b:
144; ALIGNED: pushl   $8
145; ALIGNED-NEXT: pushl   $7
146; ALIGNED-NEXT: pushl   $6
147; ALIGNED-NEXT: pushl   $5
148; ALIGNED-NEXT: pushl   $4
149; ALIGNED-NEXT: pushl   $3
150; ALIGNED-NEXT: pushl   $2
151; ALIGNED-NEXT: pushl   $1
152; ALIGNED-NEXT: call
153define void @test5b() optsize {
154entry:
155  call void @eightparams(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8)
156  ret void
157}
158
159; When having to compensate for the alignment isn't worth it,
160; don't use pushes.
161; ALIGNED-LABEL: test5c:
162; ALIGNED: movl $1, (%esp)
163; ALIGNED-NEXT: call
164define void @test5c() optsize {
165entry:
166  call void @oneparam(i32 1)
167  ret void
168}
169
170; Check that pushing the addresses of globals (Or generally, things that
171; aren't exactly immediates) isn't broken.
172; Fixes PR21878.
173; NORMAL-LABEL: test6:
174; NORMAL: pushl    $_ext
175; NORMAL-NEXT: call
176declare void @f(i8*)
177@ext = external constant i8
178
179define void @test6() {
180  call void @f(i8* @ext)
181  br label %bb
182bb:
183  alloca i32
184  ret void
185}
186
187; Check that we fold simple cases into the push
188; NORMAL-LABEL: test7:
189; NORMAL-NOT: subl {{.*}} %esp
190; NORMAL: movl 4(%esp), [[EAX:%e..]]
191; NORMAL-NEXT: pushl   $4
192; NORMAL-NEXT: pushl   ([[EAX]])
193; NORMAL-NEXT: pushl   $2
194; NORMAL-NEXT: pushl   $1
195; NORMAL-NEXT: call
196; NORMAL-NEXT: addl $16, %esp
197define void @test7(i32* %ptr) optsize {
198entry:
199  %val = load i32, i32* %ptr
200  call void @good(i32 1, i32 2, i32 %val, i32 4)
201  ret void
202}
203
204; Fold stack-relative loads into the push, with correct offset
205; In particular, at the second push, %b was at 12(%esp) and
206; %a wast at 8(%esp), but the second push bumped %esp, so %a
207; is now it at 12(%esp)
208; NORMAL-LABEL: test8:
209; NORMAL: pushl   $4
210; NORMAL-NEXT: pushl   12(%esp)
211; NORMAL-NEXT: pushl   12(%esp)
212; NORMAL-NEXT: pushl   $1
213; NORMAL-NEXT: call
214; NORMAL-NEXT: addl $16, %esp
215define void @test8(i32 %a, i32 %b) optsize {
216entry:
217  call void @good(i32 1, i32 %a, i32 %b, i32 4)
218  ret void
219}
220
221; If one function is using push instructions, and the other isn't
222; (because it has frame-index references), then we must resolve
223; these references correctly.
224; NORMAL-LABEL: test9:
225; NORMAL-NOT: leal (%esp),
226; NORMAL: pushl $4
227; NORMAL-NEXT: pushl $3
228; NORMAL-NEXT: pushl $2
229; NORMAL-NEXT: pushl $1
230; NORMAL-NEXT: call
231; NORMAL-NEXT: subl $4, %esp
232; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
233; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
234; NORMAL-NEXT: movl    [[E2]], 4(%esp)
235; NORMAL-NEXT: movl    [[E1]], (%esp)
236; NORMAL-NEXT: leal 32(%esp), [[E3:%e..]]
237; NORMAL-NEXT: movl    [[E3]], 16(%esp)
238; NORMAL-NEXT: leal 28(%esp), [[E4:%e..]]
239; NORMAL-NEXT: movl    [[E4]], 12(%esp)
240; NORMAL-NEXT: movl    $6, 8(%esp)
241; NORMAL-NEXT: call
242; NORMAL-NEXT: addl $20, %esp
243define void @test9() optsize {
244entry:
245  %p = alloca i32, align 4
246  %q = alloca i32, align 4
247  %s = alloca %struct.s, align 4
248  call void @good(i32 1, i32 2, i32 3, i32 4)
249  %pv = ptrtoint i32* %p to i32
250  %qv = ptrtoint i32* %q to i32
251  call void @struct(%struct.s* byval %s, i32 6, i32 %qv, i32 %pv)
252  ret void
253}
254
255; We can end up with an indirect call which gets reloaded on the spot.
256; Make sure we reference the correct stack slot - we spill into (%esp)
257; and reload from 16(%esp) due to the pushes.
258; NORMAL-LABEL: test10:
259; NORMAL: movl $_good, [[ALLOC:.*]]
260; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]]
261; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill
262; NORMAL: nop
263; NORMAL: pushl $4
264; NORMAL-NEXT: pushl $3
265; NORMAL-NEXT: pushl $2
266; NORMAL-NEXT: pushl $1
267; NORMAL-NEXT: calll *16(%esp)
268; NORMAL-NEXT: addl $24, %esp
269define void @test10() optsize {
270  %stack_fptr = alloca void (i32, i32, i32, i32)*
271  store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
272  %good_ptr = load volatile void (i32, i32, i32, i32)*, void (i32, i32, i32, i32)** %stack_fptr
273  call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
274  call void (i32, i32, i32, i32) %good_ptr(i32 1, i32 2, i32 3, i32 4)
275  ret void
276}
277
278; We can't fold the load from the global into the push because of
279; interference from the store
280; NORMAL-LABEL: test11:
281; NORMAL: movl    _the_global, [[EAX:%e..]]
282; NORMAL-NEXT: movl    $42, _the_global
283; NORMAL-NEXT: pushl $4
284; NORMAL-NEXT: pushl $3
285; NORMAL-NEXT: pushl $2
286; NORMAL-NEXT: pushl [[EAX]]
287; NORMAL-NEXT: call
288; NORMAL-NEXT: addl $16, %esp
289@the_global = external global i32
290define void @test11() optsize {
291  %myload = load i32, i32* @the_global
292  store i32 42, i32* @the_global
293  call void @good(i32 %myload, i32 2, i32 3, i32 4)
294  ret void
295}
296
297; Converting one mov into a push isn't worth it when
298; doing so forces too much overhead for other calls.
299; NORMAL-LABEL: test12:
300; NORMAL: movl    $8, 12(%esp)
301; NORMAL-NEXT: movl    $7, 8(%esp)
302; NORMAL-NEXT: movl    $6, 4(%esp)
303; NORMAL-NEXT: movl    $5, (%esp)
304; NORMAL-NEXT: calll _good
305define void @test12() optsize {
306entry:
307  %s = alloca %struct.s, align 4
308  call void @struct(%struct.s* %s, i32 2, i32 3, i32 4)
309  call void @good(i32 5, i32 6, i32 7, i32 8)
310  call void @struct(%struct.s* %s, i32 10, i32 11, i32 12)
311  ret void
312}
313
314; But if the gains outweigh the overhead, we should do it
315; NORMAL-LABEL: test12b:
316; NORMAL: pushl    $4
317; NORMAL-NEXT: pushl    $3
318; NORMAL-NEXT: pushl    $2
319; NORMAL-NEXT: pushl    $1
320; NORMAL-NEXT: calll _good
321; NORMAL-NEXT: subl    $4, %esp
322; NORMAL: movl    $8, 16(%esp)
323; NORMAL-NEXT: movl    $7, 12(%esp)
324; NORMAL-NEXT: movl    $6, 8(%esp)
325; NORMAL-NEXT: calll _struct
326; NORMAL-NEXT: addl    $20, %esp
327; NORMAL-NEXT: pushl    $12
328; NORMAL-NEXT: pushl    $11
329; NORMAL-NEXT: pushl    $10
330; NORMAL-NEXT: pushl    $9
331; NORMAL-NEXT: calll _good
332; NORMAL-NEXT: addl $16, %esp
333define void @test12b() optsize {
334entry:
335  %s = alloca %struct.s, align 4
336  call void @good(i32 1, i32 2, i32 3, i32 4)
337  call void @struct(%struct.s* %s, i32 6, i32 7, i32 8)
338  call void @good(i32 9, i32 10, i32 11, i32 12)
339  ret void
340}
341
342; Make sure the add does not prevent folding loads into pushes.
343; val1 and val2 will not be folded into pushes since they have
344; an additional use, but val3 should be.
345; NORMAL-LABEL: test13:
346; NORMAL: movl ([[P1:%e..]]), [[V1:%e..]]
347; NORMAL-NEXT: movl ([[P2:%e..]]), [[V2:%e..]]
348; NORMAL-NEXT: , [[ADD:%e..]]
349; NORMAL-NEXT: pushl [[ADD]]
350; NORMAL-NEXT: pushl ([[P3:%e..]])
351; NORMAL-NEXT: pushl [[V2]]
352; NORMAL-NEXT: pushl [[V1]]
353; NORMAL-NEXT: calll _good
354; NORMAL: movl [[P3]], %eax
355define i32* @test13(i32* inreg %ptr1, i32* inreg %ptr2, i32* inreg %ptr3) optsize {
356entry:
357  %val1 = load i32, i32* %ptr1
358  %val2 = load i32, i32* %ptr2
359  %val3 = load i32, i32* %ptr3
360  %add = add i32 %val1, %val2
361  call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add)
362  ret i32* %ptr3
363}
364
365; Make sure to fold adjacent stack adjustments.
366; LINUX-LABEL: pr27140:
367; LINUX: subl    $12, %esp
368; LINUX: .cfi_def_cfa_offset 16
369; LINUX-NOT: sub
370; LINUX: pushl   $4
371; LINUX: .cfi_adjust_cfa_offset 4
372; LINUX: pushl   $3
373; LINUX: .cfi_adjust_cfa_offset 4
374; LINUX: pushl   $2
375; LINUX: .cfi_adjust_cfa_offset 4
376; LINUX: pushl   $1
377; LINUX: .cfi_adjust_cfa_offset 4
378; LINUX: calll   good
379; LINUX: addl    $28, %esp
380; LINUX: .cfi_adjust_cfa_offset -16
381; LINUX-NOT: add
382; LINUX: retl
383define void @pr27140() optsize {
384entry:
385  tail call void @good(i32 1, i32 2, i32 3, i32 4)
386  ret void
387}
388
389; Check that a stack restore (leal -4(%ebp), %esp) doesn't get merged with a
390; stack adjustment (addl $12, %esp). Just because it's a lea doesn't mean it's
391; simply decreasing the stack pointer.
392; NORMAL-LABEL: test14:
393; NORMAL: calll _B_func
394; NORMAL: leal -4(%ebp), %esp
395; NORMAL-NOT: %esp
396; NORMAL: retl
397%struct.A = type { i32, i32 }
398%struct.B = type { i8 }
399declare x86_thiscallcc %struct.B* @B_ctor(%struct.B* returned, %struct.A* byval)
400declare void @B_func(%struct.B* sret, %struct.B*, i32)
401define void @test14(%struct.A* %a) {
402entry:
403  %ref.tmp = alloca %struct.B, align 1
404  %agg.tmp = alloca i64, align 4
405  %tmpcast = bitcast i64* %agg.tmp to %struct.A*
406  %tmp = alloca %struct.B, align 1
407  %0 = bitcast %struct.A* %a to i64*
408  %1 = load i64, i64* %0, align 4
409  store i64 %1, i64* %agg.tmp, align 4
410  %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* %ref.tmp, %struct.A* byval %tmpcast)
411  %2 = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 0
412  call void @B_func(%struct.B* sret %tmp, %struct.B* %ref.tmp, i32 1)
413  ret void
414}
415