xref: /aosp_15_r20/art/runtime/arch/x86_64/quick_entrypoints_x86_64.S (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21#include "arch/quick_field_entrypoints.S"
22
23MACRO0(ASSERT_USE_READ_BARRIER)
24#if !defined(USE_READ_BARRIER)
25    int3
26    int3
27#endif
28END_MACRO
29
30// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
31
32
33    /*
34     * Macro that sets up the callee save frame to conform with
35     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
36     */
37MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
38#if defined(__APPLE__)
39    int3
40    int3
41#else
42    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
43    // R10 := Runtime::Current()
44    LOAD_RUNTIME_INSTANCE r10
45    // R10 := ArtMethod* for ref and args callee save frame method.
46    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
47    // Store ArtMethod* to bottom of stack.
48    movq %r10, 0(%rsp)
49    // Store rsp as the top quick frame.
50    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
51#endif  // __APPLE__
52END_MACRO
53
54MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
55    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
56    // Store ArtMethod to bottom of stack.
57    movq %rdi, 0(%rsp)
58    // Store rsp as the stop quick frame.
59    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
60END_MACRO
61
62    /*
63     * Macro that sets up the callee save frame to conform with
64     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
65     * when R14 and R15 are already saved.
66     */
67MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
68#if defined(__APPLE__)
69    int3
70    int3
71#else
72    // Save core registers from highest to lowest to agree with core spills bitmap.
73    // R14 and R15, or at least placeholders for them, are already on the stack.
74    PUSH r13
75    PUSH r12
76    PUSH r11
77    PUSH r10
78    PUSH r9
79    PUSH r8
80    PUSH rdi
81    PUSH rsi
82    PUSH rbp
83    PUSH rbx
84    PUSH rdx
85    PUSH rcx
86    PUSH rax
87    // Create space for FPRs and stack alignment padding.
88    INCREASE_FRAME 8 + 16 * 8
89    // R10 := Runtime::Current()
90    LOAD_RUNTIME_INSTANCE r10
91    // Save FPRs.
92    movq %xmm0, 8(%rsp)
93    movq %xmm1, 16(%rsp)
94    movq %xmm2, 24(%rsp)
95    movq %xmm3, 32(%rsp)
96    movq %xmm4, 40(%rsp)
97    movq %xmm5, 48(%rsp)
98    movq %xmm6, 56(%rsp)
99    movq %xmm7, 64(%rsp)
100    movq %xmm8, 72(%rsp)
101    movq %xmm9, 80(%rsp)
102    movq %xmm10, 88(%rsp)
103    movq %xmm11, 96(%rsp)
104    movq %xmm12, 104(%rsp)
105    movq %xmm13, 112(%rsp)
106    movq %xmm14, 120(%rsp)
107    movq %xmm15, 128(%rsp)
108    // Push ArtMethod* for save everything frame method.
109    pushq \runtime_method_offset(%r10)
110    CFI_ADJUST_CFA_OFFSET(8)
111    // Store rsp as the top quick frame.
112    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
113
114    // Ugly compile-time check, but we only have the preprocessor.
115    // Last +8: implicit return address pushed on stack when caller made call.
116#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
117#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
118#endif
119#endif  // __APPLE__
120END_MACRO
121
122    /*
123     * Macro that sets up the callee save frame to conform with
124     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
125     * when R15 is already saved.
126     */
127MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
128    PUSH r14
129    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset
130END_MACRO
131
132    /*
133     * Macro that sets up the callee save frame to conform with
134     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
135     */
136MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
137    PUSH r15
138    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset
139END_MACRO
140
141MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
142    // Restore FPRs. Method and padding is still on the stack.
143    movq 16(%rsp), %xmm0
144    movq 24(%rsp), %xmm1
145    movq 32(%rsp), %xmm2
146    movq 40(%rsp), %xmm3
147    movq 48(%rsp), %xmm4
148    movq 56(%rsp), %xmm5
149    movq 64(%rsp), %xmm6
150    movq 72(%rsp), %xmm7
151    movq 80(%rsp), %xmm8
152    movq 88(%rsp), %xmm9
153    movq 96(%rsp), %xmm10
154    movq 104(%rsp), %xmm11
155    movq 112(%rsp), %xmm12
156    movq 120(%rsp), %xmm13
157    movq 128(%rsp), %xmm14
158    movq 136(%rsp), %xmm15
159END_MACRO
160
161MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
162    // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
163    POP rcx
164    POP rdx
165    POP rbx
166    POP rbp
167    POP rsi
168    POP rdi
169    POP r8
170    POP r9
171    POP r10
172    POP r11
173    POP r12
174    POP r13
175    POP r14
176    POP r15
177END_MACRO
178
179MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
180    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
181
182    // Remove save everything callee save method, stack alignment padding and FPRs.
183    DECREASE_FRAME 16 + 16 * 8
184
185    POP rax
186    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
187END_MACRO
188
189MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
190    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
191
192    // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
193    DECREASE_FRAME 16 + 16 * 8 + 8
194
195    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
196END_MACRO
197
198MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
199    DEFINE_FUNCTION VAR(c_name)
200    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
201    // Outgoing argument set up
202    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
203    call CALLVAR(cxx_name)             // cxx_name(Thread*)
204    movq %rax, %rdi                    // pass Context*
205    call SYMBOL(art_quick_do_long_jump)
206    UNREACHABLE
207    END_FUNCTION VAR(c_name)
208END_MACRO
209
210MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
211    DEFINE_FUNCTION VAR(c_name)
212    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
213    // Outgoing argument set up
214    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
215    call CALLVAR(cxx_name)             // cxx_name(Thread*)
216    movq %rax, %rdi                    // pass Context*
217    call SYMBOL(art_quick_do_long_jump)
218    UNREACHABLE
219    END_FUNCTION VAR(c_name)
220END_MACRO
221
222MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
223    DEFINE_FUNCTION VAR(c_name)
224    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
225    // Outgoing argument set up
226    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
227    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
228    movq %rax, %rdi                    // pass Context*
229    call SYMBOL(art_quick_do_long_jump)
230    UNREACHABLE
231    END_FUNCTION VAR(c_name)
232END_MACRO
233
234MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
235    DEFINE_FUNCTION VAR(c_name)
236    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
237    // Outgoing argument set up
238    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
239    call CALLVAR(cxx_name)             // cxx_name(Thread*)
240    movq %rax, %rdi                    // pass Context*
241    call SYMBOL(art_quick_do_long_jump)
242    UNREACHABLE
243    END_FUNCTION VAR(c_name)
244END_MACRO
245
246    /*
247     * Called by managed code to create and deliver a NullPointerException.
248     */
249NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
250
251    /*
252     * Call installed by a signal handler to create and deliver a NullPointerException.
253     */
254DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
255    // Fault address and return address were saved by the fault handler.
256    // Save all registers as basis for long jump context; R15 will replace fault address later.
257    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
258    // Retrieve fault address and save R15.
259    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
260    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
261    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
262    // Outgoing argument set up; RDI already contains the fault address.
263    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
264    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
265    movq %rax, %rdi                    // pass Context*
266    call SYMBOL(art_quick_do_long_jump)
267    UNREACHABLE
268END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
269
270    /*
271     * Called by managed code to create and deliver an ArithmeticException.
272     */
273NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
274
275    /*
276     * Called by managed code to create and deliver a StackOverflowError.
277     */
278NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
279
280    /*
281     * Called by managed code, saves callee saves and then calls artThrowException
282     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
283     */
284ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
285
286    /*
287     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
288     * index, arg2 holds limit.
289     */
290TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
291
292    /*
293     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
294     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
295     */
296TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
297
298    /*
299     * All generated callsites for interface invokes and invocation slow paths will load arguments
300     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
301     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
302     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
303     *
304     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
305     * of the target Method* in rax and method->code_ in rdx.
306     *
307     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
308     * thread and we branch to another stub to deliver it.
309     *
310     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
311     * location on the stack.
312     *
313     * Adapted from x86 code.
314     */
315MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
316    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
317    // Helper signature is always
318    // (method_idx, *this_object, *caller_method, *self, sp)
319
320    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
321    movq %rsp, %rcx                                        // pass SP
322
323    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
324                                                           // save the code pointer
325    movq %rax, %rdi
326    movq %rdx, %rax
327    RESTORE_SAVE_REFS_AND_ARGS_FRAME
328
329    testq %rdi, %rdi
330    jz 1f
331
332    // Tail call to intended method.
333    jmp *%rax
3341:
335    DELIVER_PENDING_EXCEPTION
336END_MACRO
337MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
338    DEFINE_FUNCTION VAR(c_name)
339    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
340    END_FUNCTION VAR(c_name)
341END_MACRO
342
343INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
344
345INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
346INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
347INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
348INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
349
350
351    /*
352     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
353     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
354     * the end of the shorty.
355     */
356MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3571: // LOOP
358    movb (%r10), %al              // al := *shorty
359    addq MACRO_LITERAL(1), %r10   // shorty++
360    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
361    je VAR(finished)
362    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
363    je 2f
364    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
365    je 3f
366    addq MACRO_LITERAL(4), %r11   // arg_array++
367    //  Handle extra space in arg array taken by a long.
368    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
369    jne 1b
370    addq MACRO_LITERAL(4), %r11   // arg_array++
371    jmp 1b                        // goto LOOP
3722:  // FOUND_DOUBLE
373    movsd (%r11), REG_VAR(xmm_reg)
374    addq MACRO_LITERAL(8), %r11   // arg_array+=2
375    jmp 4f
3763:  // FOUND_FLOAT
377    movss (%r11), REG_VAR(xmm_reg)
378    addq MACRO_LITERAL(4), %r11   // arg_array++
3794:
380END_MACRO
381
382    /*
383     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
384     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
385     * the end of the shorty.
386     */
387MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
3881: // LOOP
389    movb (%r10), %al              // al := *shorty
390    addq MACRO_LITERAL(1), %r10   // shorty++
391    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
392    je  VAR(finished)
393    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
394    je 2f
395    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
396    je 3f
397    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
398    je 4f
399    movl (%r11), REG_VAR(gpr_reg32)
400    addq MACRO_LITERAL(4), %r11   // arg_array++
401    jmp 5f
4022:  // FOUND_LONG
403    movq (%r11), REG_VAR(gpr_reg64)
404    addq MACRO_LITERAL(8), %r11   // arg_array+=2
405    jmp 5f
4063:  // SKIP_FLOAT
407    addq MACRO_LITERAL(4), %r11   // arg_array++
408    jmp 1b
4094:  // SKIP_DOUBLE
410    addq MACRO_LITERAL(8), %r11   // arg_array+=2
411    jmp 1b
4125:
413END_MACRO
414
415    /*
416     * Quick invocation stub.
417     * On entry:
418     *   [sp] = return address
419     *   rdi = method pointer
420     *   rsi = argument array that must at least contain the this pointer.
421     *   rdx = size of argument array in bytes
422     *   rcx = (managed) thread pointer
423     *   r8 = JValue* result
424     *   r9 = char* shorty
425     */
426DEFINE_FUNCTION art_quick_invoke_stub
427#if defined(__APPLE__)
428    int3
429    int3
430#else
431    // Set up argument XMM registers.
432    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
433    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
434    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
435    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
436    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
437    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
438    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
439    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
440    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
441    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
442    .balign 16
443.Lxmm_setup_finished:
444    PUSH rbp                      // Save rbp.
445    PUSH r8                       // Save r8/result*.
446    PUSH r9                       // Save r9/shorty*.
447    PUSH rbx                      // Save native callee save rbx
448    PUSH r12                      // Save native callee save r12
449    PUSH r13                      // Save native callee save r13
450    PUSH r14                      // Save native callee save r14
451    PUSH r15                      // Save native callee save r15
452    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
453    CFI_DEF_CFA_REGISTER(rbp)
454
455    movl %edx, %r10d
456    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
457                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
458    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
459    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
460                                   // r13, r14, and r15
461    subq %rdx, %rsp                // Reserve stack space for argument array.
462
463#if (STACK_REFERENCE_SIZE != 4)
464#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
465#endif
466    movq LITERAL(0), (%rsp)       // Store null for method*
467
468    movl %r10d, %ecx              // Place size of args in rcx.
469    movq %rdi, %rax               // rax := method to be called
470    movq %rsi, %r11               // r11 := arg_array
471    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
472                                  // arguments.
473    // Copy arg array into stack.
474    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
475    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
476    movq %rax, %rdi               // rdi := method to be called
477    movl (%r11), %esi             // rsi := this pointer
478    addq LITERAL(4), %r11         // arg_array++
479    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
480    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
481    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
482    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
483.Lgpr_setup_finished:
484    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
485    movq %rbp, %rsp               // Restore stack pointer.
486    POP r15                       // Pop r15
487    POP r14                       // Pop r14
488    POP r13                       // Pop r13
489    POP r12                       // Pop r12
490    POP rbx                       // Pop rbx
491    POP r9                        // Pop r9 - shorty*
492    POP r8                        // Pop r8 - result*.
493    POP rbp                       // Pop rbp
494    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
495    je .Lreturn_double_quick
496    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
497    je .Lreturn_float_quick
498    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
499    ret
500.Lreturn_double_quick:
501    movsd %xmm0, (%r8)            // Store the double floating point result.
502    ret
503.Lreturn_float_quick:
504    movss %xmm0, (%r8)            // Store the floating point result.
505    ret
506#endif  // __APPLE__
507END_FUNCTION art_quick_invoke_stub
508
509    /*
510     * Quick invocation stub.
511     * On entry:
512     *   [sp] = return address
513     *   rdi = method pointer
514     *   rsi = argument array or null if no arguments.
515     *   rdx = size of argument array in bytes
516     *   rcx = (managed) thread pointer
517     *   r8 = JValue* result
518     *   r9 = char* shorty
519     */
520DEFINE_FUNCTION art_quick_invoke_static_stub
521#if defined(__APPLE__)
522    int3
523    int3
524#else
525    // Set up argument XMM registers.
526    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
527    movq %rsi, %r11               // R11 := arg_array
528    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
529    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
530    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
531    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
532    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
533    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
534    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
535    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
536    .balign 16
537.Lxmm_setup_finished2:
538    PUSH rbp                      // Save rbp.
539    PUSH r8                       // Save r8/result*.
540    PUSH r9                       // Save r9/shorty*.
541    PUSH rbx                      // Save rbx
542    PUSH r12                      // Save r12
543    PUSH r13                      // Save r13
544    PUSH r14                      // Save r14
545    PUSH r15                      // Save r15
546    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
547    CFI_DEF_CFA_REGISTER(rbp)
548
549    movl %edx, %r10d
550    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
551                                   // r8, r9, r12, r13, r14, and r15 in frame.
552    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
553    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
554                                   // r13, r14, and r15.
555    subq %rdx, %rsp                // Reserve stack space for argument array.
556
557#if (STACK_REFERENCE_SIZE != 4)
558#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
559#endif
560    movq LITERAL(0), (%rsp)        // Store null for method*
561
562    movl %r10d, %ecx               // Place size of args in rcx.
563    movq %rdi, %rax                // rax := method to be called
564    movq %rsi, %r11                // r11 := arg_array
565    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
566                                   // stack arguments.
567    // Copy arg array into stack.
568    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
569    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
570    movq %rax, %rdi                // rdi := method to be called
571    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
572    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
573    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
574    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
575    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
576.Lgpr_setup_finished2:
577    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
578    movq %rbp, %rsp                // Restore stack pointer.
579    POP r15                        // Pop r15
580    POP r14                        // Pop r14
581    POP r13                        // Pop r13
582    POP r12                        // Pop r12
583    POP rbx                        // Pop rbx
584    POP r9                         // Pop r9 - shorty*.
585    POP r8                         // Pop r8 - result*.
586    POP rbp                        // Pop rbp
587    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
588    je .Lreturn_double_quick2
589    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
590    je .Lreturn_float_quick2
591    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
592    ret
593.Lreturn_double_quick2:
594    movsd %xmm0, (%r8)             // Store the double floating point result.
595    ret
596.Lreturn_float_quick2:
597    movss %xmm0, (%r8)             // Store the floating point result.
598    ret
599#endif  // __APPLE__
600END_FUNCTION art_quick_invoke_static_stub
601
602    /*
603     * Long jump stub.
604     * On entry rdi is the long jump context. This is expected to be returned from a previous
605     * entrypoint call which threw an exception or deoptimized.
606     */
607DEFINE_FUNCTION art_quick_do_long_jump
608#if defined(__APPLE__)
609    int3
610    int3
611#else
612    // Reserve space for the gprs + fprs;
613    INCREASE_FRAME X86_64_LONG_JUMP_CONTEXT_SIZE
614
615    leaq 0(%rsp), %rsi                              // GPRS
616    leaq X86_64_LONG_JUMP_GPRS_SIZE(%rsp), %rdx     // FPRS
617
618    call SYMBOL(artContextCopyForLongJump)          // Context* context,
619                                                    // uintptr_t* gprs,
620                                                    // uintptr_t* fprs
621
622    // Restore FPRs.
623    movq (X86_64_LONG_JUMP_GPRS_SIZE+0)(%rsp), %xmm0
624    movq (X86_64_LONG_JUMP_GPRS_SIZE+8)(%rsp), %xmm1
625    movq (X86_64_LONG_JUMP_GPRS_SIZE+16)(%rsp), %xmm2
626    movq (X86_64_LONG_JUMP_GPRS_SIZE+24)(%rsp), %xmm3
627    movq (X86_64_LONG_JUMP_GPRS_SIZE+32)(%rsp), %xmm4
628    movq (X86_64_LONG_JUMP_GPRS_SIZE+40)(%rsp), %xmm5
629    movq (X86_64_LONG_JUMP_GPRS_SIZE+48)(%rsp), %xmm6
630    movq (X86_64_LONG_JUMP_GPRS_SIZE+56)(%rsp), %xmm7
631    movq (X86_64_LONG_JUMP_GPRS_SIZE+64)(%rsp), %xmm8
632    movq (X86_64_LONG_JUMP_GPRS_SIZE+72)(%rsp), %xmm9
633    movq (X86_64_LONG_JUMP_GPRS_SIZE+80)(%rsp), %xmm10
634    movq (X86_64_LONG_JUMP_GPRS_SIZE+88)(%rsp), %xmm11
635    movq (X86_64_LONG_JUMP_GPRS_SIZE+96)(%rsp), %xmm12
636    movq (X86_64_LONG_JUMP_GPRS_SIZE+104)(%rsp), %xmm13
637    movq (X86_64_LONG_JUMP_GPRS_SIZE+112)(%rsp), %xmm14
638    movq (X86_64_LONG_JUMP_GPRS_SIZE+120)(%rsp), %xmm15
639    // Restore FPRs.
640    // Load all registers except RSP and RIP with values in gprs.
641    POP_ARG r15
642    POP_ARG r14
643    POP_ARG r13
644    POP_ARG r12
645    POP_ARG r11
646    POP_ARG r10
647    POP_ARG r9
648    POP_ARG r8
649    POP_ARG rdi
650    POP_ARG rsi
651    POP_ARG rbp
652    addq LITERAL(8), %rsp   // Skip rsp
653    POP_ARG rbx
654    POP_ARG rdx
655    POP_ARG rcx
656    POP_ARG rax
657    popq %rsp      // Load stack pointer.
658    CFI_DEF_CFA(rsp, 8)
659    ret            // From higher in the stack pop rip.
660#endif  // __APPLE__
661END_FUNCTION art_quick_do_long_jump
662
663MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
664    DEFINE_FUNCTION VAR(c_name)
665    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
666    // Outgoing argument set up
667    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
668    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
669    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
670    CALL_MACRO(return_macro)             // return or deliver exception
671    END_FUNCTION VAR(c_name)
672END_MACRO
673
674MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
675    DEFINE_FUNCTION VAR(c_name)
676    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
677    // Outgoing argument set up
678    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
679    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
680    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
681    CALL_MACRO(return_macro)             // return or deliver exception
682    END_FUNCTION VAR(c_name)
683END_MACRO
684
685MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
686    DEFINE_FUNCTION VAR(c_name)
687    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
688    // Outgoing argument set up
689    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
690    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
691    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
692    CALL_MACRO(return_macro)            // return or deliver exception
693    END_FUNCTION VAR(c_name)
694END_MACRO
695
696MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
697    DEFINE_FUNCTION VAR(c_name)
698    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
699    // Outgoing argument set up
700    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
701    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
702    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
703    CALL_MACRO(return_macro)            // return or deliver exception
704    END_FUNCTION VAR(c_name)
705END_MACRO
706
707    /*
708     * Macro for resolution and initialization of indexed DEX file
709     * constants such as classes and strings.
710     */
711MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
712    DEFINE_FUNCTION VAR(c_name)
713    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  // save everything for GC
714    // Outgoing argument set up
715    movl %eax, %edi                               // pass the index of the constant as arg0
716    movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
717    call CALLVAR(cxx_name)                        // cxx_name(arg0, Thread*)
718    testl %eax, %eax                              // If result is null, deliver pending exception.
719    jz 1f
720    DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_RAX /*is_ref=*/1
7211:
722    DELIVER_PENDING_EXCEPTION_FRAME_READY
723    END_FUNCTION VAR(c_name)
724END_MACRO
725
726MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
727    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
728END_MACRO
729
730MACRO0(RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER)
731    CFI_REMEMBER_STATE
732    testq %rax, %rax               // rax == 0 ?
733    jz  1f                         // if rax == 0 goto 1
734    DEOPT_OR_RETURN /*is_ref=*/1   // Check if deopt is required
7351:                                 // deliver exception on current thread
736    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 8
737    DELIVER_PENDING_EXCEPTION
738END_MACRO
739
740
741MACRO1(RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION, is_ref = 0)
742    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
743    CFI_REMEMBER_STATE
744    testq %rcx, %rcx               // rcx == 0 ?
745    jnz 1f                         // if rcx != 0 goto 1
746    DEOPT_OR_RETURN \is_ref        // Check if deopt is required
7471:                                 // deliver exception on current thread
748    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 8
749    DELIVER_PENDING_EXCEPTION
750END_MACRO
751
752MACRO0(RETURN_REF_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION)
753    RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION /*is_ref=*/1
754END_MACRO
755
756MACRO1(DEOPT_OR_RETURN, is_ref = 0)
757  cmpl LITERAL(0), %gs:THREAD_DEOPT_CHECK_REQUIRED_OFFSET
758  jne 2f
759  ret
7602:
761  SETUP_SAVE_EVERYTHING_FRAME
762  movq LITERAL(\is_ref), %rdx          // pass if result is a reference
763  movq %rax, %rsi                      // pass the result
764  movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current
765  call SYMBOL(artDeoptimizeIfNeeded)
766
767  CFI_REMEMBER_STATE
768  testq %rax, %rax
769  jnz 3f
770
771  RESTORE_SAVE_EVERYTHING_FRAME
772  ret
773
7743:
775  // Deoptimize.
776  CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
777  movq %rax, %rdi                      // pass Context*
778  call SYMBOL(art_quick_do_long_jump)
779  UNREACHABLE
780END_MACRO
781
782MACRO1(DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_RAX, is_ref = 0)
783  cmpl LITERAL(0), %gs:THREAD_DEOPT_CHECK_REQUIRED_OFFSET
784  CFI_REMEMBER_STATE
785  jne 2f
786  RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX
787  ret
7882:
789  CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
790  movq %rax, SAVE_EVERYTHING_FRAME_RAX_OFFSET(%rsp) // update result in the frame
791  movq LITERAL(\is_ref), %rdx                       // pass if result is a reference
792  movq %rax, %rsi                                   // pass the result
793  movq %gs:THREAD_SELF_OFFSET, %rdi                 // pass Thread::Current
794  call SYMBOL(artDeoptimizeIfNeeded)
795
796  CFI_REMEMBER_STATE
797  testq %rax, %rax
798  jnz 3f
799
800  RESTORE_SAVE_EVERYTHING_FRAME
801  ret
802
8033:
804  // Deoptimize.
805  CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
806  movq %rax, %rdi                                   // pass Context*
807  call SYMBOL(art_quick_do_long_jump)
808  UNREACHABLE
809END_MACRO
810
811
812
813MACRO0(RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER)
814    CFI_REMEMBER_STATE
815    testl %eax, %eax               // eax == 0 ?
816    jnz  1f                        // if eax != 0 goto 1
817    DEOPT_OR_RETURN                // Check if we need a deopt
8181:                                 // deliver exception on current thread
819    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 8
820    DELIVER_PENDING_EXCEPTION
821END_MACRO
822
823// Generate the allocation entrypoints for each allocator.
824GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
825
826// Comment out allocators that have x86_64 specific asm.
827// Region TLAB:
828// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
829// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
830GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
831GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
832// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
833// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
834// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
835// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
836// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
837GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
838GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
839GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
840// Normal TLAB:
841// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
842// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
843GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
844GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
845// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
846// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
847// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
848// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
849// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
850GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
851GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
852GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
853
854
855// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
856MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
857    DEFINE_FUNCTION VAR(c_name)
858    // Fast path rosalloc allocation.
859    // RDI: mirror::Class*, RAX: return value
860    // RSI, RDX, RCX, R8, R9: free.
861                                                           // Check if the thread local
862                                                           // allocation stack has room.
863    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
864    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
865    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
866    jae    .Lslow_path\c_name
867                                                           // Load the object size
868    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
869                                                           // Check if the size is for a thread
870                                                           // local allocation. Also does the
871                                                           // initialized and finalizable checks.
872    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
873    ja     .Lslow_path\c_name
874                                                           // Compute the rosalloc bracket index
875                                                           // from the size.
876    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
877                                                           // Load the rosalloc run (r9)
878                                                           // Subtract __SIZEOF_POINTER__ to
879                                                           // subtract one from edi as there is no
880                                                           // 0 byte run and the size is already
881                                                           // aligned.
882    movq   (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
883                                                           // Load the free list head (rax). This
884                                                           // will be the return val.
885    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
886    testq  %rax, %rax
887    jz     .Lslow_path\c_name
888    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
889                                                           // Push the new object onto the thread
890                                                           // local allocation stack and
891                                                           // increment the thread local
892                                                           // allocation stack top.
893    movl   %eax, (%rcx)
894    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
895    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
896                                                           // Load the next pointer of the head
897                                                           // and update the list head with the
898                                                           // next pointer.
899    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
900    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
901                                                           // Store the class pointer in the
902                                                           // header. This also overwrites the
903                                                           // next pointer. The offsets are
904                                                           // asserted to match.
905#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
906#error "Class pointer needs to overwrite next pointer."
907#endif
908    POISON_HEAP_REF edi
909    movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
910                                                           // Decrement the size of the free list
911    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
912                                                           // No fence necessary for x86.
913    ret
914.Lslow_path\c_name:
915    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
916    // Outgoing argument set up
917    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
918    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
919    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
920    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER       // return or deliver exception
921    END_FUNCTION VAR(c_name)
922END_MACRO
923
924ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
925ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
926
927// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
928// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
929// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
930//
931// RDI: the class, RAX: return value.
932// RCX, RSI, RDX: scratch, r8: Thread::Current().
933MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
934    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
935END_MACRO
936
937// The fast path code for art_quick_alloc_object_initialized_region_tlab.
938//
939// RDI: the class, RSI: ArtMethod*, RAX: return value.
940// RCX, RSI, RDX: scratch, r8: Thread::Current().
941MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
942    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
943    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
944    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
945    addq %rax, %rcx                                            // Add size to pos, note that these
946                                                               // are both 32 bit ints, overflow
947                                                               // will cause the add to be past the
948                                                               // end of the thread local region.
949    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
950    ja   RAW_VAR(slowPathLabel)
951    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
952                                                               // Store the class pointer in the
953                                                               // header.
954                                                               // No fence needed for x86.
955    POISON_HEAP_REF edi
956    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
957    ret                                                        // Fast path succeeded.
958END_MACRO
959
960// The fast path code for art_quick_alloc_array_region_tlab.
961// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
962// Free temps: RCX, RDX, R8
963// Output: RAX: return value.
964MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
965    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
966    // Mask out the unaligned part to make sure we are 8 byte aligned.
967    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
968    cmpq LITERAL(MIN_LARGE_OBJECT_THRESHOLD), %r9              // Possibly a large object.
969    jae  RAW_VAR(slowPathLabel)                                // Go to slow path if large object
970    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
971    addq %rax, %r9
972    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
973    ja   RAW_VAR(slowPathLabel)
974    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
975                                                               // Store the class pointer in the
976                                                               // header.
977                                                               // No fence needed for x86.
978    POISON_HEAP_REF edi
979    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
980    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
981    ret                                                        // Fast path succeeded.
982END_MACRO
983
984// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
985// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
986MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
987    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
988    // Outgoing argument set up
989    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
990    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
991    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
992    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER       // return or deliver exception
993END_MACRO
994
995// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
996// called with CC if the GC is not active.
997DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
998    // RDI: mirror::Class* klass
999    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1000    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
1001.Lart_quick_alloc_object_resolved_tlab_slow_path:
1002    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
1003END_FUNCTION art_quick_alloc_object_resolved_tlab
1004
1005// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
1006// May be called with CC if the GC is not active.
1007DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
1008    // RDI: mirror::Class* klass
1009    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1010    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
1011.Lart_quick_alloc_object_initialized_tlab_slow_path:
1012    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
1013END_FUNCTION art_quick_alloc_object_initialized_tlab
1014
1015MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
1016    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx        // Load component type.
1017    UNPOISON_HEAP_REF ecx
1018    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
1019    shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx  // Get component size shift.
1020    movq %rsi, %r9
1021    salq %cl, %r9                                              // Calculate array count shifted.
1022    // Add array header + alignment rounding.
1023    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1024    // Add 4 extra bytes if we are doing a long array.
1025    addq MACRO_LITERAL(1), %rcx
1026    andq MACRO_LITERAL(4), %rcx
1027#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1028#error Long array data offset must be 4 greater than int array data offset.
1029#endif
1030    addq %rcx, %r9
1031END_MACRO
1032
1033MACRO0(COMPUTE_ARRAY_SIZE_8)
1034    // RDI: mirror::Class* klass, RSI: int32_t component_count
1035    // RDX, RCX, R8, R9: free. RAX: return val.
1036    movq %rsi, %r9
1037    // Add array header + alignment rounding.
1038    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1039END_MACRO
1040
1041MACRO0(COMPUTE_ARRAY_SIZE_16)
1042    // RDI: mirror::Class* klass, RSI: int32_t component_count
1043    // RDX, RCX, R8, R9: free. RAX: return val.
1044    movq %rsi, %r9
1045    salq MACRO_LITERAL(1), %r9
1046    // Add array header + alignment rounding.
1047    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1048END_MACRO
1049
1050MACRO0(COMPUTE_ARRAY_SIZE_32)
1051    // RDI: mirror::Class* klass, RSI: int32_t component_count
1052    // RDX, RCX, R8, R9: free. RAX: return val.
1053    movq %rsi, %r9
1054    salq MACRO_LITERAL(2), %r9
1055    // Add array header + alignment rounding.
1056    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1057END_MACRO
1058
1059MACRO0(COMPUTE_ARRAY_SIZE_64)
1060    // RDI: mirror::Class* klass, RSI: int32_t component_count
1061    // RDX, RCX, R8, R9: free. RAX: return val.
1062    movq %rsi, %r9
1063    salq MACRO_LITERAL(3), %r9
1064    // Add array header + alignment rounding.
1065    addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1066END_MACRO
1067
1068MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
1069    DEFINE_FUNCTION VAR(c_entrypoint)
1070    // RDI: mirror::Class* klass, RSI: int32_t component_count
1071    // RDX, RCX, R8, R9: free. RAX: return val.
1072    CALL_MACRO(size_setup)
1073    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
1074.Lslow_path\c_entrypoint:
1075    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
1076    // Outgoing argument set up
1077    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
1078    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
1079    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
1080    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER           // return or deliver exception
1081    END_FUNCTION VAR(c_entrypoint)
1082END_MACRO
1083
1084
1085GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1086GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1087GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1088GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1089GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1090
1091GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1092GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1093GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1094GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1095GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1096
1097// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
1098DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
1099    // Fast path region tlab allocation.
1100    // RDI: mirror::Class* klass
1101    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1102    ASSERT_USE_READ_BARRIER
1103    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
1104.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
1105    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
1106END_FUNCTION art_quick_alloc_object_resolved_region_tlab
1107
1108// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
1109DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
1110    // Fast path region tlab allocation.
1111    // RDI: mirror::Class* klass
1112    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1113    ASSERT_USE_READ_BARRIER
1114    // No read barrier since the caller is responsible for that.
1115    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
1116.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
1117    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
1118END_FUNCTION art_quick_alloc_object_initialized_region_tlab
1119
1120ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1121ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1122ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1123ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1124ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1125ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1126
1127TWO_ARG_DOWNCALL art_quick_handle_fill_data, \
1128                 artHandleFillArrayDataFromCode, \
1129                 RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
1130
1131    /*
1132     * Entry from managed code that tries to lock the object in a fast path and
1133     * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
1134     * RDI holds the possibly null object to lock.
1135     */
1136DEFINE_FUNCTION art_quick_lock_object
1137    testq %rdi, %rdi                      // Null check object.
1138    jz   art_quick_lock_object_no_inline
1139    LOCK_OBJECT_FAST_PATH rdi, ecx, art_quick_lock_object_no_inline
1140END_FUNCTION art_quick_lock_object
1141
1142    /*
1143     * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
1144     * RDI holds the possibly null object to lock.
1145     */
1146DEFINE_FUNCTION art_quick_lock_object_no_inline
1147    // This is also the slow path for art_quick_lock_object.
1148    SETUP_SAVE_REFS_ONLY_FRAME
1149    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1150    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1151    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1152    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
1153END_FUNCTION art_quick_lock_object_no_inline
1154
1155    /*
1156     * Entry from managed code that tries to unlock the object in a fast path and calls
1157     * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
1158     * RDI holds the possibly null object to unlock.
1159     */
1160DEFINE_FUNCTION art_quick_unlock_object
1161    testq %rdi, %rdi                      // Null check object.
1162    jz   art_quick_lock_object_no_inline
1163    UNLOCK_OBJECT_FAST_PATH rdi, ecx, /*saved_rax*/ none, art_quick_unlock_object_no_inline
1164END_FUNCTION art_quick_unlock_object
1165
1166    /*
1167     * Entry from managed code that calls `artUnlockObjectFromCode()`
1168     * and delivers exception on failure.
1169     * RDI holds the possibly null object to unlock.
1170     */
1171DEFINE_FUNCTION art_quick_unlock_object_no_inline
1172    // This is also the slow path for art_quick_unlock_object.
1173    SETUP_SAVE_REFS_ONLY_FRAME
1174    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1175    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1176    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1177    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
1178END_FUNCTION art_quick_unlock_object_no_inline
1179
1180DEFINE_FUNCTION art_quick_check_instance_of
1181    // Type check using the bit string passes null as the target class. In that case just throw.
1182    testl %esi, %esi
1183    jz .Lthrow_class_cast_exception_for_bitstring_check
1184
1185    // We could check the super classes here but that is usually already checked in the caller.
1186    PUSH rdi                          // Save args for exc
1187    PUSH rsi
1188    subq LITERAL(8), %rsp             // Alignment padding.
1189    CFI_ADJUST_CFA_OFFSET(8)
1190    SETUP_FP_CALLEE_SAVE_FRAME
1191    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
1192    CFI_REMEMBER_STATE
1193    testq %rax, %rax
1194    jz .Lthrow_class_cast_exception   // jump forward if not assignable
1195    RESTORE_FP_CALLEE_SAVE_FRAME
1196    addq LITERAL(24), %rsp            // pop arguments
1197    CFI_ADJUST_CFA_OFFSET(-24)
1198    ret
1199
1200.Lthrow_class_cast_exception:
1201    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 64  // Reset unwind info so following code unwinds.
1202    RESTORE_FP_CALLEE_SAVE_FRAME
1203    addq LITERAL(8), %rsp             // pop padding
1204    CFI_ADJUST_CFA_OFFSET(-8)
1205    POP rsi                           // Pop arguments
1206    POP rdi
1207
1208.Lthrow_class_cast_exception_for_bitstring_check:
1209    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1210    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1211    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
1212    movq %rax, %rdi                   // pass Context*
1213    call SYMBOL(art_quick_do_long_jump)
1214    UNREACHABLE
1215END_FUNCTION art_quick_check_instance_of
1216
1217
1218// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
1219MACRO2(POP_REG_NE, reg, exclude_reg)
1220    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
1221      DECREASE_FRAME 8
1222    .else
1223      POP RAW_VAR(reg)
1224    .endif
1225END_MACRO
1226
1227DEFINE_FUNCTION art_quick_aput_obj
1228    test %edx, %edx              // store of null
1229    jz .Laput_obj_null
1230    movl MIRROR_OBJECT_CLASS_OFFSET(%rdi), %ecx
1231    UNPOISON_HEAP_REF ecx
1232#ifdef USE_READ_BARRIER
1233    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
1234    CFI_REMEMBER_STATE
1235    jnz .Laput_obj_gc_marking
1236#endif  // USE_READ_BARRIER
1237    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx
1238    cmpl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %ecx  // Both poisoned if heap poisoning is enabled.
1239    jne .Laput_obj_check_assignability
1240.Laput_obj_store:
1241    POISON_HEAP_REF edx
1242    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1243    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1244    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1245    movb %dl, (%rdx, %rdi)
1246    ret
1247
1248.Laput_obj_null:
1249    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1250    ret
1251
1252.Laput_obj_check_assignability:
1253    UNPOISON_HEAP_REF ecx         // Unpoison array component type if poisoning is enabled.
1254    PUSH_ARG rdi                  // Save arguments.
1255    PUSH_ARG rsi
1256    PUSH_ARG rdx
1257    movl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %esi  // Pass arg2 = value's class.
1258    UNPOISON_HEAP_REF esi
1259.Laput_obj_check_assignability_call:
1260    movl %ecx, %edi               // Pass arg1 = array's component type.
1261    SETUP_FP_CALLEE_SAVE_FRAME
1262    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1263    RESTORE_FP_CALLEE_SAVE_FRAME  // Resore FP registers.
1264    POP_ARG rdx                   // Restore arguments.
1265    POP_ARG rsi
1266    POP_ARG rdi
1267    testq %rax, %rax              // Check for exception.
1268    jz   .Laput_obj_throw_array_store_exception
1269    POISON_HEAP_REF edx
1270    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1271    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1272    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1273    movb %dl, (%rdx, %rdi)
1274    ret
1275
1276.Laput_obj_throw_array_store_exception:
1277    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
1278    // Outgoing argument set up.
1279    movq %rdx, %rsi                          // Pass arg 2 = value.
1280    movq %gs:THREAD_SELF_OFFSET, %rdx        // Pass arg 3 = Thread::Current().
1281                                             // Pass arg 1 = array.
1282    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1283    movq %rax, %rdi                          // pass Context*
1284    call SYMBOL(art_quick_do_long_jump)
1285    UNREACHABLE
1286
1287#ifdef USE_READ_BARRIER
1288.Laput_obj_gc_marking:
1289    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 8
1290    // We need to align stack for `art_quick_read_barrier_mark_regNN`.
1291    INCREASE_FRAME 8                        // Stack alignment.
1292    call SYMBOL(art_quick_read_barrier_mark_reg01)  // Mark ECX
1293    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx
1294    UNPOISON_HEAP_REF ecx
1295    call SYMBOL(art_quick_read_barrier_mark_reg01)  // Mark ECX
1296    movl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %eax
1297    UNPOISON_HEAP_REF eax
1298    call SYMBOL(art_quick_read_barrier_mark_reg00)  // Mark EAX
1299    DECREASE_FRAME 8                        // Remove stack alignment.
1300    cmpl %eax, %ecx
1301    je .Laput_obj_store
1302    // Prepare arguments in line with `.Laput_obj_check_assignability_call` and jump there.
1303    PUSH_ARG rdi                  // Save arguments.
1304    PUSH_ARG rsi
1305    PUSH_ARG rdx
1306    movl %eax, %esi               // Pass arg2 - type of the value to be stored.
1307    // The arg1 shall be moved at `.Ldo_assignability_check_call`.
1308    jmp .Laput_obj_check_assignability_call
1309#endif  // USE_READ_BARRIER
1310END_FUNCTION art_quick_aput_obj
1311
1312// TODO: This is quite silly on X86_64 now.
1313DEFINE_FUNCTION art_quick_memcpy
1314    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1315    ret
1316END_FUNCTION art_quick_memcpy
1317
1318DEFINE_FUNCTION art_quick_test_suspend
1319    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save everything for GC
1320    // Outgoing argument set up
1321    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
1322    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
1323
1324    CFI_REMEMBER_STATE
1325    testq %rax, %rax
1326    jnz .Ltest_suspend_deoptimize
1327
1328    // Normal return.
1329    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
1330    ret
1331
1332.Ltest_suspend_deoptimize:
1333    // Deoptimize.
1334    CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
1335    movq %rax, %rdi                             // pass Context*
1336    call SYMBOL(art_quick_do_long_jump)
1337    UNREACHABLE
1338END_FUNCTION art_quick_test_suspend
1339
1340UNIMPLEMENTED art_quick_ldiv
1341UNIMPLEMENTED art_quick_lmod
1342UNIMPLEMENTED art_quick_lmul
1343UNIMPLEMENTED art_quick_lshl
1344UNIMPLEMENTED art_quick_lshr
1345UNIMPLEMENTED art_quick_lushr
1346
1347GENERATE_FIELD_ENTRYPOINTS
1348
1349DEFINE_FUNCTION art_quick_proxy_invoke_handler
1350    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1351
1352    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1353    movq %rsp, %rcx                         // Pass SP.
1354    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1355    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1356    movq %rax, %xmm0                        // Copy return value in case of float returns.
1357    RETURN_OR_DELIVER_PENDING_EXCEPTION
1358END_FUNCTION art_quick_proxy_invoke_handler
1359
1360    /*
1361     * Called to resolve an imt conflict.
1362     * rdi is the conflict ArtMethod.
1363     * rax is a hidden argument that holds the target interface method.
1364     *
1365     * Note that this stub writes to rdi.
1366     */
1367DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1368#if defined(__APPLE__)
1369    int3
1370    int3
1371#else
1372    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
1373.Limt_table_iterate:
1374    cmpq %rax, 0(%rdi)
1375    jne .Limt_table_next_entry
1376    // We successfully hit an entry in the table. Load the target method
1377    // and jump to it.
1378    movq __SIZEOF_POINTER__(%rdi), %rdi
1379    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
1380.Limt_table_next_entry:
1381    // If the entry is null, the interface method is not in the ImtConflictTable.
1382    cmpq LITERAL(0), 0(%rdi)
1383    jz .Lconflict_trampoline
1384    // Iterate over the entries of the ImtConflictTable.
1385    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
1386    jmp .Limt_table_iterate
1387.Lconflict_trampoline:
1388    // Call the runtime stub to populate the ImtConflictTable and jump to the
1389    // resolved method.
1390    movq %rax, %rdi  // Load interface method
1391    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1392#endif  // __APPLE__
1393END_FUNCTION art_quick_imt_conflict_trampoline
1394
1395DEFINE_FUNCTION art_quick_resolution_trampoline
1396    SETUP_SAVE_REFS_AND_ARGS_FRAME
1397    movq %gs:THREAD_SELF_OFFSET, %rdx
1398    movq %rsp, %rcx
1399    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1400    movq %rax, %r10               // Remember returned code pointer in R10.
1401    movq (%rsp), %rdi             // Load called method into RDI.
1402    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1403    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1404    jz 1f
1405    jmp *%r10                     // Tail call into method.
14061:
1407    DELIVER_PENDING_EXCEPTION
1408END_FUNCTION art_quick_resolution_trampoline
1409
1410/* Generic JNI frame layout:
1411 *
1412 * #-------------------#
1413 * |                   |
1414 * | caller method...  |
1415 * #-------------------#    <--- SP on entry
1416 *
1417 *          |
1418 *          V
1419 *
1420 * #-------------------#
1421 * | caller method...  |
1422 * #-------------------#
1423 * | Return            |
1424 * | R15               |    callee save
1425 * | R14               |    callee save
1426 * | R13               |    callee save
1427 * | R12               |    callee save
1428 * | R9                |    arg5
1429 * | R8                |    arg4
1430 * | RSI/R6            |    arg1
1431 * | RBP/R5            |    callee save
1432 * | RBX/R3            |    callee save
1433 * | RDX/R2            |    arg2
1434 * | RCX/R1            |    arg3
1435 * | XMM7              |    float arg 8
1436 * | XMM6              |    float arg 7
1437 * | XMM5              |    float arg 6
1438 * | XMM4              |    float arg 5
1439 * | XMM3              |    float arg 4
1440 * | XMM2              |    float arg 3
1441 * | XMM1              |    float arg 2
1442 * | XMM0              |    float arg 1
1443 * | RDI/Method*       |  <- sp
1444 * #-------------------#
1445 * | Scratch Alloca    |    5K scratch space
1446 * #---------#---------#
1447 * |         | sp*     |
1448 * | Tramp.  #---------#
1449 * | args    | thread  |
1450 * | Tramp.  #---------#
1451 * |         | method  |
1452 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1453 *
1454 *           |
1455 *           v              artQuickGenericJniTrampoline
1456 *
1457 * #-------------------#
1458 * | caller method...  |
1459 * #-------------------#
1460 * | Return PC         |
1461 * | Callee-Saves      |
1462 * | padding           | // 8B
1463 * | Method*           |    <--- (1)
1464 * #-------------------#
1465 * | local ref cookie  | // 4B
1466 * | padding           | // 0B or 4B to align handle scope on 8B address
1467 * | handle scope      | // Size depends on number of references; multiple of 4B.
1468 * #-------------------#
1469 * | JNI Stack Args    | // Empty if all args fit into registers.
1470 * #-------------------#    <--- SP on native call (1)
1471 * | Free scratch      |
1472 * #-------------------#
1473 * | SP for JNI call   | // Pointer to (1).
1474 * #-------------------#
1475 * | Hidden arg        | // For @CriticalNative
1476 * #-------------------#
1477 * |                   |
1478 * | Stack for Regs    |    The trampoline assembly will pop these values
1479 * |                   |    into registers for native call
1480 * #-------------------#
1481 */
1482    /*
1483     * Called to do a generic JNI down-call
1484     */
1485DEFINE_FUNCTION art_quick_generic_jni_trampoline
1486    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1487
1488    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1489    CFI_DEF_CFA_REGISTER(rbp)
1490
1491    subq LITERAL(GENERIC_JNI_TRAMPOLINE_RESERVED_AREA), %rsp
1492    // prepare for artQuickGenericJniTrampoline call
1493    // (Thread*, managed_sp, reserved_area)
1494    //    rdi       rsi           rdx   <= C calling convention
1495    //  gs:...      rbp           rsp   <= where they are
1496    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread::Current().
1497    movq %rbp, %rsi                    // Pass managed frame SP.
1498    movq %rsp, %rdx                    // Pass reserved area.
1499    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1500
1501    // The C call will have registered the complete save-frame on success.
1502    // The result of the call is:
1503    //     %rax: pointer to native code, 0 on error.
1504    //     The bottom of the reserved area contains values for arg registers,
1505    //     hidden arg register and SP for out args for the call.
1506
1507    // Check for error (class init check or locking for synchronized native method can throw).
1508    test %rax, %rax
1509    jz .Lexception_in_native
1510
1511    // pop from the register-passing alloca region
1512    // what's the right layout?
1513    popq %rdi
1514    popq %rsi
1515    popq %rdx
1516    popq %rcx
1517    popq %r8
1518    popq %r9
1519    // TODO: skip floating point if unused, some flag.
1520    movq 0(%rsp), %xmm0
1521    movq 8(%rsp), %xmm1
1522    movq 16(%rsp), %xmm2
1523    movq 24(%rsp), %xmm3
1524    movq 32(%rsp), %xmm4
1525    movq 40(%rsp), %xmm5
1526    movq 48(%rsp), %xmm6
1527    movq 56(%rsp), %xmm7
1528
1529    // Save call target in scratch register.
1530    movq %rax, %r11
1531
1532    // Load hidden arg (rax) for @CriticalNative.
1533    movq 64(%rsp), %rax
1534    // Load SP for out args, releasing unneeded reserved area.
1535    movq 72(%rsp), %rsp
1536
1537    // native call
1538    call *%r11
1539
1540    // result sign extension is handled in C code
1541    // prepare for artQuickGenericJniEndTrampoline call
1542    // (Thread*,  result, result_f)
1543    //   rdi      rsi   rdx       <= C calling convention
1544    //  gs:...    rax   xmm0      <= where they are
1545    movq %gs:THREAD_SELF_OFFSET, %rdi
1546    movq %rax, %rsi
1547    movq %xmm0, %rdx
1548    call SYMBOL(artQuickGenericJniEndTrampoline)
1549
1550    // Pending exceptions possible.
1551    // TODO: use cmpq, needs direct encoding because of gas bug
1552    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1553    test %rcx, %rcx
1554    jnz .Lexception_in_native
1555
1556    // Tear down the alloca.
1557    movq %rbp, %rsp
1558
1559    // store into fpr, for when it's a fpr return...
1560    movq %rax, %xmm0
1561
1562    LOAD_RUNTIME_INSTANCE rcx
1563    cmpb MACRO_LITERAL(0), RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE(%rcx)
1564    CFI_REMEMBER_STATE
1565    jne .Lcall_method_exit_hook
1566.Lcall_method_exit_hook_done:
1567
1568    // Tear down the callee-save frame.
1569    CFI_DEF_CFA_REGISTER(rsp)
1570    // Load callee-save FPRs. Skip FP args.
1571    movq 80(%rsp), %xmm12
1572    movq 88(%rsp), %xmm13
1573    movq 96(%rsp), %xmm14
1574    movq 104(%rsp), %xmm15
1575    // Pop method, padding, FP args and two GRP args (rcx, rdx).
1576    DECREASE_FRAME 16 + 12*8 + 2*8
1577    // Load callee-save GPRs and skip args, mixed together to agree with core spills bitmap.
1578    POP rbx  // Callee save.
1579    POP rbp  // Callee save.
1580    DECREASE_FRAME 3*8  // Skip three args (RSI, R8, R9).
1581    POP r12  // Callee save.
1582    POP r13  // Callee save.
1583    POP r14  // Callee save.
1584    POP r15  // Callee save.
1585    ret
1586
1587.Lcall_method_exit_hook:
1588    CFI_RESTORE_STATE_AND_DEF_CFA rbp, 208
1589    movq LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS), %r8
1590    call art_quick_method_exit_hook
1591    jmp .Lcall_method_exit_hook_done
1592
1593.Lexception_in_native:
1594    pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET
1595    addq LITERAL(-1), (%rsp)  // Remove the GenericJNI tag.
1596    movq (%rsp), %rsp
1597    call art_deliver_pending_exception
1598END_FUNCTION art_quick_generic_jni_trampoline
1599
1600DEFINE_FUNCTION art_deliver_pending_exception
1601    // This will create a new save-all frame, required by the runtime.
1602    DELIVER_PENDING_EXCEPTION
1603END_FUNCTION art_deliver_pending_exception
1604
1605    /*
1606     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1607     * of a quick call:
1608     * RDI = method being called / to bridge to.
1609     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1610     */
1611DEFINE_FUNCTION art_quick_to_interpreter_bridge
1612    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
1613    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
1614    movq %rsp, %rdx                    // RDX := sp
1615    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1616    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
1617    movq %rax, %xmm0                   // Place return value also into floating point return value.
1618    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1619END_FUNCTION art_quick_to_interpreter_bridge
1620
1621    /*
1622     * Called to catch an attempt to invoke an obsolete method.
1623     * RDI = method being called.
1624     */
1625ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1626
1627    /*
1628     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1629     * will long jump to the interpreter bridge.
1630     */
1631DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
1632    SETUP_SAVE_EVERYTHING_FRAME
1633                                                // Stack should be aligned now.
1634    movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
1635    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
1636    movq %rax, %rdi                             // pass Context*
1637    call SYMBOL(art_quick_do_long_jump)
1638    UNREACHABLE
1639END_FUNCTION art_quick_deoptimize_from_compiled_code
1640
1641    /*
1642     * String's compareTo.
1643     *
1644     * On entry:
1645     *    rdi:   this string object (known non-null)
1646     *    rsi:   comp string object (known non-null)
1647     */
1648DEFINE_FUNCTION art_quick_string_compareto
1649    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
1650    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
1651    /* Build pointers to the start of string data */
1652    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
1653    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
1654#if (STRING_COMPRESSION_FEATURE)
1655    /* Differ cases */
1656    shrl    LITERAL(1), %r8d
1657    jnc     .Lstring_compareto_this_is_compressed
1658    shrl    LITERAL(1), %r9d
1659    jnc     .Lstring_compareto_that_is_compressed
1660    jmp     .Lstring_compareto_both_not_compressed
1661.Lstring_compareto_this_is_compressed:
1662    shrl    LITERAL(1), %r9d
1663    jnc     .Lstring_compareto_both_compressed
1664    /* Comparison this (8-bit) and that (16-bit) */
1665    mov     %r8d, %eax
1666    subl    %r9d, %eax
1667    mov     %r8d, %ecx
1668    cmovg   %r9d, %ecx
1669    /* Going into loop to compare each character */
1670    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
1671.Lstring_compareto_loop_comparison_this_compressed:
1672    movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
1673    movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
1674    addl    LITERAL(1), %edi                    // ++this_cur_char (8-bit)
1675    addl    LITERAL(2), %esi                    // ++that_cur_char (16-bit)
1676    subl    %r9d, %r8d
1677    loope   .Lstring_compareto_loop_comparison_this_compressed
1678    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1679.Lstring_compareto_keep_length1:
1680    ret
1681.Lstring_compareto_that_is_compressed:
1682    movl    %r8d, %eax
1683    subl    %r9d, %eax
1684    mov     %r8d, %ecx
1685    cmovg   %r9d, %ecx
1686    /* Comparison this (8-bit) and that (16-bit) */
1687    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
1688.Lstring_compareto_loop_comparison_that_compressed:
1689    movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
1690    movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
1691    addl    LITERAL(2), %edi                    // ++this_cur_char (16-bit)
1692    addl    LITERAL(1), %esi                    // ++that_cur_char (8-bit)
1693    subl    %r9d, %r8d
1694    loope   .Lstring_compareto_loop_comparison_that_compressed
1695    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1696.Lstring_compareto_keep_length2:
1697    ret
1698.Lstring_compareto_both_compressed:
1699    /* Calculate min length and count diff */
1700    movl    %r8d, %ecx
1701    movl    %r8d, %eax
1702    subl    %r9d, %eax
1703    cmovg   %r9d, %ecx
1704    jecxz   .Lstring_compareto_keep_length3
1705    repe    cmpsb
1706    je      .Lstring_compareto_keep_length3
1707    movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
1708    movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
1709    jmp     .Lstring_compareto_count_difference
1710#endif // STRING_COMPRESSION_FEATURE
1711.Lstring_compareto_both_not_compressed:
1712    /* Calculate min length and count diff */
1713    movl    %r8d, %ecx
1714    movl    %r8d, %eax
1715    subl    %r9d, %eax
1716    cmovg   %r9d, %ecx
1717    /*
1718     * At this point we have:
1719     *   eax: value to return if first part of strings are equal
1720     *   ecx: minimum among the lengths of the two strings
1721     *   esi: pointer to comp string data
1722     *   edi: pointer to this string data
1723     */
1724    jecxz .Lstring_compareto_keep_length3
1725    repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1726    je    .Lstring_compareto_keep_length3
1727    movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
1728    movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
1729.Lstring_compareto_count_difference:
1730    subl  %ecx, %eax              // return the difference
1731.Lstring_compareto_keep_length3:
1732    ret
1733END_FUNCTION art_quick_string_compareto
1734
1735UNIMPLEMENTED art_quick_memcmp16
1736
1737DEFINE_FUNCTION art_quick_instance_of
1738    SETUP_FP_CALLEE_SAVE_FRAME
1739    subq LITERAL(8), %rsp                      // Alignment padding.
1740    CFI_ADJUST_CFA_OFFSET(8)
1741    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
1742    addq LITERAL(8), %rsp
1743    CFI_ADJUST_CFA_OFFSET(-8)
1744    RESTORE_FP_CALLEE_SAVE_FRAME
1745    ret
1746END_FUNCTION art_quick_instance_of
1747
1748DEFINE_FUNCTION art_quick_string_builder_append
1749    SETUP_SAVE_REFS_ONLY_FRAME                // save ref containing registers for GC
1750    // Outgoing argument set up
1751    leaq FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__(%rsp), %rsi  // pass args
1752    movq %gs:THREAD_SELF_OFFSET, %rdx         // pass Thread::Current()
1753    call artStringBuilderAppend               // (uint32_t, const unit32_t*, Thread*)
1754    RESTORE_SAVE_REFS_ONLY_FRAME              // restore frame up to return address
1755    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER  // return or deopt or deliver exception
1756END_FUNCTION art_quick_string_builder_append
1757
1758// Create a function `name` calling the ReadBarrier::Mark routine,
1759// getting its argument and returning its result through register
1760// `reg`, saving and restoring all caller-save registers.
1761//
1762// The generated function follows a non-standard runtime calling
1763// convention:
1764// - register `reg` (which may be different from RDI) is used to pass the (sole) argument,
1765// - register `reg` (which may be different from RAX) is used to return the result,
1766// - all other registers are callee-save (the values they hold are preserved).
1767MACRO2(READ_BARRIER_MARK_REG, name, reg)
1768    DEFINE_FUNCTION VAR(name)
1769    // Null check so that we can load the lock word.
1770    testq REG_VAR(reg), REG_VAR(reg)
1771    jz .Lret_rb_\name
1772.Lnot_null_\name:
1773    // Check the mark bit, if it is 1 return.
1774    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
1775    jz .Lslow_rb_\name
1776    ret
1777.Lslow_rb_\name:
1778    PUSH rax
1779    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
1780    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
1781    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
1782    // forwarding address one.
1783    // Taken ~25% of the time.
1784    CFI_REMEMBER_STATE
1785    jnae .Lret_forwarding_address\name
1786
1787    // Save all potentially live caller-save core registers.
1788    movq 0(%rsp), %rax
1789    PUSH rcx
1790    PUSH rdx
1791    PUSH rsi
1792    PUSH rdi
1793    PUSH r8
1794    PUSH r9
1795    PUSH r10
1796    PUSH r11
1797    // Create space for caller-save floating-point registers.
1798    INCREASE_FRAME 12 * 8
1799    // Save all potentially live caller-save floating-point registers.
1800    movq %xmm0, 0(%rsp)
1801    movq %xmm1, 8(%rsp)
1802    movq %xmm2, 16(%rsp)
1803    movq %xmm3, 24(%rsp)
1804    movq %xmm4, 32(%rsp)
1805    movq %xmm5, 40(%rsp)
1806    movq %xmm6, 48(%rsp)
1807    movq %xmm7, 56(%rsp)
1808    movq %xmm8, 64(%rsp)
1809    movq %xmm9, 72(%rsp)
1810    movq %xmm10, 80(%rsp)
1811    movq %xmm11, 88(%rsp)
1812    SETUP_FP_CALLEE_SAVE_FRAME
1813
1814    .ifnc RAW_VAR(reg), rdi
1815      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
1816    .endif
1817    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
1818    .ifnc RAW_VAR(reg), rax
1819      movq %rax, REG_VAR(reg)       // Return result into `reg`.
1820    .endif
1821
1822    RESTORE_FP_CALLEE_SAVE_FRAME
1823    // Restore floating-point registers.
1824    movq 0(%rsp), %xmm0
1825    movq 8(%rsp), %xmm1
1826    movq 16(%rsp), %xmm2
1827    movq 24(%rsp), %xmm3
1828    movq 32(%rsp), %xmm4
1829    movq 40(%rsp), %xmm5
1830    movq 48(%rsp), %xmm6
1831    movq 56(%rsp), %xmm7
1832    movq 64(%rsp), %xmm8
1833    movq 72(%rsp), %xmm9
1834    movq 80(%rsp), %xmm10
1835    movq 88(%rsp), %xmm11
1836    // Remove floating-point registers.
1837    DECREASE_FRAME 12 * 8
1838    // Restore core regs, except `reg`, as it is used to return the
1839    // result of this function (simply remove it from the stack instead).
1840    POP_REG_NE r11, RAW_VAR(reg)
1841    POP_REG_NE r10, RAW_VAR(reg)
1842    POP_REG_NE r9, RAW_VAR(reg)
1843    POP_REG_NE r8, RAW_VAR(reg)
1844    POP_REG_NE rdi, RAW_VAR(reg)
1845    POP_REG_NE rsi, RAW_VAR(reg)
1846    POP_REG_NE rdx, RAW_VAR(reg)
1847    POP_REG_NE rcx, RAW_VAR(reg)
1848    POP_REG_NE rax, RAW_VAR(reg)
1849.Lret_rb_\name:
1850    ret
1851.Lret_forwarding_address\name:
1852    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 16
1853    // The overflow cleared the top bits.
1854    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
1855    movq %rax, REG_VAR(reg)
1856    POP_REG_NE rax, RAW_VAR(reg)
1857    ret
1858    END_FUNCTION VAR(name)
1859END_MACRO
1860
1861READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
1862READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
1863READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
1864READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
1865// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
1866// cannot be used to pass arguments.
1867READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
1868READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
1869READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
1870READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
1871READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
1872READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
1873READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
1874READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
1875READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
1876READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
1877READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
1878
1879DEFINE_FUNCTION art_quick_read_barrier_slow
1880    SETUP_FP_CALLEE_SAVE_FRAME
1881    subq LITERAL(8), %rsp           // Alignment padding.
1882    CFI_ADJUST_CFA_OFFSET(8)
1883    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
1884    addq LITERAL(8), %rsp
1885    CFI_ADJUST_CFA_OFFSET(-8)
1886    RESTORE_FP_CALLEE_SAVE_FRAME
1887    ret
1888END_FUNCTION art_quick_read_barrier_slow
1889
1890DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
1891    SETUP_FP_CALLEE_SAVE_FRAME
1892    subq LITERAL(8), %rsp                  // Alignment padding.
1893    CFI_ADJUST_CFA_OFFSET(8)
1894    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
1895    addq LITERAL(8), %rsp
1896    CFI_ADJUST_CFA_OFFSET(-8)
1897    RESTORE_FP_CALLEE_SAVE_FRAME
1898    ret
1899END_FUNCTION art_quick_read_barrier_for_root_slow
1900
1901    /*
1902     * On stack replacement stub.
1903     * On entry:
1904     *   [sp] = return address
1905     *   rdi = stack to copy
1906     *   rsi = size of stack
1907     *   rdx = pc to call
1908     *   rcx = JValue* result
1909     *   r8 = shorty
1910     *   r9 = thread
1911     *
1912     * Note that the native C ABI already aligned the stack to 16-byte.
1913     */
1914DEFINE_FUNCTION art_quick_osr_stub
1915    // Save the non-volatiles.
1916    PUSH rbp                      // Save rbp.
1917    PUSH rcx                      // Save rcx/result*.
1918    PUSH r8                       // Save r8/shorty*.
1919
1920    // Save callee saves.
1921    PUSH rbx
1922    PUSH r12
1923    PUSH r13
1924    PUSH r14
1925    PUSH r15
1926
1927    pushq LITERAL(0)              // Push null for ArtMethod*.
1928    CFI_ADJUST_CFA_OFFSET(8)
1929    movl %esi, %ecx               // rcx := size of stack
1930    movq %rdi, %rsi               // rsi := stack to copy
1931    movq %rsp, %rbp               // Save stack pointer to RBP for CFI use in .Losr_entry.
1932    CFI_REMEMBER_STATE
1933    call .Losr_entry
1934
1935    // Restore stack and callee-saves.
1936    addq LITERAL(8), %rsp
1937    CFI_ADJUST_CFA_OFFSET(-8)
1938    POP r15
1939    POP r14
1940    POP r13
1941    POP r12
1942    POP rbx
1943    POP r8
1944    POP rcx
1945    POP rbp
1946    movq %rax, (%rcx)              // Store the result.
1947    ret
1948.Losr_entry:
1949    CFI_RESTORE_STATE_AND_DEF_CFA rsp, 80
1950    // Since the call has pushed the return address we need to switch the CFA register to RBP.
1951    CFI_DEF_CFA_REGISTER(rbp)
1952
1953    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
1954    subq %rcx, %rsp
1955    movq %rsp, %rdi               // rdi := beginning of stack
1956    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
1957    jmp *%rdx
1958END_FUNCTION art_quick_osr_stub
1959
1960DEFINE_FUNCTION art_quick_invoke_polymorphic
1961                                                   // On entry: RDI := unused, RSI := receiver
1962    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
1963    movq %rsi, %rdi                                // RDI := receiver
1964    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread (self)
1965    movq %rsp, %rdx                                // RDX := pass SP
1966    call SYMBOL(artInvokePolymorphic)              // invoke with (receiver, self, SP)
1967                                                   // save the code pointer
1968    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1969    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
1970    RETURN_OR_DELIVER_PENDING_EXCEPTION
1971END_FUNCTION art_quick_invoke_polymorphic
1972
1973    /*
1974     * Slow path for MethodHandle.invokeExact intrinsic.
1975     * That intrinsic has a custom calling convention: the argument allocation doesn't start from
1976     * the receiver (MethodHandle) object, but from the argument following it. That's done to match
1977     * expectation of the underlying method when MethodHandle targets a method. That also affects
1978     * the way arguments are spilled onto the stack.
1979     */
1980DEFINE_FUNCTION art_quick_invoke_polymorphic_with_hidden_receiver
1981                                                        // On entry: RDI := receiver
1982    SETUP_SAVE_REFS_AND_ARGS_FRAME                      // save callee saves
1983    movq %gs:THREAD_SELF_OFFSET, %rsi                   // RSI := Thread (self)
1984    movq %rsp, %rdx                                     // RDX := pass SP
1985    call SYMBOL(artInvokePolymorphicWithHiddenReceiver) // invoke with (receiver, self, SP)
1986                                                        // save the code pointer
1987    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1988    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
1989    RETURN_OR_DELIVER_PENDING_EXCEPTION
1990END_FUNCTION art_quick_invoke_polymorphic_with_hidden_receiver
1991
1992DEFINE_FUNCTION art_quick_invoke_custom
1993    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
1994                                                   // RDI := call_site_index
1995    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread::Current()
1996    movq %rsp, %rdx                                // RDX := SP
1997    call SYMBOL(artInvokeCustom)                   // artInvokeCustom(Thread*, SP)
1998    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1999    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2000    RETURN_OR_DELIVER_PENDING_EXCEPTION
2001END_FUNCTION art_quick_invoke_custom
2002
2003// On entry: edi is the class, r11 is the inline cache. r10 and rax are available.
2004DEFINE_FUNCTION art_quick_update_inline_cache
2005#if (INLINE_CACHE_SIZE != 5)
2006#error "INLINE_CACHE_SIZE not as expected."
2007#endif
2008    // Don't update the cache if we are marking.
2009    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
2010    jnz .Ldone
2011.Lentry1:
2012    movl INLINE_CACHE_CLASSES_OFFSET(%r11), %eax
2013    cmpl %edi, %eax
2014    je .Ldone
2015    cmpl LITERAL(0), %eax
2016    jne .Lentry2
2017    lock cmpxchg %edi, INLINE_CACHE_CLASSES_OFFSET(%r11)
2018    jz .Ldone
2019    jmp .Lentry1
2020.Lentry2:
2021    movl (INLINE_CACHE_CLASSES_OFFSET+4)(%r11), %eax
2022    cmpl %edi, %eax
2023    je .Ldone
2024    cmpl LITERAL(0), %eax
2025    jne .Lentry3
2026    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+4)(%r11)
2027    jz .Ldone
2028    jmp .Lentry2
2029.Lentry3:
2030    movl (INLINE_CACHE_CLASSES_OFFSET+8)(%r11), %eax
2031    cmpl %edi, %eax
2032    je .Ldone
2033    cmpl LITERAL(0), %eax
2034    jne .Lentry4
2035    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+8)(%r11)
2036    jz .Ldone
2037    jmp .Lentry3
2038.Lentry4:
2039    movl (INLINE_CACHE_CLASSES_OFFSET+12)(%r11), %eax
2040    cmpl %edi, %eax
2041    je .Ldone
2042    cmpl LITERAL(0), %eax
2043    jne .Lentry5
2044    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+12)(%r11)
2045    jz .Ldone
2046    jmp .Lentry4
2047.Lentry5:
2048    // Unconditionally store, the cache is megamorphic.
2049    movl %edi, (INLINE_CACHE_CLASSES_OFFSET+16)(%r11)
2050.Ldone:
2051    ret
2052END_FUNCTION art_quick_update_inline_cache
2053
2054// On entry, method is at the bottom of the stack.
2055DEFINE_FUNCTION art_quick_compile_optimized
2056    SETUP_SAVE_EVERYTHING_FRAME
2057    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
2058    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
2059    call SYMBOL(artCompileOptimized)            // (ArtMethod*, Thread*)
2060    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
2061    ret
2062END_FUNCTION art_quick_compile_optimized
2063
2064// On entry, method is at the bottom of the stack.
2065DEFINE_FUNCTION art_quick_method_entry_hook
2066    SETUP_SAVE_EVERYTHING_FRAME
2067
2068    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
2069    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
2070    movq %rsp, %rdx                             // SP
2071    call SYMBOL(artMethodEntryHook)             // (ArtMethod*, Thread*, sp)
2072
2073    CFI_REMEMBER_STATE
2074    testq %rax, %rax
2075    jnz .Lentryhook_deopt
2076
2077    // Normal return.
2078    RESTORE_SAVE_EVERYTHING_FRAME
2079    ret
2080
2081.Lentryhook_deopt:
2082    // Deoptimize.
2083    CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
2084    movq %rax, %rdi                             // pass Context*
2085    call SYMBOL(art_quick_do_long_jump)
2086    UNREACHABLE
2087END_FUNCTION art_quick_method_entry_hook
2088
2089// On entry, method is at the bottom of the stack.
2090DEFINE_FUNCTION art_quick_method_exit_hook
2091    SETUP_SAVE_EVERYTHING_FRAME
2092
2093    // R8 passed from JITed code contains frame_size
2094    leaq 16(%rsp), %rcx                         // floating-point result pointer in kSaveEverything
2095                                                // frame
2096    leaq 144(%rsp), %rdx                        // integer result pointer in kSaveEverything frame
2097    leaq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod**
2098    movq %gs:THREAD_SELF_OFFSET, %rdi           // Thread::Current
2099    call SYMBOL(artMethodExitHook)              // (Thread*, ArtMethod**, gpr_res*, fpr_res*,
2100                                                //  frame_size)
2101
2102    CFI_REMEMBER_STATE
2103    testq %rax, %rax
2104    jnz .Lexithook_deopt_or_exception
2105
2106    // Normal return.
2107    RESTORE_SAVE_EVERYTHING_FRAME
2108    ret
2109
2110.Lexithook_deopt_or_exception:
2111    // Deoptimize or exception thrown.
2112    CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
2113    movq %rax, %rdi                             // pass Context*
2114    call SYMBOL(art_quick_do_long_jump)
2115    UNREACHABLE
2116END_FUNCTION art_quick_method_exit_hook
2117