xref: /aosp_15_r20/art/runtime/arch/arm/quick_entrypoints_arm.S (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21#include "arch/quick_field_entrypoints.S"
22
23    /* Deliver the given exception */
24    .extern artDeliverExceptionFromCode
25    /* Deliver an exception pending on a thread */
26    .extern artDeliverPendingException
27
28.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
29    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
30    LOAD_RUNTIME_INSTANCE \rTemp                  @ Load Runtime::Current into rTemp.
31    @ Load kSaveRefsAndArgs Method* into rTemp.
32    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
33    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
34    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
35.endm
36
37.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
38    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
39    str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
40    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
41.endm
42
43    /*
44     * Macro that sets up the callee save frame to conform with
45     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
46     * when core registers are already saved.
47     */
48.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
49                                        @ 14 words of callee saves and args already saved.
50    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
51    .cfi_adjust_cfa_offset 128
52    sub sp, #8                          @ 2 words of space, alignment padding and Method*
53    .cfi_adjust_cfa_offset 8
54    LOAD_RUNTIME_INSTANCE \rTemp        @ Load Runtime::Current into rTemp.
55    @ Load kSaveEverything Method* into rTemp.
56    ldr \rTemp, [\rTemp, #\runtime_method_offset]
57    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
58    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
59
60    // Ugly compile-time check, but we only have the preprocessor.
61#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
62#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
63#endif
64.endm
65
66    /*
67     * Macro that sets up the callee save frame to conform with
68     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
69     */
70.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
71    push {r0-r12, lr}                   @ 14 words of callee saves and args.
72    .cfi_adjust_cfa_offset 56
73    .cfi_rel_offset r0, 0
74    .cfi_rel_offset r1, 4
75    .cfi_rel_offset r2, 8
76    .cfi_rel_offset r3, 12
77    .cfi_rel_offset r4, 16
78    .cfi_rel_offset r5, 20
79    .cfi_rel_offset r6, 24
80    .cfi_rel_offset r7, 28
81    .cfi_rel_offset r8, 32
82    .cfi_rel_offset r9, 36
83    .cfi_rel_offset r10, 40
84    .cfi_rel_offset r11, 44
85    .cfi_rel_offset ip, 48
86    .cfi_rel_offset lr, 52
87    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset
88.endm
89
90.macro RESTORE_SAVE_EVERYTHING_FRAME
91    add  sp, #8                         @ rewind sp
92    .cfi_adjust_cfa_offset -8
93    vpop {d0-d15}
94    .cfi_adjust_cfa_offset -128
95    pop {r0-r12, lr}                    @ 14 words of callee saves
96    .cfi_restore r0
97    .cfi_restore r1
98    .cfi_restore r2
99    .cfi_restore r3
100    .cfi_restore r4
101    .cfi_restore r5
102    .cfi_restore r6
103    .cfi_restore r7
104    .cfi_restore r8
105    .cfi_restore r9
106    .cfi_restore r10
107    .cfi_restore r11
108    .cfi_restore r12
109    .cfi_restore lr
110    .cfi_adjust_cfa_offset -56
111.endm
112
113.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
114    add  sp, #8                         @ rewind sp
115    .cfi_adjust_cfa_offset -8
116    vpop {d0-d15}
117    .cfi_adjust_cfa_offset -128
118    add  sp, #4                         @ skip r0
119    .cfi_adjust_cfa_offset -4
120    .cfi_restore r0                     @ debugger can no longer restore caller's r0
121    pop {r1-r12, lr}                    @ 13 words of callee saves
122    .cfi_restore r1
123    .cfi_restore r2
124    .cfi_restore r3
125    .cfi_restore r4
126    .cfi_restore r5
127    .cfi_restore r6
128    .cfi_restore r7
129    .cfi_restore r8
130    .cfi_restore r9
131    .cfi_restore r10
132    .cfi_restore r11
133    .cfi_restore r12
134    .cfi_restore lr
135    .cfi_adjust_cfa_offset -52
136.endm
137
138.macro RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION is_ref = 0
139    // Use R2 to allow returning 64-bit values in R0-R1.
140    ldr r2, [rSELF, # THREAD_EXCEPTION_OFFSET]  // Get exception field.
141    CFI_REMEMBER_STATE
142    cbnz r2, 1f
143    DEOPT_OR_RETURN r2, \is_ref                 // Check if deopt is required
1441:
145    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
146    DELIVER_PENDING_EXCEPTION
147.endm
148
149.macro RETURN_REF_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
150    RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION /* is_ref= */ 1
151.endm
152
153.macro DEOPT_OR_RETURN temp, is_ref = 0
154  ldr \temp, [rSELF, #THREAD_DEOPT_CHECK_REQUIRED_OFFSET]
155  cbnz \temp, 2f
156  bx     lr
1572:
158  SETUP_SAVE_EVERYTHING_FRAME \temp
159  mov r2, \is_ref                      // pass if result is a reference
160  mov r1, r0                           // pass the result
161  mov r0, rSELF                        // Thread::Current
162  bl artDeoptimizeIfNeeded
163
164  CFI_REMEMBER_STATE
165  cbnz r0, 3f
166
167  RESTORE_SAVE_EVERYTHING_FRAME
168  REFRESH_MARKING_REGISTER
169  bx     lr
170
1713:
172  // Deoptimize
173  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
174  bl art_quick_do_long_jump            // (Context*)
175  bkpt  // Unreached
176.endm
177
178.macro DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_R0 temp, is_ref
179  ldr \temp, [rSELF, #THREAD_DEOPT_CHECK_REQUIRED_OFFSET]
180  cbnz \temp, 2f
181  CFI_REMEMBER_STATE
182  RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
183  REFRESH_MARKING_REGISTER
184  bx     lr
185  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
1862:
187  str    r0, [sp, SAVE_EVERYTHING_FRAME_R0_OFFSET] // update result in the frame
188  mov r2, \is_ref                                  // pass if result is a reference
189  mov r1, r0                                       // pass the result
190  mov r0, rSELF                                    // Thread::Current
191  bl artDeoptimizeIfNeeded
192
193  CFI_REMEMBER_STATE
194  cbnz r0, 3f
195
196  RESTORE_SAVE_EVERYTHING_FRAME
197  REFRESH_MARKING_REGISTER
198  bx     lr
199
2003:
201  // Deoptimize
202  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
203  bl art_quick_do_long_jump                        // (Context*)
204  bkpt  // Unreached
205.endm
206
207.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
208    .extern \cxx_name
209ENTRY \c_name
210    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
211    mov r0, rSELF                   @ pass Thread::Current
212    bl  \cxx_name                   @ \cxx_name(Thread*)
213    bl  art_quick_do_long_jump      @ (Context*)
214    bkpt  // Unreached
215END \c_name
216.endm
217
218.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
219    .extern \cxx_name
220ENTRY \c_name
221    SETUP_SAVE_EVERYTHING_FRAME r0  @ save all registers as basis for long jump context
222    mov r0, rSELF                   @ pass Thread::Current
223    bl  \cxx_name                   @ \cxx_name(Thread*)
224    bl  art_quick_do_long_jump      @ (Context*)
225    bkpt  // Unreached
226END \c_name
227.endm
228
229.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
230    .extern \cxx_name
231ENTRY \c_name
232    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
233    mov r1, rSELF                   @ pass Thread::Current
234    bl  \cxx_name                   @ \cxx_name(Thread*)
235    bl  art_quick_do_long_jump      @ (Context*)
236    bkpt  // Unreached
237END \c_name
238.endm
239
240.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
241    .extern \cxx_name
242ENTRY \c_name
243    SETUP_SAVE_EVERYTHING_FRAME r2  @ save all registers as basis for long jump context
244    mov r2, rSELF                   @ pass Thread::Current
245    bl  \cxx_name                   @ \cxx_name(Thread*)
246    bl  art_quick_do_long_jump      @ (Context*)
247    bkpt  // Unreached
248END \c_name
249.endm
250
251.macro RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
252    CFI_REMEMBER_STATE
253    cbnz   r0, 1f              @ result non-zero branch over
254    DEOPT_OR_RETURN r1
2551:
256    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
257    DELIVER_PENDING_EXCEPTION
258.endm
259
260.macro RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
261    CFI_REMEMBER_STATE
262    cbz    r0, 1f              @ result zero branch over
263    DEOPT_OR_RETURN r1, /* is_ref= */ 1
2641:
265    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
266    DELIVER_PENDING_EXCEPTION
267.endm
268
269// Macros taking opportunity of code similarities for downcalls.
270// Used for field and allocation entrypoints.
271.macro N_ARG_DOWNCALL n, name, entrypoint, return
272    .extern \entrypoint
273ENTRY \name
274    SETUP_SAVE_REFS_ONLY_FRAME r\n        @ save callee saves in case of GC
275    mov    r\n, rSELF                     @ pass Thread::Current
276    bl     \entrypoint                    @ (<args>, Thread*)
277    RESTORE_SAVE_REFS_ONLY_FRAME
278    REFRESH_MARKING_REGISTER
279    \return
280END \name
281.endm
282
283.macro ONE_ARG_DOWNCALL name, entrypoint, return
284    N_ARG_DOWNCALL 1, \name, \entrypoint, \return
285.endm
286
287.macro TWO_ARG_DOWNCALL name, entrypoint, return
288    N_ARG_DOWNCALL 2, \name, \entrypoint, \return
289.endm
290
291.macro THREE_ARG_DOWNCALL name, entrypoint, return
292    N_ARG_DOWNCALL 3, \name, \entrypoint, \return
293.endm
294
295// Macro to facilitate adding new allocation entrypoints.
296.macro FOUR_ARG_DOWNCALL name, entrypoint, return
297    .extern \entrypoint
298ENTRY \name
299    SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
300    str    rSELF, [sp, #-16]!             @ expand the frame and pass Thread::Current
301    .cfi_adjust_cfa_offset 16
302    bl     \entrypoint                    @ (<args>, Thread*)
303    DECREASE_FRAME 16                     @ strip the extra frame
304    RESTORE_SAVE_REFS_ONLY_FRAME
305    REFRESH_MARKING_REGISTER
306    \return
307END \name
308.endm
309
310    /*
311     * Called by managed code, saves callee saves and then calls artThrowException
312     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
313     */
314ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
315
316    /*
317     * Called by managed code to create and deliver a NullPointerException.
318     */
319NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
320
321    /*
322     * Call installed by a signal handler to create and deliver a NullPointerException.
323     */
324    .extern art_quick_throw_null_pointer_exception_from_signal
325ENTRY art_quick_throw_null_pointer_exception_from_signal
326    // The fault handler pushes the gc map address, i.e. "return address", to stack
327    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
328    .cfi_def_cfa_offset __SIZEOF_POINTER__
329    .cfi_rel_offset lr, 0
330    push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
331    .cfi_adjust_cfa_offset 52
332    .cfi_rel_offset r0, 0
333    .cfi_rel_offset r1, 4
334    .cfi_rel_offset r2, 8
335    .cfi_rel_offset r3, 12
336    .cfi_rel_offset r4, 16
337    .cfi_rel_offset r5, 20
338    .cfi_rel_offset r6, 24
339    .cfi_rel_offset r7, 28
340    .cfi_rel_offset r8, 32
341    .cfi_rel_offset r9, 36
342    .cfi_rel_offset r10, 40
343    .cfi_rel_offset r11, 44
344    .cfi_rel_offset ip, 48
345
346    @ save all registers as basis for long jump context
347    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
348    mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
349    mov r1, rSELF                   @ pass Thread::Current
350    bl  artThrowNullPointerExceptionFromSignal  @ (Thread*)
351    bl  art_quick_do_long_jump                  @ (Context*)
352    bkpt  // Unreached
353END art_quick_throw_null_pointer_exception_from_signal
354
355    /*
356     * Called by managed code to create and deliver an ArithmeticException.
357     */
358NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
359
360    /*
361     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
362     * index, arg2 holds limit.
363     */
364TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
365
366    /*
367     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
368     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
369     */
370TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
371
372    /*
373     * Called by managed code to create and deliver a StackOverflowError.
374     */
375NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
376
377    /*
378     * All generated callsites for interface invokes and invocation slow paths will load arguments
379     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
380     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
381     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
382     *
383     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
384     * of the target Method* in r0 and method->code_ in r1.
385     *
386     * If unsuccessful, the helper will return null/null. There will bea pending exception in the
387     * thread and we branch to another stub to deliver it.
388     *
389     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
390     * pointing back to the original caller.
391     *
392     * Clobbers IP (R12).
393     */
394.macro INVOKE_TRAMPOLINE_BODY cxx_name
395    .extern \cxx_name
396    SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
397    mov    r2, rSELF                      @ pass Thread::Current
398    mov    r3, sp
399    bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
400    mov    r12, r1                        @ save Method*->code_
401    RESTORE_SAVE_REFS_AND_ARGS_FRAME
402    REFRESH_MARKING_REGISTER
403    cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
404    bx     r12                            @ tail call to target
4051:
406    DELIVER_PENDING_EXCEPTION
407.endm
408.macro INVOKE_TRAMPOLINE c_name, cxx_name
409ENTRY \c_name
410    INVOKE_TRAMPOLINE_BODY \cxx_name
411END \c_name
412.endm
413
414INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
415
416INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
417INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
418INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
419INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
420
421    /*
422     * Quick invocation stub internal.
423     * On entry:
424     *   r0 = method pointer
425     *   r1 = argument array or null for no argument methods
426     *   r2 = size of argument array in bytes
427     *   r3 = (managed) thread pointer
428     *   [sp] = JValue* result
429     *   [sp + 4] = result_in_float
430     *   [sp + 8] = core register argument array
431     *   [sp + 12] = fp register argument array
432     *  +-------------------------+
433     *  | uint32_t* fp_reg_args   |
434     *  | uint32_t* core_reg_args |
435     *  |   result_in_float       | <- Caller frame
436     *  |   Jvalue* result        |
437     *  +-------------------------+
438     *  |          lr             |
439     *  |          r11            |
440     *  |          r9             |
441     *  |          r4             | <- r11
442     *  +-------------------------+
443     *  | uint32_t out[n-1]       |
444     *  |    :      :             |        Outs
445     *  | uint32_t out[0]         |
446     *  | StackRef<ArtMethod>     | <- SP  value=null
447     *  +-------------------------+
448     */
449ENTRY art_quick_invoke_stub_internal
450    SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
451    mov    r11, sp                         @ save the stack pointer
452    .cfi_def_cfa_register r11
453
454    mov    r9, r3                          @ move managed thread pointer into r9
455
456    add    r4, r2, #4                      @ create space for method pointer in frame
457    sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
458    and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
459    mov    sp, r4                          @ 16B alignment ourselves.
460
461    mov    r4, r0                          @ save method*
462    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
463    bl     memcpy                          @ memcpy (dest, src, bytes)
464    mov    ip, #0                          @ set ip to 0
465    str    ip, [sp]                        @ store null for method* at bottom of frame
466
467    ldr    ip, [r11, #48]                  @ load fp register argument array pointer
468    vldm   ip, {s0-s15}                    @ copy s0 - s15
469
470    ldr    ip, [r11, #44]                  @ load core register argument array pointer
471    mov    r0, r4                          @ restore method*
472    add    ip, ip, #4                      @ skip r0
473    ldm    ip, {r1-r3}                     @ copy r1 - r3
474
475    REFRESH_MARKING_REGISTER
476
477    ldr    ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
478    blx    ip                              @ call the method
479
480    mov    sp, r11                         @ restore the stack pointer
481    .cfi_def_cfa_register sp
482
483    ldr    r4, [sp, #40]                   @ load result_is_float
484    ldr    r9, [sp, #36]                   @ load the result pointer
485    cmp    r4, #0
486    ite    eq
487    strdeq r0, [r9]                        @ store r0/r1 into result pointer
488    vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
489
490    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}               @ restore spill regs
491END art_quick_invoke_stub_internal
492
493    /*
494     * On stack replacement stub.
495     * On entry:
496     *   r0 = stack to copy
497     *   r1 = size of stack
498     *   r2 = pc to call
499     *   r3 = JValue* result
500     *   [sp] = shorty
501     *   [sp + 4] = thread
502     */
503ENTRY art_quick_osr_stub
504    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
505    vpush  {s16-s31}                       @ Spill fp-regs (16)
506    .cfi_adjust_cfa_offset 64
507    SAVE_SIZE=(9*4+16*4)
508    mov    r11, sp                         @ Save the stack pointer
509    .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
510    CFI_REMEMBER_STATE
511    mov    r10, r1                         @ Save size of stack
512    ldr    r9, [r11, #(SAVE_SIZE+4)]       @ Move managed thread pointer into r9
513    REFRESH_MARKING_REGISTER
514    mov    r6, r2                          @ Save the pc to call
515    sub    r7, sp, #12                     @ Reserve space for stack pointer,
516                                           @    JValue* result, and ArtMethod* slot.
517    and    r7, #0xFFFFFFF0                 @ Align stack pointer
518    mov    sp, r7                          @ Update stack pointer
519    str    r11, [sp, #4]                   @ Save old stack pointer
520    str    r3, [sp, #8]                    @ Save JValue* result
521    mov    ip, #0
522    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
523    // r11 isn't properly spilled in the osr method, so we need use DWARF expression.
524    // NB: the CFI must be before the call since this is the address gdb will lookup.
525    // NB: gdb expects that cfa_expression returns the CFA value (not address to it).
526    .cfi_escape                            /* CFA = [sp + 4] + SAVE_SIZE */ \
527      0x0f, 6,                             /* DW_CFA_def_cfa_expression(len) */ \
528      0x92, 13, 4,                         /* DW_OP_bregx(reg,offset) */ \
529      0x06,                                /* DW_OP_deref */ \
530      0x23, SAVE_SIZE                      /* DW_OP_plus_uconst(val) */
531    bl     .Losr_entry                     @ Call the method
532    ldr    r10, [sp, #8]                   @ Restore JValue* result
533    ldr    sp, [sp, #4]                    @ Restore saved stack pointer
534    .cfi_def_cfa sp, SAVE_SIZE             @ CFA = sp + SAVE_SIZE
535    strd r0, [r10]                         @ Store r0/r1 into result pointer
536    vpop   {s16-s31}
537    .cfi_adjust_cfa_offset -64
538    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
539.Losr_entry:
540    CFI_RESTORE_STATE_AND_DEF_CFA r11, SAVE_SIZE  @ CFA = r11 + SAVE_SIZE
541    sub sp, sp, r10                        @ Reserve space for callee stack
542    sub r10, r10, #4
543    str lr, [sp, r10]                      @ Store link register per the compiler ABI
544    mov r2, r10
545    mov r1, r0
546    mov r0, sp
547    bl  memcpy                             @ memcpy (dest r0, src r1, bytes r2)
548    bx r6
549END art_quick_osr_stub
550
551    /*
552     * On entry r0 is the long jump context. This is expected to be returned from a previous
553     * entrypoint call which threw an exception or deoptimized.
554     * The r12 (IP) shall be clobbered rather than retrieved from gprs_.
555     */
556ARM_ENTRY art_quick_do_long_jump
557    // Reserve space for the gprs + fprs;
558    INCREASE_FRAME ARM_LONG_JUMP_CONTEXT_SIZE
559
560    mov r1, sp
561    add r2, sp, #ARM_LONG_JUMP_GPRS_SIZE
562
563    bl artContextCopyForLongJump  // Context* context, uintptr_t* gprs, uintptr_t* fprs
564
565    add r0, sp, #ARM_LONG_JUMP_GPRS_SIZE
566
567    vldm r0, {s0-s31}     @ Load all fprs from argument fprs_.
568                          @ Do not access fprs_ from now, they may be below SP.
569    ldm  sp, {r0-r11}     @ load r0-r11 from gprs_.
570    ldr  r12, [sp, #60]   @ Load the value of PC (r15) from gprs_ (60 = 4 * 15) into IP (r12).
571    ldr  lr, [sp, #56]    @ Load LR from gprs_, 56 = 4 * 14.
572    ldr  sp, [sp, #52]    @ Load SP from gprs_ 52 = 4 * 13.
573                          @ Do not access gprs_ from now, they are below SP.
574    .cfi_def_cfa_offset 0
575    REFRESH_MARKING_REGISTER
576    bx   r12              @ Do long jump.
577END art_quick_do_long_jump
578
579    /*
580     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
581     * failure.
582     */
583TWO_ARG_DOWNCALL art_quick_handle_fill_data, \
584                 artHandleFillArrayDataFromCode, \
585                 RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
586
587    /*
588     * Entry from managed code that tries to lock the object in a fast path and
589     * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
590     * r0 holds the possibly null object to lock.
591     */
592ENTRY art_quick_lock_object
593    // Note: the slow path is actually the art_quick_lock_object_no_inline (tail call).
594    LOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Llock_object_slow, /*can_be_null*/ 1
595END art_quick_lock_object
596
597    /*
598     * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
599     * r0 holds the possibly null object to lock.
600     */
601    .extern artLockObjectFromCode
602ENTRY art_quick_lock_object_no_inline
603    // This is also the slow path for art_quick_lock_object.
604    // Note that we need a local label as the assembler emits bad instructions
605    // for CBZ/CBNZ if we try to jump to `art_quick_lock_object_no_inline`.
606.Llock_object_slow:
607    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
608    mov    r1, rSELF                  @ pass Thread::Current
609    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
610    RESTORE_SAVE_REFS_ONLY_FRAME
611    REFRESH_MARKING_REGISTER
612    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
613END art_quick_lock_object_no_inline
614
615    /*
616     * Entry from managed code that tries to unlock the object in a fast path and calls
617     * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
618     * r0 holds the possibly null object to unlock.
619     */
620ENTRY art_quick_unlock_object
621    // Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call).
622    UNLOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Lunlock_object_slow, /*can_be_null*/ 1
623END art_quick_unlock_object
624
625    /*
626     * Entry from managed code that calls `artUnlockObjectFromCode()`
627     * and delivers exception on failure.
628     * r0 holds the possibly null object to unlock.
629     */
630    .extern artUnlockObjectFromCode
631ENTRY art_quick_unlock_object_no_inline
632    // This is also the slow path for art_quick_unlock_object.
633    // Note that we need a local label as the assembler emits bad instructions
634    // for CBZ/CBNZ if we try to jump to `art_quick_unlock_object_no_inline`.
635.Lunlock_object_slow:
636    @ save callee saves in case exception allocation triggers GC
637    SETUP_SAVE_REFS_ONLY_FRAME r1
638    mov    r1, rSELF                  @ pass Thread::Current
639    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
640    RESTORE_SAVE_REFS_ONLY_FRAME
641    REFRESH_MARKING_REGISTER
642    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
643END art_quick_unlock_object_no_inline
644
645    /*
646     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
647     * artThrowClassCastExceptionForObject.
648     */
649    .extern artInstanceOfFromCode
650    .extern artThrowClassCastExceptionForObject
651ENTRY art_quick_check_instance_of
652    // Type check using the bit string passes null as the target class. In that case just throw.
653    cbz r1, .Lthrow_class_cast_exception_for_bitstring_check
654
655    push {r0-r2, lr}                    @ save arguments, padding (r2) and link register
656    .cfi_adjust_cfa_offset 16
657    .cfi_rel_offset r0, 0
658    .cfi_rel_offset r1, 4
659    .cfi_rel_offset r2, 8
660    .cfi_rel_offset lr, 12
661    bl artInstanceOfFromCode
662    cbz    r0, .Lthrow_class_cast_exception
663    pop {r0-r2, pc}
664
665.Lthrow_class_cast_exception:
666    pop {r0-r2, lr}
667    .cfi_adjust_cfa_offset -16
668    .cfi_restore r0
669    .cfi_restore r1
670    .cfi_restore r2
671    .cfi_restore lr
672
673.Lthrow_class_cast_exception_for_bitstring_check:
674    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
675    mov r2, rSELF                   @ pass Thread::Current
676    bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
677    bl  art_quick_do_long_jump               @ (Context*)
678    bkpt  // Unreached
679END art_quick_check_instance_of
680
681// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
682.macro POP_REG_NE rReg, offset, rExclude
683    .ifnc \rReg, \rExclude
684        ldr \rReg, [sp, #\offset]   @ restore rReg
685        .cfi_restore \rReg
686    .endif
687.endm
688
689// Save rReg's value to [sp, #offset].
690.macro PUSH_REG rReg, offset
691    str \rReg, [sp, #\offset]       @ save rReg
692    .cfi_rel_offset \rReg, \offset
693.endm
694
695    // Helper macros for `art_quick_aput_obj`.
696#ifdef USE_READ_BARRIER
697#ifdef USE_BAKER_READ_BARRIER
698.macro BAKER_RB_CHECK_GRAY_BIT_AND_LOAD rDest, rObj, offset, gray_slow_path_label
699    ldr ip, [\rObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
700    tst ip, #LOCK_WORD_READ_BARRIER_STATE_MASK_SHIFTED
701    bne \gray_slow_path_label
702    // False dependency to avoid needing load/load fence.
703    add \rObj, \rObj, ip, lsr #32
704    ldr \rDest, [\rObj, #\offset]
705    UNPOISON_HEAP_REF \rDest
706.endm
707
708.macro BAKER_RB_LOAD_AND_MARK rDest, rObj, offset, mark_function
709    ldr \rDest, [\rObj, #\offset]
710    UNPOISON_HEAP_REF \rDest
711    str lr, [sp, #-8]!             @ Save LR with correct stack alignment.
712    .cfi_rel_offset lr, 0
713    .cfi_adjust_cfa_offset 8
714    bl \mark_function
715    ldr lr, [sp], #8               @ Restore LR.
716    .cfi_restore lr
717    .cfi_adjust_cfa_offset -8
718.endm
719#else  // USE_BAKER_READ_BARRIER
720    .extern artReadBarrierSlow
721.macro READ_BARRIER_SLOW rDest, rObj, offset
722    push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
723    .cfi_adjust_cfa_offset 24
724    .cfi_rel_offset r0, 0
725    .cfi_rel_offset r1, 4
726    .cfi_rel_offset r2, 8
727    .cfi_rel_offset r3, 12
728    .cfi_rel_offset ip, 16
729    .cfi_rel_offset lr, 20
730    sub sp, #8                      @ push padding
731    .cfi_adjust_cfa_offset 8
732    @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
733    .ifnc \rObj, r1
734        mov r1, \rObj               @ pass rObj
735    .endif
736    mov r2, #\offset                @ pass offset
737    bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
738    @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
739    .ifnc \rDest, r0
740        mov \rDest, r0              @ save return value in rDest
741    .endif
742    add sp, #8                      @ pop padding
743    .cfi_adjust_cfa_offset -8
744    POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
745    POP_REG_NE r1, 4, \rDest
746    POP_REG_NE r2, 8, \rDest
747    POP_REG_NE r3, 12, \rDest
748    POP_REG_NE ip, 16, \rDest
749    add sp, #20
750    .cfi_adjust_cfa_offset -20
751    pop {lr}                        @ restore lr
752    .cfi_adjust_cfa_offset -4
753    .cfi_restore lr
754.endm
755#endif // USE_BAKER_READ_BARRIER
756#endif  // USE_READ_BARRIER
757
758    .hidden art_quick_aput_obj
759ENTRY art_quick_aput_obj
760#if defined(USE_READ_BARRIER) && !defined(USE_BAKER_READ_BARRIER)
761    @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from `READ_BARRIER_SLOW`.
762    tst r2, r2
763    beq .Laput_obj_null
764    READ_BARRIER_SLOW r3, r0, MIRROR_OBJECT_CLASS_OFFSET
765    READ_BARRIER_SLOW r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
766    READ_BARRIER_SLOW r4, r2, MIRROR_OBJECT_CLASS_OFFSET
767#else  // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
768    cbz r2, .Laput_obj_null
769#ifdef USE_READ_BARRIER
770    cmp rMR, #0
771    bne .Laput_obj_gc_marking
772#endif  // USE_READ_BARRIER
773    ldr r3, [r0, #MIRROR_OBJECT_CLASS_OFFSET]
774    UNPOISON_HEAP_REF r3
775    // R4 is a scratch register in managed ARM ABI.
776    ldr r4, [r2, #MIRROR_OBJECT_CLASS_OFFSET]
777    UNPOISON_HEAP_REF r4
778    ldr r3, [r3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
779    UNPOISON_HEAP_REF r3
780#endif  // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
781    cmp r3, r4  @ value's type == array's component type - trivial assignability
782    bne .Laput_obj_check_assignability
783.Laput_obj_store:
784    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
785    POISON_HEAP_REF r2
786    str r2, [r3, r1, lsl #2]
787    ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET]
788    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
789    strb r3, [r3, r0]
790    blx lr
791
792.Laput_obj_null:
793    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
794    str r2, [r3, r1, lsl #2]
795    blx lr
796
797.Laput_obj_check_assignability:
798    push {r0-r2, lr}             @ save arguments
799    .cfi_adjust_cfa_offset 16
800    .cfi_rel_offset lr, 12
801    mov r1, r4
802    mov r0, r3
803    bl artIsAssignableFromCode
804    CFI_REMEMBER_STATE
805    cbz r0, .Lthrow_array_store_exception
806    pop {r0-r2, lr}
807    .cfi_restore lr
808    .cfi_adjust_cfa_offset -16
809    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
810    POISON_HEAP_REF r2
811    str r2, [r3, r1, lsl #2]
812    ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET]
813    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
814    strb r3, [r3, r0]
815    blx lr
816
817.Lthrow_array_store_exception:
818    CFI_RESTORE_STATE_AND_DEF_CFA sp, 16
819    pop {r0-r2, lr}
820    .cfi_restore lr
821    .cfi_adjust_cfa_offset -16
822#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
823    CFI_REMEMBER_STATE
824#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
825    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
826    mov r1, r2
827    mov r2, rSELF                  @ Pass Thread::Current.
828    bl artThrowArrayStoreException @ (Class*, Class*, Thread*)
829    bl art_quick_do_long_jump      @ (Context*)
830    bkpt  // Unreached
831
832#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
833    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
834.Laput_obj_gc_marking:
835    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
836        r3, r0, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_array_class
837.Laput_obj_mark_array_class_continue:
838    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
839        r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, .Laput_obj_mark_array_element
840.Laput_obj_mark_array_element_continue:
841    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
842        r4, r2, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_object_class
843.Laput_obj_mark_object_class_continue:
844
845    cmp r3, r4  @ value's type == array's component type - trivial assignability
846    // All registers are set up for correctly `.Laput_obj_check_assignability`.
847    bne .Laput_obj_check_assignability
848    b   .Laput_obj_store
849
850.Laput_obj_mark_array_class:
851    BAKER_RB_LOAD_AND_MARK r3, r0, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg03
852    b .Laput_obj_mark_array_class_continue
853
854.Laput_obj_mark_array_element:
855    BAKER_RB_LOAD_AND_MARK \
856        r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, art_quick_read_barrier_mark_reg03
857    b .Laput_obj_mark_array_element_continue
858
859.Laput_obj_mark_object_class:
860    BAKER_RB_LOAD_AND_MARK r4, r2, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg04
861    b .Laput_obj_mark_object_class_continue
862#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
863END art_quick_aput_obj
864
865    /*
866     * Macro for resolution and initialization of indexed DEX file
867     * constants such as classes and strings.
868     */
869.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
870    .extern \entrypoint
871ENTRY \name
872    SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset    @ save everything in case of GC
873    mov    r1, rSELF                  @ pass Thread::Current
874    bl     \entrypoint                @ (uint32_t index, Thread*)
875    cbz    r0, 1f                     @ If result is null, deliver the OOME.
876    str    r0, [sp, #136]             @ store result in the frame
877    DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_R0 r1, /* is_ref= */ 1
8781:
879    DELIVER_PENDING_EXCEPTION_FRAME_READY
880END \name
881.endm
882
883.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
884    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
885.endm
886
887ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
888ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
889ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
890ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
891ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
892ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
893
894// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
895// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
896
897GENERATE_STATIC_FIELD_GETTERS
898
899GENERATE_INSTANCE_FIELD_GETTERS
900
901GENERATE_STATIC_FIELD_SETTERS /* emit64= */ 0
902
903GENERATE_INSTANCE_FIELD_SETTERS /* emit64= */ 0
904
905    /*
906     * Called by managed code to resolve an instance field and store a wide value.
907     */
908    .extern artSet64InstanceFromCompiledCode
909ENTRY art_quick_set64_instance
910    SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
911                                         @ r2:r3 contain the wide argument
912    str    rSELF, [sp, #-16]!            @ expand the frame and pass Thread::Current
913    .cfi_adjust_cfa_offset 16
914    bl     artSet64InstanceFromCompiledCode      @ (field_idx, Object*, new_val, Thread*)
915    add    sp, #16                       @ release out args
916    .cfi_adjust_cfa_offset -16
917    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
918    REFRESH_MARKING_REGISTER
919    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
920END art_quick_set64_instance
921
922    .extern artSet64StaticFromCompiledCode
923ENTRY art_quick_set64_static
924    SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
925                                          @ r2:r3 contain the wide argument
926    str    rSELF, [sp, #-16]!             @ expand the frame and pass Thread::Current
927    .cfi_adjust_cfa_offset 16
928    bl     artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*)
929    add    sp, #16                        @ release out args
930    .cfi_adjust_cfa_offset -16
931    RESTORE_SAVE_REFS_ONLY_FRAME          @ TODO: we can clearly save an add here
932    REFRESH_MARKING_REGISTER
933    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
934END art_quick_set64_static
935
936// Generate the allocation entrypoints for each allocator.
937GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
938// Comment out allocators that have arm specific asm.
939// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
940// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
941GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
942GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
943// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
944// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
945// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
946// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
947// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
948GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
949GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
950GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
951
952// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
953// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
954GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
955GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
956// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
957// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
958// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
959// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
960// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
961GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
962GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
963GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
964
965// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
966//
967// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
968// If isInitialized=0 the compiler can only assume it's been at least resolved.
969.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
970ENTRY \c_name
971    // Fast path rosalloc allocation.
972    // r0: type/return value, rSELF (r9): Thread::Current
973    // r1, r2, r3, r12: free.
974    ldr    r3, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
975                                                              // allocation stack has room.
976                                                              // TODO: consider using ldrd.
977    ldr    r12, [rSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
978    cmp    r3, r12
979    bhs    .Lslow_path\c_name
980
981    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
982    cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
983                                                              // local allocation.
984    // If the class is not yet visibly initialized, or it is finalizable,
985    // the object size will be very large to force the branch below to be taken.
986    //
987    // See Class::SetStatus() in class.cc for more details.
988    bhs    .Lslow_path\c_name
989                                                              // Compute the rosalloc bracket index
990                                                              // from the size. Since the size is
991                                                              // already aligned we can combine the
992                                                              // two shifts together.
993    add    r12, rSELF, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
994                                                              // Subtract pointer size since there
995                                                              // are no runs for 0 byte allocations
996                                                              // and the size is already aligned.
997                                                              // Load the rosalloc run (r12)
998    ldr    r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
999                                                              // Load the free list head (r3). This
1000                                                              // will be the return val.
1001    ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1002    cbz    r3, .Lslow_path\c_name
1003    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1004    ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1005                                                              // and update the list head with the
1006                                                              // next pointer.
1007    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1008                                                              // Store the class pointer in the
1009                                                              // header. This also overwrites the
1010                                                              // next pointer. The offsets are
1011                                                              // asserted to match.
1012#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1013#error "Class pointer needs to overwrite next pointer."
1014#endif
1015    POISON_HEAP_REF r0
1016    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
1017                                                              // Push the new object onto the thread
1018                                                              // local allocation stack and
1019                                                              // increment the thread local
1020                                                              // allocation stack top.
1021    ldr    r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1022    str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
1023    str    r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1024                                                              // Decrement the size of the free list
1025
1026    // After this "STR" the object is published to the thread local allocation stack,
1027    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1028    // It is not yet visible to the running (user) compiled code until after the return.
1029    //
1030    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1031    // the state of the allocation stack slot. It can be a pointer to one of:
1032    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1033    //       (The stack initial state is "null" pointers).
1034    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1035    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1036    // Other states are not allowed.
1037    //
1038    // An object that is invalid only temporarily, and will eventually become valid.
1039    // The internal runtime code simply checks if the object is not null or is partial and then
1040    // ignores it.
1041    //
1042    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1043    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1044    // "next" pointer is not-cyclic.)
1045    //
1046    // See also b/28790624 for a listing of CLs dealing with this race.
1047    ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1048    sub    r1, #1
1049                                                              // TODO: consider combining this store
1050                                                              // and the list head store above using
1051                                                              // strd.
1052    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1053
1054    mov    r0, r3                                             // Set the return value and return.
1055    // No barrier. The class is already observably initialized (otherwise the fast
1056    // path size check above would fail) and new-instance allocations are protected
1057    // from publishing by the compiler which inserts its own StoreStore barrier.
1058    bx     lr
1059
1060.Lslow_path\c_name:
1061    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
1062    mov    r1, rSELF                  @ pass Thread::Current
1063    bl     \cxx_name                  @ (mirror::Class* cls, Thread*)
1064    RESTORE_SAVE_REFS_ONLY_FRAME
1065    REFRESH_MARKING_REGISTER
1066    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1067END \c_name
1068.endm
1069
1070ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1071ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1072
1073// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
1074// and art_quick_alloc_object_resolved/initialized_region_tlab.
1075//
1076// r0: type, rSELF (r9): Thread::Current, r1, r2, r3, r12: free.
1077// Need to preserve r0 to the slow path.
1078//
1079// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1080// If isInitialized=0 the compiler can only assume it's been at least resolved.
1081.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized
1082                                                             // Load thread_local_pos (r12) and
1083                                                             // thread_local_end (r3) with ldrd.
1084                                                             // Check constraints for ldrd.
1085#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1086#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1087#endif
1088    ldrd   r12, r3, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1089    sub    r12, r3, r12                                       // Compute the remaining buf size.
1090    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3).
1091    cmp    r3, r12                                            // Check if it fits.
1092    // If the class is not yet visibly initialized, or it is finalizable,
1093    // the object size will be very large to force the branch below to be taken.
1094    //
1095    // See Class::SetStatus() in class.cc for more details.
1096    bhi    \slowPathLabel
1097    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1098                                                              // Reload old thread_local_pos (r0)
1099                                                              // for the return value.
1100    ldr    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1101    add    r1, r2, r3
1102    str    r1, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1103    // After this "STR" the object is published to the thread local allocation stack,
1104    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1105    // It is not yet visible to the running (user) compiled code until after the return.
1106    //
1107    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1108    // the state of the object. It can be either:
1109    // 1) A partially valid object, with a null class pointer
1110    //       (because the initial state of TLAB buffers is all 0s/nulls).
1111    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1112    // Other states are not allowed.
1113    //
1114    // An object that is invalid only temporarily, and will eventually become valid.
1115    // The internal runtime code simply checks if the object is not null or is partial and then
1116    // ignores it.
1117    //
1118    // (Note: The actual check is done by checking that the object's class pointer is non-null.
1119    // Also, unlike rosalloc, the object can never be observed as null).
1120    POISON_HEAP_REF r0
1121    str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1122    mov    r0, r2
1123    // No barrier. The class is already observably initialized (otherwise the fast
1124    // path size check above would fail) and new-instance allocations are protected
1125    // from publishing by the compiler which inserts its own StoreStore barrier.
1126    bx     lr
1127.endm
1128
1129// The common code for art_quick_alloc_object_*region_tlab
1130// Currently the implementation ignores isInitialized. TODO(b/172087402): clean this up.
1131// Caller must execute a constructor fence after this.
1132.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1133ENTRY \name
1134    // Fast path tlab allocation.
1135    // r0: type, rSELF (r9): Thread::Current
1136    // r1, r2, r3, r12: free.
1137    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized
1138.Lslow_path\name:
1139    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
1140    mov    r1, rSELF                                          // Pass Thread::Current.
1141    bl     \entrypoint                                        // (mirror::Class* klass, Thread*)
1142    RESTORE_SAVE_REFS_ONLY_FRAME
1143    REFRESH_MARKING_REGISTER
1144    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1145END \name
1146.endm
1147
1148GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1149GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1150GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1151GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1152
1153
1154// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
1155// and art_quick_alloc_array_resolved/initialized_region_tlab.
1156//
1157// r0: type, r1: component_count, r2: total_size, rSELF (r9): Thread::Current, r3, r12: free.
1158// Need to preserve r0 and r1 to the slow path.
1159.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
1160    and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignment mask
1161                                                              // (addr + 7) & ~7.
1162
1163                                                              // Load thread_local_pos (r3) and
1164                                                              // thread_local_end (r12) with ldrd.
1165                                                              // Check constraints for ldrd.
1166#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1167#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1168#endif
1169    ldrd   r3, r12, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1170    sub    r12, r12, r3                                       // Compute the remaining buf size.
1171    cmp    r2, r12                                            // Check if the total_size fits.
1172    // The array class is always initialized here. Unlike new-instance,
1173    // this does not act as a double test.
1174    bhi    \slowPathLabel
1175    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1176    add    r2, r2, r3
1177    str    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1178    POISON_HEAP_REF r0
1179    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1180    str    r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET]              // Store the array length.
1181    mov    r0, r3
1182// new-array is special. The class is loaded and immediately goes to the Initialized state
1183// before it is published. Therefore the only fence needed is for the publication of the object.
1184// See ClassLinker::CreateArrayClass() for more details.
1185
1186// For publication of the new array, we don't need a 'dmb ishst' here.
1187// The compiler generates 'dmb ishst' for all new-array insts.
1188    bx     lr
1189.endm
1190
1191// Caller must execute a constructor fence after this.
1192.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1193ENTRY \name
1194    // Fast path array allocation for region tlab allocation.
1195    // r0: mirror::Class* type
1196    // r1: int32_t component_count
1197    // rSELF (r9): thread
1198    // r2, r3, r12: free.
1199    \size_setup .Lslow_path\name
1200    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name
1201.Lslow_path\name:
1202    // r0: mirror::Class* klass
1203    // r1: int32_t component_count
1204    // r2: Thread* self
1205    SETUP_SAVE_REFS_ONLY_FRAME r2  // save callee saves in case of GC
1206    mov    r2, rSELF               // pass Thread::Current
1207    bl     \entrypoint
1208    RESTORE_SAVE_REFS_ONLY_FRAME
1209    REFRESH_MARKING_REGISTER
1210    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1211END \name
1212.endm
1213
1214.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
1215    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
1216    cmp r1, r2
1217    bhi \slow_path
1218                                                            // Array classes are never finalizable
1219                                                            // or uninitialized, no need to check.
1220    ldr    r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Load component type
1221    UNPOISON_HEAP_REF r3
1222    ldr    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1223    lsr    r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT         // Component size shift is in high 16
1224                                                            // bits.
1225    lsl    r2, r1, r3                                       // Calculate data size
1226                                                            // Add array data offset and alignment.
1227    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1228#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1229#error Long array data offset must be 4 greater than int array data offset.
1230#endif
1231
1232    add    r3, r3, #1                                       // Add 4 to the length only if the
1233                                                            // component size shift is 3
1234                                                            // (for 64 bit alignment).
1235    and    r3, r3, #4
1236    add    r2, r2, r3
1237.endm
1238
1239.macro COMPUTE_ARRAY_SIZE_8 slow_path
1240    // Possibly a large object, go slow.
1241    // Also does negative array size check.
1242    movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
1243    cmp r1, r2
1244    bhi \slow_path
1245    // Add array data offset and alignment.
1246    add    r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1247.endm
1248
1249.macro COMPUTE_ARRAY_SIZE_16 slow_path
1250    // Possibly a large object, go slow.
1251    // Also does negative array size check.
1252    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
1253    cmp r1, r2
1254    bhi \slow_path
1255    lsl    r2, r1, #1
1256    // Add array data offset and alignment.
1257    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1258.endm
1259
1260.macro COMPUTE_ARRAY_SIZE_32 slow_path
1261    // Possibly a large object, go slow.
1262    // Also does negative array size check.
1263    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
1264    cmp r1, r2
1265    bhi \slow_path
1266    lsl    r2, r1, #2
1267    // Add array data offset and alignment.
1268    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1269.endm
1270
1271.macro COMPUTE_ARRAY_SIZE_64 slow_path
1272    // Possibly a large object, go slow.
1273    // Also does negative array size check.
1274    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
1275    cmp r1, r2
1276    bhi \slow_path
1277    lsl    r2, r1, #3
1278    // Add array data offset and alignment.
1279    add    r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1280.endm
1281
1282GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1283GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1284GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1285GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1286GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1287GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1288GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1289GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1290GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1291GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1292
1293    /*
1294     * Called by managed code when the value in rSUSPEND has been decremented to 0.
1295     */
1296    .extern artTestSuspendFromCode
1297ENTRY art_quick_test_suspend
1298    SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl
1299    mov    r0, rSELF
1300    bl     artTestSuspendFromCode               @ (Thread*)
1301
1302    CFI_REMEMBER_STATE
1303    cbnz   r0, .Ltest_suspend_deoptimize
1304
1305    RESTORE_SAVE_EVERYTHING_FRAME
1306    REFRESH_MARKING_REGISTER
1307    bx     lr
1308
1309.Ltest_suspend_deoptimize:
1310    // Deoptimize
1311    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
1312    bl     art_quick_do_long_jump               @ (Context*)
1313    bkpt  // Unreached
1314END art_quick_test_suspend
1315
1316    .extern artImplicitSuspendFromCode
1317ENTRY art_quick_implicit_suspend
1318    mov    r0, rSELF
1319    SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
1320    bl     artImplicitSuspendFromCode         @ (Thread*)
1321
1322    CFI_REMEMBER_STATE
1323    cbnz   r0, .Limplicit_suspend_deopt
1324
1325    RESTORE_SAVE_REFS_ONLY_FRAME
1326    REFRESH_MARKING_REGISTER
1327    bx     lr
1328
1329.Limplicit_suspend_deopt:
1330    // Deoptimize
1331    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_ONLY
1332    bl     art_quick_do_long_jump             @ (Context*)
1333    bkpt  // Unreached
1334END art_quick_implicit_suspend
1335
1336    /*
1337     * Called by managed code that is attempting to call a method on a proxy class. On entry
1338     * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The
1339     * frame size of the invoked proxy method agrees with a ref and args callee save frame.
1340     */
1341     .extern artQuickProxyInvokeHandler
1342ENTRY art_quick_proxy_invoke_handler
1343    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1344    mov     r2, rSELF              @ pass Thread::Current
1345    mov     r3, sp                 @ pass SP
1346    blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
1347    ldr     r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1348    // Tear down the callee-save frame. Skip arg registers.
1349    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1350    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1351    RESTORE_SAVE_REFS_ONLY_FRAME
1352    REFRESH_MARKING_REGISTER
1353    cbnz    r2, 1f                 @ success if no exception is pending
1354    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1355    bx      lr                     @ return on success
13561:
1357    DELIVER_PENDING_EXCEPTION
1358END art_quick_proxy_invoke_handler
1359
1360    /*
1361     * Called to resolve an imt conflict.
1362     * r0 is the conflict ArtMethod.
1363     * r12 is a hidden argument that holds the target interface method.
1364     *
1365     * Note that this stub writes to r0, r4, and r12.
1366     */
1367ENTRY art_quick_imt_conflict_trampoline
1368    ldr     r0, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
1369    ldr     r4, [r0]  // Load first entry in ImtConflictTable.
1370.Limt_table_iterate:
1371    cmp     r4, r12
1372    // Branch if found. Benchmarks have shown doing a branch here is better.
1373    beq     .Limt_table_found
1374    // If the entry is null, the interface method is not in the ImtConflictTable.
1375    cbz     r4, .Lconflict_trampoline
1376    // Iterate over the entries of the ImtConflictTable.
1377    ldr     r4, [r0, #(2 * __SIZEOF_POINTER__)]!
1378    b .Limt_table_iterate
1379.Limt_table_found:
1380    // We successfully hit an entry in the table. Load the target method
1381    // and jump to it.
1382    ldr     r0, [r0, #__SIZEOF_POINTER__]
1383    ldr     pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
1384.Lconflict_trampoline:
1385    // Pass interface method to the trampoline.
1386    mov r0, r12
1387    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1388END art_quick_imt_conflict_trampoline
1389
1390    .extern artQuickResolutionTrampoline
1391ENTRY art_quick_resolution_trampoline
1392    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
1393    mov     r2, rSELF              @ pass Thread::Current
1394    mov     r3, sp                 @ pass SP
1395    blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
1396    CFI_REMEMBER_STATE
1397    cbz     r0, 1f                 @ is code pointer null? goto exception
1398    mov     r12, r0
1399    ldr     r0, [sp, #0]           @ load resolved method in r0
1400    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1401    REFRESH_MARKING_REGISTER
1402    bx      r12                    @ tail-call into actual code
14031:
1404    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
1405    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1406    DELIVER_PENDING_EXCEPTION
1407END art_quick_resolution_trampoline
1408
1409    /*
1410     * Called to do a generic JNI down-call
1411     */
1412ENTRY art_quick_generic_jni_trampoline
1413    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1414
1415    // Save rSELF
1416    mov r11, rSELF
1417    // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
1418    mov r10, sp
1419    .cfi_def_cfa_register r10
1420
1421    sub sp, sp, #GENERIC_JNI_TRAMPOLINE_RESERVED_AREA
1422
1423    // prepare for artQuickGenericJniTrampoline call
1424    // (Thread*, managed_sp, reserved_area)
1425    //    r0         r1            r2   <= C calling convention
1426    //  rSELF       r10            sp   <= where they are
1427
1428    mov r0, rSELF   // Thread*
1429    mov r1, r10     // SP for the managed frame.
1430    mov r2, sp      // reserved area for arguments and other saved data (up to managed frame)
1431    blx artQuickGenericJniTrampoline  // (Thread*, managed_sp, reserved_area)
1432
1433    // The C call will have registered the complete save-frame on success.
1434    // The result of the call is:
1435    //     r0: pointer to native code, 0 on error.
1436    //     The bottom of the reserved area contains values for arg registers,
1437    //     hidden arg register and SP for out args for the call.
1438
1439    // Check for error (class init check or locking for synchronized native method can throw).
1440    cbz r0, .Lexception_in_native
1441
1442    // Save the code pointer
1443    mov lr, r0
1444
1445    // Load parameters from frame into registers r0-r3 (soft-float),
1446    // hidden arg (r4) for @CriticalNative and SP for out args.
1447    pop {r0-r3, r4, ip}
1448
1449    // Apply the new SP for out args, releasing unneeded reserved area.
1450    mov sp, ip
1451
1452    // Softfloat.
1453    // TODO: Change to hardfloat when supported.
1454
1455    blx lr            // native call.
1456
1457    // result sign extension is handled in C code
1458    // prepare for artQuickGenericJniEndTrampoline call
1459    // (Thread*, result, result_f)
1460    //    r0      r2,r3    stack       <= C calling convention
1461    //    r11     r0,r1    r0,r1       <= where they are
1462    sub sp, sp, #8 // Stack alignment.
1463
1464    push {r0-r1}
1465    mov r3, r1
1466    mov r2, r0
1467    mov r0, r11
1468
1469    blx artQuickGenericJniEndTrampoline
1470
1471    // Restore self pointer.
1472    mov rSELF, r11
1473
1474    // Pending exceptions possible.
1475    ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1476    cbnz r2, .Lexception_in_native
1477
1478    // Tear down the alloca.
1479    mov sp, r10
1480
1481    // store into fpr, for when it's a fpr return...
1482    vmov d0, r0, r1
1483
1484    LOAD_RUNTIME_INSTANCE r2
1485    ldrb r2, [r2,  #RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE]
1486    CFI_REMEMBER_STATE
1487    cbnz r2, .Lcall_method_exit_hook
1488.Lcall_method_exit_hook_done:
1489
1490    // Tear down the callee-save frame. Skip arg registers.
1491    .cfi_def_cfa_register sp
1492    add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - 7 * 4)
1493    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - 7 * 4)
1494    pop {r5-r8, r10-r11, lr}  @ This must match the non-args registers restored by
1495    .cfi_restore r5           @ `RESTORE_SAVE_REFS_AND_ARGS_FRAME`.
1496    .cfi_restore r6
1497    .cfi_restore r7
1498    .cfi_restore r8
1499    .cfi_restore r10
1500    .cfi_restore r11
1501    .cfi_restore lr
1502    .cfi_adjust_cfa_offset -(7 * 4)
1503    REFRESH_MARKING_REGISTER
1504    bx lr      // ret
1505
1506.Lcall_method_exit_hook:
1507    CFI_RESTORE_STATE_AND_DEF_CFA r10, FRAME_SIZE_SAVE_REFS_AND_ARGS
1508    mov r2, #FRAME_SIZE_SAVE_REFS_AND_ARGS
1509    bl art_quick_method_exit_hook
1510    b .Lcall_method_exit_hook_done
1511
1512.Lexception_in_native:
1513    ldr ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
1514    add ip, ip, #-1  // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE.
1515    mov sp, ip
1516    bl art_deliver_pending_exception
1517END art_quick_generic_jni_trampoline
1518
1519ENTRY art_deliver_pending_exception
1520    # This will create a new save-all frame, required by the runtime.
1521    DELIVER_PENDING_EXCEPTION
1522END art_deliver_pending_exception
1523
1524    .extern artQuickToInterpreterBridge
1525ENTRY art_quick_to_interpreter_bridge
1526    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
1527    mov     r1, rSELF              @ pass Thread::Current
1528    mov     r2, sp                 @ pass SP
1529    blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
1530    ldr     r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1531    // Tear down the callee-save frame. Skip arg registers.
1532    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1533    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1534    RESTORE_SAVE_REFS_ONLY_FRAME
1535    REFRESH_MARKING_REGISTER
1536    cbnz    r2, 1f                 @ success if no exception is pending
1537    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1538    bx      lr                     @ return on success
15391:
1540    DELIVER_PENDING_EXCEPTION
1541END art_quick_to_interpreter_bridge
1542
1543/*
1544 * Called to attempt to execute an obsolete method.
1545 */
1546ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1547
1548    /*
1549     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1550     * will long jump to the interpreter bridge.
1551     */
1552    .extern artDeoptimizeFromCompiledCode
1553ENTRY art_quick_deoptimize_from_compiled_code
1554    SETUP_SAVE_EVERYTHING_FRAME r1
1555    mov    r1, rSELF                      @ pass Thread::Current
1556    blx    artDeoptimizeFromCompiledCode  @ (DeoptimizationKind, Thread*)
1557    bl     art_quick_do_long_jump         @ (Context*)
1558    bkpt  // Unreached
1559END art_quick_deoptimize_from_compiled_code
1560
1561    /*
1562     * Signed 64-bit integer multiply.
1563     *
1564     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
1565     *        WX
1566     *      x YZ
1567     *  --------
1568     *     ZW ZX
1569     *  YW YX
1570     *
1571     * The low word of the result holds ZX, the high word holds
1572     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
1573     * it doesn't fit in the low 64 bits.
1574     *
1575     * Unlike most ARM math operations, multiply instructions have
1576     * restrictions on using the same register more than once (Rd and Rm
1577     * cannot be the same).
1578     */
1579    /* mul-long vAA, vBB, vCC */
1580ENTRY art_quick_mul_long
1581    push    {r9-r10}
1582    .cfi_adjust_cfa_offset 8
1583    .cfi_rel_offset r9, 0
1584    .cfi_rel_offset r10, 4
1585    mul     ip, r2, r1                  @  ip<- ZxW
1586    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
1587    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
1588    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
1589    mov     r0,r9
1590    mov     r1,r10
1591    pop     {r9-r10}
1592    .cfi_adjust_cfa_offset -8
1593    .cfi_restore r9
1594    .cfi_restore r10
1595    bx      lr
1596END art_quick_mul_long
1597
1598    /*
1599     * Long integer shift.  This is different from the generic 32/64-bit
1600     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1601     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1602     * 6 bits.
1603     * On entry:
1604     *   r0: low word
1605     *   r1: high word
1606     *   r2: shift count
1607     */
1608    /* shl-long vAA, vBB, vCC */
1609ARM_ENTRY art_quick_shl_long            @ ARM code as thumb code requires spills
1610    and     r2, r2, #63                 @ r2<- r2 & 0x3f
1611    mov     r1, r1, asl r2              @  r1<- r1 << r2
1612    rsb     r3, r2, #32                 @  r3<- 32 - r2
1613    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
1614    subs    ip, r2, #32                 @  ip<- r2 - 32
1615    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
1616    mov     r0, r0, asl r2              @  r0<- r0 << r2
1617    bx      lr
1618END art_quick_shl_long
1619
1620    /*
1621     * Long integer shift.  This is different from the generic 32/64-bit
1622     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1623     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1624     * 6 bits.
1625     * On entry:
1626     *   r0: low word
1627     *   r1: high word
1628     *   r2: shift count
1629     */
1630    /* shr-long vAA, vBB, vCC */
1631ARM_ENTRY art_quick_shr_long            @ ARM code as thumb code requires spills
1632    and     r2, r2, #63                 @ r0<- r0 & 0x3f
1633    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
1634    rsb     r3, r2, #32                 @  r3<- 32 - r2
1635    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
1636    subs    ip, r2, #32                 @  ip<- r2 - 32
1637    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
1638    mov     r1, r1, asr r2              @  r1<- r1 >> r2
1639    bx      lr
1640END art_quick_shr_long
1641
1642    /*
1643     * Long integer shift.  This is different from the generic 32/64-bit
1644     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1645     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1646     * 6 bits.
1647     * On entry:
1648     *   r0: low word
1649     *   r1: high word
1650     *   r2: shift count
1651     */
1652    /* ushr-long vAA, vBB, vCC */
1653ARM_ENTRY art_quick_ushr_long           @ ARM code as thumb code requires spills
1654    and     r2, r2, #63                 @ r0<- r0 & 0x3f
1655    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
1656    rsb     r3, r2, #32                 @  r3<- 32 - r2
1657    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
1658    subs    ip, r2, #32                 @  ip<- r2 - 32
1659    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
1660    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
1661    bx      lr
1662END art_quick_ushr_long
1663
1664    /*
1665     * String's indexOf.
1666     *
1667     * On entry:
1668     *    r0:   string object (known non-null)
1669     *    r1:   char to match (known <= 0xFFFF)
1670     *    r2:   Starting offset in string data
1671     */
1672ENTRY art_quick_indexof
1673    push {r4, r10-r11, lr} @ 4 words of callee saves
1674    .cfi_adjust_cfa_offset 16
1675    .cfi_rel_offset r4, 0
1676    .cfi_rel_offset r10, 4
1677    .cfi_rel_offset r11, 8
1678    .cfi_rel_offset lr, 12
1679#if (STRING_COMPRESSION_FEATURE)
1680    ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
1681#else
1682    ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
1683#endif
1684    add   r0, #MIRROR_STRING_VALUE_OFFSET
1685#if (STRING_COMPRESSION_FEATURE)
1686    /* r4 count (with flag) and r3 holds actual length */
1687    lsr   r3, r4, #1
1688#endif
1689    /* Clamp start to [0..count] */
1690    cmp   r2, #0
1691    it    lt
1692    movlt r2, #0
1693    cmp   r2, r3
1694    it    gt
1695    movgt r2, r3
1696
1697    /* Save a copy in r12 to later compute result */
1698    mov   r12, r0
1699
1700    /* Build pointer to start of data to compare and pre-bias */
1701#if (STRING_COMPRESSION_FEATURE)
1702    lsrs  r4, r4, #1
1703    bcc   .Lstring_indexof_compressed
1704#endif
1705    add   r0, r0, r2, lsl #1
1706    sub   r0, #2
1707
1708    /* Compute iteration count */
1709    sub   r2, r3, r2
1710
1711    /*
1712     * At this point we have:
1713     *   r0: start of data to test
1714     *   r1: char to compare
1715     *   r2: iteration count
1716     *   r4: compression style (used temporarily)
1717     *   r12: original start of string data
1718     *   r3, r4, r10, r11 available for loading string data
1719     */
1720
1721    subs  r2, #4
1722    blt   .Lindexof_remainder
1723
1724.Lindexof_loop4:
1725    ldrh  r3, [r0, #2]!
1726    ldrh  r4, [r0, #2]!
1727    ldrh  r10, [r0, #2]!
1728    ldrh  r11, [r0, #2]!
1729    cmp   r3, r1
1730    beq   .Lmatch_0
1731    cmp   r4, r1
1732    beq   .Lmatch_1
1733    cmp   r10, r1
1734    beq   .Lmatch_2
1735    cmp   r11, r1
1736    beq   .Lmatch_3
1737    subs  r2, #4
1738    bge   .Lindexof_loop4
1739
1740.Lindexof_remainder:
1741    adds  r2, #4
1742    beq   .Lindexof_nomatch
1743
1744.Lindexof_loop1:
1745    ldrh  r3, [r0, #2]!
1746    cmp   r3, r1
1747    beq   .Lmatch_3
1748    subs  r2, #1
1749    bne   .Lindexof_loop1
1750
1751.Lindexof_nomatch:
1752    mov   r0, #-1
1753    pop {r4, r10-r11, pc}
1754
1755.Lmatch_0:
1756    sub   r0, #6
1757    sub   r0, r12
1758    asr   r0, r0, #1
1759    pop {r4, r10-r11, pc}
1760.Lmatch_1:
1761    sub   r0, #4
1762    sub   r0, r12
1763    asr   r0, r0, #1
1764    pop {r4, r10-r11, pc}
1765.Lmatch_2:
1766    sub   r0, #2
1767    sub   r0, r12
1768    asr   r0, r0, #1
1769    pop {r4, r10-r11, pc}
1770.Lmatch_3:
1771    sub   r0, r12
1772    asr   r0, r0, #1
1773    pop {r4, r10-r11, pc}
1774#if (STRING_COMPRESSION_FEATURE)
1775.Lstring_indexof_compressed:
1776    add   r0, r0, r2
1777    sub   r0, #1
1778    sub   r2, r3, r2
1779.Lstring_indexof_compressed_loop:
1780    subs  r2, #1
1781    blt   .Lindexof_nomatch
1782    ldrb  r3, [r0, #1]!
1783    cmp   r3, r1
1784    beq   .Lstring_indexof_compressed_matched
1785    b     .Lstring_indexof_compressed_loop
1786.Lstring_indexof_compressed_matched:
1787    sub   r0, r12
1788    pop {r4, r10-r11, pc}
1789#endif
1790END art_quick_indexof
1791
1792    /* Assembly routines used to handle ABI differences. */
1793
1794    /* double fmod(double a, double b) */
1795    .extern fmod
1796ENTRY art_quick_fmod
1797    push  {lr}
1798    .cfi_adjust_cfa_offset 4
1799    .cfi_rel_offset lr, 0
1800    sub   sp, #4
1801    .cfi_adjust_cfa_offset 4
1802    vmov  r0, r1, d0
1803    vmov  r2, r3, d1
1804    bl    fmod
1805    vmov  d0, r0, r1
1806    add   sp, #4
1807    .cfi_adjust_cfa_offset -4
1808    pop   {pc}
1809END art_quick_fmod
1810
1811    /* float fmodf(float a, float b) */
1812     .extern fmodf
1813ENTRY art_quick_fmodf
1814    push  {lr}
1815    .cfi_adjust_cfa_offset 4
1816    .cfi_rel_offset lr, 0
1817    sub   sp, #4
1818    .cfi_adjust_cfa_offset 4
1819    vmov  r0, r1, d0
1820    bl    fmodf
1821    vmov  s0, r0
1822    add   sp, #4
1823    .cfi_adjust_cfa_offset -4
1824    pop   {pc}
1825END art_quick_fmodf
1826
1827    /* int64_t art_d2l(double d) */
1828    .extern art_d2l
1829ENTRY art_quick_d2l
1830    vmov  r0, r1, d0
1831    b     art_d2l
1832END art_quick_d2l
1833
1834    /* int64_t art_f2l(float f) */
1835    .extern art_f2l
1836ENTRY art_quick_f2l
1837    vmov  r0, s0
1838    b     art_f2l
1839END art_quick_f2l
1840
1841    /* float art_l2f(int64_t l) */
1842    .extern art_l2f
1843ENTRY art_quick_l2f
1844    push  {lr}
1845    .cfi_adjust_cfa_offset 4
1846    .cfi_rel_offset lr, 0
1847    sub   sp, #4
1848    .cfi_adjust_cfa_offset 4
1849    bl    art_l2f
1850    vmov  s0, r0
1851    add   sp, #4
1852    .cfi_adjust_cfa_offset -4
1853    pop   {pc}
1854END art_quick_l2f
1855
1856    .extern artStringBuilderAppend
1857ENTRY art_quick_string_builder_append
1858    SETUP_SAVE_REFS_ONLY_FRAME r2       @ save callee saves in case of GC
1859    add    r1, sp, #(FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__)  @ pass args
1860    mov    r2, rSELF                    @ pass Thread::Current
1861    bl     artStringBuilderAppend       @ (uint32_t, const unit32_t*, Thread*)
1862    RESTORE_SAVE_REFS_ONLY_FRAME
1863    REFRESH_MARKING_REGISTER
1864    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1865END art_quick_string_builder_append
1866
1867    /*
1868     * Create a function `name` calling the ReadBarrier::Mark routine,
1869     * getting its argument and returning its result through register
1870     * `reg`, saving and restoring all caller-save registers.
1871     *
1872     * IP is clobbered; `reg` must not be IP.
1873     *
1874     * If `reg` is different from `r0`, the generated function follows a
1875     * non-standard runtime calling convention:
1876     * - register `reg` (which may be different from R0) is used to pass the (sole) argument,
1877     * - register `reg` (which may be different from R0) is used to return the result,
1878     * - all other registers are callee-save (the values they hold are preserved).
1879     */
1880.macro READ_BARRIER_MARK_REG name, reg
1881ENTRY \name
1882    // Null check so that we can load the lock word.
1883    SMART_CBZ \reg, .Lret_rb_\name
1884    // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
1885    ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
1886    tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
1887    beq .Lnot_marked_rb_\name
1888    // Already marked, return right away.
1889.Lret_rb_\name:
1890    bx lr
1891
1892.Lnot_marked_rb_\name:
1893    // Test that both the forwarding state bits are 1.
1894#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
1895    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
1896    // the highest bits and the "forwarding address" state to have all bits set.
1897#error "Unexpected lock word state shift or forwarding address state value."
1898#endif
1899    cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
1900    bhs .Lret_forwarding_address\name
1901
1902.Lslow_rb_\name:
1903    // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to
1904    // make a tail call here. Currently, it serves only for stack alignment but
1905    // we may reintroduce kSaveEverything calls here in the future.
1906    push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
1907    .cfi_adjust_cfa_offset 32
1908    .cfi_rel_offset r0, 0
1909    .cfi_rel_offset r1, 4
1910    .cfi_rel_offset r2, 8
1911    .cfi_rel_offset r3, 12
1912    .cfi_rel_offset r4, 16
1913    .cfi_rel_offset r9, 20
1914    .cfi_rel_offset ip, 24
1915    .cfi_rel_offset lr, 28
1916
1917    .ifnc \reg, r0
1918      mov   r0, \reg                    @ pass arg1 - obj from `reg`
1919    .endif
1920
1921    vpush {s0-s15}                      @ save floating-point caller-save registers
1922    .cfi_adjust_cfa_offset 64
1923    bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
1924    vpop {s0-s15}                       @ restore floating-point registers
1925    .cfi_adjust_cfa_offset -64
1926
1927    .ifc \reg, r0                       @ Save result to the stack slot or destination register.
1928      str r0, [sp, #0]
1929    .else
1930      .ifc \reg, r1
1931        str r0, [sp, #4]
1932      .else
1933        .ifc \reg, r2
1934          str r0, [sp, #8]
1935        .else
1936          .ifc \reg, r3
1937            str r0, [sp, #12]
1938          .else
1939            .ifc \reg, r4
1940              str r0, [sp, #16]
1941            .else
1942              .ifc \reg, r9
1943                str r0, [sp, #20]
1944              .else
1945                mov \reg, r0
1946              .endif
1947            .endif
1948          .endif
1949        .endif
1950      .endif
1951    .endif
1952
1953    pop   {r0-r4, r9, ip, lr}           @ restore caller-save registers
1954    .cfi_adjust_cfa_offset -32
1955    .cfi_restore r0
1956    .cfi_restore r1
1957    .cfi_restore r2
1958    .cfi_restore r3
1959    .cfi_restore r4
1960    .cfi_restore r9
1961    .cfi_restore ip
1962    .cfi_restore lr
1963    bx lr
1964.Lret_forwarding_address\name:
1965    // Shift left by the forwarding address shift. This clears out the state bits since they are
1966    // in the top 2 bits of the lock word.
1967    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
1968    bx lr
1969END \name
1970.endm
1971
1972READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
1973READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
1974READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
1975READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
1976READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
1977READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
1978READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
1979READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
1980READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
1981READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
1982READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
1983READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
1984
1985// Helper macros for Baker CC read barrier mark introspection (BRBMI).
1986.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
1987    \macro_for_register r0
1988    \macro_for_register r1
1989    \macro_for_register r2
1990    \macro_for_register r3
1991    \macro_for_register r4
1992    \macro_for_register r5
1993    \macro_for_register r6
1994    \macro_for_register r7
1995    \macro_for_reserved_register  // r8 (rMR) is the marking register.
1996    \macro_for_register r9
1997    \macro_for_register r10
1998    \macro_for_register r11
1999    \macro_for_reserved_register  // IP is reserved.
2000    \macro_for_reserved_register  // SP is reserved.
2001    \macro_for_reserved_register  // LR is reserved.
2002    \macro_for_reserved_register  // PC is reserved.
2003.endm
2004
2005.macro BRBMI_RETURN_SWITCH_CASE reg
2006    .balign 8
2007.Lmark_introspection_return_switch_case_\reg:
2008    mov     rMR, #1
2009    mov     \reg, ip
2010    bx      lr
2011.endm
2012
2013.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
2014    .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
2015.endm
2016
2017.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2018    .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
2019.endm
2020
2021#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
2022#error "Array and field introspection code sharing requires same LDR offset."
2023#endif
2024.macro BRBMI_ARRAY_LOAD index_reg
2025    ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
2026    b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
2027    .balign 8                                           // Add padding to 8 bytes.
2028.endm
2029
2030.macro BRBMI_BKPT_FILL_4B
2031    bkpt    0
2032    bkpt    0
2033.endm
2034
2035.macro BRBMI_BKPT_FILL_8B
2036    BRBMI_BKPT_FILL_4B
2037    BRBMI_BKPT_FILL_4B
2038.endm
2039
2040.macro BRBMI_RUNTIME_CALL
2041    // Note: This macro generates exactly 22 bytes of code. The core register
2042    // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
2043
2044    push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
2045    .cfi_adjust_cfa_offset 24
2046    .cfi_rel_offset r0, 0
2047    .cfi_rel_offset r1, 4
2048    .cfi_rel_offset r2, 8
2049    .cfi_rel_offset r3, 12
2050    .cfi_rel_offset r7, 16
2051    .cfi_rel_offset lr, 20
2052
2053    mov     r0, ip                    // Pass the reference.
2054    vpush {s0-s15}                    // save floating-point caller-save registers
2055    .cfi_adjust_cfa_offset 64
2056    bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
2057    vpop    {s0-s15}                  // restore floating-point registers
2058    .cfi_adjust_cfa_offset -64
2059    mov     ip, r0                    // Move reference to ip in preparation for return switch.
2060
2061    pop     {r0-r3, r7, lr}           // Restore registers.
2062    .cfi_adjust_cfa_offset -24
2063    .cfi_restore r0
2064    .cfi_restore r1
2065    .cfi_restore r2
2066    .cfi_restore r3
2067    .cfi_restore r7
2068    .cfi_restore lr
2069.endm
2070
2071.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
2072    // If reference is null, just return it in the right register.
2073    cmp     ip, #0
2074    beq     .Lmark_introspection_return\label_suffix
2075    // Use rMR as temp and check the mark bit of the reference.
2076    ldr     rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2077    tst     rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
2078    beq     .Lmark_introspection_unmarked\label_suffix
2079.Lmark_introspection_return\label_suffix:
2080.endm
2081
2082.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
2083.Lmark_introspection_unmarked\label_suffix:
2084    // Check if the top two bits are one, if this is the case it is a forwarding address.
2085#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
2086    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
2087    // the highest bits and the "forwarding address" state to have all bits set.
2088#error "Unexpected lock word state shift or forwarding address state value."
2089#endif
2090    cmp     rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
2091    bhs     .Lmark_introspection_forwarding_address\label_suffix
2092.endm
2093
2094.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
2095.Lmark_introspection_forwarding_address\label_suffix:
2096    // Note: This macro generates exactly 22 bytes of code, the branch is near.
2097
2098    // Shift left by the forwarding address shift. This clears out the state bits since they are
2099    // in the top 2 bits of the lock word.
2100    lsl     ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2101    b       .Lmark_introspection_return\label_suffix
2102.endm
2103
2104.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
2105    // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
2106    ldrh    rMR, [lr, #(-1 + \ldr_offset + 2)]
2107.endm
2108
2109.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
2110    // Load the 16-bit instruction. Adjust for the thumb state in LR.
2111    ldrh    rMR, [lr, #(-1 + \ldr_offset)]
2112.endm
2113
2114.macro BRBMI_EXTRACT_RETURN_REG_wide
2115    lsr     rMR, rMR, #12             // Extract `ref_reg`.
2116.endm
2117
2118.macro BRBMI_EXTRACT_RETURN_REG_narrow
2119    and     rMR, rMR, #7              // Extract `ref_reg`.
2120.endm
2121
2122.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix
2123    BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset
2124    BRBMI_EXTRACT_RETURN_REG\label_suffix
2125.endm
2126
2127.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix
2128    .balign 32
2129    .thumb_func
2130    .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
2131    .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2132    .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2133art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
2134    BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix
2135.endm
2136
2137.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix
2138    .balign 16
2139    // Note: Generates exactly 16 bytes of code.
2140    BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
2141    BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix
2142    b .Lmark_introspection_runtime_call
2143.endm
2144
2145    /*
2146     * Use introspection to load a reference from the same address as the LDR
2147     * instruction in generated code would load (unless loaded by the thunk,
2148     * see below), call ReadBarrier::Mark() with that reference if needed
2149     * and return it in the same register as the LDR instruction would load.
2150     *
2151     * The entrypoint is called through a thunk that differs across load kinds.
2152     * For field and array loads the LDR instruction in generated code follows
2153     * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
2154     * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
2155     * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
2156     * knows the holder and performs the gray bit check, returning to the LDR
2157     * instruction if the object is not gray, so this entrypoint no longer
2158     * needs to know anything about the holder. For GC root loads, the LDR
2159     * instruction in generated code precedes the branch to the thunk, i.e. the
2160     * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
2161     * where the -1 is again the Thumb mode bit adjustment, and the thunk does
2162     * not do the gray bit check.
2163     *
2164     * For field accesses and array loads with a constant index the thunk loads
2165     * the reference into IP using introspection and calls the main entrypoint
2166     * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or
2167     * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known
2168     * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)
2169     * from the main entrypoint and the thunk adjusts the entrypoint pointer.
2170     * With heap poisoning enabled, the passed reference is poisoned.
2171     *
2172     * For array accesses with non-constant index, the thunk inserts the bits
2173     * 0-5 of the LDR instruction to the entrypoint address, effectively
2174     * calculating a switch case label based on the index register (bits 0-3)
2175     * and adding an extra offset (bits 4-5 hold the shift which is always 2
2176     * for reference loads) to differentiate from the main entrypoint, then
2177     * moves the base register to IP and jumps to the switch case. Therefore
2178     * we need to align the main entrypoint to 512 bytes, accounting for
2179     * a 256-byte offset followed by 16 array entrypoints starting at
2180     * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
2181     * (register) and a branch to the main entrypoint.
2182     *
2183     * For GC root accesses we cannot use the main entrypoint because of the
2184     * different offset where the LDR instruction in generated code is located.
2185     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2186     * To re-use the same entrypoint pointer in generated code, we make sure
2187     * that the gc root entrypoint (a copy of the entrypoint with a different
2188     * offset for introspection loads) is located at a known offset (0xc0/0xe0
2189     * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/
2190     * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the
2191     * main entrypoint and the GC root thunk adjusts the entrypoint pointer,
2192     * moves the root register to IP and jumps to the customized entrypoint,
2193     * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}.
2194     * The thunk also performs all the fast-path checks, so we need just the
2195     * slow path.
2196     *
2197     * Intrinsic CAS operations (VarHandle*CompareAnd{Set,Exchange}* and
2198     * UnsafeCASObject) use similar code to the GC roots wide load but using
2199     * MOV (register, T3) instead of the LDR (immediate, T3), with destination
2200     * register in bits 8-11 rather than 12-15. Therefore they have their own
2201     * entrypoint, art_quick_read_barrier_mark_introspection_intrinsic_cas
2202     * at the offset BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_ENTRYPOINT_OFFSET.
2203     * This is used only for high registers, low registers reuse the GC roots
2204     * narrow load entrypoint as the low 3 bits of the destination register
2205     * for MOV (register) encoding T1 match the LDR (immediate) encoding T1.
2206     *
2207     * The code structure is
2208     *   art_quick_read_barrier_mark_introspection:                   // @0x00
2209     *     Up to 32 bytes code for main entrypoint fast-path code for fields
2210     *     (and array elements with constant offset) with LDR encoding T3;
2211     *     jumps to the switch in the "narrow" entrypoint.
2212     *   art_quick_read_barrier_mark_introspection_narrow:            // @0x20
2213     *     Up to 48 bytes code for fast path code for fields (and array
2214     *     elements with constant offset) with LDR encoding T1, ending in the
2215     *     return switch instruction TBB and the table with switch offsets.
2216     *   .Lmark_introspection_return_switch_case_r0:                  // @0x50
2217     *     Exactly 88 bytes of code for the return switch cases (8 bytes per
2218     *     case, 11 cases; no code for reserved registers).
2219     *   .Lmark_introspection_forwarding_address_narrow:              // @0xa8
2220     *     Exactly 6 bytes to extract the forwarding address and jump to the
2221     *     "narrow" entrypoint fast path.
2222     *   .Lmark_introspection_return_switch_case_bad:                 // @0xae
2223     *     Exactly 2 bytes, bkpt for unexpected return register.
2224     *   .Lmark_introspection_unmarked_narrow:                        // @0xb0
2225     *     Exactly 16 bytes for "narrow" entrypoint slow path.
2226     *   art_quick_read_barrier_mark_introspection_gc_roots_wide:     // @0xc0
2227     *     GC root entrypoint code for LDR encoding T3 (10 bytes); loads and
2228     *     extracts the return register and jumps to the runtime call.
2229     *   .Lmark_introspection_forwarding_address_wide:                // @0xca
2230     *     Exactly 6 bytes to extract the forwarding address and jump to the
2231     *     "wide" entrypoint fast path.
2232     *   .Lmark_introspection_unmarked_wide:                          // @0xd0
2233     *     Exactly 16 bytes for "wide" entrypoint slow path.
2234     *   art_quick_read_barrier_mark_introspection_gc_roots_narrow:   // @0xe0
2235     *     GC root entrypoint code for LDR encoding T1 (8 bytes); loads and
2236     *     extracts the return register and falls through to the runtime call.
2237     *   .Lmark_introspection_runtime_call:                           // @0xe8
2238     *     Exactly 24 bytes for the runtime call to MarkReg() and jump to the
2239     *     return switch.
2240     *   art_quick_read_barrier_mark_introspection_arrays:            // @0x100
2241     *     Exactly 128 bytes for array load switch cases (16x2 instructions).
2242     *   art_quick_read_barrier_mark_introspection_intrinsic_cas:     // @0x180
2243     *     Intrinsic CAS entrypoint for MOV (register) encoding T3 (6 bytes).
2244     *     Loads the return register and jumps to the runtime call.
2245     */
2246#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2247ENTRY_ALIGNED art_quick_read_barrier_mark_introspection, 512
2248    // At this point, IP contains the reference, rMR is clobbered by the thunk
2249    // and can be freely used as it will be set back to 1 before returning.
2250    // For heap poisoning, the reference is poisoned, so unpoison it first.
2251    UNPOISON_HEAP_REF ip
2252    // Check for null or marked, lock word is loaded into rMR.
2253    BRBMI_CHECK_NULL_AND_MARKED _wide
2254    // Load and extract the return register from the instruction.
2255    BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
2256    b       .Lmark_introspection_return_switch
2257
2258    .balign 32
2259    .thumb_func
2260    .type art_quick_read_barrier_mark_introspection_narrow, #function
2261    .hidden art_quick_read_barrier_mark_introspection_narrow
2262    .global art_quick_read_barrier_mark_introspection_narrow
2263art_quick_read_barrier_mark_introspection_narrow:
2264    // At this point, IP contains the reference, rMR is clobbered by the thunk
2265    // and can be freely used as it will be set back to 1 before returning.
2266    // For heap poisoning, the reference is poisoned, so unpoison it first.
2267    UNPOISON_HEAP_REF ip
2268    // Check for null or marked, lock word is loaded into rMR.
2269    BRBMI_CHECK_NULL_AND_MARKED _narrow
2270    // Load and extract the return register from the instruction.
2271    BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
2272.Lmark_introspection_return_switch:
2273    tbb     [pc, rMR]                 // Jump to the switch case.
2274.Lmark_introspection_return_table:
2275    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2276    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */
2277
2278    .balign 8
2279    BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow  // 6 bytes
2280.Lmark_introspection_return_switch_case_bad:
2281    bkpt                              // 2 bytes
2282
2283    BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
2284
2285    // 8 bytes for the loading and extracting of the return register.
2286    BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
2287    // 2 bytes for near branch to the runtime call.
2288    b .Lmark_introspection_runtime_call
2289
2290    BRBMI_EXTRACT_FORWARDING_ADDRESS _wide  // Not even 4-byte aligned.
2291
2292    BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
2293
2294    // 8 bytes for the loading and extracting of the return register.
2295    BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
2296    // And the runtime call and branch to the switch taking exactly 24 bytes
2297    // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch)
2298    // shall take the rest of the 32-byte section (within a cache line).
2299.Lmark_introspection_runtime_call:
2300    BRBMI_RUNTIME_CALL
2301    b       .Lmark_introspection_return_switch
2302
2303    .balign 256
2304    .thumb_func
2305    .type art_quick_read_barrier_mark_introspection_arrays, #function
2306    .hidden art_quick_read_barrier_mark_introspection_arrays
2307    .global art_quick_read_barrier_mark_introspection_arrays
2308art_quick_read_barrier_mark_introspection_arrays:
2309    BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
2310
2311    .balign 8
2312    .thumb_func
2313    .type art_quick_read_barrier_mark_introspection_intrinsic_cas, #function
2314    .hidden art_quick_read_barrier_mark_introspection_intrinsic_cas
2315    .global art_quick_read_barrier_mark_introspection_intrinsic_cas
2316art_quick_read_barrier_mark_introspection_intrinsic_cas:
2317    // Load the byte of the MOV instruction that contains Rd. Adjust for the thumb state in LR.
2318    // The MOV (register, T3) is |11101010010|S|1111|(0)000|Rd|0000|Rm|, so the byte we read
2319    // here, i.e. |(0)000|Rd|, contains only the register number, the top 4 bits are 0.
2320    ldrb    rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_INTRINSIC_CAS_MOV_OFFSET + 3)]
2321    b .Lmark_introspection_runtime_call
2322END art_quick_read_barrier_mark_introspection
2323#else  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2324ENTRY art_quick_read_barrier_mark_introspection
2325    bkpt                              // Unreachable.
2326END art_quick_read_barrier_mark_introspection
2327#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2328
2329.extern artInvokePolymorphic
2330ENTRY art_quick_invoke_polymorphic
2331    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
2332    mov     r0, r1                 @ r0 := receiver
2333    mov     r1, rSELF              @ r1 := Thread::Current
2334    mov     r2, sp                 @ r2 := SP
2335    bl      artInvokePolymorphic   @ artInvokePolymorphic(receiver, Thread*, SP)
2336    str     r1, [sp, 72]           @ r0:r1 := Result. Copy r1 to context.
2337    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2338    REFRESH_MARKING_REGISTER
2339    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return register.
2340    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2341END art_quick_invoke_polymorphic
2342
2343.extern artInvokeCustom
2344ENTRY art_quick_invoke_custom
2345    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
2346                                   @ r0 := call_site_idx
2347    mov     r1, rSELF              @ r1 := Thread::Current
2348    mov     r2, sp                 @ r2 := SP
2349    bl      artInvokeCustom        @ artInvokeCustom(call_site_idx, Thread*, SP)
2350    str     r1, [sp, #72]          @ Save r1 to context (r0:r1 = result)
2351    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2352    REFRESH_MARKING_REGISTER
2353    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return register.
2354    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2355END art_quick_invoke_custom
2356
2357// r0 contains the class, r4 contains the inline cache. We can use ip as temporary.
2358ENTRY art_quick_update_inline_cache
2359#if (INLINE_CACHE_SIZE != 5)
2360#error "INLINE_CACHE_SIZE not as expected."
2361#endif
2362#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2363    // Don't update the cache if we are marking.
2364    cmp rMR, #0
2365    bne .Ldone
2366#endif
2367.Lentry1:
2368    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2369    cmp ip, r0
2370    beq .Ldone
2371    cmp ip, #0
2372    bne .Lentry2
2373    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2374    cmp ip, #0
2375    bne .Lentry1
2376    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2377    cmp ip, #0
2378    bne .Ldone
2379    b .Lentry1
2380.Lentry2:
2381    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2382    cmp ip, r0
2383    beq .Ldone
2384    cmp ip, #0
2385    bne .Lentry3
2386    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2387    cmp ip, #0
2388    bne .Lentry2
2389    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2390    cmp ip, #0
2391    bne .Ldone
2392    b .Lentry2
2393.Lentry3:
2394    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2395    cmp ip, r0
2396    beq .Ldone
2397    cmp ip, #0
2398    bne .Lentry4
2399    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2400    cmp ip, #0
2401    bne .Lentry3
2402    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2403    cmp ip, #0
2404    bne .Ldone
2405    b .Lentry3
2406.Lentry4:
2407    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2408    cmp ip, r0
2409    beq .Ldone
2410    cmp ip, #0
2411    bne .Lentry5
2412    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2413    cmp ip, #0
2414    bne .Lentry4
2415    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2416    cmp ip, #0
2417    bne .Ldone
2418    b .Lentry4
2419.Lentry5:
2420    // Unconditionally store, the inline cache is megamorphic.
2421    str  r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+16]
2422.Ldone:
2423    blx lr
2424END art_quick_update_inline_cache
2425
2426// On entry, method is at the bottom of the stack.
2427ENTRY art_quick_compile_optimized
2428    SETUP_SAVE_EVERYTHING_FRAME r0
2429    ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod
2430    mov r1, rSELF                            @ pass Thread::Current
2431    bl     artCompileOptimized               @ (ArtMethod*, Thread*)
2432    RESTORE_SAVE_EVERYTHING_FRAME
2433    // We don't need to restore the marking register here, as
2434    // artCompileOptimized doesn't allow thread suspension.
2435    blx lr
2436END art_quick_compile_optimized
2437
2438// On entry, method is at the bottom of the stack.
2439ENTRY art_quick_method_entry_hook
2440    SETUP_SAVE_EVERYTHING_FRAME r0
2441    ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod
2442    mov r1, rSELF                            @ pass Thread::Current
2443    mov r2, sp                               @ pass SP
2444    bl  artMethodEntryHook                   @ (ArtMethod*, Thread*, SP)
2445
2446    CFI_REMEMBER_STATE
2447    cbnz r0, .Lentryhook_deopt
2448
2449    RESTORE_SAVE_EVERYTHING_FRAME
2450    REFRESH_MARKING_REGISTER
2451    blx lr
2452
2453.Lentryhook_deopt:
2454    // Deoptimize
2455    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
2456    bl  art_quick_do_long_jump               @ (Context*)
2457    bkpt  // Unreached
2458END art_quick_method_entry_hook
2459
2460ENTRY art_quick_method_exit_hook
2461    SETUP_SAVE_EVERYTHING_FRAME r5
2462
2463    INCREASE_FRAME 4                          @ align stack
2464    push {r2}                                 @ pass frame_size stack
2465    .cfi_adjust_cfa_offset 4
2466    add r3, sp, #(8 + 8)                      @ store fpr_res pointer, in kSaveEverything frame
2467    add r2, sp, #(136 + 8)                    @ store gpr_res pointer, in kSaveEverything frame
2468    add r1, sp, #(FRAME_SIZE_SAVE_EVERYTHING + 8)   @ pass ArtMethod**
2469    mov r0, rSELF                             @ pass Thread::Current
2470    blx artMethodExitHook                     @ (Thread*, ArtMethod**, gpr_res*, fpr_res*,
2471                                              @ frame_size)
2472    DECREASE_FRAME 8                          @ pop arguments on stack
2473
2474    CFI_REMEMBER_STATE
2475    cbnz r0, .Lexithook_deopt_or_exception
2476
2477    RESTORE_SAVE_EVERYTHING_FRAME
2478    REFRESH_MARKING_REGISTER
2479    blx lr
2480
2481.Lexithook_deopt_or_exception:
2482    // Deoptimize or exception thrown.
2483    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
2484    bl  art_quick_do_long_jump                @ (Context*)
2485    bkpt  // Unreached
2486END art_quick_method_exit_hook
2487