xref: /aosp_15_r20/art/runtime/arch/riscv64/quick_entrypoints_riscv64.S (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1/*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_riscv64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21#include "arch/quick_field_entrypoints.S"
22
23
24.macro INVOKE_STUB_CREATE_FRAME
25    // Save RA, FP, xSELF (current thread), A4, A5 (they will be needed in the invoke stub return).
26    INCREASE_FRAME 48
27    // Slot (8*0) is used for `ArtMethod*` (if no args), args or padding, see below.
28    SAVE_GPR xSELF, (8*1)
29    SAVE_GPR a4,    (8*2)
30    SAVE_GPR a5,    (8*3)
31    SAVE_GPR fp,    (8*4)  // Store FP just under the return address.
32    SAVE_GPR ra,    (8*5)
33
34    // Make the new FP point to the location where we stored the old FP.
35    // Some stack-walking tools may rely on this simply-linked list of saved FPs.
36    addi fp, sp, (8*4)  // save frame pointer
37    .cfi_def_cfa fp, 48 - (8*4)
38
39    // We already have space for `ArtMethod*` on the stack but we need space for args above
40    // the `ArtMethod*`, so add sufficient space now, pushing the `ArtMethod*` slot down.
41    addi t0, a2, 0xf    // Reserve space for arguments and
42    andi t0, t0, ~0xf   // round up for 16-byte stack alignment.
43    sub  sp, sp, t0
44
45    mv xSELF, a3
46
47    // Copy arguments on stack (4 bytes per slot):
48    //   A1: source address
49    //   A2: arguments length
50    //   T0: destination address if there are any args.
51
52    beqz a2, 2f      // loop through 4-byte arguments from the last to the first
53    addi t0, sp, 8   // destination address is bottom of the stack + 8 bytes for ArtMethod* (null)
541:
55    addi a2, a2, -4
56    add  t1, a1, a2  // T1 is the source address of the next copied argument
57    lw   t2, (t1)    // T2 is the 4 bytes at address T1
58    add  t1, t0, a2  // T1 is the destination address of the next copied argument
59    sw   t2, (t1)    // save T2 at the destination address T1
60    bnez a2, 1b
612:
62    sd zero, (sp)  // Store null into ArtMethod* at bottom of frame.
63.endm
64
65
66.macro INVOKE_STUB_CALL_AND_RETURN
67    // Call the method.
68    ld   t0, ART_METHOD_QUICK_CODE_OFFSET_64(a0)
69    jalr t0
70
71    addi sp, fp, -(8*4)  // restore SP (see `INVOKE_STUB_CREATE_FRAME`)
72    .cfi_def_cfa sp, 48
73
74    // Restore ra, fp, xSELF (current thread) a4 (shorty), a5 (result pointer) from stack.
75    RESTORE_GPR xSELF, (8*1)
76    RESTORE_GPR a4,    (8*2)
77    RESTORE_GPR a5,    (8*3)
78    RESTORE_GPR fp,    (8*4)
79    RESTORE_GPR ra,    (8*5)
80    DECREASE_FRAME 48
81
82    // Load result type (1-byte symbol) from a5.
83    // Check result type and store the correct register into the jvalue in memory at a4 address.
84    lbu t0, (a5)
85
86    li t1, 'V'  // void (do not store result at all)
87    beq t1, t0, 1f
88
89    li t1, 'D'  // double
90    beq t1, t0, 2f
91
92    li t1, 'F'  // float
93    beq t1, t0, 3f
94
95    // Otherwise, result is in a0 (either 8 or 4 bytes, but it is fine to store 8 bytes as the
96    // upper bytes in a0 in that case are zero, and jvalue has enough space).
97    sd a0, (a4)
981:
99    ret
100
1012:  // double: result in fa0 (8 bytes)
102    fsd fa0, (a4)
103    ret
104
1053:  // float: result in fa0 (4 bytes)
106    fsw fa0, (a4)
107    ret
108.endm
109
110
111ENTRY art_deliver_pending_exception
112    DELIVER_PENDING_EXCEPTION
113END art_deliver_pending_exception
114
115
116// The size of the handler emitted by `INVOKE_STUB_LOAD_REG` below.
117#define INVOKE_STUB_LOAD_REG_SIZE 8
118
119// The offset within `INVOKE_STUB_LOAD_REG` for skipping arguments.
120#define INVOKE_STUB_LOAD_REG_SKIP_OFFSET 6
121
122// Macro for loading an argument into a register.
123//  load - instruction used for loading,
124//  reg - the register to load,
125//  args - pointer to next argument,
126//  size - the size of the register - 4 or 8 bytes, used as an offset for the load,
127//  handler_reg - the register with the address of the handler (points to this handler on entry),
128//  handler_diff - the difference in bytes from the current to the next handler,
129//  cont - the base name of the label for continuing the shorty processing loop,
130//  sfx - suffix added to all labels to make labels unique for different users.
131.macro INVOKE_STUB_LOAD_REG load, reg, args, size, handler_reg, handler_diff, cont, sfx
132.Linvoke_stub_\load\reg\sfx:
133    \load  \reg, -\size(\args)
134    c.addi \handler_reg, \handler_diff
135.org .Linvoke_stub_\load\reg\sfx + INVOKE_STUB_LOAD_REG_SKIP_OFFSET  // Enforce skip offset.
136    c.j    \cont\sfx
137.org .Linvoke_stub_\load\reg\sfx + INVOKE_STUB_LOAD_REG_SIZE  // Enforce handler size.
138.endm
139
140
141// Fill registers a1 to a7 and fa0 to fa7 with parameters.
142// Parse the passed shorty to determine which register to load.
143//  a5 - shorty,
144//  t0 - points to arguments on the stack if any (undefined for static method without args),
145//  sfx - suffix added to all labels to make labels unique for different users.
146.macro INVOKE_STUB_LOAD_ALL_ARGS sfx
147    addi t1, a5, 1                     // Load shorty address, plus one to skip the return type.
148
149    // Load this (if instance method) and record the number of GPRs to fill.
150    .ifc \sfx, _instance
151        lwu  a1, (t0)                  // Load "this" parameter,
152        addi t0, t0, 4                 // and increment arg pointer.
153        .equ NUM_GPRS_TO_FILL, 6
154    .else
155        .equ NUM_GPRS_TO_FILL, 7
156    .endif
157    .equ NUM_FPRS_TO_FILL, 8
158
159    // Load addresses for routines that load argument GPRs and FPRs.
160    lla  t4, .Lreg_handlers_start\sfx  // First handler for non-FP args.
161    addi t5, t4, (3 * NUM_GPRS_TO_FILL * INVOKE_STUB_LOAD_REG_SIZE)  // First handler for FP args.
162
163    // Loop to fill registers.
164.Lfill_regs\sfx:
165    lb   t2, (t1)                      // Load next character in signature,
166    addi t1, t1, 1                     // and increment.
167    beqz t2, .Lcall_method\sfx         // Exit at end of signature. Shorty 0 terminated.
168
169    li   t3, 'L'
170    beq  t2, t3, .Lload_reference\sfx  // Is this a reference?
171
172    li   t3, 'J'
173    beq  t2, t3, .Lload_long\sfx       // Is this a long?
174
175    li   t3, 'F'
176    beq  t2, t3, .Lload_float\sfx      // Is this a float?
177
178    li   t3, 'D'
179    beq  t2, t3, .Lload_double\sfx     // Is this a double?
180
181    // Everything else uses a 4-byte value sign-extened to a 64 bit GPR.
182    addi t0, t0, 4
183    jalr x0, 0(t4)
184
185.Lload_reference\sfx:
186    addi t0, t0, 4
187    jalr x0, (NUM_GPRS_TO_FILL * INVOKE_STUB_LOAD_REG_SIZE)(t4)
188
189.Lload_long\sfx:
190    addi t0, t0, 8
191    jalr x0, (2 * NUM_GPRS_TO_FILL * INVOKE_STUB_LOAD_REG_SIZE)(t4)
192
193.Lload_float\sfx:
194    addi t0, t0, 4
195    jalr x0, 0(t5)
196
197.Lload_double\sfx:
198    addi t0, t0, 8
199    jalr x0, (NUM_FPRS_TO_FILL * INVOKE_STUB_LOAD_REG_SIZE)(t5)
200
201.Lreg_handlers_start\sfx:
202
203// Handlers for loading other args (not reference/long/float/double) into GPRs.
204    .ifnc \sfx, _instance
205        INVOKE_STUB_LOAD_REG lw, a1, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
206    .endif
207    INVOKE_STUB_LOAD_REG lw, a2, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
208    INVOKE_STUB_LOAD_REG lw, a3, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
209    INVOKE_STUB_LOAD_REG lw, a4, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
210    INVOKE_STUB_LOAD_REG lw, a5, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
211    INVOKE_STUB_LOAD_REG lw, a6, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
212    INVOKE_STUB_LOAD_REG lw, a7, t0, 4, t4, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx
213
214// Handlers for loading reference args into GPRs.
215    .ifnc \sfx, _instance
216        INVOKE_STUB_LOAD_REG lwu, a1, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
217    .endif
218    INVOKE_STUB_LOAD_REG lwu, a2, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
219    INVOKE_STUB_LOAD_REG lwu, a3, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
220    INVOKE_STUB_LOAD_REG lwu, a4, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
221    INVOKE_STUB_LOAD_REG lwu, a5, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
222    INVOKE_STUB_LOAD_REG lwu, a6, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
223    INVOKE_STUB_LOAD_REG lwu, a7, t0, 4, t4, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx
224
225// Handlers for loading long args into GPRs.
226    .ifnc \sfx, _instance
227        INVOKE_STUB_LOAD_REG ld, a1, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
228    .endif
229    INVOKE_STUB_LOAD_REG ld, a2, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
230    INVOKE_STUB_LOAD_REG ld, a3, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
231    INVOKE_STUB_LOAD_REG ld, a4, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
232    INVOKE_STUB_LOAD_REG ld, a5, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
233    INVOKE_STUB_LOAD_REG ld, a6, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
234    INVOKE_STUB_LOAD_REG ld, a7, t0, 8, t4, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx
235
236// Handlers for loading floats into FPRs.
237    INVOKE_STUB_LOAD_REG flw, fa0, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
238    INVOKE_STUB_LOAD_REG flw, fa1, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
239    INVOKE_STUB_LOAD_REG flw, fa2, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
240    INVOKE_STUB_LOAD_REG flw, fa3, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
241    INVOKE_STUB_LOAD_REG flw, fa4, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
242    INVOKE_STUB_LOAD_REG flw, fa5, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
243    INVOKE_STUB_LOAD_REG flw, fa6, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
244    INVOKE_STUB_LOAD_REG flw, fa7, t0, 4, t5, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx
245
246// Handlers for loading doubles into FPRs.
247    INVOKE_STUB_LOAD_REG fld, fa0, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
248    INVOKE_STUB_LOAD_REG fld, fa1, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
249    INVOKE_STUB_LOAD_REG fld, fa2, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
250    INVOKE_STUB_LOAD_REG fld, fa3, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
251    INVOKE_STUB_LOAD_REG fld, fa4, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
252    INVOKE_STUB_LOAD_REG fld, fa5, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
253    INVOKE_STUB_LOAD_REG fld, fa6, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx
254    INVOKE_STUB_LOAD_REG fld, fa7, t0, 8, t5, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx
255
256.Lcall_method\sfx:
257.endm
258
259
260// void art_quick_invoke_stub(ArtMethod* method,   // a0
261//                            uint32_t*  args,     // a1
262//                            uint32_t   argsize,  // a2
263//                            Thread*    self,     // a3
264//                            JValue*    result,   // a4
265//                            char*      shorty)   // a5
266ENTRY art_quick_invoke_stub
267    INVOKE_STUB_CREATE_FRAME
268
269    // Load args into registers.
270    INVOKE_STUB_LOAD_ALL_ARGS _instance
271
272    // Call the method and return.
273    INVOKE_STUB_CALL_AND_RETURN
274END art_quick_invoke_stub
275
276
277// void art_quick_invoke_static_stub(ArtMethod* method,   // a0
278//                                   uint32_t*  args,     // a1
279//                                   uint32_t   argsize,  // a2
280//                                   Thread*    self,     // a3
281//                                   JValue*    result,   // a4
282//                                   char*      shorty)   // a5
283ENTRY art_quick_invoke_static_stub
284    INVOKE_STUB_CREATE_FRAME
285
286    // Load args into registers.
287    INVOKE_STUB_LOAD_ALL_ARGS _static
288
289    // Call the method and return.
290    INVOKE_STUB_CALL_AND_RETURN
291END art_quick_invoke_static_stub
292
293
294ENTRY art_quick_generic_jni_trampoline
295    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
296
297    // Save sp, so we can have static CFI info.
298    mv   fp, sp
299    .cfi_def_cfa_register fp
300
301    li   t0, GENERIC_JNI_TRAMPOLINE_RESERVED_AREA
302    sub  sp, sp, t0
303
304    mv   a0, xSELF    // Thread*
305    mv   a1, fp       // SP for the managed frame.
306    mv   a2, sp       // reserved area for arguments and other saved data (up to managed frame)
307    call artQuickGenericJniTrampoline
308
309    // Check for error (class init check or locking for synchronized native method can throw).
310    beqz a0, .Lexception_in_native
311
312    mv   t2, a0       // save pointer to native method code into temporary
313
314    // Load argument GPRs from stack (saved there by artQuickGenericJniTrampoline).
315    ld  a0, 8*0(sp)   // JniEnv* for the native method
316    ld  a1, 8*1(sp)
317    ld  a2, 8*2(sp)
318    ld  a3, 8*3(sp)
319    ld  a4, 8*4(sp)
320    ld  a5, 8*5(sp)
321    ld  a6, 8*6(sp)
322    ld  a7, 8*7(sp)
323
324    // Load argument FPRs from stack (saved there by artQuickGenericJniTrampoline).
325    fld  fa0, 8*8(sp)
326    fld  fa1, 8*9(sp)
327    fld  fa2, 8*10(sp)
328    fld  fa3, 8*11(sp)
329    fld  fa4, 8*12(sp)
330    fld  fa5, 8*13(sp)
331    fld  fa6, 8*14(sp)
332    fld  fa7, 8*15(sp)
333
334    ld  t0, 8*16(sp)  // @CriticalNative arg, used by art_jni_dlsym_lookup_critical_stub
335
336    ld  t1, 8*17(sp)  // restore stack
337    mv  sp, t1
338
339    jalr  t2  // call native method
340
341    // result sign extension is handled in C code, prepare for artQuickGenericJniEndTrampoline call:
342    // uint64_t artQuickGenericJniEndTrampoline(Thread* self,       // a0
343    //                                          jvalue result,      // a1 (need to move from a0)
344    //                                          uint64_t result_f)  // a2 (need to move from fa0)
345    mv  a1, a0
346    mv  a0, xSELF
347    fmv.x.d  a2, fa0
348    call artQuickGenericJniEndTrampoline
349
350    // Pending exceptions possible.
351    ld   t0, THREAD_EXCEPTION_OFFSET(xSELF)
352    bnez t0, .Lexception_in_native
353
354    // Tear down the alloca.
355    mv   sp, fp
356    CFI_REMEMBER_STATE
357    .cfi_def_cfa_register sp
358
359    LOAD_RUNTIME_INSTANCE a1
360    lb   a1, RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE(a1)
361    bnez a1, .Lcall_method_exit_hook
362
363.Lcall_method_exit_hook_done:
364    // This does not clobber the result register a0. a1 is not used for result as the managed code
365    // does not have a 128-bit type. Alternatively we could restore a subset of these registers.
366    RESTORE_SAVE_REFS_AND_ARGS_FRAME
367    fmv.d.x  fa0, a0
368    ret
369    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
370
371.Lcall_method_exit_hook:
372    fmv.d.x  fa0, a0
373    li   a4, FRAME_SIZE_SAVE_REFS_AND_ARGS
374    call  art_quick_method_exit_hook
375    j    .Lcall_method_exit_hook_done
376
377.Lexception_in_native:
378    // Move to a1 then sp to please assembler.
379    ld   a1, THREAD_TOP_QUICK_FRAME_OFFSET(xSELF)
380    addi sp, a1, -1  // Remove the GenericJNI tag.
381    call art_deliver_pending_exception
382END art_quick_generic_jni_trampoline
383
384
385ENTRY art_quick_to_interpreter_bridge
386    SETUP_SAVE_REFS_AND_ARGS_FRAME
387
388    // uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, ArtMethod** sp)
389    // a0 will contain ArtMethod*
390    mv   a1, xSELF
391    mv   a2, sp
392    call artQuickToInterpreterBridge
393
394    // TODO: no need to restore arguments in this case.
395    RESTORE_SAVE_REFS_AND_ARGS_FRAME
396
397    fmv.d.x  fa0, a0  // copy the result to FP result register
398
399    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG t0
400END art_quick_to_interpreter_bridge
401
402
403    .extern artMethodEntryHook
404ENTRY art_quick_method_entry_hook
405    SETUP_SAVE_EVERYTHING_FRAME
406
407    ld   a0, FRAME_SIZE_SAVE_EVERYTHING(sp)   // Pass ArtMethod*.
408    mv   a1, xSELF                            // Pass Thread::Current().
409    mv   a2, sp                               // pass SP
410    call artMethodEntryHook                   // (ArtMethod*, Thread*, SP)
411
412    CFI_REMEMBER_STATE
413    bnez a0, .Lentryhook_deopt
414
415    RESTORE_SAVE_EVERYTHING_FRAME
416    ret
417
418.Lentryhook_deopt:
419    // Deoptimize.
420    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
421    call art_quick_do_long_jump               // (Context*)
422    unimp  // Unreached
423END art_quick_method_entry_hook
424
425
426    .extern artMethodExitHook
427ENTRY art_quick_method_exit_hook
428    SETUP_SAVE_EVERYTHING_FRAME
429
430    // frame_size is passed in A4 from JITed code and `art_quick_generic_jni_trampoline`.
431    addi a3, sp, SAVE_EVERYTHING_FRAME_OFFSET_FA0  // FP result ptr in kSaveEverything frame
432    addi a2, sp, SAVE_EVERYTHING_FRAME_OFFSET_A0   // integer result ptr in kSaveEverything frame
433    addi a1, sp, FRAME_SIZE_SAVE_EVERYTHING        // ArtMethod**
434    mv   a0, xSELF                                 // Thread::Current
435    call artMethodExitHook                         // (Thread*, ArtMethod**, gpr_res*, fpr_res*,
436                                                   // frame_size)
437
438    CFI_REMEMBER_STATE
439    bnez a0, .Lexithook_deopt_or_exception
440
441    // Normal return.
442    RESTORE_SAVE_EVERYTHING_FRAME
443    ret
444
445.Lexithook_deopt_or_exception:
446    // Deoptimize or exception thrown.
447    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
448    call art_quick_do_long_jump                    // (Context*)
449    unimp  // Unreached
450END art_quick_method_exit_hook
451
452// On entry a0 is the long jump context. This is expected to be returned from a previous entrypoint
453// call which threw an exception or deoptimized.
454ENTRY art_quick_do_long_jump
455    // Reserve space for the gprs + fprs;
456    INCREASE_FRAME RISCV64_LONG_JUMP_CONTEXT_SIZE
457
458    mv a1, sp
459    add a2, sp, RISCV64_LONG_JUMP_GPRS_SIZE
460
461    call artContextCopyForLongJump  // Context* context, uintptr_t* gprs, uintptr_t* fprs
462
463    add a0, sp, RISCV64_LONG_JUMP_GPRS_SIZE
464
465    // Load FPRs
466    fld  ft0,  8*0(a0)   // f0
467    fld  ft1,  8*1(a0)   // f1
468    fld  ft2,  8*2(a0)   // f2
469    fld  ft3,  8*3(a0)   // f3
470    fld  ft4,  8*4(a0)   // f4
471    fld  ft5,  8*5(a0)   // f5
472    fld  ft6,  8*6(a0)   // f6
473    fld  ft7,  8*7(a0)   // f7
474    fld  fs0,  8*8(a0)   // f8
475    fld  fs1,  8*9(a0)   // f9
476    fld  fa0,  8*10(a0)  // f10
477    fld  fa1,  8*11(a0)  // f11
478    fld  fa2,  8*12(a0)  // f12
479    fld  fa3,  8*13(a0)  // f13
480    fld  fa4,  8*14(a0)  // f14
481    fld  fa5,  8*15(a0)  // f15
482    fld  fa6,  8*16(a0)  // f16
483    fld  fa7,  8*17(a0)  // f17
484    fld  fs2,  8*18(a0)  // f18
485    fld  fs3,  8*19(a0)  // f19
486    fld  fs4,  8*20(a0)  // f20
487    fld  fs5,  8*21(a0)  // f21
488    fld  fs6,  8*22(a0)  // f22
489    fld  fs7,  8*23(a0)  // f23
490    fld  fs8,  8*24(a0)  // f24
491    fld  fs9,  8*25(a0)  // f25
492    fld  fs10, 8*26(a0)  // f26
493    fld  fs11, 8*27(a0)  // f27
494    fld  ft8,  8*28(a0)  // f28
495    fld  ft9,  8*29(a0)  // f29
496    fld  ft10, 8*30(a0)  // f30
497    fld  ft11, 8*31(a0)  // f31
498
499    // Load GPRs.
500    // Skip slot 8*0(sp) for zero/x0 as it is hard-wired zero.
501    ld  ra,   8*1(sp)   // x1
502    // Skip slot 8*2(sp) for sp/x2 as it is set below.
503    // Skip slot 8*3(sp) for platform-specific thread pointer gp/x3.
504    // Skip slot 8*4(sp) for platform-specific global pointer tp/x4.
505    // Skip slot 8*5(sp) for t0/x5 as it is clobbered below.
506    // Skip slot 8*6(sp) for t1/x6 as it is clobbered below.
507    ld  t2,   8*7(sp)   // x7
508    ld  s0,   8*8(sp)   // x8
509    ld  s1,   8*9(sp)   // x9
510    ld  a0,   8*10(sp)  // x10
511    ld  a1,   8*11(sp)  // x11
512    ld  a2,   8*12(sp)  // x12
513    ld  a3,   8*13(sp)  // x13
514    ld  a4,   8*14(sp)  // x14
515    ld  a5,   8*15(sp)  // x15
516    ld  a6,   8*16(sp)  // x16
517    ld  a7,   8*17(sp)  // x17
518    ld  s2,   8*18(sp)  // x18
519    ld  s3,   8*19(sp)  // x19
520    ld  s4,   8*20(sp)  // x20
521    ld  s5,   8*21(sp)  // x21
522    ld  s6,   8*22(sp)  // x22
523    ld  s7,   8*23(sp)  // x23
524    ld  s8,   8*24(sp)  // x24
525    ld  s9,   8*25(sp)  // x25
526    ld  s10,  8*26(sp)  // x26
527    ld  s11,  8*27(sp)  // x27
528    ld  t3,   8*28(sp)  // x28
529    ld  t4,   8*29(sp)  // x29
530    ld  t5,   8*30(sp)  // x30
531    ld  t6,   8*31(sp)  // x31
532
533    // Load sp to t0.
534    ld  t0, 8*2(sp)
535
536    // Load PC to t1, it is in the last stack slot.
537    ld  t1, 8*32(sp)
538
539    // Set sp. Do not access fprs_ and gprs_ from now, they are below sp.
540    mv sp, t0
541    .cfi_def_cfa_offset 0
542
543    jr  t1
544END art_quick_do_long_jump
545
546
547.macro DEOPT_OR_RETURN temp, is_ref = 0
548    lwu   \temp, THREAD_DEOPT_CHECK_REQUIRED_OFFSET(xSELF)
549    bnez  \temp, 2f
550    ret
5512:
552    SETUP_SAVE_EVERYTHING_FRAME
553    li    a2, \is_ref                 // pass if result is a reference
554    mv    a1, a0                      // pass the result
555    mv    a0, xSELF                   // pass Thread::Current
556    call  artDeoptimizeIfNeeded       // (Thread*, uintptr_t, bool)
557
558    CFI_REMEMBER_STATE
559    bnez  a0, 3f
560
561    RESTORE_SAVE_EVERYTHING_FRAME
562    ret
5633:
564    // Deoptimize
565    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
566    call  art_quick_do_long_jump      // (Context*)
567    unimp  // Unreached
568.endm
569
570
571.macro RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
572    CFI_REMEMBER_STATE
573    beqz  a0, 1f
574    DEOPT_OR_RETURN a1, /*is_ref=*/ 1
5751:
576    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
577    DELIVER_PENDING_EXCEPTION
578.endm
579
580
581.macro RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
582    CFI_REMEMBER_STATE
583    bnez  a0, 1f
584    DEOPT_OR_RETURN a1
5851:
586    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
587    DELIVER_PENDING_EXCEPTION
588.endm
589
590
591.macro RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION is_ref = 0
592    lwu   a1, THREAD_EXCEPTION_OFFSET(xSELF)   // Get exception field.
593    CFI_REMEMBER_STATE
594    bnez  a1, 1f
595    DEOPT_OR_RETURN a1, \is_ref                // Check if deopt is required.
5961:
597    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
598    DELIVER_PENDING_EXCEPTION                  // Deliver exception on current thread.
599.endm
600
601
602.macro RETURN_REF_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
603    RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION /* is_ref= */ 1
604.endm
605
606
607.macro DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_A0 temp, is_ref
608  ld      \temp, THREAD_DEOPT_CHECK_REQUIRED_OFFSET(xSELF)
609  CFI_REMEMBER_STATE
610  bnez    \temp, 2f
611  RESTORE_SAVE_EVERYTHING_FRAME /* load_a0= */ 0
612  ret
6132:
614  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
615  sd      a0, SAVE_EVERYTHING_FRAME_OFFSET_A0(sp)    // update result in the frame
616  li      a2, \is_ref                                // pass if result is a reference
617  mv      a1, a0                                     // pass the result
618  mv      a0, xSELF                                  // Thread::Current
619  call    artDeoptimizeIfNeeded
620
621  CFI_REMEMBER_STATE
622  bnez    a0, 3f
623
624  RESTORE_SAVE_EVERYTHING_FRAME
625  ret
626
6273:
628  // Deoptimize
629  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
630  call    art_quick_do_long_jump                     // (Context*)
631  unimp  // Unreached
632.endm
633
634
635// Entry from managed code that tries to lock the object in a fast path and
636// calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
637// A0 holds the possibly null object to lock.
638ENTRY art_quick_lock_object
639    LOCK_OBJECT_FAST_PATH a0, art_quick_lock_object_no_inline, /*can_be_null*/ 1
640END art_quick_lock_object
641
642
643// Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
644// A0 holds the possibly null object to lock.
645    .extern artLockObjectFromCode
646ENTRY art_quick_lock_object_no_inline
647    // This is also the slow path for `art_quick_lock_object`.
648    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
649    mv    a1, xSELF                   // pass Thread::Current
650    call  artLockObjectFromCode       // (Object*, Thread*)
651    RESTORE_SAVE_REFS_ONLY_FRAME
652    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
653END art_quick_lock_object_no_inline
654
655
656// Entry from managed code that tries to unlock the object in a fast path and calls
657// `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
658// A0 holds the possibly null object to unlock.
659ENTRY art_quick_unlock_object
660    UNLOCK_OBJECT_FAST_PATH a0, art_quick_unlock_object_no_inline, /*can_be_null*/ 1
661END art_quick_unlock_object
662
663
664// Entry from managed code that calls `artUnlockObjectFromCode()`
665// and delivers exception on failure.
666// A0 holds the possibly null object to unlock.
667    .extern artUnlockObjectFromCode
668ENTRY art_quick_unlock_object_no_inline
669    // This is also the slow path for `art_quick_unlock_object`.
670    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
671    mv    a1, xSELF                   // pass Thread::Current
672    call  artUnlockObjectFromCode     // (Object*, Thread*)
673    RESTORE_SAVE_REFS_ONLY_FRAME
674    RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
675END art_quick_unlock_object_no_inline
676
677
678// Called by managed code that is attempting to call a method on a proxy class. On entry a0 holds
679// the proxy method and a1 holds the receiver. The frame size of the invoked proxy method agrees
680// with kSaveRefsAndArgs frame.
681.extern artQuickProxyInvokeHandler
682ENTRY art_quick_proxy_invoke_handler
683    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
684
685    // uint64_t artQuickProxyInvokeHandler(ArtMethod* proxy_method,   // a0
686    //                                     mirror::Object* receiver,  // a1
687    //                                     Thread* self,              // a2
688    //                                     ArtMethod** sp)            // a3
689    mv    a2, xSELF                   // pass Thread::Current
690    mv    a3, sp                      // pass sp
691    call  artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, sp)
692
693    ld    a2, THREAD_EXCEPTION_OFFSET(xSELF)
694    bnez  a2, .Lexception_in_proxy    // success if no exception is pending
695    CFI_REMEMBER_STATE
696    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Restore frame
697    fmv.d.x  fa0, a0                  // Store result in fa0 in case it was float or double
698    ret                               // return on success
699
700.Lexception_in_proxy:
701    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
702    RESTORE_SAVE_REFS_AND_ARGS_FRAME
703    DELIVER_PENDING_EXCEPTION
704END art_quick_proxy_invoke_handler
705
706
707// Compiled code has requested that we deoptimize into the interpreter. The deoptimization
708// will long jump to the upcall with a special exception of -1.
709    .extern artDeoptimizeFromCompiledCode
710ENTRY art_quick_deoptimize_from_compiled_code
711    SETUP_SAVE_EVERYTHING_FRAME
712    mv    a1, xSELF                           // Pass Thread::Current().
713    call  artDeoptimizeFromCompiledCode       // (DeoptimizationKind, Thread*)
714    call  art_quick_do_long_jump              // (Context*)
715    unimp  // Unreached
716END art_quick_deoptimize_from_compiled_code
717
718
719    .extern artStringBuilderAppend
720ENTRY art_quick_string_builder_append
721    SETUP_SAVE_REFS_ONLY_FRAME                // Save callee saves in case of GC.
722    addi  a1, sp, (FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__)  // Pass args.
723    mv    a2, xSELF                           // Pass Thread::Current().
724    call  artStringBuilderAppend              // (uint32_t, const unit32_t*, Thread*)
725    RESTORE_SAVE_REFS_ONLY_FRAME
726    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
727END art_quick_string_builder_append
728
729
730// Entry from managed code that calls artInstanceOfFromCode and on failure calls
731// artThrowClassCastExceptionForObject.
732    .extern artInstanceOfFromCode
733    .extern artThrowClassCastExceptionForObject
734ENTRY art_quick_check_instance_of
735    // Type check using the bit string passes null as the target class. In that case just throw.
736    beqz  a1, .Lthrow_class_cast_exception_for_bitstring_check
737
738    // Store arguments and return address register.
739    // Stack needs to be 16B aligned on calls.
740    INCREASE_FRAME 32
741    sd    a0, 0*8(sp)
742    sd    a1, 1*8(sp)
743    SAVE_GPR ra, 3*8
744
745    // Call runtime code.
746    call  artInstanceOfFromCode
747
748    // Restore RA.
749    RESTORE_GPR ra, 3*8
750
751    // Check for exception.
752    CFI_REMEMBER_STATE
753    beqz a0, .Lthrow_class_cast_exception
754
755    // Remove spill area and return (no need to restore A0 and A1).
756    DECREASE_FRAME 32
757    ret
758
759.Lthrow_class_cast_exception:
760    CFI_RESTORE_STATE_AND_DEF_CFA sp, 32
761    // Restore A0 and remove spill area.
762    ld    a0, 0*8(sp)
763    ld    a1, 1*8(sp)
764    DECREASE_FRAME 32
765
766.Lthrow_class_cast_exception_for_bitstring_check:
767    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME         // Save all registers as basis for long jump context.
768    mv    a2, xSELF                           // Pass Thread::Current().
769    call  artThrowClassCastExceptionForObject // (Object*, Class*, Thread*)
770    call  art_quick_do_long_jump              // (Context*)
771    unimp  // Unreached
772END art_quick_check_instance_of
773
774
775.macro N_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING n, c_name, cxx_name
776.extern \cxx_name
777ENTRY \c_name
778    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context.
779    mv    a\n, xSELF                  // pass Thread::Current.
780    call  \cxx_name                   // \cxx_name(args..., Thread*).
781    call  art_quick_do_long_jump      // (Context*)
782    unimp  // Unreached
783END \c_name
784.endm
785
786
787.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
788    N_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING 0, \c_name, \cxx_name
789.endm
790
791
792.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
793    N_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING 2, \c_name, \cxx_name
794.endm
795
796
797.macro N_ARG_RUNTIME_EXCEPTION n, c_name, cxx_name
798.extern \cxx_name
799ENTRY \c_name
800    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
801    mv    a\n, xSELF                  // pass Thread::Current.
802    call  \cxx_name                   // \cxx_name(args..., Thread*).
803    call  art_quick_do_long_jump      // (Context*)
804    unimp  // Unreached
805END \c_name
806.endm
807
808.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
809    N_ARG_RUNTIME_EXCEPTION 0, \c_name, \cxx_name
810.endm
811
812
813.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
814    N_ARG_RUNTIME_EXCEPTION 1, \c_name, \cxx_name
815.endm
816
817
818// Called by managed code to create and deliver a NullPointerException.
819NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING \
820        art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
821
822
823// Call installed by a signal handler to create and deliver a NullPointerException.
824.extern artThrowNullPointerExceptionFromSignal
825ENTRY art_quick_throw_null_pointer_exception_from_signal
826    // The fault handler pushes the gc map address, i.e. "return address", to stack
827    // and passes the fault address in RA. So we need to set up the CFI info accordingly.
828    .cfi_def_cfa_offset __SIZEOF_POINTER__
829    .cfi_rel_offset ra, 0
830    // Save all registers as basis for long jump context.
831    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
832    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_RA
833    mv    a0, ra                      // pass the fault address stored in RA by the fault handler.
834    mv    a1, xSELF                   // pass Thread::Current.
835    call  artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
836    call  art_quick_do_long_jump                  // (Context*)
837    unimp  // Unreached
838END art_quick_throw_null_pointer_exception_from_signal
839
840
841// Called by managed code to deliver an ArithmeticException.
842NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
843
844
845// Called by managed code to create and deliver an ArrayIndexOutOfBoundsException.
846// Arg0 holds index, arg1 holds limit.
847TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
848
849
850// Called by managed code to create and deliver a StringIndexOutOfBoundsException
851// as if thrown from a call to String.charAt(). Arg0 holds index, arg1 holds limit.
852TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING \
853        art_quick_throw_string_bounds, artThrowStringBoundsFromCode
854
855// Called by managed code to create and deliver a StackOverflowError.
856NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
857
858// Called by managed code to deliver an exception.
859ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
860
861
862// Called to attempt to execute an obsolete method.
863ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
864
865
866ENTRY art_quick_resolution_trampoline
867    SETUP_SAVE_REFS_AND_ARGS_FRAME
868
869    // const void* artQuickResolutionTrampoline(ArtMethod* called,         // a0
870    //                                          mirror::Object* receiver,  // a1
871    //                                          Thread* self,              // a2
872    //                                          ArtMethod** sp)            // a3
873    mv   a2, xSELF
874    mv   a3, sp
875    call artQuickResolutionTrampoline
876    CFI_REMEMBER_STATE
877    beqz a0, 1f
878    mv   t0, a0    // Remember returned code pointer in t0.
879    ld   a0, (sp)  // artQuickResolutionTrampoline puts called method in *sp.
880
881    RESTORE_SAVE_REFS_AND_ARGS_FRAME
882    jr   t0
8831:
884    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
885    RESTORE_SAVE_REFS_AND_ARGS_FRAME
886    DELIVER_PENDING_EXCEPTION
887END art_quick_resolution_trampoline
888
889
890ENTRY art_quick_test_suspend
891    SETUP_SAVE_EVERYTHING_FRAME \
892        RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
893    mv   a0, xSELF
894    call artTestSuspendFromCode
895
896    CFI_REMEMBER_STATE
897    bnez a0, .Ltest_suspend_deoptimize
898
899    RESTORE_SAVE_EVERYTHING_FRAME
900    ret
901
902.Ltest_suspend_deoptimize:
903    // Deoptimize
904    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
905    call art_quick_do_long_jump   // (Context*)
906    unimp  // Unreached
907END art_quick_test_suspend
908
909
910ENTRY art_quick_compile_optimized
911    SETUP_SAVE_EVERYTHING_FRAME
912    ld   a0, FRAME_SIZE_SAVE_EVERYTHING(sp)   // pass ArtMethod
913    mv   a1, xSELF                            // pass Thread::Current
914    call artCompileOptimized                  // (ArtMethod*, Thread*)
915    RESTORE_SAVE_EVERYTHING_FRAME
916    // Note: If we implement implicit suspend checks or a marking register for GC, we don't need
917    // to restore such registers here, as artCompileOptimized doesn't allow thread suspension.
918    ret
919END art_quick_compile_optimized
920
921
922/*  extern"C" void art_quick_osr_stub(void*          stack,                 A0
923 *                                    size_t         stack_size_in_bytes,   A1
924 *                                    const uint8_t* native_pc,             A2
925 *                                    JValue*        result,                A3
926 *                                    char*          shorty,                A4
927 *                                    Thread*        self)                  A5
928 */
929ENTRY art_quick_osr_stub
930    // Save all callee-save registers (we do not fill the spill area in the OSR frame, so we
931    // need to preserve them here) and A3 (it will be needed after the OSR method returns).
932    // Also add space for the `ArtMethod*` slot (null to indicate transition) and padding.
933    SAVE_SIZE=(12 + 12 + /* RA */ 1 + /* A3 */ 1 + /* ArtMethod* */ 1 + /* padding */ 1) * 8
934    INCREASE_FRAME SAVE_SIZE
935    sd zero, 0*8(sp)          // Store null to the `ArtMethod*` slot to indicate transition.
936                              // Skip padding.
937    SAVE_GPR a3, 2*8          // Save `result`.
938    SAVE_FPR fs0, 3*8
939    SAVE_FPR fs1, 4*8
940    SAVE_FPR fs2, 5*8
941    SAVE_FPR fs3, 6*8
942    SAVE_FPR fs4, 7*8
943    SAVE_FPR fs5, 8*8
944    SAVE_FPR fs6, 9*8
945    SAVE_FPR fs7, 10*8
946    SAVE_FPR fs8, 11*8
947    SAVE_FPR fs9, 12*8
948    SAVE_FPR fs10, 13*8
949    SAVE_FPR fs11, 14*8
950    SAVE_GPR s2, 15*8
951    SAVE_GPR s3, 16*8
952    SAVE_GPR s4, 17*8
953    SAVE_GPR s5, 18*8
954    SAVE_GPR s6, 19*8
955    SAVE_GPR s7, 20*8
956    SAVE_GPR s8, 21*8
957    SAVE_GPR s9, 22*8
958    SAVE_GPR s10, 23*8
959    SAVE_GPR s11, 24*8
960    SAVE_GPR xSELF, 25*8       // Save xSELF/S1.
961    SAVE_GPR fp, 26*8          // Save FP/S0.
962    SAVE_GPR ra, 27*8          // Save return address.
963
964    // Make the new FP point to the location where we stored the old FP.
965    // Some stack-walking tools may rely on this simply-linked list of saved FPs.
966    addi fp, sp, (26*8)  // save frame pointer
967    .cfi_def_cfa fp, SAVE_SIZE - (26*8)
968
969    mv   xSELF, a5
970
971    CFI_REMEMBER_STATE
972    jal .Losr_entry
973
974    // The called method removes the stack frame created in `.Losr_entry`.
975    // The SP is already correctly restored, we do not need to restore it from FP.
976    .cfi_def_cfa sp, SAVE_SIZE
977
978    // Restore saved registers including the result address.
979    RESTORE_GPR a3, 2*8       // Restore `result`.
980    RESTORE_FPR fs0, 3*8
981    RESTORE_FPR fs1, 4*8
982    RESTORE_FPR fs2, 5*8
983    RESTORE_FPR fs3, 6*8
984    RESTORE_FPR fs4, 7*8
985    RESTORE_FPR fs5, 8*8
986    RESTORE_FPR fs6, 9*8
987    RESTORE_FPR fs7, 10*8
988    RESTORE_FPR fs8, 11*8
989    RESTORE_FPR fs9, 12*8
990    RESTORE_FPR fs10, 13*8
991    RESTORE_FPR fs11, 14*8
992    RESTORE_GPR s2, 15*8
993    RESTORE_GPR s3, 16*8
994    RESTORE_GPR s4, 17*8
995    RESTORE_GPR s5, 18*8
996    RESTORE_GPR s6, 19*8
997    RESTORE_GPR s7, 20*8
998    RESTORE_GPR s8, 21*8
999    RESTORE_GPR s9, 22*8
1000    RESTORE_GPR s10, 23*8
1001    RESTORE_GPR s11, 24*8
1002    RESTORE_GPR xSELF, 25*8   // Restore xSELF/S1.
1003    RESTORE_GPR fp, 26*8      // Restore FP/S0.
1004    RESTORE_GPR ra, 27*8      // Restore return address.
1005    DECREASE_FRAME SAVE_SIZE
1006
1007    // The compiler put the result in A0. Doesn't matter if it is 64 or 32 bits.
1008    sd   a0, (a3)
1009    ret
1010
1011.Losr_entry:
1012    CFI_RESTORE_STATE_AND_DEF_CFA fp, SAVE_SIZE - (26*8)
1013
1014    // Prepare the destination register for backward copy of arguments.
1015    addi t1, sp, -8
1016
1017    // Update stack pointer for the callee frame.
1018    sub  sp, sp, a1
1019
1020    // Subtract the return address slot size from args size.
1021    addi a1, a1, -8
1022
1023    // Update return address slot expected by the callee.
1024    sd   ra, (t1)
1025
1026    // Prepare the source register for backward copy of arguments.
1027    add  t0, a0, a1
1028
1029    // Copy arguments into stack frame. Use simple backward-copy routine for now.
1030    // There is always at least the `ArtMethod*` to to copy.
1031    // A0 - source address
1032    // A1 - args length
1033    // SP - destination address.
1034    // T0 - loop variable initialized to A0 + A1 for backward copy
1035    // T1 - loop variable initialized to SP + A1 for backward copy
1036    // T2 - temporary for holding the copied value
1037.Losr_loop:
1038    addi t0, t0, -8
1039    ld   t2, (t0)
1040    addi t1, t1, -8
1041    sd   t2, (t1)
1042    bne  t1, sp, .Losr_loop
1043
1044    // Branch to the OSR entry point.
1045    jr   a2
1046
1047END art_quick_osr_stub
1048
1049
1050    /*
1051     * All generated callsites for interface invokes and invocation slow paths will load arguments
1052     * as usual - except instead of loading arg0/A0 with the target Method*, arg0/A0 will contain
1053     * the method_idx. This wrapper will call the appropriate C++ helper while preserving arguments
1054     * and allowing a moving GC to update references in callee-save registers.
1055     * NOTE: "this" is the first visible argument of the target, and so can be found in arg1/A1.
1056     *
1057     * The helper will attempt to locate the target and return a 128-bit result consisting of the
1058     * target `ArtMethod*` in A0 and its `entry_point_from_quick_compiled_code_` in A1.
1059     *
1060     * If unsuccessful, the helper will return null/null. There will be a pending exception
1061     * to deliver in the thread.
1062     *
1063     * On success this wrapper will restore arguments and *jump* to the target, leaving the RA
1064     * pointing back to the original caller.
1065     */
1066.macro INVOKE_TRAMPOLINE_BODY cxx_name
1067    .extern \cxx_name
1068    SETUP_SAVE_REFS_AND_ARGS_FRAME
1069    mv    a2, xSELF                           // Pass Thread::Current().
1070    mv    a3, sp                              // Pass pointer to the saved frame context.
1071    call   \cxx_name                          // (method_idx, this, Thread*, $sp)
1072    mv    t0, a1                              // Save method's code pointer in T0.
1073    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1074    beq   a0, zero, 1f
1075    jr    t0
10761:
1077    DELIVER_PENDING_EXCEPTION
1078.endm
1079
1080.macro INVOKE_TRAMPOLINE c_name, cxx_name
1081ENTRY \c_name
1082    INVOKE_TRAMPOLINE_BODY \cxx_name
1083END \c_name
1084.endm
1085
1086INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, \
1087                  artInvokeInterfaceTrampolineWithAccessCheck
1088INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, \
1089                  artInvokeStaticTrampolineWithAccessCheck
1090INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, \
1091                  artInvokeDirectTrampolineWithAccessCheck
1092INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, \
1093                  artInvokeSuperTrampolineWithAccessCheck
1094INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, \
1095                  artInvokeVirtualTrampolineWithAccessCheck
1096
1097    /*
1098     * Polymorphic method invocation.
1099     * On entry:
1100     *   A0 = unused
1101     *   A1 = receiver
1102     */
1103.extern artInvokePolymorphic
1104ENTRY art_quick_invoke_polymorphic
1105    SETUP_SAVE_REFS_AND_ARGS_FRAME
1106    mv      a0, a1                            // Pass the receiver.
1107    mv      a1, xSELF                         // Pass Thread::Current().
1108    mv      a2, sp                            // Pass pointer to the saved frame context.
1109    call    artInvokePolymorphic              // artInvokePolymorphic(receiver, Thread*, context)
1110    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1111    fmv.d.x fa0, a0                           // Copy the result also to the FP return register.
1112    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG  t0
1113END art_quick_invoke_polymorphic
1114
1115/*
1116     * InvokeCustom invocation.
1117     * On entry:
1118     *   A0 = call_site_idx
1119     */
1120.extern artInvokeCustom
1121ENTRY art_quick_invoke_custom
1122    SETUP_SAVE_REFS_AND_ARGS_FRAME
1123    mv      a1, xSELF                         // Pass Thread::Current().
1124    mv      a2, sp                            // Pass pointer to the saved frame context.
1125    call    artInvokeCustom                   // artInvokeCustom(call_site_idx, Thread*, context)
1126    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1127    fmv.d.x fa0, a0                           // Copy the result also to the FP return register.
1128    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG t0
1129END art_quick_invoke_custom
1130
1131    /*
1132     * Called to resolve an imt conflict.
1133     * On entry:
1134     *   A0 is the conflict ArtMethod.
1135     *   T0 is a hidden argument that holds the target interface method's dex method index.
1136     */
1137ENTRY art_quick_imt_conflict_trampoline
1138    ld      t1, ART_METHOD_JNI_OFFSET_64(a0)  // Load ImtConflictTable
1139    ld      a0, 0(t1)                         // Load first entry in ImtConflictTable.
1140.Limt_table_iterate:
1141    // Branch if found.
1142    beq     a0, t0, .Limt_table_found
1143
1144    // If the entry is null, the interface method is not in the ImtConflictTable.
1145    beqz    a0, .Lconflict_trampoline
1146    // Iterate over the entries of the ImtConflictTable.
1147    addi    t1, t1, (2 * __SIZEOF_POINTER__)
1148    ld      a0, 0(t1)
1149    j       .Limt_table_iterate
1150.Limt_table_found:
1151    // We successfully hit an entry in the table. Load the target method and jump to it.
1152    ld      a0, __SIZEOF_POINTER__(t1)
1153    ld      t1, ART_METHOD_QUICK_CODE_OFFSET_64(a0)
1154    jr      t1
1155.Lconflict_trampoline:
1156    // Call the runtime stub to populate the ImtConflictTable and jump to the
1157    // resolved method.
1158    move a0, t0  // Load interface method
1159    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1160END art_quick_imt_conflict_trampoline
1161
1162
1163.macro UPDATE_INLINE_CACHE_ENTRY class, entry, temp, loop_label, done_label, next_label
1164\loop_label:
1165    lwu     \temp, (\entry)
1166    beq     \class, \temp, \done_label
1167    bnez    \temp, \next_label
1168    lr.w    \temp, (\entry)
1169    bnez    \temp, \loop_label
1170    sc.w    \temp, \class, (\entry)
1171    beqz    \temp, \done_label
1172    j       \loop_label
1173.endm
1174
1175// A0 contains the class, T5 contains the inline cache. T6 can be used, T5 can be clobbered.
1176ENTRY art_quick_update_inline_cache
1177#if (INLINE_CACHE_SIZE != 5)
1178#error "INLINE_CACHE_SIZE not as expected."
1179#endif
1180#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1181    // Don't update the cache if we are marking.
1182    lwu     t6, THREAD_IS_GC_MARKING_OFFSET(xSELF)
1183    bnez    t6, .Ldone
1184#endif
1185    addi    t5, t5, INLINE_CACHE_CLASSES_OFFSET
1186    UPDATE_INLINE_CACHE_ENTRY a0, t5, t6, .Lentry1_loop, .Ldone, .Lentry2
1187.Lentry2:
1188    addi    t5, t5, 4
1189    UPDATE_INLINE_CACHE_ENTRY a0, t5, t6, .Lentry2_loop, .Ldone, .Lentry3
1190.Lentry3:
1191    addi    t5, t5, 4
1192    UPDATE_INLINE_CACHE_ENTRY a0, t5, t6, .Lentry3_loop, .Ldone, .Lentry4
1193.Lentry4:
1194    addi    t5, t5, 4
1195    UPDATE_INLINE_CACHE_ENTRY a0, t5, t6, .Lentry4_loop, .Ldone, .Lentry5
1196.Lentry5:
1197    // Unconditionally store, the inline cache is megamorphic.
1198    sw      a0, 4(t5)
1199.Ldone:
1200    ret
1201END art_quick_update_inline_cache
1202
1203
1204.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
1205        name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
1206    .extern \entrypoint
1207ENTRY \name
1208    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  // Save everything for stack crawl.
1209    mv    a1, xSELF                   // Pass Thread::Current().
1210    call  \entrypoint                 // (uint32_t/Class* index/klass, Thread* self)
1211    beqz  a0, 1f                      // If result is null, deliver the exception.
1212    DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_A0 /* temp= */ a1, /* is_ref= */ 1
12131:
1214    DELIVER_PENDING_EXCEPTION_FRAME_READY
1215END \name
1216.endm
1217
1218
1219.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
1220    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
1221            \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
1222.endm
1223
1224
1225ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT \
1226        art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1227ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1228ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
1229        art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1230ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1231ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1232ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1233
1234
1235// Helper macros for `art_quick_aput_obj`.
1236#ifdef USE_READ_BARRIER
1237#ifdef USE_BAKER_READ_BARRIER
1238.macro BAKER_RB_CHECK_GRAY_BIT_AND_LOAD dest, obj, offset, gray_slow_path_label
1239    lw    t6, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj)
1240    slliw t6, t6, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT  // Shift the state bit to sign bit.
1241    bltz  t6, \gray_slow_path_label
1242    // False dependency to avoid needing load/load fence.
1243    xor   t6, t6, t6
1244    add   \obj, \obj, t6
1245    lwu   \dest, \offset(\obj)                     // Heap reference = 32b; zero-extends to `dest`.
1246    UNPOISON_HEAP_REF \dest
1247.endm
1248
1249.macro BAKER_RB_LOAD_AND_MARK dest, obj, offset, mark_function
1250    lwu   \dest, \offset(\obj)                     // Heap reference = 32b; zero-extends to `dest`.
1251    UNPOISON_HEAP_REF \dest
1252    // Save RA in a register preserved by `art_quick_read_barrier_mark_regNN`
1253    // and unused by the `art_quick_aput_obj`.
1254    mv    t2, ra
1255    call  \mark_function
1256    mv    ra, t2                                   // Restore RA.
1257.endm
1258#else  // USE_BAKER_READ_BARRIER
1259    .extern artReadBarrierSlow
1260.macro READ_BARRIER_SLOW dest, obj, offset
1261    // Store registers used in art_quick_aput_obj (a0-a4, RA), stack is 16B aligned.
1262    INCREASE_FRAME 48
1263    SAVE_GPR a0, 0*8
1264    SAVE_GPR a1, 1*8
1265    SAVE_GPR a2, 2*8
1266    SAVE_GPR a3, 3*8
1267    SAVE_GPR a4, 4*8
1268    SAVE_GPR ra, 5*8
1269
1270    // mv a0, \ref                  // Pass ref in A0 (no-op for now since parameter ref is unused).
1271    .ifnc \obj, a1
1272        mv a1, \obj                 // Pass `obj`.
1273    .endif
1274    li    a2, \offset               // Pass offset.
1275    call  artReadBarrierSlow        // artReadBarrierSlow(ref, obj, offset)
1276    // No need to unpoison return value in A0, `artReadBarrierSlow()` would do the unpoisoning.
1277    .ifnc \dest, a0
1278        mv \dest, a0                // save return value in dest
1279    .endif
1280
1281    // Conditionally restore saved registers
1282    RESTORE_GPR_NE a0, 0*8, \dest
1283    RESTORE_GPR_NE a1, 1*8, \dest
1284    RESTORE_GPR_NE a2, 2*8, \dest
1285    RESTORE_GPR_NE a3, 3*8, \dest
1286    RESTORE_GPR_NE a4, 4*8, \dest
1287    RESTORE_GPR ra, 5*8
1288    DECREASE_FRAME 48
1289.endm
1290#endif  // USE_BAKER_READ_BARRIER
1291#endif  // USE_READ_BARRIER
1292
1293ENTRY art_quick_aput_obj
1294    beqz  a2, .Laput_obj_null
1295#if defined(USE_READ_BARRIER) && !defined(USE_BAKER_READ_BARRIER)
1296    READ_BARRIER_SLOW a3, a0, MIRROR_OBJECT_CLASS_OFFSET
1297    READ_BARRIER_SLOW a3, a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
1298    READ_BARRIER_SLOW a4, a2, MIRROR_OBJECT_CLASS_OFFSET
1299#else  // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
1300#ifdef USE_READ_BARRIER
1301    // TODO(riscv64): Define marking register to avoid this load.
1302    lw    t6, THREAD_IS_GC_MARKING_OFFSET(xSELF)
1303    bnez  t6, .Laput_obj_gc_marking
1304#endif  // USE_READ_BARRIER
1305    lwu   a3, MIRROR_OBJECT_CLASS_OFFSET(a0)           // Heap reference = 32b; zero-extends to a3.
1306    UNPOISON_HEAP_REF a3
1307    lwu   a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET(a3)   // Heap reference = 32b; zero-extends to a3.
1308    UNPOISON_HEAP_REF a3
1309    lwu   a4, MIRROR_OBJECT_CLASS_OFFSET(a2)           // Heap reference = 32b; zero-extends to a4.
1310    UNPOISON_HEAP_REF a4
1311#endif  // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
1312    // value's type == array's component type - trivial assignability
1313    bne   a3, a4, .Laput_obj_check_assignability
1314.Laput_obj_store:
1315    sh2add a3, a1, a0
1316    POISON_HEAP_REF a2
1317    sw    a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET(a3)      // Heap reference = 32b.
1318    ld    a3, THREAD_CARD_TABLE_OFFSET(xSELF)
1319    srli  a0, a0, CARD_TABLE_CARD_SHIFT
1320    add   a0, a0, a3
1321    sb    a3, (a0)
1322    ret
1323
1324.Laput_obj_null:
1325    sh2add a3, a1, a0
1326    sw    a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET(a3)      // Heap reference = 32b.
1327    ret
1328
1329.Laput_obj_check_assignability:
1330    // Store arguments and return register
1331    INCREASE_FRAME 32
1332    SAVE_GPR a0, 0*8
1333    SAVE_GPR a1, 1*8
1334    SAVE_GPR a2, 2*8
1335    SAVE_GPR ra, 3*8
1336
1337    // Call runtime code
1338    mv    a0, a3          // Heap reference, 32b, "uncompress" = do nothing, already zero-extended.
1339    mv    a1, a4          // Heap reference, 32b, "uncompress" = do nothing, already zero-extended.
1340    call  artIsAssignableFromCode
1341
1342    // Check for exception
1343    CFI_REMEMBER_STATE
1344    beqz a0, .Laput_obj_throw_array_store_exception
1345
1346    // Restore
1347    RESTORE_GPR a0, 0*8
1348    RESTORE_GPR a1, 1*8
1349    RESTORE_GPR a2, 2*8
1350    RESTORE_GPR ra, 3*8
1351    DECREASE_FRAME 32
1352
1353    sh2add a3, a1, a0
1354    POISON_HEAP_REF a2
1355    sw    a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET(a3)      // Heap reference = 32b.
1356    ld    a3, THREAD_CARD_TABLE_OFFSET(xSELF)
1357    srli  a0, a0, CARD_TABLE_CARD_SHIFT
1358    add   a0, a0, a3
1359    sb    a3, (a0)
1360    ret
1361
1362.Laput_obj_throw_array_store_exception:
1363    CFI_RESTORE_STATE_AND_DEF_CFA sp, 32
1364    RESTORE_GPR a0, 0*8
1365    RESTORE_GPR a1, 1*8
1366    RESTORE_GPR a2, 2*8
1367    RESTORE_GPR ra, 3*8
1368    DECREASE_FRAME 32
1369
1370#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1371    CFI_REMEMBER_STATE
1372#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1373    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
1374    mv    a1, a2                        // Pass value.
1375    mv    a2, xSELF                     // Pass Thread::Current().
1376    call  artThrowArrayStoreException   // (Object*, Object*, Thread*).
1377    call  art_quick_do_long_jump        // (Context*)
1378    unimp  // Unreached
1379
1380#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1381    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
1382.Laput_obj_gc_marking:
1383    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
1384        a3, a0, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_array_class
1385.Laput_obj_mark_array_class_continue:
1386    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
1387        a3, a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, .Laput_obj_mark_array_element
1388.Laput_obj_mark_array_element_continue:
1389    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
1390        a4, a2, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_object_class
1391.Laput_obj_mark_object_class_continue:
1392    // value's type == array's component type - trivial assignability
1393    bne   a3, a4, .Laput_obj_check_assignability
1394    j     .Laput_obj_store
1395
1396.Laput_obj_mark_array_class:
1397    BAKER_RB_LOAD_AND_MARK a3, a0, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg13
1398    j     .Laput_obj_mark_array_class_continue
1399
1400.Laput_obj_mark_array_element:
1401    BAKER_RB_LOAD_AND_MARK \
1402        a3, a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, art_quick_read_barrier_mark_reg13
1403    j     .Laput_obj_mark_array_element_continue
1404
1405.Laput_obj_mark_object_class:
1406    BAKER_RB_LOAD_AND_MARK a4, a2, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg14
1407    j     .Laput_obj_mark_object_class_continue
1408#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1409END art_quick_aput_obj
1410
1411
1412// Create a function `name` calling the art::ReadBarrier::Mark routine, getting its argument and
1413// returning its result through \reg, saving and restoring all caller-save registers.
1414//
1415// The generated function follows a non-standard calling convention:
1416// - register `reg` is used to pass the singleton argument,
1417// - register `reg` is used to return the result,
1418// - all other registers are callee-save (the values they hold are preserved).
1419.macro READ_BARRIER_MARK_REG name, reg
1420ENTRY \name
1421    beqz \reg, .Lrb_return_\name  // early return if null
1422
1423    // Save t5 and t6 onto stack to honor caller-save calling convention.
1424    INCREASE_FRAME 16
1425    SAVE_GPR t5, (8*0)
1426    SAVE_GPR t6, (8*1)
1427
1428    lw t5, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg)  // t5 := lock word
1429    slliw t6, t5, 31-LOCK_WORD_MARK_BIT_SHIFT    // mark bit into MSB
1430    bltz t6, .Lrb_tmp_restore_\name
1431    // Check if the top two bits are set. If so, it is a forwarding address.
1432    slliw t6, t5, 1
1433    and t6, t6, t5
1434    CFI_REMEMBER_STATE
1435    bgez t6, .Lrb_full_\name
1436    // Extract and zero-extend the forwarding address.
1437    slli \reg, t5, (LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + 32)
1438    srli \reg, \reg, 32
1439    .ifc \reg, t5
1440      sd t5, (8*0)(sp)
1441    .endif
1442    .ifc \reg, t6
1443      sd t6, (8*1)(sp)
1444    .endif
1445.Lrb_tmp_restore_\name:
1446    RESTORE_GPR t5, (8*0)
1447    RESTORE_GPR t6, (8*1)
1448    DECREASE_FRAME 16
1449.Lrb_return_\name:
1450    ret
1451.Lrb_full_\name:
1452    CFI_RESTORE_STATE_AND_DEF_CFA sp, 16
1453    // Save remaining caller-save registers on stack. t5 and t6 already saved.
1454    // fa0-fa7, ft0-ft11, a0-a7, t0-t4, ra: 8 * (8 + 12 + 8 + 5 + 1) = 8 * 34 = 272 bytes
1455    INCREASE_FRAME 272
1456    SAVE_FPR fa0,  (8*0)
1457    SAVE_FPR fa1,  (8*1)
1458    SAVE_FPR fa2,  (8*2)
1459    SAVE_FPR fa3,  (8*3)
1460    SAVE_FPR fa4,  (8*4)
1461    SAVE_FPR fa5,  (8*5)
1462    SAVE_FPR fa6,  (8*6)
1463    SAVE_FPR fa7,  (8*7)
1464    SAVE_FPR ft0,  (8*8)
1465    SAVE_FPR ft1,  (8*9)
1466    SAVE_FPR ft2,  (8*10)
1467    SAVE_FPR ft3,  (8*11)
1468    SAVE_FPR ft4,  (8*12)
1469    SAVE_FPR ft5,  (8*13)
1470    SAVE_FPR ft6,  (8*14)
1471    SAVE_FPR ft7,  (8*15)
1472    SAVE_FPR ft8,  (8*16)
1473    SAVE_FPR ft9,  (8*17)
1474    SAVE_FPR ft10, (8*18)
1475    SAVE_FPR ft11, (8*19)
1476
1477    SAVE_GPR a0,   (8*20)
1478    SAVE_GPR a1,   (8*21)
1479    SAVE_GPR a2,   (8*22)
1480    SAVE_GPR a3,   (8*23)
1481    SAVE_GPR a4,   (8*24)
1482    SAVE_GPR a5,   (8*25)
1483    SAVE_GPR a6,   (8*26)
1484    SAVE_GPR a7,   (8*27)
1485    SAVE_GPR t0,   (8*28)
1486    SAVE_GPR t1,   (8*29)
1487    SAVE_GPR t2,   (8*30)
1488    SAVE_GPR t3,   (8*31)
1489    SAVE_GPR t4,   (8*32)
1490    SAVE_GPR ra,   (8*33)
1491
1492    .ifc \reg, t5
1493      ld a0, (8*34)(sp)
1494    .else
1495      .ifc \reg, t6
1496        ld a0, (8*35)(sp)
1497      .else
1498        .ifnc \reg, a0
1499          mv a0, \reg
1500        .endif
1501      .endif
1502    .endif
1503    call artReadBarrierMark
1504    .ifnc \reg, a0
1505      mv \reg, a0
1506    .endif
1507
1508    // Restore all caller-save registers from stack, including t5 and t6.
1509    // fa0-fa7, ft0-ft11, ra, a0-a7, t0-t6: 8 * (8 + 12 + 1 + 8 + 7) = 8 * 36 = 288 bytes
1510    RESTORE_FPR fa0,  (8*0)
1511    RESTORE_FPR fa1,  (8*1)
1512    RESTORE_FPR fa2,  (8*2)
1513    RESTORE_FPR fa3,  (8*3)
1514    RESTORE_FPR fa4,  (8*4)
1515    RESTORE_FPR fa5,  (8*5)
1516    RESTORE_FPR fa6,  (8*6)
1517    RESTORE_FPR fa7,  (8*7)
1518    RESTORE_FPR ft0,  (8*8)
1519    RESTORE_FPR ft1,  (8*9)
1520    RESTORE_FPR ft2,  (8*10)
1521    RESTORE_FPR ft3,  (8*11)
1522    RESTORE_FPR ft4,  (8*12)
1523    RESTORE_FPR ft5,  (8*13)
1524    RESTORE_FPR ft6,  (8*14)
1525    RESTORE_FPR ft7,  (8*15)
1526    RESTORE_FPR ft8,  (8*16)
1527    RESTORE_FPR ft9,  (8*17)
1528    RESTORE_FPR ft10, (8*18)
1529    RESTORE_FPR ft11, (8*19)
1530    RESTORE_GPR_NE \reg, a0, (8*20)
1531    RESTORE_GPR_NE \reg, a1, (8*21)
1532    RESTORE_GPR_NE \reg, a2, (8*22)
1533    RESTORE_GPR_NE \reg, a3, (8*23)
1534    RESTORE_GPR_NE \reg, a4, (8*24)
1535    RESTORE_GPR_NE \reg, a5, (8*25)
1536    RESTORE_GPR_NE \reg, a6, (8*26)
1537    RESTORE_GPR_NE \reg, a7, (8*27)
1538    RESTORE_GPR_NE \reg, t0, (8*28)
1539    RESTORE_GPR_NE \reg, t1, (8*29)
1540    RESTORE_GPR_NE \reg, t2, (8*30)
1541    RESTORE_GPR_NE \reg, t3, (8*31)
1542    RESTORE_GPR_NE \reg, t4, (8*32)
1543    RESTORE_GPR_NE \reg, ra, (8*33)
1544    RESTORE_GPR_NE \reg, t5, (8*34)
1545    RESTORE_GPR_NE \reg, t6, (8*35)
1546    DECREASE_FRAME 288
1547    ret
1548END \name
1549.endm
1550
1551
1552// No read barrier for X0 (Zero), X1 (RA), X2 (SP), X3 (GP) and X4 (TP).
1553READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, t0
1554READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, t1
1555READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, t2
1556READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, s0
1557// No read barrier for X9 (S1/xSELF).
1558READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, a0
1559READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, a1
1560READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, a2
1561READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, a3
1562READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, a4
1563READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, a5
1564READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, a6
1565READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, a7
1566READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, s2
1567READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, s3
1568READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, s4
1569READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, s5
1570READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, s6
1571READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, s7
1572READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, s8
1573READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, s9
1574READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, s10
1575READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, s11
1576READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, t3
1577READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, t4
1578READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg30, t5
1579READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg31, t6
1580
1581
1582.macro N_ARG_DOWNCALL n, name, entrypoint, return
1583    .extern \entrypoint
1584ENTRY \name
1585    SETUP_SAVE_REFS_ONLY_FRAME        // Save callee saves in case of GC.
1586    mv    a\n, xSELF                  // Pass Thread::Current().
1587    call  \entrypoint                 // (<n args>, Thread*)
1588    RESTORE_SAVE_REFS_ONLY_FRAME
1589    \return
1590END \name
1591.endm
1592
1593
1594.macro ONE_ARG_DOWNCALL name, entrypoint, return
1595    N_ARG_DOWNCALL 1, \name, \entrypoint, \return
1596.endm
1597
1598
1599.macro TWO_ARG_DOWNCALL name, entrypoint, return
1600    N_ARG_DOWNCALL 2, \name, \entrypoint, \return
1601.endm
1602
1603
1604.macro THREE_ARG_DOWNCALL name, entrypoint, return
1605    N_ARG_DOWNCALL 3, \name, \entrypoint, \return
1606.endm
1607
1608
1609.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1610    N_ARG_DOWNCALL 4, \name, \entrypoint, \return
1611.endm
1612
1613
1614// Entry from managed code that calls artHandleFillArrayDataFromCode and
1615// delivers exception on failure.
1616TWO_ARG_DOWNCALL art_quick_handle_fill_data, \
1617                 artHandleFillArrayDataFromCode, \
1618                 RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
1619
1620
1621// Generate the allocation entrypoints for each allocator.
1622GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1623// Comment out allocators that have riscv64 specific asm.
1624// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1625// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1626GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1627GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
1628// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1629// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1630// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1631// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1632// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1633GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1634GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1635GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1636
1637// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1638// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1639GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1640GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
1641// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1642// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1643// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1644// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1645// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1646GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1647GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1648GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1649
1650// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1651// If isInitialized=0 the compiler can only assume it's been at least resolved.
1652.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
1653ENTRY \c_name
1654    // Fast path rosalloc allocation.
1655    // a0: type, xSELF(s1): Thread::Current
1656    // a1-a7: free.
1657    ld    a3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(xSELF)    // Check if the thread local
1658                                                            // allocation stack has room.
1659                                                            // ldp won't work due to large offset.
1660    ld    a4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET(xSELF)
1661    bgeu  a3, a4, .Lslow_path\c_name
1662    lwu   a3, MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(a0)  // Load the object size (a3)
1663    li    a5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1664                                                            // local allocation.
1665    // If the class is not yet visibly initialized, or it is finalizable,
1666    // the object size will be very large to force the branch below to be taken.
1667    //
1668    // See Class::SetStatus() in class.cc for more details.
1669    bgeu  a3, a5, .Lslow_path\c_name
1670                                                            // Compute the rosalloc bracket index
1671                                                            // from the size. Since the size is
1672                                                            // already aligned we can combine the
1673                                                            // two shifts together.
1674#if ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT != POINTER_SIZE_SHIFT
1675#error "Unexpected ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT != POINTER_SIZE_SHIFT"
1676#endif
1677    // No-op: srli  a3, a3, (ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
1678    add   a4, xSELF, a3
1679                                                            // Subtract pointer size since there
1680                                                            // are no runs for 0 byte allocations
1681                                                            // and the size is already aligned.
1682    ld    a4, (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(a4)
1683                                                            // Load the free list head (a3). This
1684                                                            // will be the return val.
1685    ld    a3, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(a4)
1686    beqz  a3, .Lslow_path\c_name
1687    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber a0 and a1.
1688    ld    a1, ROSALLOC_SLOT_NEXT_OFFSET(a3)                 // Load the next pointer of the head
1689                                                            // and update the list head with the
1690                                                            // next pointer.
1691    sd    a1, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(a4)
1692                                                            // Store the class pointer in the
1693                                                            // header. This also overwrites the
1694                                                            // next pointer. The offsets are
1695                                                            // asserted to match.
1696
1697#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1698#error "Class pointer needs to overwrite next pointer."
1699#endif
1700    POISON_HEAP_REF a0
1701    sw    a0, MIRROR_OBJECT_CLASS_OFFSET(a3)
1702                                                            // Push the new object onto the thread
1703                                                            // local allocation stack and
1704                                                            // increment the thread local
1705                                                            // allocation stack top.
1706    ld    a1, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(xSELF)
1707    sw    a3, (a1)
1708    addi  a1, a1, COMPRESSED_REFERENCE_SIZE                 // Increment A1 to point to next slot.
1709    sd    a1, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(xSELF)    // Decrement the size of the free list.
1710
1711    // After this "SD" the object is published to the thread local allocation stack,
1712    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1713    // It is not yet visible to the running (user) compiled code until after the return.
1714    //
1715    // To avoid the memory barrier prior to the "SD", a trick is employed, by differentiating
1716    // the state of the allocation stack slot. It can be a pointer to one of:
1717    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1718    //       (The stack initial state is "null" pointers).
1719    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1720    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1721    // Other states are not allowed.
1722    //
1723    // An object that is invalid only temporarily, and will eventually become valid.
1724    // The internal runtime code simply checks if the object is not null or is partial and then
1725    // ignores it.
1726    //
1727    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1728    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1729    // "next" pointer is not-cyclic.)
1730    //
1731    // See also b/28790624 for a listing of CLs dealing with this race.
1732    lwu   a1, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(a4)
1733    addi  a1, a1, -1
1734    sw    a1, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(a4)
1735
1736    mv    a0, a3                                            // Set the return value and return.
1737    // No barrier. The class is already observably initialized (otherwise the fast
1738    // path size check above would fail) and new-instance allocations are protected
1739    // from publishing by the compiler which inserts its own StoreStore barrier.
1740    ret
1741.Lslow_path\c_name:
1742    SETUP_SAVE_REFS_ONLY_FRAME                              // Save callee saves in case of GC.
1743    mv    a1, xSELF                                         // Pass Thread::Current().
1744    call  \cxx_name
1745    RESTORE_SAVE_REFS_ONLY_FRAME
1746    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1747END \c_name
1748.endm
1749
1750ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, \
1751                                artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1752ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, \
1753                                artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1754
1755// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1756// If isInitialized=0 the compiler can only assume it's been at least resolved.
1757.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel isInitialized
1758    ld    a4, THREAD_LOCAL_POS_OFFSET(xSELF)
1759    ld    a5, THREAD_LOCAL_END_OFFSET(xSELF)
1760    lwu   a7, MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(a0)  // Load the object size (a7).
1761    add   a6, a4, a7                                        // Add object size to tlab pos.
1762                                                            // Check if it fits, overflow works
1763                                                            // since the tlab pos and end are 32
1764                                                            // bit values.
1765
1766    // If the class is not yet visibly initialized, or it is finalizable,
1767    // the object size will be very large to force the branch below to be taken.
1768    //
1769    // See Class::SetStatus() in class.cc for more details.
1770    bgtu  a6, a5, \slowPathLabel
1771    sd    a6, THREAD_LOCAL_POS_OFFSET(xSELF)                // Store new thread_local_pos.
1772    POISON_HEAP_REF a0
1773    sw    a0, MIRROR_OBJECT_CLASS_OFFSET(a4)                // Store the class pointer.
1774    mv    a0, a4
1775    // No barrier. The class is already observably initialized (otherwise the fast
1776    // path size check above would fail) and new-instance allocations are protected
1777    // from publishing by the compiler which inserts its own StoreStore barrier.
1778    ret
1779.endm
1780
1781// The common code for art_quick_alloc_object_*region_tlab
1782// Currently the implementation ignores isInitialized. TODO(b/172087402): clean this up.
1783// Caller must execute a constructor fence after this.
1784.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1785ENTRY \name
1786    // Fast path region tlab allocation.
1787    // a0: type, xSELF(s1): Thread::Current
1788    // a1-a7: free.
1789    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name, \isInitialized
1790.Lslow_path\name:
1791    SETUP_SAVE_REFS_ONLY_FRAME                              // Save callee saves in case of GC.
1792    mv    a1, xSELF                                         // Pass Thread::Current().
1793    call  \entrypoint                                       // (mirror::Class*, Thread*)
1794    RESTORE_SAVE_REFS_ONLY_FRAME
1795    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1796END \name
1797.endm
1798
1799GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
1800    art_quick_alloc_object_resolved_region_tlab, \
1801    artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1802GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
1803    art_quick_alloc_object_initialized_region_tlab, \
1804    artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1805GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
1806    art_quick_alloc_object_resolved_tlab, \
1807    artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1808GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
1809    art_quick_alloc_object_initialized_tlab, \
1810    artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1811
1812.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE \
1813    slowPathLabel, class, count, temp0, temp1, temp2
1814    andi  \temp1, \temp1, OBJECT_ALIGNMENT_MASK_TOGGLED64   // Apply alignment mask
1815                                                            // (addr + 7) & ~7. The mask must
1816                                                            // be 64 bits to keep high bits in
1817                                                            // case of overflow.
1818    // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
1819    // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
1820    // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
1821    li    \temp2, MIN_LARGE_OBJECT_THRESHOLD                // Possibly a large object, go slow
1822    bgeu  \temp1, \temp2, \slowPathLabel                    // path.
1823
1824    ld    \temp0, THREAD_LOCAL_POS_OFFSET(xSELF)            // Check tlab for space, note that
1825                                                            // we use (end - begin) to handle
1826                                                            // negative size arrays. It is
1827                                                            // assumed that a negative size will
1828                                                            // always be greater unsigned than
1829                                                            // region size.
1830    ld    \temp2, THREAD_LOCAL_END_OFFSET(xSELF)
1831    sub   \temp2, \temp2, \temp0
1832
1833    // The array class is always initialized here. Unlike new-instance,
1834    // this does not act as a double test.
1835    bgtu  \temp1, \temp2, \slowPathLabel
1836    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1837                                                            // Move old thread_local_pos to x0
1838                                                            // for the return value.
1839    mv    a0, \temp0
1840    add   \temp0, \temp0, \temp1
1841    sd    \temp0, THREAD_LOCAL_POS_OFFSET(xSELF)            // Store new thread_local_pos.
1842    POISON_HEAP_REF \class
1843    sw    \class, MIRROR_OBJECT_CLASS_OFFSET(a0)            // Store the class pointer.
1844    sw    \count, MIRROR_ARRAY_LENGTH_OFFSET(a0)            // Store the array length.
1845// new-array is special. The class is loaded and immediately goes to the Initialized state
1846// before it is published. Therefore the only fence needed is for the publication of the object.
1847// See ClassLinker::CreateArrayClass() for more details.
1848
1849// For publication of the new array, we don't need a 'fence w, w' here.
1850// The compiler generates 'fence w, w' for all new-array insts.
1851    ret
1852.endm
1853
1854// Caller must execute a constructor fence after this.
1855.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1856ENTRY \name
1857    // Fast path array allocation for region tlab allocation.
1858    // a0: mirror::Class* type
1859    // a1: int32_t component_count
1860    // a2-a7: free.
1861    mv    a3, a0
1862    \size_setup a3, a1, a4, a5, a6
1863    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, a3, a1, a4, a5, a6
1864.Lslow_path\name:
1865    // a0: mirror::Class* klass
1866    // a1: int32_t component_count
1867    // a2: Thread* self
1868    SETUP_SAVE_REFS_ONLY_FRAME                              // Save callee saves in case of GC.
1869    mv    a2, xSELF                                         // Pass Thread::Current().
1870    call  \entrypoint
1871    RESTORE_SAVE_REFS_ONLY_FRAME
1872    RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
1873END \name
1874.endm
1875
1876.macro COMPUTE_ARRAY_SIZE_UNKNOWN class, count, temp0, temp1, temp2
1877    // Array classes are never finalizable or uninitialized, no need to check.
1878    lwu   \temp0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET(\class) // Load component type
1879    UNPOISON_HEAP_REF \temp0
1880    lwu   \temp0, MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(\temp0)
1881    srli  \temp0, \temp0, PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT   // Component size shift is in high 16
1882                                                            // bits.
1883    zext.w \temp1, \count                                   // From \count we use a 32 bit value,
1884                                                            // it can not overflow.
1885    sll   \temp1, \temp1, \temp0                            // Calculate data size
1886    // Add array data offset and alignment.
1887    addi   \temp1, \temp1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1888#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1889#error Long array data offset must be 4 greater than int array data offset.
1890#endif
1891
1892    addi  \temp0, \temp0, 1                                 // Add 4 to the length only if the
1893                                                            // component size shift is 3
1894                                                            // (for 64 bit alignment).
1895    andi  \temp0, \temp0, 4
1896    add   \temp1, \temp1, \temp0
1897.endm
1898
1899.macro COMPUTE_ARRAY_SIZE_8 class, count, temp0, temp1, temp2
1900    // Add array data offset and alignment adjustment to the `\count`.
1901    li     \temp1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1902    add.uw \temp1, \count, \temp1
1903.endm
1904
1905.macro COMPUTE_ARRAY_SIZE_16 class, count, temp0, temp1, temp2
1906    // Add array data offset and alignment adjustment to the shifted `\count`.
1907    li     \temp1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1908    sh1add.uw \temp1, \count, \temp1
1909.endm
1910
1911.macro COMPUTE_ARRAY_SIZE_32 class, count, temp0, temp1, temp2
1912    // Add array data offset and alignment adjustment to the shifted `\count`.
1913    li     \temp1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1914    sh2add.uw \temp1, \count, \temp1
1915.endm
1916
1917.macro COMPUTE_ARRAY_SIZE_64 class, count, temp0, temp1, temp2
1918    // Add array data offset and alignment adjustment to the shifted `\count`.
1919    li     \temp1, (MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1920    sh3add.uw \temp1, \count, \temp1
1921.endm
1922
1923// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
1924// the entrypoint once all backends have been updated to use the size variants.
1925GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, \
1926                          artAllocArrayFromCodeResolvedRegionTLAB, \
1927                          COMPUTE_ARRAY_SIZE_UNKNOWN
1928GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, \
1929                          artAllocArrayFromCodeResolvedRegionTLAB, \
1930                          COMPUTE_ARRAY_SIZE_8
1931GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, \
1932                          artAllocArrayFromCodeResolvedRegionTLAB, \
1933                          COMPUTE_ARRAY_SIZE_16
1934GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, \
1935                          artAllocArrayFromCodeResolvedRegionTLAB, \
1936                          COMPUTE_ARRAY_SIZE_32
1937GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, \
1938                          artAllocArrayFromCodeResolvedRegionTLAB, \
1939                          COMPUTE_ARRAY_SIZE_64
1940GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, \
1941                          artAllocArrayFromCodeResolvedTLAB, \
1942                          COMPUTE_ARRAY_SIZE_UNKNOWN
1943GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, \
1944                          artAllocArrayFromCodeResolvedTLAB, \
1945                          COMPUTE_ARRAY_SIZE_8
1946GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, \
1947                          artAllocArrayFromCodeResolvedTLAB, \
1948                          COMPUTE_ARRAY_SIZE_16
1949GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, \
1950                          artAllocArrayFromCodeResolvedTLAB, \
1951                          COMPUTE_ARRAY_SIZE_32
1952GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, \
1953                          artAllocArrayFromCodeResolvedTLAB, \
1954                          COMPUTE_ARRAY_SIZE_64
1955
1956
1957GENERATE_FIELD_ENTRYPOINTS
1958
1959
1960// String's indexOf.
1961//
1962// TODO: Not very optimized. We should use the vector extension.
1963// On entry:
1964//    a0:   string object (known non-null)
1965//    a1:   char to match (known <= 0xFFFF)
1966//    a2:   Starting offset in string data
1967ENTRY art_quick_indexof
1968#if (STRING_COMPRESSION_FEATURE)
1969    lwu   a4, MIRROR_STRING_COUNT_OFFSET(a0)
1970#else
1971    lwu   a3, MIRROR_STRING_COUNT_OFFSET(a0)
1972#endif
1973    addi  a0, a0, MIRROR_STRING_VALUE_OFFSET
1974#if (STRING_COMPRESSION_FEATURE)
1975    /* Split the count into length (a3) and compression flag (a4) */
1976    srliw a3, a4, 1
1977    andi  a4, a4, 1
1978#endif
1979    /* Clamp start to [0..count) */
1980    sraiw a5, a2, 31
1981    andn  a2, a2, a5
1982    bge   a2, a3, .Lstring_indexof_nomatch
1983
1984#if (STRING_COMPRESSION_FEATURE)
1985    beqz  a4, .Lstring_indexof_compressed
1986#endif
1987    /* Build pointers to start and end of the data to compare */
1988    sh1add a2, a2, a0
1989    sh1add a3, a3, a0
1990
1991    /*
1992     * At this point we have:
1993     *  a0: original start of string data
1994     *  a1: char to compare
1995     *  a2: start of the data to test
1996     *  a3: end of the data to test
1997     */
1998
1999.Lstring_indexof_loop:
2000    lhu   a4, 0(a2)
2001    beq   a4, a1, .Lstring_indexof_match
2002    addi  a2, a2, 2
2003    bne   a2, a3, .Lstring_indexof_loop
2004.Lstring_indexof_nomatch:
2005    li    a0, -1
2006    ret
2007.Lstring_indexof_match:
2008    sub   a0, a2, a0
2009    srli  a0, a0, 1
2010    ret
2011
2012#if (STRING_COMPRESSION_FEATURE)
2013   // Comparing compressed string one character at a time with the input character.
2014.Lstring_indexof_compressed:
2015    add   a2, a2, a0
2016    add   a3, a3, a0
2017.Lstring_indexof_compressed_loop:
2018    lbu   a4, (a2)
2019    beq   a4, a1, .Lstring_indexof_compressed_match
2020    addi  a2, a2, 1
2021    bne   a2, a3, .Lstring_indexof_compressed_loop
2022    li    a0, -1
2023    ret
2024.Lstring_indexof_compressed_match:
2025    sub   a0, a2, a0
2026    ret
2027#endif
2028END art_quick_indexof
2029