xref: /aosp_15_r20/art/runtime/interpreter/mterp/riscv64/main.S (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1%def header():
2/*
3 * Copyright (C) 2023 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * This is a #include, not a %include, because we want the C pre-processor
20 * to expand the macros into assembler assignment statements.
21 */
22#include "asm_support.h"
23#include "arch/riscv64/asm_support_riscv64.S"
24
25/**
26 * RISC-V 64 ABI general notes
27 *
28 * References
29 * - https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
30 * - runtime/arch/riscv64/registers_riscv64.h
31 *
32 * 32 general purposes registers
33 * - fixed purpose: zero, ra, sp, gp, tp, s1
34 *     gp/scs: shadow call stack - do not clobber!
35 *     s1/tr: ART thread register - do not clobber!
36 * - temporaries: t0-t6
37 * - arguments: a0-a7
38 * - callee saved: ra, s0/fp, s2-s11
39 *     s0 is flexible, available to use as a frame pointer if needed.
40 *
41 * 32 floating point registers
42 * - temporaries: ft0-ft11
43 * - arguments: fa0-fa7
44 * - callee saved: fs0-fs11
45 */
46
47// Android references
48//   Bytecodes: https://source.android.com/docs/core/runtime/dalvik-bytecode
49//   Instruction formats: https://source.android.com/docs/core/runtime/instruction-formats
50//   Shorty: https://source.android.com/docs/core/runtime/dex-format#shortydescriptor
51
52// Fixed register usages in Nterp.
53//    nickname  ABI    reg   purpose
54#define xSELF    s1  // x9,   Thread* self pointer
55#define xFP      s2  // x18,  interpreted frame pointer: to access locals and args
56#define xPC      s3  // x19,  interpreted program counter: to fetch instructions
57#define xINST    s4  // x20,  first 16-bit code unit of current instruction
58#define xIBASE   s5  // x21,  interpreted instruction base pointer: for computed goto
59#define xREFS    s6  // x22,  base of object references of dex registers
60
61// DWARF registers reference
62// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-dwarf.adoc
63#define CFI_TMP  10  // DWARF register number for       a0/x10
64#define CFI_DEX  19  // DWARF register number for xPC  /s3/x19
65#define CFI_REFS 22  // DWARF register number for xREFS/s6/x22
66
67// Synchronization
68// This code follows the RISC-V atomics ABI specification [1].
69//
70// Object publication.
71// new-instance and new-array operations must first perform a `fence w,w` "constructor fence" to
72// ensure their new object references are correctly published with a subsequent SET_VREG_OBJECT.
73//
74// Volatile load/store.
75// A volatile load is implemented as: fence rw,rw ; load ; fence r,rw.
76// A 32-bit or 64-bit volatile store is implemented as: amoswap.{w,d}.rl
77// A volatile store for a narrower type is implemented as: fence rw,w ; store ; fence rw,rw
78//
79// [1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-atomic.adoc
80
81// An assembly entry for nterp.
82.macro OAT_ENTRY name
83    .type \name, @function
84    .hidden \name
85    .global \name
86    .balign 16
87\name:
88.endm
89
90.macro SIZE name
91    .size \name, .-\name
92.endm
93
94// Similar to ENTRY but without the CFI directives.
95.macro NAME_START name
96    .type \name, @function
97    .hidden \name  // Hide this as a global symbol, so we do not incur plt calls.
98    .global \name
99    /* Cache alignment for function entry */
100    .balign 16
101\name:
102.endm
103
104.macro NAME_END name
105  SIZE \name
106.endm
107
108// Macro for defining entrypoints into runtime. We don't need to save registers (we're not holding
109// references there), but there is no kDontSave runtime method. So just use the kSaveRefsOnly
110// runtime method.
111.macro NTERP_TRAMPOLINE name, helper
112ENTRY \name
113    SETUP_SAVE_REFS_ONLY_FRAME
114    call \helper
115    RESTORE_SAVE_REFS_ONLY_FRAME
116    ld t0, THREAD_EXCEPTION_OFFSET(xSELF)
117    bnez t0, nterp_deliver_pending_exception
118    ret
119END \name
120.endm
121
122// Unpack code items from dex format.
123// Input: \code_item
124// Output:
125//   - \regs: register count
126//   - \outs: out count
127//   - \ins: in count. If set to register "zero" (x0), load is skipped.
128//   - \code_item: holds instruction array on exit
129.macro FETCH_CODE_ITEM_INFO code_item, regs, outs, ins
130    // Unpack values from regular dex format.
131    lhu \regs, CODE_ITEM_REGISTERS_SIZE_OFFSET(\code_item)
132    lhu \outs, CODE_ITEM_OUTS_SIZE_OFFSET(\code_item)
133    .ifnc \ins, zero
134      lhu \ins, CODE_ITEM_INS_SIZE_OFFSET(\code_item)
135    .endif
136    addi \code_item, \code_item, CODE_ITEM_INSNS_OFFSET
137.endm
138
139.macro EXPORT_PC
140    sd xPC, -16(xREFS)
141.endm
142
143.macro TEST_IF_MARKING reg, label
144    lb \reg, THREAD_IS_GC_MARKING_OFFSET(xSELF)
145    bnez \reg, \label
146.endm
147
148.macro DO_SUSPEND_CHECK continue
149    lwu t0, THREAD_FLAGS_OFFSET(xSELF)
150    andi t0, t0, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
151    beqz t0, \continue
152    EXPORT_PC
153    call art_quick_test_suspend
154.endm
155
156// Fetch one or more half-word units from an offset past the current PC.
157// The offset is specified in 16-bit code units.
158//
159// A \width flag allows reading 32 bits (2 units) or 64 bits (4 units) from the offset.
160// The RISC-V ISA supports unaligned accesses for these wider loads.
161//
162// If \width=8, \byte={0,1} indexes into the code unit at the offset.
163//
164// Default behavior loads one code unit with unsigned zext.
165// The \signed flag is for signed sext, for shorter loads.
166//
167// Does not advance xPC.
168.macro FETCH reg, count, signed=0, width=16, byte=0
169    .if \width == 8
170      .if \signed
171        lb  \reg, (\count*2 + \byte)(xPC)
172      .else
173        lbu \reg, (\count*2 + \byte)(xPC)
174      .endif
175    .elseif \width == 16
176      .if \signed
177        lh  \reg, (\count*2)(xPC)
178      .else
179        lhu \reg, (\count*2)(xPC)
180      .endif
181    .elseif \width == 32
182      .if \signed
183        lw  \reg, (\count*2)(xPC)
184      .else
185        lwu \reg, (\count*2)(xPC)
186      .endif
187    .elseif \width == 64
188      ld  \reg, (\count*2)(xPC)
189    .else
190      unimp  // impossible
191    .endif
192.endm
193
194// Fetch the next instruction, from xPC into xINST.
195// Does not advance xPC.
196.macro FETCH_INST
197    lhu xINST, (xPC)  // zero in upper 48 bits
198.endm
199
200// Fetch the next instruction, from xPC into xINST. Advance xPC by \count units, each 2 bytes.
201//
202// Immediates have a 12-bit offset range from xPC. Thus, \count can range from -1024 to 1023.
203//
204// Note: Must be placed AFTER anything that can throw an exception, or the exception catch may miss.
205// Thus, this macro must be placed after EXPORT_PC.
206.macro FETCH_ADVANCE_INST count
207    lhu xINST, (\count*2)(xPC)  // zero in upper 48 bits
208    addi xPC, xPC, (\count*2)
209.endm
210
211// Clobbers: \reg
212.macro GET_INST_OPCODE reg
213    and \reg, xINST, 0xFF
214.endm
215
216// Clobbers: \reg
217.macro GOTO_OPCODE reg
218    slliw \reg, \reg, ${handler_size_bits}
219    add \reg, xIBASE, \reg
220    jr \reg
221.endm
222
223.macro FETCH_FROM_THREAD_CACHE reg, miss_label, z0, z1
224    // See art::InterpreterCache::IndexOf() for computing index of key within cache array.
225    // Entry address:
226    //   xSELF + OFFSET + ((xPC>>2 & xFF) << 4)
227    // = xSELF + OFFSET + ((xPC & xFF<<2) << 2)
228    // = xSELF + ((OFFSET>>2 + (xPC & xFF<<2)) << 2)
229    // => ANDI, ADD, SH2ADD
230#if (THREAD_INTERPRETER_CACHE_SIZE_LOG2 != 8)
231#error Expected interpreter cache array size = 256 elements
232#endif
233#if (THREAD_INTERPRETER_CACHE_SIZE_SHIFT != 2)
234#error Expected interpreter cache entry size = 16 bytes
235#endif
236#if ((THREAD_INTERPRETER_CACHE_OFFSET & 0x3) != 0)
237#error Expected interpreter cache offset to be 4-byte aligned
238#endif
239    andi \z0, xPC, 0xFF << 2
240    addi \z0, \z0, THREAD_INTERPRETER_CACHE_OFFSET >> 2
241    sh2add \z0, \z0, xSELF  // z0 := entry's address
242    ld \z1, (\z0)           // z1 := dex PC
243    bne xPC, \z1, \miss_label
244    ld \reg, 8(\z0)         // value: depends on context; see call site
245.endm
246
247// Inputs:
248//   - a0
249//   - xSELF
250// Clobbers: t0
251.macro CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot, if_not_hot
252    // TODO(solanes): Figure out if there's a way to load t0 only once.
253    lwu t0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
254    BRANCH_IF_BIT_CLEAR t0, t0, ART_METHOD_IS_MEMORY_SHARED_FLAG_BIT, \if_hot
255    lwu t0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
256    // Intrinsics are always in the boot image and considered hot.
257    BRANCH_IF_BIT_SET t0, t0, ART_METHOD_IS_INTRINSIC_FLAG_BIT, \if_hot
258    lwu t0, THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)  // t0 := hotness
259    beqz t0, \if_hot
260
261    addi t0, t0, -1  // increase hotness
262    sw t0,  THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)
263    j \if_not_hot
264.endm
265
266// Update xPC by \units code units. On back edges, perform hotness and suspend.
267.macro BRANCH units
268    sh1add xPC, \units, xPC
269    blez \units, 2f  // If branch is <= 0, increase hotness and do a suspend check.
2701:
271    FETCH_INST
272    GET_INST_OPCODE t0
273    GOTO_OPCODE t0
2742:
275    ld a0, (sp)
276    lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)  // t0 := hotness
277#if (NTERP_HOTNESS_VALUE != 0)
278#error Expected 0 for hotness value
279#endif
280    // If the counter is at zero (hot), handle it in the runtime.
281    beqz t0, 3f
282    addi t0, t0, -1  // increase hotness
283    sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
284    DO_SUSPEND_CHECK continue=1b
285    j 1b
2863:
287    tail NterpHandleHotnessOverflow  // arg a0 (ArtMethod*)
288.endm
289
290// Increase method hotness before starting the method.
291// Hardcoded:
292// - a0: ArtMethod*
293// Clobbers: t0
294.macro START_EXECUTING_INSTRUCTIONS
295    ld a0, (sp)
296    lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)  // t0 := hotness
297#if (NTERP_HOTNESS_VALUE != 0)
298#error Expected 0 for hotness value
299#endif
300    // If the counter is at zero (hot), handle it in the runtime.
301    beqz t0, 3f
302    addi t0, t0, -1  // increase hotness
303    sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
3041:
305    DO_SUSPEND_CHECK continue=2f
3062:
307    FETCH_INST
308    GET_INST_OPCODE t0
309    GOTO_OPCODE t0
3103:
311    CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=4f, if_not_hot=1b
3124:
313    mv a1, zero  // dex_pc_ptr=nullptr
314    mv a2, zero  // vergs=nullptr
315    call nterp_hot_method
316    j 2b
317.endm
318
319// 64 bit read
320// Clobbers: \reg
321// Safe if \reg == \vreg.
322.macro GET_VREG_WIDE reg, vreg
323    sh2add \reg, \vreg, xFP  // vreg addr in register array
324    ld \reg, (\reg)          // reg := fp[vreg](lo) | fp[vreg+1](hi)
325.endm
326
327// 64 bit write
328// Clobbers: z0
329.macro SET_VREG_WIDE reg, vreg, z0
330    sh2add \z0, \vreg, xFP    // vreg addr in register array
331    sd \reg, (\z0)            // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
332    sh2add \z0, \vreg, xREFS  // vreg addr in reference array
333    sd zero, (\z0)            // refs[vreg] := null ; refs[vreg+1] := null
334.endm
335
336// Object read
337// Clobbers: \reg
338// Safe if \reg == \vreg.
339.macro GET_VREG_OBJECT reg, vreg
340    sh2add \reg, \vreg, xREFS  // vreg addr in reference array
341    lwu \reg, (\reg)           // reg := refs[vreg]
342.endm
343
344// Object write
345// Clobbers: z0
346.macro SET_VREG_OBJECT reg, vreg, z0
347    sh2add \z0, \vreg, xFP    // vreg addr in register array
348    sw \reg, (\z0)            // fp[vreg] := reg
349    sh2add \z0, \vreg, xREFS  // vreg addr in reference array
350    sw \reg, (\z0)            // refs[vreg] := reg
351.endm
352
353// Floating-point 64 bit read
354// Clobbers: \reg, \vreg
355.macro GET_VREG_DOUBLE reg, vreg
356    sh2add \vreg, \vreg, xFP  // vreg addr in register array
357    fld \reg, (\vreg)         // reg := fp[vreg](lo) | fp[vreg+1](hi)
358.endm
359
360// Floating-point 64 bit write
361// Clobbers: \reg, z0
362.macro SET_VREG_DOUBLE reg, vreg, z0
363    sh2add \z0, \vreg, xFP    // vreg addr in register array
364    fsd \reg, (\z0)           // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
365    sh2add \z0, \vreg, xREFS  // vreg addr in reference array
366    sd zero, (\z0)            // refs[vreg] := null ; refs[vreg+1] := null
367.endm
368
369// Put "%def" definitions after ".macro" definitions for proper expansion. %def is greedy.
370
371// Typed read, defaults to 32-bit read
372// Note: An object ref requires LWU, or LW;ZEXT.W.
373// Clobbers: \reg
374// Safe if \reg == \vreg.
375%def get_vreg(reg, vreg, width=32, is_wide=False, is_unsigned=False):
376%  if is_wide or width == 64:
377     GET_VREG_WIDE $reg, $vreg
378%  elif is_unsigned:
379     sh2add $reg, $vreg, xFP  // vreg addr in register array
380     lwu $reg, ($reg)         // reg := fp[vreg], zext
381%  else:
382     sh2add $reg, $vreg, xFP  // vreg addr in register array
383     lw $reg, ($reg)          // reg := fp[vreg]
384%#:
385
386// Typed write, defaults to 32-bit write.
387// Note: Incorrect for an object ref; it requires 2nd SW into xREFS.
388// Clobbers: z0
389%def set_vreg(reg, vreg, z0, width=32, is_wide=False):
390%  if is_wide or width == 64:
391     SET_VREG_WIDE $reg, $vreg, $z0
392%  else:
393     sh2add $z0, $vreg, xFP    // vreg addr in register array
394     sw $reg, ($z0)            // fp[vreg] := reg
395     sh2add $z0, $vreg, xREFS  // vreg addr in reference array
396     sw zero, ($z0)            // refs[vreg] := null
397%#:
398
399// Floating-point read, defaults to 32-bit read.
400// Clobbers: reg, vreg
401%def get_vreg_float(reg, vreg, is_double=False):
402%  if is_double:
403     GET_VREG_DOUBLE $reg, $vreg
404%  else:
405     sh2add $vreg, $vreg, xFP  // vreg addr in register array
406     flw $reg, ($vreg)         // reg := fp[vreg]
407%#:
408
409// Floating-point write, defaults to 32-bit write.
410// Clobbers: reg, z0
411%def set_vreg_float(reg, vreg, z0, is_double=False):
412%  if is_double:
413     SET_VREG_DOUBLE $reg, $vreg, $z0
414%  else:
415     sh2add $z0, $vreg, xFP    // vreg addr in register array
416     fsw $reg, ($z0)           // fp[vreg] := reg
417     sh2add $z0, $vreg, xREFS  // vreg addr in reference array
418     sw zero, ($z0)            // refs[vreg] := null
419%#:
420
421%def entry():
422/*
423 * ArtMethod entry point.
424 *
425 * On entry:
426 *  a0     ArtMethod* callee
427 *  a1-a7  method parameters
428 */
429OAT_ENTRY ExecuteNterpWithClinitImpl
430#if MIRROR_CLASS_STATUS_SHIFT < 12
431#error mirror class status bits cannot use LUI load technique
432#endif
433    .cfi_startproc
434    // For simplicity, we don't do a read barrier here, but instead rely
435    // on art_quick_resolution_trampoline to always have a suspend point before
436    // calling back here.
437    lwu t0, ART_METHOD_DECLARING_CLASS_OFFSET(a0)
438    lw t1, MIRROR_CLASS_STATUS_OFFSET(t0)  // t1 := status word, sext
439    lui t2, MIRROR_CLASS_STATUS_VISIBLY_INITIALIZED << (MIRROR_CLASS_STATUS_SHIFT - 12)
440    // The unsigned comparison works in tandem with the 64-bit sign-extension of
441    // the status bits at the top of the 32-bit word. The order of the status
442    // constants (sign extended from LUI) is unchanged with unsigned comparison.
443    bgeu t1, t2, ExecuteNterpImpl
444    lui t2, MIRROR_CLASS_STATUS_INITIALIZED << (MIRROR_CLASS_STATUS_SHIFT - 12)
445    bltu t1, t2, .Linitializing_check
446    fence w, w
447    j ExecuteNterpImpl
448.Linitializing_check:
449    lui t2, MIRROR_CLASS_STATUS_INITIALIZING << (MIRROR_CLASS_STATUS_SHIFT - 12)
450    bltu t1, t2, .Lresolution_trampoline
451    lwu t1, MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET(t0)
452    lwu t0, THREAD_TID_OFFSET(xSELF)
453    beq t0, t1, ExecuteNterpImpl
454.Lresolution_trampoline:
455    tail art_quick_resolution_trampoline
456    .cfi_endproc
457    .type EndExecuteNterpWithClinitImpl, @function
458    .hidden EndExecuteNterpWithClinitImpl
459    .global EndExecuteNterpWithClinitImpl
460EndExecuteNterpWithClinitImpl:
461
462OAT_ENTRY ExecuteNterpImpl
463   .cfi_startproc
464%  setup_nterp_frame(cfi_refs="CFI_REFS", refs="xREFS", fp="xFP", pc="xPC", regs="s7", ins="s8", spills_sp="s9", z0="t0", z1="t1", z2="t2", z3="t3", uniq="entry")
465                            // xREFS := callee refs array
466                            // xFP   := callee fp array
467                            // xPC   := callee dex array
468                            // s7    := refs/fp vreg count
469                            // s8    := ins count
470                            // s9    := post-spills pre-frame sp
471                            // sp    := post-frame sp
472   CFI_DEFINE_DEX_PC_WITH_OFFSET(/*tmpReg*/CFI_TMP, /*dexReg*/CFI_DEX, /*dexOffset*/0)
473
474   // Fast path: zero args.
475   beqz s8, .Lentry_go
476
477   sub s7, s7, s8           // s7 := a1 index in fp/refs
478   lwu s10, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
479                            // s10 := method flags
480
481   // Fast path: all reference args.
482   sh2add t0, s7, xFP       // t0 := &xFP[a1]
483   sh2add t1, s7, xREFS     // t1 := &xREFS[a1]
484   BRANCH_IF_BIT_CLEAR t2, s10, ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG_BIT, .Lentry_a1
485%  setup_ref_args_and_go(fp="t0", refs="t1", refs_end="xFP", spills_sp="s9", z0="t2", z1="t3", done=".Lentry_go")
486
487   // Fast path: instance with zero args.
488.Lentry_a1:
489   bexti s10, s10, ART_METHOD_IS_STATIC_FLAG_BIT
490                            // s10 := 1 if static, 0 if instance
491   bnez s10, .Lentry_shorty
492   sw a1, (t0)
493   sw a1, (t1)
494   li t2, 1
495   beq s8, t2, .Lentry_go
496
497   // Slow path: runtime call to obtain shorty, full setup from managed ABI.
498.Lentry_shorty:
499   SPILL_ALL_ARGUMENTS
500   // TODO: Better way to get shorty
501   call NterpGetShorty      // arg a0
502   mv s11, a0               // s11 := shorty
503   RESTORE_ALL_ARGUMENTS
504
505   // temporaries are trashed, recompute some values
506   sh2add t0, s7, xFP       // t0 := &xFP[a1]
507   sh2add t1, s7, xREFS     // t1 := &xREFS[a1]
508   addi t2, s11, 1          // t2 := shorty arg (skip return type)
509   xori s10, s10, 1         // s10 := 0 if static, 1 if instance
510   slliw t3, s10, 2         // t3 := (static) 0, (instance) 4: fp/refs/outs byte offset
511   // constant setup for gpr/fpr shorty comparisons
512   li s0, 'D'               // s0 := double char (unused fp)
513   li s4, 'F'               // s4 := float char (unused xINST)
514   li s5, 'J'               // s5 := long char (unused xIBASE)
515   li s8, 'L'               // s8 := ref char (unused ins count)
516   bnez s10, .Lentry_args   // instance a1 already stored into callee's xFP and xREFS
517
518%  store_gpr_to_vreg(gpr="a1", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
519
520.Lentry_args:
521   // linear scan through shorty: extract non-float args
522%  store_gpr_to_vreg(gpr="a2", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
523%  store_gpr_to_vreg(gpr="a3", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
524%  store_gpr_to_vreg(gpr="a4", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
525%  store_gpr_to_vreg(gpr="a5", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
526%  store_gpr_to_vreg(gpr="a6", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
527%  store_gpr_to_vreg(gpr="a7", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
528   // We drained arg registers, so continue from caller stack's out array. Unlike the reference-only
529   // fast-path, the continuation offset in the out array can vary, depending on the presence of
530   // 64-bit values in the arg registers. \offset tracks this value as a byte offset.
531   addi t5, s9, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8)
532                            // t5 := (caller) outs array base address
533   add t4, t3, t0           // t4 := (callee) &FP[next]
534   add t1, t3, t1           // t1 := (callee) &REFS[next]
535   add t3, t3, t5           // t3 := (caller) &OUTS[next]
536%  store_outs_to_vregs(outs="t3", shorty="t2", fp="t4", refs="t1", z0="t5", z1="t6", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
537                            // t0 = &xFP[a1], unclobbered
538.Lentry_fargs:
539   addi t1, s11, 1          // t1 := shorty arg (skip return type)
540   slliw t2, s10, 2         // t2 := starting byte offset for fp/outs, static and instance
541   // linear scan through shorty: extract float args
542%  store_fpr_to_vreg(fpr="fa0", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
543%  store_fpr_to_vreg(fpr="fa1", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
544%  store_fpr_to_vreg(fpr="fa2", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
545%  store_fpr_to_vreg(fpr="fa3", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
546%  store_fpr_to_vreg(fpr="fa4", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
547%  store_fpr_to_vreg(fpr="fa5", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
548%  store_fpr_to_vreg(fpr="fa6", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
549%  store_fpr_to_vreg(fpr="fa7", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
550   addi t3, s9, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8)
551                            // t3 := (caller) outs array base address
552   add t0, t2, t0           // t0 := (callee) &FP[next]
553   add t2, t2, t3           // t2 := (caller) &OUTS[next]
554%  store_float_outs_to_vregs(outs="t2", shorty="t1", fp="t0", z0="t3", D="s0", F="s4", J="s5", next=".Lentry_go")
555
556.Lentry_go:
557    la xIBASE, artNterpAsmInstructionStart
558    START_EXECUTING_INSTRUCTIONS
559    // NOTE: no fallthrough
560    // cfi info continues, and covers the whole nterp implementation.
561    SIZE ExecuteNterpImpl
562
563%def footer():
564/*
565 * ===========================================================================
566 *  Common subroutines and data
567 * ===========================================================================
568 */
569
570    .text
571    .align  2
572
573
574// Enclose all code below in a symbol (which gets printed in backtraces).
575NAME_START nterp_helper
576
577common_errArrayIndex:
578    EXPORT_PC
579    // CALL preserves RA for stack walking.
580    call art_quick_throw_array_bounds  // args a0 (index), a1 (length)
581
582common_errDivideByZero:
583    EXPORT_PC
584    // CALL preserves RA for stack walking.
585    call art_quick_throw_div_zero
586
587common_errNullObject:
588    EXPORT_PC
589    // CALL preserves RA for stack walking.
590    call art_quick_throw_null_pointer_exception
591
592NterpInvokeVirtual:
593%  nterp_invoke_virtual()
594NterpInvokeSuper:
595%  nterp_invoke_super()
596NterpInvokeDirect:
597%  nterp_invoke_direct()
598NterpInvokeStringInit:
599%  nterp_invoke_string_init()
600NterpInvokeStatic:
601%  nterp_invoke_static()
602NterpInvokeInterface:
603%  nterp_invoke_interface()
604NterpInvokePolymorphic:
605%  nterp_invoke_polymorphic()
606NterpInvokeCustom:
607%  nterp_invoke_custom()
608NterpInvokeVirtualRange:
609%  nterp_invoke_virtual_range()
610NterpInvokeSuperRange:
611%  nterp_invoke_super_range()
612NterpInvokeDirectRange:
613%  nterp_invoke_direct_range()
614NterpInvokeStringInitRange:
615%  nterp_invoke_string_init_range()
616NterpInvokeStaticRange:
617%  nterp_invoke_static_range()
618NterpInvokeInterfaceRange:
619%  nterp_invoke_interface_range()
620NterpInvokePolymorphicRange:
621%  nterp_invoke_polymorphic_range()
622NterpInvokeCustomRange:
623%  nterp_invoke_custom_range()
624
625// Arg a0: ArtMethod*
626NterpHandleHotnessOverflow:
627   CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=.Lhotspill_hot, if_not_hot=.Lhotspill_suspend
628.Lhotspill_hot:
629   mv a1, xPC
630   mv a2, xFP
631   call nterp_hot_method  // args a0, a1, a2
632   bnez a0, .Lhotspill_osr
633.Lhotspill_advance:
634   FETCH_INST
635   GET_INST_OPCODE t0
636   GOTO_OPCODE t0
637.Lhotspill_osr:
638   // a0 = OsrData*
639   // Drop most of the current nterp frame, but keep the callee-saves.
640   // The nterp callee-saves (count and layout) match the OSR frame's callee-saves.
641   ld sp, -8(xREFS)  // caller's interpreted frame pointer
642   .cfi_def_cfa sp, NTERP_SIZE_SAVE_CALLEE_SAVES
643   lwu t0, OSR_DATA_FRAME_SIZE(a0)
644   addi t0, t0, -NTERP_SIZE_SAVE_CALLEE_SAVES  // t0 := osr frame - callee saves, in bytes
645   mv s7, sp         // Remember CFA in a callee-save register.
646   .cfi_def_cfa_register s7
647   sub sp, sp, t0    // OSR size guaranteed to be stack aligned (16 bytes).
648
649   addi t1, a0, OSR_DATA_MEMORY  // t1 := read start
650   add t1, t1, t0                // t1 := read end (exclusive)
651   mv t2, s7                     // t2 := write end (exclusive)
652   // t0 >= 8 (OSR places ArtMethod* at bottom of frame), so loop will terminate.
653.Lhotspill_osr_copy_loop:
654   addi t1, t1, -8
655   ld t3, (t1)
656   addi t2, t2, -8
657   sd t3, (t2)
658   bne t2, sp, .Lhotspill_osr_copy_loop
659
660   ld s8, OSR_DATA_NATIVE_PC(a0)  // s8 := native PC; jump after free
661   call free  // arg a0; release OsrData*
662   jr s8      // Jump to the compiled code.
663.Lhotspill_suspend:
664   DO_SUSPEND_CHECK continue=.Lhotspill_advance
665   j .Lhotspill_advance
666
667// This is the logical end of ExecuteNterpImpl, where the frame info applies.
668.cfi_endproc
669
670NterpToNterpInstance:
671%  nterp_to_nterp_instance()
672NterpToNterpStringInit:
673%  nterp_to_nterp_string_init()
674NterpToNterpStatic:
675%  nterp_to_nterp_static()
676NterpToNterpInstanceRange:
677%  nterp_to_nterp_instance_range()
678NterpToNterpStringInitRange:
679%  nterp_to_nterp_string_init_range()
680NterpToNterpStaticRange:
681%  nterp_to_nterp_static_range()
682
683NAME_END nterp_helper
684
685// EndExecuteNterpImpl includes the methods after .cfi_endproc, as we want the runtime to see them
686// as part of the Nterp PCs. This label marks the end of PCs contained by the OatQuickMethodHeader
687// created for the interpreter entry point.
688    .type EndExecuteNterpImpl, @function
689    .hidden EndExecuteNterpImpl
690    .global EndExecuteNterpImpl
691EndExecuteNterpImpl:
692
693// Entrypoints into runtime.
694NTERP_TRAMPOLINE nterp_allocate_object, NterpAllocateObject
695NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
696NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
697NTERP_TRAMPOLINE nterp_get_class, NterpGetClass
698NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
699NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
700NTERP_TRAMPOLINE nterp_get_static_field, NterpGetStaticField
701NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
702NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
703
704ENTRY nterp_deliver_pending_exception
705    DELIVER_PENDING_EXCEPTION
706END nterp_deliver_pending_exception
707
708// gen_mterp.py will inline the following definitions
709// within [ExecuteNterpImpl, EndExecuteNterpImpl).
710%def instruction_start():
711    .type artNterpAsmInstructionStart, @function
712    .hidden artNterpAsmInstructionStart
713    .global artNterpAsmInstructionStart
714artNterpAsmInstructionStart = .L_op_nop
715    .text
716
717%def instruction_end():
718    .type artNterpAsmInstructionEnd, @function
719    .hidden artNterpAsmInstructionEnd
720    .global artNterpAsmInstructionEnd
721artNterpAsmInstructionEnd:
722    // artNterpAsmInstructionEnd is used as landing pad for exception handling.
723    // xPC (S3) for the exception handler was set just prior to the long jump coming here.
724    FETCH_INST
725    GET_INST_OPCODE t0
726    GOTO_OPCODE t0
727
728%def opcode_pre():
729%   pass
730%def opcode_name_prefix():
731%   return "nterp_"
732%def opcode_start():
733    NAME_START nterp_${opcode}
734%def opcode_end():
735    NAME_END nterp_${opcode}
736%def opcode_slow_path_start(name):
737    NAME_START ${name}
738%def opcode_slow_path_end(name):
739    NAME_END ${name}
740