1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "berberis/runtime_primitives/runtime_library.h"
18 
19 #include <cstdlib>
20 
21 #include "berberis/guest_state/guest_state.h"
22 #include "berberis/runtime_primitives/host_code.h"
23 
24 namespace berberis {
25 
26 // "Calling conventions" among generated code and trampolines
27 // ==========================================================
28 //
29 // Introduction
30 // ------------
31 //
32 // To ensure the high performance of our generated code, we employ a couple of
33 // techniques:
34 //
35 // - We allow generated regions to jump among them without transferring control
36 //   back to Berberis runtime.
37 //
38 // - We use custom "calling conventions" that are different from the standard
39 //   aapcs64 calling conventions, with some items passed in registers.
40 //
41 // Entry and exits
42 // ---------------
43 //
44 // Upon entry into generated code and trampoline adapters, we must have:
45 //
46 // - x29 pointing to ThreadState,
47 //
48 // - every field in ThreadState up to date, except insn_addr, and
49 //
50 // - x0 containing up-to-date value for potentially stale ThreadState::insn_addr.
51 //
52 // Since we jump among generated code and trampolines, each region must adhere
53 // to the "calling conventions" above as it exits.
54 //
55 // Each region is allowed to use the stack pointed to by sp. However, it must
56 // restore sp before exiting.
57 //
58 // x19-x30 and the lower 64 bits of v8-v15 are callee saved. All other registers,
59 // and the upper 64 bits of v8-v15, are caller saved. That is, regions are
60 // allowed to use them without restoring their original values.
61 //
62 // Berberis -> generated code
63 // ---------------------------------
64 //
65 // If we are transferring control to generated code and trampolines from the
66 // Berberis runtime, such as ExecuteGuest, then we must do so via
67 // berberis_RunGeneratedCode, which is responsible for setting up registers for
68 // the "calling conventions".
69 //
70 // Generated code -> Berberis
71 // ---------------------------------
72 //
73 // When we are exiting generate code, we must do so via END_GENERATED_CODE macro
74 // defined in this file. The macro ensures that ThreadState is fully up to date,
75 // including insn_addr, before transferring control back to the Berberis
76 // runtime.
77 
78 namespace {
79 
80 // Number of bytes used for storing callee-saved registers on the stack when
81 // entering and exiting generated code. There are a total of 20 64-bit
82 // callee-saved registers.
83 constexpr size_t kCalleeSavedFrameSize = 8 * 20;
84 
85 }  // namespace
86 
87 extern "C" {
88 
89 // Perform all the steps needed to exit generated code except return, which is
90 // up to the users of this macro. The users of this macro may choose to perform
91 // a sibling call as necessary.
92 // clang-format off
93 #define END_GENERATED_CODE(EXIT_INSN)                                   \
94   asm(                                                                  \
95       /* Sync insn_addr. */                                             \
96       "str x0, [x29, %[InsnAddr]]\n"                                    \
97       /* Set kOutsideGeneratedCode residence. */                        \
98       "mov w28, %[OutsideGeneratedCode]\n"                              \
99       "strb w28, [x29, %[Residence]]\n"                                 \
100                                                                         \
101       /* Set x0 to the pointer to the guest state so that               \
102        * we can perform a sibling call to functions like                \
103        * berberis_HandleNotTranslated.                                  \
104        */                                                               \
105       "mov x0, x29\n"                                                   \
106                                                                         \
107       /* Epilogue */                                                    \
108       "ldp d15, d14, [sp]\n"                                            \
109       "ldp d13, d12, [sp, 16]\n"                                        \
110       "ldp d11, d10, [sp, 32]\n"                                        \
111       "ldp d9, d8, [sp, 48]\n"                                          \
112       "ldp x29, x28, [sp, 64]\n"                                        \
113       "ldp x27, x26, [sp, 80]\n"                                        \
114       "ldp x25, x24, [sp, 96]\n"                                        \
115       "ldp x23, x22, [sp, 112]\n"                                       \
116       "ldp x21, x20, [sp, 128]\n"                                       \
117       "ldp x19, lr, [sp, 144]\n"                                        \
118       "add sp, sp, %[CalleeSavedFrameSize]\n"                               \
119                                                                         \
120       EXIT_INSN                                                         \
121       ::[InsnAddr] "p"(offsetof(berberis::ThreadState, cpu.insn_addr)), \
122       [Residence] "p"(offsetof(berberis::ThreadState, residence)),      \
123       [OutsideGeneratedCode] "M"(berberis::kOutsideGeneratedCode),      \
124       [CalleeSavedFrameSize] "I"(kCalleeSavedFrameSize))
125 // clang-format on
126 
berberis_RunGeneratedCode(ThreadState * state,HostCode code)127 [[gnu::naked]] [[gnu::noinline]] void berberis_RunGeneratedCode(ThreadState* state, HostCode code) {
128   // Parameters are in x0 - state and x1 - code
129   //
130   // In aapcs64, the stack must be aligned on 16 at every call instruction (sp mod 16 = 0).
131   // See https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst (6.4.5.1)
132 
133   // clang-format off
134   asm(
135     // Prologue
136     "sub sp, sp, %[CalleeSavedFrameSize]\n"
137     "stp x19, lr, [sp, 144]\n"
138     "stp x21, x20, [sp, 128]\n"
139     "stp x23, x22, [sp, 112]\n"
140     "stp x25, x24, [sp, 96]\n"
141     "stp x27, x26, [sp, 80]\n"
142     "stp x29, x28, [sp, 64]\n"
143     "stp d9, d8, [sp, 48]\n"
144     "stp d11, d10, [sp, 32]\n"
145     "stp d13, d12, [sp, 16]\n"
146     "stp d15, d14, [sp]\n"
147 
148     // Set state pointer
149     "mov x29, x0\n"
150 
151     // Set insn_addr.
152     "ldr x0, [x29, %[InsnAddr]]\n"
153     // Set kInsideGeneratedCode residence.
154     "mov w28, %[InsideGeneratedCode]\n"
155     "strb w28, [x29, %[Residence]]\n"
156 
157     // Jump to entry
158     "br x1"
159     ::[InsnAddr] "p"(offsetof(ThreadState, cpu.insn_addr)),
160     [Residence] "p"(offsetof(ThreadState, residence)),
161     [InsideGeneratedCode] "M"(kInsideGeneratedCode),
162     [CalleeSavedFrameSize] "I"(kCalleeSavedFrameSize));
163   // clang-format on
164 }
165 
berberis_entry_Interpret()166 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Interpret() {
167   // clang-format off
168   asm(
169     // Sync insn_addr.
170     "str x0, [x29, %[InsnAddr]]\n"
171     // Set kOutsideGeneratedCode residence. */
172     "mov w28, %[OutsideGeneratedCode]\n"
173     "strb w28, [x29, %[Residence]]\n"
174 
175     // x29 holds the pointer to state which is the argument to the call.
176     "mov x0, x29\n"
177     "bl berberis_HandleInterpret\n"
178 
179     // x0 may be clobbered by the call above, so init it again.
180     "mov x0, x29\n"
181     "bl berberis_GetDispatchAddress\n"
182     "mov x1, x0\n"
183 
184     // Set insn_addr.
185     "ldr x0, [x29, %[InsnAddr]]\n"
186     // Set kInsideGeneratedCode residence.
187     "mov w28, %[InsideGeneratedCode]\n"
188     "strb w28, [x29, %[Residence]]\n"
189 
190     "br x1\n"
191     ::[InsnAddr] "p"(offsetof(berberis::ThreadState, cpu.insn_addr)),
192     [Residence] "p"(offsetof(berberis::ThreadState, residence)),
193     [OutsideGeneratedCode] "M"(berberis::kOutsideGeneratedCode),
194     [InsideGeneratedCode] "M"(berberis::kInsideGeneratedCode));
195   // clang-format on
196 }
197 
berberis_entry_ExitGeneratedCode()198 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_ExitGeneratedCode() {
199   END_GENERATED_CODE("ret");
200 }
201 
berberis_entry_Stop()202 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Stop() {
203   END_GENERATED_CODE("ret");
204 }
205 
berberis_entry_NoExec()206 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_NoExec() {
207   END_GENERATED_CODE("b berberis_HandleNoExec");
208   // void berberis_HandleNoExec(ThreadState*);
209   // Perform a sibling call to berberis_HandleNoExec. The only parameter
210   // is state which is saved in x0 by END_GENERATED_CODE.
211   // TODO(b/232598137): Remove state from HandleNoExec parameters. Get it from
212   // the guest thread instead.
213 }
214 
berberis_entry_NotTranslated()215 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_NotTranslated() {
216   END_GENERATED_CODE("b berberis_HandleNotTranslated");
217   // void berberis_HandleNotTranslated(ThreadState*);
218   // See the comment above about the sibling call.
219 }
220 
berberis_entry_Translating()221 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Translating() {
222   // TODO(b/232598137): Run interpreter while translation is in progress.
223   END_GENERATED_CODE("ret");
224 }
225 
berberis_entry_Invalidating()226 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Invalidating() {
227   // TODO(b/232598137): maybe call sched_yield() here.
228   END_GENERATED_CODE("ret");
229 }
230 
berberis_entry_Wrapping()231 [[gnu::naked]] [[gnu::noinline]] void berberis_entry_Wrapping() {
232   // TODO(b/232598137): maybe call sched_yield() here.
233   END_GENERATED_CODE("ret");
234 }
235 
236 }  // extern "C"
237 
238 }  // namespace berberis
239