1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "berberis/code_gen_lib/code_gen_lib.h"
18 
19 #include "berberis/assembler/machine_code.h"
20 #include "berberis/assembler/x86_64.h"
21 #include "berberis/base/checks.h"
22 #include "berberis/base/config.h"
23 #include "berberis/calling_conventions/calling_conventions_x86_64.h"
24 #include "berberis/code_gen_lib/code_gen_lib_arch.h"
25 #include "berberis/code_gen_lib/gen_adaptor.h"
26 #include "berberis/guest_state/guest_addr.h"
27 #include "berberis/guest_state/guest_state.h"
28 #include "berberis/instrument/trampolines.h"
29 #include "berberis/kernel_api/run_guest_syscall.h"
30 #include "berberis/runtime_primitives/host_code.h"
31 #include "berberis/runtime_primitives/translation_cache.h"
32 
33 namespace berberis {
34 
35 namespace {
36 
37 // Emits checks for pending emulated signals. If pending signals present
38 // generated code returns to the main dispatcher loop to handle them.
39 // To ensure we don't loop endlessly in generated code, it must be called on
40 // every region exit (loops within regions must be taken care of separately).
41 // Thus we call it in EmitDirectDispatch and EmitIndirectDispatch.
EmitCheckSignalsAndMaybeReturn(x86_64::Assembler * as)42 void EmitCheckSignalsAndMaybeReturn(x86_64::Assembler* as) {
43   // C++:
44   //   std::atomic_int_least8_t pending_signals_status;
45   //   uint8_t status = pending_signals_status.load(std::memory_order_acquire);
46   //   if (status == kPendingSignalsPresent) { ... }
47   // x86_64 asm:
48   //   cmpb pending_signals_status, kPendingSignalsPresent
49   const size_t offset = offsetof(ThreadState, pending_signals_status);
50   as->Cmpb({.base = x86_64::Assembler::rbp, .disp = offset}, kPendingSignalsPresent);
51   as->Jcc(x86_64::Assembler::Condition::kEqual, kEntryExitGeneratedCode);
52 }
53 
54 }  // namespace
55 
GenTrampolineAdaptor(MachineCode * mc,GuestAddr pc,HostCode marshall,const void * callee,const char * name)56 void GenTrampolineAdaptor(MachineCode* mc,
57                           GuestAddr pc,
58                           HostCode marshall,
59                           const void* callee,
60                           const char* name) {
61   x86_64::Assembler as(mc);
62 
63   // Update insn_addr to the current PC.  This way, code generated by this
64   // function does not require insn_addr to be up to date upon entry.  Note that
65   // the trampoline that we call requires insn_addr to be up to date.
66   as.Movq(as.rdi, pc);
67   as.Movq({.base = as.rbp, .disp = offsetof(ThreadState, cpu.insn_addr)}, as.rdi);
68   as.Movq({.base = as.rbp, .disp = offsetof(ThreadState, residence)}, kOutsideGeneratedCode);
69 
70   if (kInstrumentTrampolines) {
71     if (auto instrument = GetOnTrampolineCall(name)) {
72       as.Movq(as.rdi, as.rbp);
73       as.Movq(as.rsi, reinterpret_cast<intptr_t>(name));
74       as.Call(AsHostCode(instrument));
75     }
76   }
77 
78   // void Trampoline(void*, ThreadState*);
79   as.Movq(as.rdi, reinterpret_cast<intptr_t>(callee));
80   as.Movq(as.rsi, as.rbp);
81   as.Call(marshall);
82 
83   if (kInstrumentTrampolines) {
84     if (auto instrument = GetOnTrampolineReturn(name)) {
85       as.Movq(as.rdi, as.rbp);
86       as.Movq(as.rsi, reinterpret_cast<intptr_t>(name));
87       as.Call(AsHostCode(instrument));
88     }
89   }
90 
91   // jump to guest return address
92   // Prefer rdx, since rax/rcx will result in extra moves inside EmitIndirectDispatch.
93   as.Movq(as.rdx, {.base = as.rbp, .disp = kReturnAddressRegisterOffset});
94   // We are returning to generated code.
95   as.Movq({.base = as.rbp, .disp = offsetof(ThreadState, residence)}, kInsideGeneratedCode);
96   EmitIndirectDispatch(&as, as.rdx);
97   as.Finalize();
98 }
99 
EmitSyscall(x86_64::Assembler * as,GuestAddr pc)100 void EmitSyscall(x86_64::Assembler* as, GuestAddr pc) {
101   // We may run guest signal handler during the syscall so insn_addr needs to be synched.
102   as->Movq(as->rdi, pc);
103   as->Movq({.base = as->rbp, .disp = offsetof(ThreadState, cpu.insn_addr)}, as->rdi);
104   as->Movq({.base = as->rbp, .disp = offsetof(ThreadState, residence)}, kOutsideGeneratedCode);
105 
106   // void RunGuestSyscall(ThreadState*);
107   as->Movq(as->rdi, as->rbp);
108   as->Call(AsHostCode(RunGuestSyscall));
109 
110   // We are returning to generated code.
111   as->Movq({.base = as->rbp, .disp = offsetof(ThreadState, residence)}, kInsideGeneratedCode);
112   // Advance to the next instruction.
113   // TODO(b/161722184): if syscall is interrupted by signal, signal handler might overwrite the
114   // insn_addr, so incrementing insn_addr here may be incorrect. This problem also exists in the
115   // interpreter. On the other hand syscalls can only be interruprted by asynchroneous signals which
116   // are unlikely to overwrite insn_addr.
117   EmitDirectDispatch(as, pc + 4, /*check_pending_signals=*/true);
118 }
119 
EmitDirectDispatch(x86_64::Assembler * as,GuestAddr pc,bool check_pending_signals)120 void EmitDirectDispatch(x86_64::Assembler* as, GuestAddr pc, bool check_pending_signals) {
121   // insn_addr is passed between regions in rax.
122   as->Movq(as->rax, pc);
123 
124   if (!config::kLinkJumpsBetweenRegions) {
125     as->Jmp(kEntryExitGeneratedCode);
126     return;
127   }
128 
129   if (check_pending_signals) {
130     EmitCheckSignalsAndMaybeReturn(as);
131   }
132 
133   CHECK_EQ(pc & 0xffff'0000'0000'0000, 0);
134   as->Movq(as->rcx,
135            reinterpret_cast<uint64_t>(TranslationCache::GetInstance()->GetHostCodePtr(pc)));
136   as->Movl(as->rcx, {.base = as->rcx});
137   as->Jmp(as->rcx);
138 }
139 
EmitExitGeneratedCode(x86_64::Assembler * as,x86_64::Assembler::Register target)140 void EmitExitGeneratedCode(x86_64::Assembler* as, x86_64::Assembler::Register target) {
141   // insn_addr is passed between regions in rax.
142   if (target != as->rax) {
143     as->Movq(as->rax, target);
144   }
145 
146   as->Jmp(kEntryExitGeneratedCode);
147 }
148 
EmitIndirectDispatch(x86_64::Assembler * as,x86_64::Assembler::Register target)149 void EmitIndirectDispatch(x86_64::Assembler* as, x86_64::Assembler::Register target) {
150   // insn_addr is passed between regions in rax.
151   if (target != as->rax) {
152     as->Movq(as->rax, target);
153   }
154 
155   if (!config::kLinkJumpsBetweenRegions) {
156     as->Jmp(kEntryExitGeneratedCode);
157     return;
158   }
159 
160   EmitCheckSignalsAndMaybeReturn(as);
161 
162   auto main_table_ptr = TranslationCache::GetInstance()->main_table_ptr();
163 
164   // Rax holds insn_addr. We use target and/or rcx/rdx for scratches.
165   x86_64::Assembler::Register scratch1 = target;
166   x86_64::Assembler::Register scratch2 = as->rcx;
167   if (target == as->rax) {
168     as->Movq(as->rdx, target);
169     scratch1 = as->rdx;
170   } else if (target == as->rcx) {
171     scratch1 = as->rcx;
172     scratch2 = as->rdx;
173   }
174   // scratch1 always holds insn_addr at this point.
175   as->Shrq(scratch1, int8_t{24});
176   as->Andl(scratch1, 0xff'ffff);
177   as->Movq(scratch2, reinterpret_cast<uint64_t>(main_table_ptr));
178   as->Movq(scratch2,
179            {.base = scratch2, .index = scratch1, .scale = x86_64::Assembler::kTimesEight});
180 
181   as->Movq(scratch1, as->rax);
182   as->Andl(scratch1, 0xff'ffff);
183   as->Movl(scratch2, {.base = scratch2, .index = scratch1, .scale = x86_64::Assembler::kTimesFour});
184 
185   as->Jmp(scratch2);
186 }
187 
EmitAllocStackFrame(x86_64::Assembler * as,uint32_t frame_size)188 void EmitAllocStackFrame(x86_64::Assembler* as, uint32_t frame_size) {
189   if (frame_size > config::kFrameSizeAtTranslatedCode) {
190     uint32_t extra_size = AlignUp(frame_size - config::kFrameSizeAtTranslatedCode,
191                                   x86_64::CallingConventions::kStackAlignmentBeforeCall);
192     as->Subq(as->rsp, extra_size);
193   }
194 }
195 
EmitFreeStackFrame(x86_64::Assembler * as,uint32_t frame_size)196 void EmitFreeStackFrame(x86_64::Assembler* as, uint32_t frame_size) {
197   if (frame_size > config::kFrameSizeAtTranslatedCode) {
198     uint32_t extra_size = AlignUp(frame_size - config::kFrameSizeAtTranslatedCode,
199                                   x86_64::CallingConventions::kStackAlignmentBeforeCall);
200     as->Addq(as->rsp, extra_size);
201   }
202 }
203 }  // namespace berberis
204