1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
18 #define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
19
20 #include <array>
21 #include <cstdint>
22 #include <type_traits>
23
24 #include "berberis/assembler/x86_64.h"
25 #include "berberis/base/bit_util.h"
26 #include "berberis/base/dependent_false.h"
27 #include "berberis/intrinsics/macro_assembler.h"
28 #include "berberis/runtime_primitives/platform.h"
29
30 namespace berberis::call_intrinsic {
31
32 constexpr x86_64::Assembler::Register kCallerSavedRegs[] = {
33 x86_64::Assembler::rax,
34 x86_64::Assembler::rcx,
35 x86_64::Assembler::rdx,
36 x86_64::Assembler::rdi,
37 x86_64::Assembler::rsi,
38 x86_64::Assembler::r8,
39 x86_64::Assembler::r9,
40 x86_64::Assembler::r10,
41 x86_64::Assembler::r11,
42 };
43
44 constexpr int8_t kRegIsNotOnStack = -1;
45
46 // Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
47 inline constexpr auto kRegOffsetsOnStack = []() {
48 std::array<int8_t, 16> regs_on_stack = {};
49 // regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
50 for (auto& num : regs_on_stack) {
51 num = kRegIsNotOnStack;
52 }
53
54 int8_t stack_allocation_size = 0;
55 for (auto reg : kCallerSavedRegs) {
56 regs_on_stack[reg.GetPhysicalIndex()] = stack_allocation_size;
57 ++stack_allocation_size;
58 }
59 return regs_on_stack;
60 }();
61
62 constexpr x86_64::Assembler::XMMRegister kCallerSavedXMMRegs[] = {
63 x86_64::Assembler::xmm0,
64 x86_64::Assembler::xmm1,
65 x86_64::Assembler::xmm2,
66 x86_64::Assembler::xmm3,
67 x86_64::Assembler::xmm4,
68 x86_64::Assembler::xmm5,
69 x86_64::Assembler::xmm6,
70 x86_64::Assembler::xmm7,
71 x86_64::Assembler::xmm8,
72 x86_64::Assembler::xmm9,
73 x86_64::Assembler::xmm10,
74 x86_64::Assembler::xmm11,
75 x86_64::Assembler::xmm12,
76 x86_64::Assembler::xmm13,
77 x86_64::Assembler::xmm14,
78 x86_64::Assembler::xmm15,
79 };
80
81 // Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
82 inline constexpr auto kSimdRegOffsetsOnStack = []() {
83 std::array<int8_t, 16> simd_regs_on_stack = {};
84 // simd_regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
85 for (auto& num : simd_regs_on_stack) {
86 num = kRegIsNotOnStack;
87 }
88
89 int8_t stack_allocation_size = AlignUp(std::size(kCallerSavedRegs), 2);
90 for (auto reg : kCallerSavedXMMRegs) {
91 simd_regs_on_stack[reg.GetPhysicalIndex()] = stack_allocation_size;
92 stack_allocation_size += 2;
93 }
94 return simd_regs_on_stack;
95 }();
96
97 // Save area size for CallIntrinsic save area. Counted in 8-byte slots.
98 inline constexpr int8_t kSaveAreaSize =
99 AlignUp(std::size(kCallerSavedRegs), 2) + std::size(kCallerSavedXMMRegs) * 2;
100
101 struct StoredRegsInfo {
102 std::decay_t<decltype(kRegOffsetsOnStack)> regs_on_stack;
103 std::decay_t<decltype(kSimdRegOffsetsOnStack)> simd_regs_on_stack;
104 };
105
PushCallerSaved(MacroAssembler<x86_64::Assembler> & as)106 inline void PushCallerSaved(MacroAssembler<x86_64::Assembler>& as) {
107 as.Subq(as.rsp, kSaveAreaSize * 8);
108
109 for (auto reg : kCallerSavedRegs) {
110 as.Movq({.base = as.rsp, .disp = kRegOffsetsOnStack[reg.GetPhysicalIndex()] * 8}, reg);
111 }
112
113 for (auto reg : kCallerSavedXMMRegs) {
114 as.Movdqa({.base = as.rsp, .disp = kSimdRegOffsetsOnStack[reg.GetPhysicalIndex()] * 8}, reg);
115 }
116 }
117
118 // Note: regs_on_stack is usually copy of kRegOffsetsOnStack with some registers marked off as
119 // kRegIsNotOnStack, simd_regs_on_stack is kSimdRegOffsetsOnStack with some registers marked as
120 // kRegIsNotOnStack. These registers are skipped during restoration process.
PopCallerSaved(MacroAssembler<x86_64::Assembler> & as,const StoredRegsInfo regs_info)121 inline void PopCallerSaved(MacroAssembler<x86_64::Assembler>& as, const StoredRegsInfo regs_info) {
122 for (auto reg : kCallerSavedRegs) {
123 if (regs_info.regs_on_stack[reg.GetPhysicalIndex()] != kRegIsNotOnStack) {
124 as.Movq(reg, {.base = as.rsp, .disp = regs_info.regs_on_stack[reg.GetPhysicalIndex()] * 8});
125 }
126 }
127 for (auto reg : kCallerSavedXMMRegs) {
128 if (regs_info.simd_regs_on_stack[reg.GetPhysicalIndex()] != kRegIsNotOnStack) {
129 as.Movdqa(reg,
130 {.base = as.rsp, .disp = regs_info.simd_regs_on_stack[reg.GetPhysicalIndex()] * 8});
131 }
132 }
133
134 as.Addq(as.rsp, kSaveAreaSize * 8);
135 }
136
137 // Nonfunctional assembler used by static_assert expression. It doesn't do anything but allows us
138 // to call InitArgs during compilation time with the same argument types as would happen during
139 // execution.
140 //
141 // This turns runtime check into compile time check and thus allows us to catch weird corner cases
142 // faster.
143 class ConstExprCheckAssembler {
144 public:
145 using Operand = MacroAssembler<x86_64::Assembler>::Operand;
146 using Register = MacroAssembler<x86_64::Assembler>::Register;
147 using XMMRegister = MacroAssembler<x86_64::Assembler>::XMMRegister;
148 static constexpr auto rsp = MacroAssembler<x86_64::Assembler>::rsp;
149
150 constexpr ConstExprCheckAssembler() = default;
151
152 template <typename U, typename V>
Expand(Register,Operand)153 constexpr void Expand(Register, Operand) const {}
154 template <typename U, typename V>
Expand(Register,Register)155 constexpr void Expand(Register, Register) const {}
156
157 template <typename U>
Mov(Operand,Register)158 constexpr void Mov(Operand, Register) const {}
159 template <typename U>
Mov(Register,Operand)160 constexpr void Mov(Register, Operand) const {}
161 template <typename U>
Mov(Register,Register)162 constexpr void Mov(Register, Register) const {}
163
Movl(Register,int32_t)164 constexpr void Movl(Register, int32_t) const {}
165
166 template <typename U>
Movs(Operand,XMMRegister)167 constexpr void Movs(Operand, XMMRegister) const {}
168 template <typename U>
Movs(XMMRegister,Operand)169 constexpr void Movs(XMMRegister, Operand) const {}
170 template <typename U>
Movs(XMMRegister,XMMRegister)171 constexpr void Movs(XMMRegister, XMMRegister) const {}
172
173 template <typename U>
Vmovs(Operand,XMMRegister)174 constexpr void Vmovs(Operand, XMMRegister) const {}
175 template <typename U>
Vmovs(XMMRegister,Operand)176 constexpr void Vmovs(XMMRegister, Operand) const {}
177 template <typename U>
Vmovs(XMMRegister,XMMRegister,XMMRegister)178 constexpr void Vmovs(XMMRegister, XMMRegister, XMMRegister) const {}
179 };
180
181 // Helper wrapper to pass the intrinsic type down the generic lambda.
182 template <typename T, typename U>
183 struct ArgWrap {
184 using AssemblerType = T;
185 using IntrinsicType = U;
186 AssemblerType value;
187 };
188
189 static constexpr x86_64::Assembler::Register kAbiArgs[] = {
190 x86_64::Assembler::rdi,
191 x86_64::Assembler::rsi,
192 x86_64::Assembler::rdx,
193 x86_64::Assembler::rcx,
194 x86_64::Assembler::r8,
195 x86_64::Assembler::r9,
196 };
197
198 static constexpr x86_64::Assembler::XMMRegister kAbiSimdArgs[] = {
199 x86_64::Assembler::xmm0,
200 x86_64::Assembler::xmm1,
201 x86_64::Assembler::xmm2,
202 x86_64::Assembler::xmm3,
203 x86_64::Assembler::xmm4,
204 x86_64::Assembler::xmm5,
205 x86_64::Assembler::xmm6,
206 x86_64::Assembler::xmm7,
207 };
208
209 // Assumes RSP points to preallocated stack args area.
210 template <typename IntrinsicResType,
211 typename... IntrinsicArgType,
212 typename MacroAssembler,
213 typename... AssemblerArgType>
InitArgs(MacroAssembler && as,bool has_avx,AssemblerArgType...args)214 constexpr bool InitArgs(MacroAssembler&& as, bool has_avx, AssemblerArgType... args) {
215 using Assembler = std::decay_t<MacroAssembler>;
216 using Register = typename Assembler::Register;
217 using XMMRegister = typename Assembler::XMMRegister;
218 using Float32 = intrinsics::Float32;
219 using Float64 = intrinsics::Float64;
220
221 // All ABI argument registers are saved among caller-saved registers, so we can safely initialize
222 // them now. When intrinsic receives its argument from such register we'll read it from stack, so
223 // there is no early-clobbering problem. Callee-saved regs are never ABI arguments, so we can move
224 // them to ABI reg directly.
225
226 size_t gp_index = 0;
227 size_t simd_index = 0;
228 bool success = ([&as, &gp_index, &simd_index, has_avx](auto arg) -> bool {
229 using AssemblerType = typename decltype(arg)::AssemblerType;
230 using IntrinsicType = typename decltype(arg)::IntrinsicType;
231
232 if (std::is_integral_v<IntrinsicType>) {
233 if (gp_index == std::size(kAbiArgs)) {
234 return false;
235 }
236 } else if constexpr (std::is_same_v<IntrinsicType, Float32> ||
237 std::is_same_v<IntrinsicType, Float64>) {
238 if (simd_index == std::size(kAbiSimdArgs)) {
239 return false;
240 }
241 } else {
242 return false;
243 }
244
245 // Note, ABI mandates extension up to 32-bit and zero-filling the upper half.
246 if constexpr (std::is_integral_v<IntrinsicType> && sizeof(IntrinsicType) <= sizeof(int32_t) &&
247 std::is_integral_v<AssemblerType> && sizeof(AssemblerType) <= sizeof(int32_t)) {
248 as.Movl(kAbiArgs[gp_index++], static_cast<int32_t>(arg.value));
249 } else if constexpr (std::is_integral_v<IntrinsicType> &&
250 sizeof(IntrinsicType) == sizeof(int64_t) &&
251 std::is_integral_v<AssemblerType> &&
252 sizeof(AssemblerType) == sizeof(int64_t)) {
253 as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++],
254 static_cast<int64_t>(arg.value));
255 } else if constexpr (std::is_integral_v<IntrinsicType> &&
256 sizeof(IntrinsicType) <= sizeof(int32_t) &&
257 std::is_same_v<AssemblerType, Register>) {
258 if (kRegOffsetsOnStack[arg.value.GetPhysicalIndex()] == kRegIsNotOnStack) {
259 as.template Expand<int32_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
260 } else {
261 as.template Expand<int32_t, IntrinsicType>(
262 kAbiArgs[gp_index++],
263 {.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.GetPhysicalIndex()] * 8});
264 }
265 } else if constexpr (std::is_integral_v<IntrinsicType> &&
266 sizeof(IntrinsicType) == sizeof(int64_t) &&
267 std::is_same_v<AssemblerType, Register>) {
268 if (kRegOffsetsOnStack[arg.value.GetPhysicalIndex()] == kRegIsNotOnStack) {
269 as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
270 } else {
271 as.template Expand<int64_t, IntrinsicType>(
272 kAbiArgs[gp_index++],
273 {.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.GetPhysicalIndex()] * 8});
274 }
275 } else if constexpr ((std::is_same_v<IntrinsicType, Float32> ||
276 std::is_same_v<IntrinsicType, Float64>)&&std::is_same_v<AssemblerType,
277 XMMRegister>) {
278 if (kSimdRegOffsetsOnStack[arg.value.GetPhysicalIndex()] == kRegIsNotOnStack) {
279 if (has_avx) {
280 as.template Vmovs<IntrinsicType>(
281 kAbiSimdArgs[simd_index], kAbiSimdArgs[simd_index], arg.value);
282 simd_index++;
283 } else {
284 as.template Movs<IntrinsicType>(kAbiSimdArgs[simd_index++], arg.value);
285 }
286 } else {
287 if (has_avx) {
288 as.template Vmovs<IntrinsicType>(
289 kAbiSimdArgs[simd_index++],
290 {.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.GetPhysicalIndex()] * 8});
291 } else {
292 as.template Movs<IntrinsicType>(
293 kAbiSimdArgs[simd_index++],
294 {.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.GetPhysicalIndex()] * 8});
295 }
296 }
297 } else {
298 static_assert(kDependentTypeFalse<std::tuple<IntrinsicType, AssemblerType>>,
299 "Unknown parameter type, please add support to CallIntrinsic");
300 }
301 return true;
302 }(ArgWrap<AssemblerArgType, IntrinsicArgType>{.value = args}) && ...);
303 return success;
304 }
305
306 // Forward results from ABI registers to result-specified registers and mark registers in the
307 // returned StoredRegsInfo with kRegIsNotOnStack to prevent restoration from stack.
308 template <typename IntrinsicResType, typename AssemblerResType>
ForwardResults(MacroAssembler<x86_64::Assembler> & as,AssemblerResType result)309 StoredRegsInfo ForwardResults(MacroAssembler<x86_64::Assembler>& as, AssemblerResType result) {
310 using Assembler = MacroAssembler<x86_64::Assembler>;
311 using Register = Assembler::Register;
312 using XMMRegister = Assembler::XMMRegister;
313 using Float32 = intrinsics::Float32;
314 using Float64 = intrinsics::Float64;
315
316 StoredRegsInfo regs_info = {.regs_on_stack = kRegOffsetsOnStack,
317 .simd_regs_on_stack = kSimdRegOffsetsOnStack};
318
319 if constexpr (Assembler::kFormatIs<IntrinsicResType, std::tuple<int32_t>, std::tuple<uint32_t>> &&
320 std::is_same_v<AssemblerResType, Register>) {
321 // Note: even unsigned 32-bit results are sign-extended to 64bit register on RV64.
322 regs_info.regs_on_stack[result.GetPhysicalIndex()] = kRegIsNotOnStack;
323 as.Expand<int64_t, int32_t>(result, Assembler::rax);
324 } else if constexpr (Assembler::
325 kFormatIs<IntrinsicResType, std::tuple<int64_t>, std::tuple<uint64_t>> &&
326 std::is_same_v<AssemblerResType, Register>) {
327 regs_info.regs_on_stack[result.GetPhysicalIndex()] = kRegIsNotOnStack;
328 as.Mov<int64_t>(result, Assembler::rax);
329 } else if constexpr (Assembler::
330 kFormatIs<IntrinsicResType, std::tuple<Float32>, std::tuple<Float64>> &&
331 std::is_same_v<AssemblerResType, XMMRegister>) {
332 using ResType0 = std::tuple_element_t<0, IntrinsicResType>;
333 regs_info.simd_regs_on_stack[result.GetPhysicalIndex()] = kRegIsNotOnStack;
334 if (host_platform::kHasAVX) {
335 as.Vmovs<ResType0>(result, result, Assembler::xmm0);
336 } else {
337 as.Movs<ResType0>(result, Assembler::xmm0);
338 }
339 } else if constexpr (std::tuple_size_v<IntrinsicResType> == 2) {
340 using ResType0 = std::tuple_element_t<0, IntrinsicResType>;
341 using ResType1 = std::tuple_element_t<1, IntrinsicResType>;
342 auto [result0, result1] = result;
343 if constexpr (Assembler::kFormatIs<ResType0, int32_t, uint32_t> &&
344 std::is_same_v<std::tuple_element_t<0, AssemblerResType>, Register>) {
345 regs_info.regs_on_stack[result0.GetPhysicalIndex()] = kRegIsNotOnStack;
346 as.Expand<int64_t, int32_t>(result0, Assembler::rax);
347 } else if constexpr (Assembler::kFormatIs<ResType0, int64_t, uint64_t> &&
348 std::is_same_v<std::tuple_element_t<0, AssemblerResType>, Register>) {
349 regs_info.regs_on_stack[result0.GetPhysicalIndex()] = kRegIsNotOnStack;
350 as.Mov<int64_t>(result0, Assembler::rax);
351 } else {
352 static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
353 "Unknown result type, please add support to CallIntrinsic");
354 }
355 if constexpr (Assembler::kFormatIs<ResType1, int32_t, uint32_t> &&
356 std::is_same_v<std::tuple_element_t<1, AssemblerResType>, Register>) {
357 regs_info.regs_on_stack[result1.GetPhysicalIndex()] = kRegIsNotOnStack;
358 as.Expand<int64_t, int32_t>(result1, Assembler::rdx);
359 } else if constexpr (Assembler::kFormatIs<ResType1, int64_t, uint64_t> &&
360 std::is_same_v<std::tuple_element_t<1, AssemblerResType>, Register>) {
361 regs_info.regs_on_stack[result1.GetPhysicalIndex()] = kRegIsNotOnStack;
362 as.Mov<int64_t>(result1, Assembler::rdx);
363 } else {
364 static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
365 "Unknown result type, please add support to CallIntrinsic");
366 }
367 } else {
368 static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
369 "Unknown result type, please add support to CallIntrinsic");
370 }
371 return regs_info;
372 }
373
374 // Note: we can ignore status in the actual InitArgs call because we know that InitArgs would
375 // succeed if the call in static_assert succeeded.
376 //
377 // AVX flag shouldn't change the outcome, but better safe than sorry.
378
379 template <typename IntrinsicResType, typename... IntrinsicArgType, typename... AssemblerArgType>
InitArgsVerify(AssemblerArgType...)380 void InitArgsVerify(AssemblerArgType...) {
381 constexpr auto MakeDummyAssemblerType = []<typename AssemblerType>() {
382 if constexpr (std::is_same_v<AssemblerType, x86_64::Assembler::Register>) {
383 // Note: we couldn't use no_register here, but any “real” register should work.
384 return x86_64::Assembler::rax;
385 } else if constexpr (std::is_same_v<AssemblerType, x86_64::Assembler::XMMRegister>) {
386 // Note: we couldn't use no_xmm_register here, but any “real” register should work.
387 return x86_64::Assembler::xmm0;
388 } else {
389 return AssemblerType{0};
390 }
391 };
392 static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
393 ConstExprCheckAssembler(),
394 true,
395 MakeDummyAssemblerType.template operator()<AssemblerArgType>()...));
396 static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
397 ConstExprCheckAssembler(),
398 false,
399 MakeDummyAssemblerType.template operator()<AssemblerArgType>()...));
400 }
401
402 template <typename AssemblerResType,
403 typename IntrinsicResType,
404 typename... IntrinsicArgType,
405 typename... AssemblerArgType>
CallIntrinsic(MacroAssembler<x86_64::Assembler> & as,IntrinsicResType (* function)(IntrinsicArgType...),AssemblerResType result,AssemblerArgType...args)406 void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
407 IntrinsicResType (*function)(IntrinsicArgType...),
408 AssemblerResType result,
409 AssemblerArgType... args) {
410 PushCallerSaved(as);
411
412 InitArgsVerify<IntrinsicResType, IntrinsicArgType...>(args...);
413 InitArgs<IntrinsicResType, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
414
415 as.Call(reinterpret_cast<void*>(function));
416
417 auto regs_info = ForwardResults<IntrinsicResType>(as, result);
418
419 PopCallerSaved(as, regs_info);
420 }
421
422 template <typename AssemblerResType, typename... IntrinsicArgType, typename... AssemblerArgType>
CallIntrinsic(MacroAssembler<x86_64::Assembler> & as,void (* function)(IntrinsicArgType...),AssemblerArgType...args)423 void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
424 void (*function)(IntrinsicArgType...),
425 AssemblerArgType... args) {
426 PushCallerSaved(as);
427
428 InitArgsVerify<void, IntrinsicArgType...>(args...);
429 InitArgs<void, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
430
431 as.Call(reinterpret_cast<void*>(function));
432
433 PopCallerSaved(
434 as, {.regs_on_stack = kRegOffsetsOnStack, .simd_regs_on_stack = kSimdRegOffsetsOnStack});
435 }
436
437 } // namespace berberis::call_intrinsic
438
439 #endif // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
440