1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
18 #define BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
19
20 #include <cfenv>
21 #include <cstdint>
22 #include <tuple>
23 #include <type_traits>
24 #include <utility>
25 #include <variant>
26
27 #include "berberis/assembler/x86_64.h"
28 #include "berberis/backend/common/machine_ir.h"
29 #include "berberis/backend/x86_64/machine_insn_intrinsics.h"
30 #include "berberis/backend/x86_64/machine_ir.h"
31 #include "berberis/backend/x86_64/machine_ir_builder.h"
32 #include "berberis/base/checks.h"
33 #include "berberis/base/config.h"
34 #include "berberis/base/dependent_false.h"
35 #include "berberis/intrinsics/all_to_x86_32_or_x86_64/intrinsics_bindings.h"
36 #include "berberis/intrinsics/intrinsics.h"
37 #include "berberis/intrinsics/intrinsics_args.h"
38 #include "berberis/intrinsics/intrinsics_process_bindings.h"
39 #include "berberis/intrinsics/macro_assembler.h"
40 #include "berberis/runtime_primitives/platform.h"
41
42 #include "simd_register.h"
43
44 namespace berberis {
45
46 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
47 bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
48 ResType result,
49 FlagRegister flag_register,
50 ArgType... args);
51
52 template <auto kFunc>
53 class InlineIntrinsic {
54 public:
55 template <typename ResType, typename FlagRegister, typename... ArgType>
TryInlineWithHostRounding(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)56 static bool TryInlineWithHostRounding(x86_64::MachineIRBuilder* builder,
57 ResType result,
58 FlagRegister flag_register,
59 ArgType... args) {
60 std::tuple args_tuple = std::make_tuple(args...);
61 if constexpr (IsTagEq<&intrinsics::FMul<intrinsics::Float64>>()) {
62 auto [rm, frm, src1, src2] = args_tuple;
63 if (rm != FPFlags::DYN) {
64 return false;
65 }
66 return TryInlineIntrinsicForHeavyOptimizer<
67 &intrinsics::FMulHostRounding<intrinsics::Float64>>(
68 builder, result, flag_register, src1, src2);
69 } else if constexpr (IsTagEq<&intrinsics::FMul<intrinsics::Float32>>()) {
70 auto [rm, frm, src1, src2] = args_tuple;
71 if (rm != FPFlags::DYN) {
72 return false;
73 }
74 return TryInlineIntrinsicForHeavyOptimizer<
75 &intrinsics::FMulHostRounding<intrinsics::Float32>>(
76 builder, result, flag_register, src1, src2);
77 } else if constexpr (IsTagEq<&intrinsics::FAdd<intrinsics::Float64>>()) {
78 auto [rm, frm, src1, src2] = args_tuple;
79 if (rm != FPFlags::DYN) {
80 return false;
81 }
82 return TryInlineIntrinsicForHeavyOptimizer<
83 &intrinsics::FAddHostRounding<intrinsics::Float64>>(
84 builder, result, flag_register, src1, src2);
85 } else if constexpr (IsTagEq<&intrinsics::FAdd<intrinsics::Float32>>()) {
86 auto [rm, frm, src1, src2] = args_tuple;
87 if (rm != FPFlags::DYN) {
88 return false;
89 }
90 return TryInlineIntrinsicForHeavyOptimizer<
91 &intrinsics::FAddHostRounding<intrinsics::Float32>>(
92 builder, result, flag_register, src1, src2);
93 } else if constexpr (IsTagEq<&intrinsics::FSub<intrinsics::Float64>>()) {
94 auto [rm, frm, src1, src2] = args_tuple;
95 if (rm != FPFlags::DYN) {
96 return false;
97 }
98 return TryInlineIntrinsicForHeavyOptimizer<
99 &intrinsics::FSubHostRounding<intrinsics::Float64>>(
100 builder, result, flag_register, src1, src2);
101 } else if constexpr (IsTagEq<&intrinsics::FSub<intrinsics::Float32>>()) {
102 auto [rm, frm, src1, src2] = args_tuple;
103 if (rm != FPFlags::DYN) {
104 return false;
105 }
106 return TryInlineIntrinsicForHeavyOptimizer<
107 &intrinsics::FSubHostRounding<intrinsics::Float32>>(
108 builder, result, flag_register, src1, src2);
109 } else if constexpr (IsTagEq<&intrinsics::FDiv<intrinsics::Float64>>()) {
110 auto [rm, frm, src1, src2] = args_tuple;
111 if (rm != FPFlags::DYN) {
112 return false;
113 }
114 return TryInlineIntrinsicForHeavyOptimizer<
115 &intrinsics::FDivHostRounding<intrinsics::Float64>>(
116 builder, result, flag_register, src1, src2);
117 } else if constexpr (IsTagEq<&intrinsics::FDiv<intrinsics::Float32>>()) {
118 auto [rm, frm, src1, src2] = args_tuple;
119 if (rm != FPFlags::DYN) {
120 return false;
121 }
122 return TryInlineIntrinsicForHeavyOptimizer<
123 &intrinsics::FDivHostRounding<intrinsics::Float32>>(
124 builder, result, flag_register, src1, src2);
125 } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int64_t, intrinsics::Float64>>()) {
126 auto [rm, frm, src] = args_tuple;
127 if (rm != FPFlags::DYN) {
128 return false;
129 }
130 return TryInlineIntrinsicForHeavyOptimizer<
131 &intrinsics::FCvtFloatToIntegerHostRounding<int64_t, intrinsics::Float64>>(
132 builder, result, flag_register, src);
133 } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int64_t, intrinsics::Float32>>()) {
134 auto [rm, frm, src] = args_tuple;
135 if (rm != FPFlags::DYN) {
136 return false;
137 }
138 return TryInlineIntrinsicForHeavyOptimizer<
139 &intrinsics::FCvtFloatToIntegerHostRounding<int64_t, intrinsics::Float32>>(
140 builder, result, flag_register, src);
141 } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int32_t, intrinsics::Float64>>()) {
142 auto [rm, frm, src] = args_tuple;
143 if (rm != FPFlags::DYN) {
144 return false;
145 }
146 return TryInlineIntrinsicForHeavyOptimizer<
147 &intrinsics::FCvtFloatToIntegerHostRounding<int32_t, intrinsics::Float64>>(
148 builder, result, flag_register, src);
149 } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int32_t, intrinsics::Float32>>()) {
150 auto [rm, frm, src] = args_tuple;
151 if (rm != FPFlags::DYN) {
152 return false;
153 }
154 return TryInlineIntrinsicForHeavyOptimizer<
155 &intrinsics::FCvtFloatToIntegerHostRounding<int32_t, intrinsics::Float32>>(
156 builder, result, flag_register, src);
157 }
158 return false;
159 }
160
161 private:
162 // Comparison of pointers which point to different functions is generally not a
163 // constexpr since such functions can be merged in object code (comparing
164 // pointers to the same function is constexpr). This helper compares them using
165 // templates explicitly telling that we are not worried about such subtleties here.
166 template <auto kFunction>
167 class FunctionCompareTag;
168
169 // Note, if we define it as a variable clang doesn't consider it a constexpr in TryInline funcs.
170 template <auto kOtherFunction>
IsTagEq()171 static constexpr bool IsTagEq() {
172 return std::is_same_v<FunctionCompareTag<kFunc>, FunctionCompareTag<kOtherFunction>>;
173 }
174 };
175
176 template <typename DestRegClass, typename SrcRegClass>
Mov(x86_64::MachineIRBuilder * builder,MachineReg dest,MachineReg src)177 void Mov(x86_64::MachineIRBuilder* builder, MachineReg dest, MachineReg src) {
178 using DestType = typename DestRegClass::Type;
179 using SrcType = typename SrcRegClass::Type;
180 constexpr const auto src_reg_class = SrcRegClass::template kRegClass<x86_64::MachineInsnX86_64>;
181 if constexpr (std::is_integral_v<DestType>) {
182 if constexpr (std::is_integral_v<SrcType>) {
183 builder->Gen<PseudoCopy>(dest, src, src_reg_class.RegSize());
184 } else if constexpr (SrcRegClass::kAsRegister == 'x') {
185 if constexpr (src_reg_class.RegSize() == 4) {
186 if (host_platform::kHasAVX) {
187 builder->Gen<x86_64::VmovdRegXReg>(dest, src);
188 } else {
189 builder->Gen<x86_64::MovdRegXReg>(dest, src);
190 }
191 } else {
192 static_assert(src_reg_class.RegSize() >= 8);
193 if (host_platform::kHasAVX) {
194 builder->Gen<x86_64::VmovqRegXReg>(dest, src);
195 } else {
196 builder->Gen<x86_64::MovqRegXReg>(dest, src);
197 }
198 }
199 } else {
200 static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
201 }
202 } else if (DestRegClass::kAsRegister == 'x') {
203 if constexpr (src_reg_class.RegSize() == 4) {
204 if constexpr (std::is_integral_v<SrcType>) {
205 if (host_platform::kHasAVX) {
206 builder->Gen<x86_64::VmovdXRegReg>(dest, src);
207 } else {
208 builder->Gen<x86_64::MovdXRegReg>(dest, src);
209 }
210 } else if constexpr (SrcRegClass::kAsRegister == 'x') {
211 builder->Gen<PseudoCopy>(dest, src, 16);
212 } else {
213 static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
214 }
215 } else {
216 static_assert(src_reg_class.RegSize() >= 8);
217 if constexpr (std::is_integral_v<SrcType>) {
218 if (host_platform::kHasAVX) {
219 builder->Gen<x86_64::VmovqXRegReg>(dest, src);
220 } else {
221 builder->Gen<x86_64::MovqXRegReg>(dest, src);
222 }
223 } else if constexpr (SrcRegClass::kAsRegister == 'x') {
224 builder->Gen<PseudoCopy>(dest, src, 16);
225 } else {
226 static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
227 }
228 }
229 }
230 }
231
232 template <typename DestRegClass, typename SrcReg>
MovFromInput(x86_64::MachineIRBuilder * builder,MachineReg dest,SrcReg src)233 void MovFromInput(x86_64::MachineIRBuilder* builder, MachineReg dest, SrcReg src) {
234 if constexpr (std::is_same_v<SrcReg, SimdReg>) {
235 Mov<DestRegClass, intrinsics::bindings::XmmReg>(builder, dest, src.machine_reg());
236 } else {
237 Mov<DestRegClass, intrinsics::bindings::GeneralReg64>(builder, dest, src);
238 }
239 }
240 template <typename SrcRegClass, typename DestReg>
MovToResult(x86_64::MachineIRBuilder * builder,DestReg dest,MachineReg src)241 void MovToResult(x86_64::MachineIRBuilder* builder, DestReg dest, MachineReg src) {
242 if constexpr (std::is_same_v<DestReg, SimdReg>) {
243 Mov<intrinsics::bindings::XmmReg, SrcRegClass>(builder, dest.machine_reg(), src);
244 } else {
245 Mov<intrinsics::bindings::GeneralReg64, SrcRegClass>(builder, dest, src);
246 }
247 }
248
249 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
250 class TryBindingBasedInlineIntrinsicForHeavyOptimizer {
251 template <auto kFunctionForFriend,
252 typename ResTypeForFriend,
253 typename FlagRegisterForFriend,
254 typename... ArgTypeForFriend>
255 friend bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
256 ResTypeForFriend result,
257 FlagRegisterForFriend flag_register,
258 ArgTypeForFriend... args);
259 template <auto kFunctionForFriend, typename FlagRegisterForFriend, typename... ArgTypeForFriend>
260 friend bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
261 FlagRegisterForFriend flag_register,
262 ArgTypeForFriend... args);
263
264 template <auto kFunc,
265 typename MacroAssembler,
266 typename Result,
267 typename Callback,
268 typename... Args>
269 friend Result intrinsics::bindings::ProcessBindings(Callback callback,
270 Result def_result,
271 Args&&... args);
272
273 template <auto kIntrinsicTemplateName,
274 auto kMacroInstructionTemplateName,
275 auto kMnemo,
276 typename GetOpcode,
277 typename CPUIDRestrictionTemplateValue,
278 typename PreciseNanOperationsHandlingTemplateValue,
279 bool kSideEffectsTemplateValue,
280 typename... Types>
281 friend class intrinsics::bindings::AsmCallInfo;
282
283 TryBindingBasedInlineIntrinsicForHeavyOptimizer() = delete;
284 TryBindingBasedInlineIntrinsicForHeavyOptimizer(
285 const TryBindingBasedInlineIntrinsicForHeavyOptimizer&) = delete;
286 TryBindingBasedInlineIntrinsicForHeavyOptimizer(
287 TryBindingBasedInlineIntrinsicForHeavyOptimizer&&) = delete;
288 TryBindingBasedInlineIntrinsicForHeavyOptimizer& operator=(
289 const TryBindingBasedInlineIntrinsicForHeavyOptimizer&) = delete;
290 TryBindingBasedInlineIntrinsicForHeavyOptimizer& operator=(
291 TryBindingBasedInlineIntrinsicForHeavyOptimizer&&) = delete;
292
TryBindingBasedInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)293 TryBindingBasedInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
294 ResType result,
295 FlagRegister flag_register,
296 ArgType... args)
297 : builder_(builder),
298 result_{result},
299 xmm_result_reg_{},
300 flag_register_{flag_register},
301 input_args_(std::tuple{args...}),
302 success_(intrinsics::bindings::ProcessBindings<
303 kFunction,
304 typename MacroAssembler<x86_64::Assembler>::MacroAssemblers,
305 bool,
306 TryBindingBasedInlineIntrinsicForHeavyOptimizer&>(*this, false)) {}
307
308 operator bool() { return success_; }
309
310 // TODO(b/232598137) The MachineIR bindings for some macros can't be instantiated yet. This should
311 // be removed once they're supported.
312 template <typename AsmCallInfo,
313 std::enable_if_t<AsmCallInfo::template kOpcode<MachineOpcode> ==
314 MachineOpcode::kMachineOpUndefined,
315 bool> = true>
operator()316 std::optional<bool> /*ProcessBindingsClient*/ operator()(AsmCallInfo /* asm_call_info */) {
317 return false;
318 }
319
320 template <typename AsmCallInfo,
321 std::enable_if_t<AsmCallInfo::template kOpcode<MachineOpcode> !=
322 MachineOpcode::kMachineOpUndefined,
323 bool> = true>
operator()324 std::optional<bool> /*ProcessBindingsClient*/ operator()(AsmCallInfo asm_call_info) {
325 static_assert(std::is_same_v<decltype(kFunction), typename AsmCallInfo::IntrinsicType>);
326 static_assert(std::is_same_v<typename AsmCallInfo::PreciseNanOperationsHandling,
327 intrinsics::bindings::NoNansOperation>);
328 using CPUIDRestriction = AsmCallInfo::CPUIDRestriction;
329 if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasAVX>) {
330 if (!host_platform::kHasAVX) {
331 return false;
332 }
333 } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasBMI>) {
334 if (!host_platform::kHasBMI) {
335 return false;
336 }
337 } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasLZCNT>) {
338 if (!host_platform::kHasLZCNT) {
339 return false;
340 }
341 } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasPOPCNT>) {
342 if (!host_platform::kHasPOPCNT) {
343 return false;
344 }
345 } else if constexpr (std::is_same_v<CPUIDRestriction,
346 intrinsics::bindings::NoCPUIDRestriction>) {
347 // No restrictions. Do nothing.
348 } else {
349 static_assert(berberis::kDependentValueFalse<AsmCallInfo::kCPUIDRestriction>);
350 }
351
352 // constructor_args_t here is used to generate a tuple of constructor args from the AsmCallInfo
353 // bindings. The tuple parameter pack will be expanded by the tuple specialization on the
354 // MachineInsn in machine_insn_intrinsics.h.
355 using MachineInsn = typename AsmCallInfo::template MachineInsn<berberis::x86_64::MachineInsn,
356 x86_64::constructor_args_t,
357 MachineOpcode>;
358 std::apply(MachineInsn::kGenFunc,
359 std::tuple_cat(std::tuple<x86_64::MachineIRBuilder&>{*builder_},
360 UnwrapSimdReg(AsmCallInfo::template MakeTuplefromBindings<
361 TryBindingBasedInlineIntrinsicForHeavyOptimizer&>(
362 *this, asm_call_info))));
363 ProcessBindingsResults<AsmCallInfo>(type_wrapper<typename AsmCallInfo::Bindings>());
364 return true;
365 }
366
367 template <typename ArgBinding, typename AsmCallInfo>
operator()368 auto /*MakeTuplefromBindingsClient*/ operator()(ArgTraits<ArgBinding>, AsmCallInfo) {
369 static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
370 if constexpr (arg_info.arg_type == ArgInfo::IMM_ARG) {
371 auto imm = std::get<arg_info.from>(input_args_);
372 return std::tuple{imm};
373 } else {
374 return ProcessArgInput<ArgBinding, AsmCallInfo>();
375 }
376 }
377
378 template <typename ArgBinding, typename AsmCallInfo>
ProcessArgInput()379 auto ProcessArgInput() {
380 static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
381 using RegisterClass = typename ArgTraits<ArgBinding>::RegisterClass;
382 using Usage = typename ArgTraits<ArgBinding>::Usage;
383 static constexpr const auto kNumOut = std::tuple_size_v<typename AsmCallInfo::OutputArguments>;
384
385 if constexpr (arg_info.arg_type == ArgInfo::IN_ARG) {
386 static_assert(std::is_same_v<Usage, intrinsics::bindings::Use>);
387 static_assert(!RegisterClass::kIsImplicitReg);
388 if constexpr (RegisterClass::kAsRegister == 'x' &&
389 std::is_same_v<std::tuple_element_t<arg_info.from, std::tuple<ArgType...>>,
390 MachineReg>) {
391 auto xmm_reg = AllocVReg();
392 MovFromInput<RegisterClass>(builder_, xmm_reg, std::get<arg_info.from>(input_args_));
393 return std::tuple{xmm_reg};
394 } else {
395 return std::tuple{std::get<arg_info.from>(input_args_)};
396 }
397 } else if constexpr (arg_info.arg_type == ArgInfo::IN_OUT_ARG) {
398 static_assert(!std::is_same_v<ResType, std::monostate>);
399 static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
400 static_assert(!RegisterClass::kIsImplicitReg);
401 if constexpr (RegisterClass::kAsRegister == 'x') {
402 if constexpr (kNumOut > 1) {
403 static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
404 } else {
405 CHECK(xmm_result_reg_.IsInvalidReg());
406 xmm_result_reg_ = AllocVReg();
407 MovFromInput<RegisterClass>(
408 builder_, xmm_result_reg_, std::get<arg_info.from>(input_args_));
409 return std::tuple{xmm_result_reg_};
410 }
411 } else if constexpr (kNumOut > 1) {
412 auto res = std::get<arg_info.to>(result_);
413 MovFromInput<RegisterClass>(builder_, res, std::get<arg_info.from>(input_args_));
414 return std::tuple{res};
415 } else {
416 MovFromInput<RegisterClass>(builder_, result_, std::get<arg_info.from>(input_args_));
417 return std::tuple{result_};
418 }
419 } else if constexpr (arg_info.arg_type == ArgInfo::IN_OUT_TMP_ARG) {
420 static_assert(!std::is_same_v<ResType, std::monostate>);
421 static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
422 static_assert(RegisterClass::kIsImplicitReg);
423 if constexpr (kNumOut > 1) {
424 static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
425 } else {
426 CHECK(implicit_result_reg_.IsInvalidReg());
427 implicit_result_reg_ = AllocVReg();
428 MovFromInput<RegisterClass>(
429 builder_, implicit_result_reg_, std::get<arg_info.from>(input_args_));
430 return std::tuple{implicit_result_reg_};
431 }
432 } else if constexpr (arg_info.arg_type == ArgInfo::IN_TMP_ARG) {
433 if constexpr (RegisterClass::kIsImplicitReg) {
434 auto implicit_reg = AllocVReg();
435 MovFromInput<RegisterClass>(builder_, implicit_reg, std::get<arg_info.from>(input_args_));
436 return std::tuple{implicit_reg};
437 } else {
438 static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
439 return std::tuple{std::get<arg_info.from>(input_args_)};
440 }
441 } else if constexpr (arg_info.arg_type == ArgInfo::OUT_TMP_ARG) {
442 if constexpr (kNumOut > 1) {
443 static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
444 } else {
445 CHECK(implicit_result_reg_.IsInvalidReg());
446 implicit_result_reg_ = AllocVReg();
447 return std::tuple{implicit_result_reg_};
448 }
449 } else if constexpr (arg_info.arg_type == ArgInfo::OUT_ARG) {
450 static_assert(!std::is_same_v<ResType, std::monostate>);
451 static_assert(std::is_same_v<Usage, intrinsics::bindings::Def> ||
452 std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
453 if constexpr (RegisterClass::kAsRegister == 'x') {
454 CHECK(xmm_result_reg_.IsInvalidReg());
455 xmm_result_reg_ = AllocVReg();
456 return std::tuple{xmm_result_reg_};
457 } else if constexpr (kNumOut > 1) {
458 return std::tuple{std::get<arg_info.to>(result_)};
459 } else if constexpr (RegisterClass::kIsImplicitReg) {
460 if constexpr (RegisterClass::kAsRegister == 0) {
461 return std::tuple{flag_register_};
462 } else {
463 CHECK(implicit_result_reg_.IsInvalidReg());
464 implicit_result_reg_ = AllocVReg();
465 return std::tuple{implicit_result_reg_};
466 }
467 } else {
468 return std::tuple{result_};
469 }
470 } else if constexpr (arg_info.arg_type == ArgInfo::TMP_ARG) {
471 static_assert(std::is_same_v<Usage, intrinsics::bindings::Def> ||
472 std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
473 if constexpr (RegisterClass::kAsRegister == 'm') {
474 static_assert(std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
475 if (scratch_arg_ >= 2) {
476 FATAL("Only two scratch registers are supported for now");
477 }
478 return std::tuple{x86_64::kMachineRegRBP,
479 static_cast<int32_t>(offsetof(ThreadState, intrinsics_scratch_area) +
480 config::kScratchAreaSlotSize * scratch_arg_++)};
481 } else if constexpr (RegisterClass::kIsImplicitReg) {
482 if constexpr (RegisterClass::kAsRegister == 0) {
483 return std::tuple{flag_register_};
484 } else {
485 auto implicit_reg = AllocVReg();
486 return std::tuple{implicit_reg};
487 }
488 } else {
489 auto reg = AllocVReg();
490 return std::tuple{reg};
491 }
492 } else {
493 static_assert(berberis::kDependentValueFalse<arg_info.arg_type>);
494 }
495 }
496
497 template <typename T>
498 struct type_wrapper {
499 using type = T;
500 };
501
502 template <typename AsmCallInfo, typename... ArgBinding>
ProcessBindingsResults(type_wrapper<std::tuple<ArgBinding...>>)503 void ProcessBindingsResults(type_wrapper<std::tuple<ArgBinding...>>) {
504 (ProcessBindingResult<ArgBinding, AsmCallInfo>(), ...);
505 if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 0) {
506 // No return value. Do nothing.
507 } else if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 1) {
508 using ReturnType = std::tuple_element_t<0, typename AsmCallInfo::OutputArguments>;
509 if constexpr (std::is_integral_v<ReturnType> && sizeof(ReturnType) < sizeof(int32_t)) {
510 // Don't handle these types just yet. We are not sure how to expand them and there
511 // are no examples.
512 static_assert(kDependentTypeFalse<ReturnType>);
513 }
514 if constexpr (std::is_same_v<ReturnType, int32_t> || std::is_same_v<ReturnType, uint32_t>) {
515 // Expands 32 bit values as signed. Even if actual results are processed as unsigned!
516 // TODO(b/308951522) replace with Expand node when it's created.
517 builder_->Gen<x86_64::MovsxlqRegReg>(result_, result_);
518 } else if constexpr (std::is_integral_v<ReturnType> &&
519 sizeof(ReturnType) == sizeof(int64_t)) {
520 // Do nothing, we have already produced expanded value.
521 } else if constexpr (std::is_same_v<ReturnType, intrinsics::Float32> ||
522 std::is_same_v<ReturnType, intrinsics::Float64>) {
523 // Do nothing, NaN boxing is handled by semantics player.
524 } else {
525 static_assert(kDependentTypeFalse<ReturnType>);
526 }
527 } else {
528 static_assert(kDependentTypeFalse<typename AsmCallInfo::OutputArguments>);
529 }
530 }
531
532 template <typename ArgBinding, typename AsmCallInfo>
ProcessBindingResult()533 void ProcessBindingResult() {
534 if constexpr (ArgTraits<ArgBinding>::Class::kIsImmediate) {
535 return;
536 } else {
537 using RegisterClass = typename ArgTraits<ArgBinding>::RegisterClass;
538 static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
539 if constexpr (RegisterClass::kAsRegister == 'm' || RegisterClass::kAsRegister == 0) {
540 return;
541 } else if constexpr ((arg_info.arg_type == ArgInfo::IN_OUT_ARG ||
542 arg_info.arg_type == ArgInfo::OUT_ARG) &&
543 RegisterClass::kAsRegister == 'x') {
544 CHECK(!xmm_result_reg_.IsInvalidReg());
545 MovToResult<RegisterClass>(builder_, result_, xmm_result_reg_);
546 } else if constexpr ((arg_info.arg_type == ArgInfo::OUT_ARG ||
547 arg_info.arg_type == ArgInfo::IN_OUT_TMP_ARG ||
548 arg_info.arg_type == ArgInfo::OUT_TMP_ARG) &&
549 RegisterClass::kIsImplicitReg) {
550 CHECK(!implicit_result_reg_.IsInvalidReg());
551 MovToResult<RegisterClass>(builder_, result_, implicit_result_reg_);
552 }
553 }
554 }
555
AllocVReg()556 MachineReg AllocVReg() { return builder_->ir()->AllocVReg(); }
557
558 template <typename T>
UnwrapSimdReg(T r)559 static constexpr auto UnwrapSimdReg(T r) {
560 if constexpr (std::is_same_v<T, SimdReg>) {
561 return r.machine_reg();
562 } else {
563 return r;
564 }
565 }
566
567 template <typename... T>
UnwrapSimdReg(std::tuple<T...> regs)568 static constexpr auto UnwrapSimdReg(std::tuple<T...> regs) {
569 constexpr const auto num_args = std::tuple_size<std::tuple<T...>>::value;
570 return UnwrapSimdReg(std::make_index_sequence<num_args>(), regs);
571 }
572
573 template <typename... T, auto... I>
UnwrapSimdReg(std::index_sequence<I...>,std::tuple<T...> regs)574 static constexpr auto UnwrapSimdReg(std::index_sequence<I...>, std::tuple<T...> regs) {
575 return std::make_tuple(UnwrapSimdReg(std::get<I>(regs))...);
576 }
577
578 private:
579 x86_64::MachineIRBuilder* builder_;
580 ResType result_;
581 MachineReg xmm_result_reg_;
582 MachineReg implicit_result_reg_;
583 FlagRegister flag_register_;
584 std::tuple<ArgType...> input_args_;
585 uint32_t scratch_arg_ = 0;
586 bool success_;
587 };
588
589 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)590 bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
591 ResType result,
592 FlagRegister flag_register,
593 ArgType... args) {
594 if (InlineIntrinsic<kFunction>::TryInlineWithHostRounding(
595 builder, result, flag_register, args...)) {
596 return true;
597 }
598
599 return TryBindingBasedInlineIntrinsicForHeavyOptimizer<kFunction,
600 ResType,
601 FlagRegister,
602 ArgType...>(
603 builder, result, flag_register, args...);
604 }
605
606 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)607 void InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
608 ResType result,
609 FlagRegister flag_register,
610 ArgType... args) {
611 bool success = TryInlineIntrinsicForHeavyOptimizer<kFunction, ResType, FlagRegister, ArgType...>(
612 builder, result, flag_register, args...);
613 CHECK(success);
614 }
615
616 template <auto kFunction, typename FlagRegister, typename... ArgType>
TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder * builder,FlagRegister flag_register,ArgType...args)617 bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
618 FlagRegister flag_register,
619 ArgType... args) {
620 return TryBindingBasedInlineIntrinsicForHeavyOptimizer<kFunction,
621 std::monostate,
622 FlagRegister,
623 ArgType...>(
624 builder, std::monostate{}, flag_register, args...);
625 }
626
627 template <auto kFunction, typename FlagRegister, typename... ArgType>
InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder * builder,FlagRegister flag_register,ArgType...args)628 void InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
629 FlagRegister flag_register,
630 ArgType... args) {
631 bool success = TryInlineIntrinsicForHeavyOptimizerVoid<kFunction, FlagRegister, ArgType...>(
632 builder, flag_register, args...);
633 CHECK(success);
634 }
635
636 } // namespace berberis
637
638 #endif // BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
639