1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
18 #define BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
19 
20 #include <cfenv>
21 #include <cstdint>
22 #include <tuple>
23 #include <type_traits>
24 #include <utility>
25 #include <variant>
26 
27 #include "berberis/assembler/x86_64.h"
28 #include "berberis/backend/common/machine_ir.h"
29 #include "berberis/backend/x86_64/machine_insn_intrinsics.h"
30 #include "berberis/backend/x86_64/machine_ir.h"
31 #include "berberis/backend/x86_64/machine_ir_builder.h"
32 #include "berberis/base/checks.h"
33 #include "berberis/base/config.h"
34 #include "berberis/base/dependent_false.h"
35 #include "berberis/intrinsics/all_to_x86_32_or_x86_64/intrinsics_bindings.h"
36 #include "berberis/intrinsics/intrinsics.h"
37 #include "berberis/intrinsics/intrinsics_args.h"
38 #include "berberis/intrinsics/intrinsics_process_bindings.h"
39 #include "berberis/intrinsics/macro_assembler.h"
40 #include "berberis/runtime_primitives/platform.h"
41 
42 #include "simd_register.h"
43 
44 namespace berberis {
45 
46 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
47 bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
48                                          ResType result,
49                                          FlagRegister flag_register,
50                                          ArgType... args);
51 
52 template <auto kFunc>
53 class InlineIntrinsic {
54  public:
55   template <typename ResType, typename FlagRegister, typename... ArgType>
TryInlineWithHostRounding(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)56   static bool TryInlineWithHostRounding(x86_64::MachineIRBuilder* builder,
57                                         ResType result,
58                                         FlagRegister flag_register,
59                                         ArgType... args) {
60     std::tuple args_tuple = std::make_tuple(args...);
61     if constexpr (IsTagEq<&intrinsics::FMul<intrinsics::Float64>>()) {
62       auto [rm, frm, src1, src2] = args_tuple;
63       if (rm != FPFlags::DYN) {
64         return false;
65       }
66       return TryInlineIntrinsicForHeavyOptimizer<
67           &intrinsics::FMulHostRounding<intrinsics::Float64>>(
68           builder, result, flag_register, src1, src2);
69     } else if constexpr (IsTagEq<&intrinsics::FMul<intrinsics::Float32>>()) {
70       auto [rm, frm, src1, src2] = args_tuple;
71       if (rm != FPFlags::DYN) {
72         return false;
73       }
74       return TryInlineIntrinsicForHeavyOptimizer<
75           &intrinsics::FMulHostRounding<intrinsics::Float32>>(
76           builder, result, flag_register, src1, src2);
77     } else if constexpr (IsTagEq<&intrinsics::FAdd<intrinsics::Float64>>()) {
78       auto [rm, frm, src1, src2] = args_tuple;
79       if (rm != FPFlags::DYN) {
80         return false;
81       }
82       return TryInlineIntrinsicForHeavyOptimizer<
83           &intrinsics::FAddHostRounding<intrinsics::Float64>>(
84           builder, result, flag_register, src1, src2);
85     } else if constexpr (IsTagEq<&intrinsics::FAdd<intrinsics::Float32>>()) {
86       auto [rm, frm, src1, src2] = args_tuple;
87       if (rm != FPFlags::DYN) {
88         return false;
89       }
90       return TryInlineIntrinsicForHeavyOptimizer<
91           &intrinsics::FAddHostRounding<intrinsics::Float32>>(
92           builder, result, flag_register, src1, src2);
93     } else if constexpr (IsTagEq<&intrinsics::FSub<intrinsics::Float64>>()) {
94       auto [rm, frm, src1, src2] = args_tuple;
95       if (rm != FPFlags::DYN) {
96         return false;
97       }
98       return TryInlineIntrinsicForHeavyOptimizer<
99           &intrinsics::FSubHostRounding<intrinsics::Float64>>(
100           builder, result, flag_register, src1, src2);
101     } else if constexpr (IsTagEq<&intrinsics::FSub<intrinsics::Float32>>()) {
102       auto [rm, frm, src1, src2] = args_tuple;
103       if (rm != FPFlags::DYN) {
104         return false;
105       }
106       return TryInlineIntrinsicForHeavyOptimizer<
107           &intrinsics::FSubHostRounding<intrinsics::Float32>>(
108           builder, result, flag_register, src1, src2);
109     } else if constexpr (IsTagEq<&intrinsics::FDiv<intrinsics::Float64>>()) {
110       auto [rm, frm, src1, src2] = args_tuple;
111       if (rm != FPFlags::DYN) {
112         return false;
113       }
114       return TryInlineIntrinsicForHeavyOptimizer<
115           &intrinsics::FDivHostRounding<intrinsics::Float64>>(
116           builder, result, flag_register, src1, src2);
117     } else if constexpr (IsTagEq<&intrinsics::FDiv<intrinsics::Float32>>()) {
118       auto [rm, frm, src1, src2] = args_tuple;
119       if (rm != FPFlags::DYN) {
120         return false;
121       }
122       return TryInlineIntrinsicForHeavyOptimizer<
123           &intrinsics::FDivHostRounding<intrinsics::Float32>>(
124           builder, result, flag_register, src1, src2);
125     } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int64_t, intrinsics::Float64>>()) {
126       auto [rm, frm, src] = args_tuple;
127       if (rm != FPFlags::DYN) {
128         return false;
129       }
130       return TryInlineIntrinsicForHeavyOptimizer<
131           &intrinsics::FCvtFloatToIntegerHostRounding<int64_t, intrinsics::Float64>>(
132           builder, result, flag_register, src);
133     } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int64_t, intrinsics::Float32>>()) {
134       auto [rm, frm, src] = args_tuple;
135       if (rm != FPFlags::DYN) {
136         return false;
137       }
138       return TryInlineIntrinsicForHeavyOptimizer<
139           &intrinsics::FCvtFloatToIntegerHostRounding<int64_t, intrinsics::Float32>>(
140           builder, result, flag_register, src);
141     } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int32_t, intrinsics::Float64>>()) {
142       auto [rm, frm, src] = args_tuple;
143       if (rm != FPFlags::DYN) {
144         return false;
145       }
146       return TryInlineIntrinsicForHeavyOptimizer<
147           &intrinsics::FCvtFloatToIntegerHostRounding<int32_t, intrinsics::Float64>>(
148           builder, result, flag_register, src);
149     } else if constexpr (IsTagEq<&intrinsics::FCvtFloatToInteger<int32_t, intrinsics::Float32>>()) {
150       auto [rm, frm, src] = args_tuple;
151       if (rm != FPFlags::DYN) {
152         return false;
153       }
154       return TryInlineIntrinsicForHeavyOptimizer<
155           &intrinsics::FCvtFloatToIntegerHostRounding<int32_t, intrinsics::Float32>>(
156           builder, result, flag_register, src);
157     }
158     return false;
159   }
160 
161  private:
162   // Comparison of pointers which point to different functions is generally not a
163   // constexpr since such functions can be merged in object code (comparing
164   // pointers to the same function is constexpr). This helper compares them using
165   // templates explicitly telling that we are not worried about such subtleties here.
166   template <auto kFunction>
167   class FunctionCompareTag;
168 
169   // Note, if we define it as a variable clang doesn't consider it a constexpr in TryInline funcs.
170   template <auto kOtherFunction>
IsTagEq()171   static constexpr bool IsTagEq() {
172     return std::is_same_v<FunctionCompareTag<kFunc>, FunctionCompareTag<kOtherFunction>>;
173   }
174 };
175 
176 template <typename DestRegClass, typename SrcRegClass>
Mov(x86_64::MachineIRBuilder * builder,MachineReg dest,MachineReg src)177 void Mov(x86_64::MachineIRBuilder* builder, MachineReg dest, MachineReg src) {
178   using DestType = typename DestRegClass::Type;
179   using SrcType = typename SrcRegClass::Type;
180   constexpr const auto src_reg_class = SrcRegClass::template kRegClass<x86_64::MachineInsnX86_64>;
181   if constexpr (std::is_integral_v<DestType>) {
182     if constexpr (std::is_integral_v<SrcType>) {
183       builder->Gen<PseudoCopy>(dest, src, src_reg_class.RegSize());
184     } else if constexpr (SrcRegClass::kAsRegister == 'x') {
185       if constexpr (src_reg_class.RegSize() == 4) {
186         if (host_platform::kHasAVX) {
187           builder->Gen<x86_64::VmovdRegXReg>(dest, src);
188         } else {
189           builder->Gen<x86_64::MovdRegXReg>(dest, src);
190         }
191       } else {
192         static_assert(src_reg_class.RegSize() >= 8);
193         if (host_platform::kHasAVX) {
194           builder->Gen<x86_64::VmovqRegXReg>(dest, src);
195         } else {
196           builder->Gen<x86_64::MovqRegXReg>(dest, src);
197         }
198       }
199     } else {
200       static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
201     }
202   } else if (DestRegClass::kAsRegister == 'x') {
203     if constexpr (src_reg_class.RegSize() == 4) {
204       if constexpr (std::is_integral_v<SrcType>) {
205         if (host_platform::kHasAVX) {
206           builder->Gen<x86_64::VmovdXRegReg>(dest, src);
207         } else {
208           builder->Gen<x86_64::MovdXRegReg>(dest, src);
209         }
210       } else if constexpr (SrcRegClass::kAsRegister == 'x') {
211         builder->Gen<PseudoCopy>(dest, src, 16);
212       } else {
213         static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
214       }
215     } else {
216       static_assert(src_reg_class.RegSize() >= 8);
217       if constexpr (std::is_integral_v<SrcType>) {
218         if (host_platform::kHasAVX) {
219           builder->Gen<x86_64::VmovqXRegReg>(dest, src);
220         } else {
221           builder->Gen<x86_64::MovqXRegReg>(dest, src);
222         }
223       } else if constexpr (SrcRegClass::kAsRegister == 'x') {
224         builder->Gen<PseudoCopy>(dest, src, 16);
225       } else {
226         static_assert(kDependentTypeFalse<std::tuple<DestRegClass, SrcRegClass>>);
227       }
228     }
229   }
230 }
231 
232 template <typename DestRegClass, typename SrcReg>
MovFromInput(x86_64::MachineIRBuilder * builder,MachineReg dest,SrcReg src)233 void MovFromInput(x86_64::MachineIRBuilder* builder, MachineReg dest, SrcReg src) {
234   if constexpr (std::is_same_v<SrcReg, SimdReg>) {
235     Mov<DestRegClass, intrinsics::bindings::XmmReg>(builder, dest, src.machine_reg());
236   } else {
237     Mov<DestRegClass, intrinsics::bindings::GeneralReg64>(builder, dest, src);
238   }
239 }
240 template <typename SrcRegClass, typename DestReg>
MovToResult(x86_64::MachineIRBuilder * builder,DestReg dest,MachineReg src)241 void MovToResult(x86_64::MachineIRBuilder* builder, DestReg dest, MachineReg src) {
242   if constexpr (std::is_same_v<DestReg, SimdReg>) {
243     Mov<intrinsics::bindings::XmmReg, SrcRegClass>(builder, dest.machine_reg(), src);
244   } else {
245     Mov<intrinsics::bindings::GeneralReg64, SrcRegClass>(builder, dest, src);
246   }
247 }
248 
249 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
250 class TryBindingBasedInlineIntrinsicForHeavyOptimizer {
251   template <auto kFunctionForFriend,
252             typename ResTypeForFriend,
253             typename FlagRegisterForFriend,
254             typename... ArgTypeForFriend>
255   friend bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
256                                                   ResTypeForFriend result,
257                                                   FlagRegisterForFriend flag_register,
258                                                   ArgTypeForFriend... args);
259   template <auto kFunctionForFriend, typename FlagRegisterForFriend, typename... ArgTypeForFriend>
260   friend bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
261                                                       FlagRegisterForFriend flag_register,
262                                                       ArgTypeForFriend... args);
263 
264   template <auto kFunc,
265             typename MacroAssembler,
266             typename Result,
267             typename Callback,
268             typename... Args>
269   friend Result intrinsics::bindings::ProcessBindings(Callback callback,
270                                                       Result def_result,
271                                                       Args&&... args);
272 
273   template <auto kIntrinsicTemplateName,
274             auto kMacroInstructionTemplateName,
275             auto kMnemo,
276             typename GetOpcode,
277             typename CPUIDRestrictionTemplateValue,
278             typename PreciseNanOperationsHandlingTemplateValue,
279             bool kSideEffectsTemplateValue,
280             typename... Types>
281   friend class intrinsics::bindings::AsmCallInfo;
282 
283   TryBindingBasedInlineIntrinsicForHeavyOptimizer() = delete;
284   TryBindingBasedInlineIntrinsicForHeavyOptimizer(
285       const TryBindingBasedInlineIntrinsicForHeavyOptimizer&) = delete;
286   TryBindingBasedInlineIntrinsicForHeavyOptimizer(
287       TryBindingBasedInlineIntrinsicForHeavyOptimizer&&) = delete;
288   TryBindingBasedInlineIntrinsicForHeavyOptimizer& operator=(
289       const TryBindingBasedInlineIntrinsicForHeavyOptimizer&) = delete;
290   TryBindingBasedInlineIntrinsicForHeavyOptimizer& operator=(
291       TryBindingBasedInlineIntrinsicForHeavyOptimizer&&) = delete;
292 
TryBindingBasedInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)293   TryBindingBasedInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
294                                                   ResType result,
295                                                   FlagRegister flag_register,
296                                                   ArgType... args)
297       : builder_(builder),
298         result_{result},
299         xmm_result_reg_{},
300         flag_register_{flag_register},
301         input_args_(std::tuple{args...}),
302         success_(intrinsics::bindings::ProcessBindings<
303                  kFunction,
304                  typename MacroAssembler<x86_64::Assembler>::MacroAssemblers,
305                  bool,
306                  TryBindingBasedInlineIntrinsicForHeavyOptimizer&>(*this, false)) {}
307 
308   operator bool() { return success_; }
309 
310   // TODO(b/232598137) The MachineIR bindings for some macros can't be instantiated yet. This should
311   // be removed once they're supported.
312   template <typename AsmCallInfo,
313             std::enable_if_t<AsmCallInfo::template kOpcode<MachineOpcode> ==
314                                  MachineOpcode::kMachineOpUndefined,
315                              bool> = true>
operator()316   std::optional<bool> /*ProcessBindingsClient*/ operator()(AsmCallInfo /* asm_call_info */) {
317     return false;
318   }
319 
320   template <typename AsmCallInfo,
321             std::enable_if_t<AsmCallInfo::template kOpcode<MachineOpcode> !=
322                                  MachineOpcode::kMachineOpUndefined,
323                              bool> = true>
operator()324   std::optional<bool> /*ProcessBindingsClient*/ operator()(AsmCallInfo asm_call_info) {
325     static_assert(std::is_same_v<decltype(kFunction), typename AsmCallInfo::IntrinsicType>);
326     static_assert(std::is_same_v<typename AsmCallInfo::PreciseNanOperationsHandling,
327                                  intrinsics::bindings::NoNansOperation>);
328     using CPUIDRestriction = AsmCallInfo::CPUIDRestriction;
329     if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasAVX>) {
330       if (!host_platform::kHasAVX) {
331         return false;
332       }
333     } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasBMI>) {
334       if (!host_platform::kHasBMI) {
335         return false;
336       }
337     } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasLZCNT>) {
338       if (!host_platform::kHasLZCNT) {
339         return false;
340       }
341     } else if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::HasPOPCNT>) {
342       if (!host_platform::kHasPOPCNT) {
343         return false;
344       }
345     } else if constexpr (std::is_same_v<CPUIDRestriction,
346                                         intrinsics::bindings::NoCPUIDRestriction>) {
347       // No restrictions. Do nothing.
348     } else {
349       static_assert(berberis::kDependentValueFalse<AsmCallInfo::kCPUIDRestriction>);
350     }
351 
352     // constructor_args_t here is used to generate a tuple of constructor args from the AsmCallInfo
353     // bindings. The tuple parameter pack will be expanded by the tuple specialization on the
354     // MachineInsn in machine_insn_intrinsics.h.
355     using MachineInsn = typename AsmCallInfo::template MachineInsn<berberis::x86_64::MachineInsn,
356                                                                    x86_64::constructor_args_t,
357                                                                    MachineOpcode>;
358     std::apply(MachineInsn::kGenFunc,
359                std::tuple_cat(std::tuple<x86_64::MachineIRBuilder&>{*builder_},
360                               UnwrapSimdReg(AsmCallInfo::template MakeTuplefromBindings<
361                                             TryBindingBasedInlineIntrinsicForHeavyOptimizer&>(
362                                   *this, asm_call_info))));
363     ProcessBindingsResults<AsmCallInfo>(type_wrapper<typename AsmCallInfo::Bindings>());
364     return true;
365   }
366 
367   template <typename ArgBinding, typename AsmCallInfo>
operator()368   auto /*MakeTuplefromBindingsClient*/ operator()(ArgTraits<ArgBinding>, AsmCallInfo) {
369     static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
370     if constexpr (arg_info.arg_type == ArgInfo::IMM_ARG) {
371       auto imm = std::get<arg_info.from>(input_args_);
372       return std::tuple{imm};
373     } else {
374       return ProcessArgInput<ArgBinding, AsmCallInfo>();
375     }
376   }
377 
378   template <typename ArgBinding, typename AsmCallInfo>
ProcessArgInput()379   auto ProcessArgInput() {
380     static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
381     using RegisterClass = typename ArgTraits<ArgBinding>::RegisterClass;
382     using Usage = typename ArgTraits<ArgBinding>::Usage;
383     static constexpr const auto kNumOut = std::tuple_size_v<typename AsmCallInfo::OutputArguments>;
384 
385     if constexpr (arg_info.arg_type == ArgInfo::IN_ARG) {
386       static_assert(std::is_same_v<Usage, intrinsics::bindings::Use>);
387       static_assert(!RegisterClass::kIsImplicitReg);
388       if constexpr (RegisterClass::kAsRegister == 'x' &&
389                     std::is_same_v<std::tuple_element_t<arg_info.from, std::tuple<ArgType...>>,
390                                    MachineReg>) {
391         auto xmm_reg = AllocVReg();
392         MovFromInput<RegisterClass>(builder_, xmm_reg, std::get<arg_info.from>(input_args_));
393         return std::tuple{xmm_reg};
394       } else {
395         return std::tuple{std::get<arg_info.from>(input_args_)};
396       }
397     } else if constexpr (arg_info.arg_type == ArgInfo::IN_OUT_ARG) {
398       static_assert(!std::is_same_v<ResType, std::monostate>);
399       static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
400       static_assert(!RegisterClass::kIsImplicitReg);
401       if constexpr (RegisterClass::kAsRegister == 'x') {
402         if constexpr (kNumOut > 1) {
403           static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
404         } else {
405           CHECK(xmm_result_reg_.IsInvalidReg());
406           xmm_result_reg_ = AllocVReg();
407           MovFromInput<RegisterClass>(
408               builder_, xmm_result_reg_, std::get<arg_info.from>(input_args_));
409           return std::tuple{xmm_result_reg_};
410         }
411       } else if constexpr (kNumOut > 1) {
412         auto res = std::get<arg_info.to>(result_);
413         MovFromInput<RegisterClass>(builder_, res, std::get<arg_info.from>(input_args_));
414         return std::tuple{res};
415       } else {
416         MovFromInput<RegisterClass>(builder_, result_, std::get<arg_info.from>(input_args_));
417         return std::tuple{result_};
418       }
419     } else if constexpr (arg_info.arg_type == ArgInfo::IN_OUT_TMP_ARG) {
420       static_assert(!std::is_same_v<ResType, std::monostate>);
421       static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
422       static_assert(RegisterClass::kIsImplicitReg);
423       if constexpr (kNumOut > 1) {
424         static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
425       } else {
426         CHECK(implicit_result_reg_.IsInvalidReg());
427         implicit_result_reg_ = AllocVReg();
428         MovFromInput<RegisterClass>(
429             builder_, implicit_result_reg_, std::get<arg_info.from>(input_args_));
430         return std::tuple{implicit_result_reg_};
431       }
432     } else if constexpr (arg_info.arg_type == ArgInfo::IN_TMP_ARG) {
433       if constexpr (RegisterClass::kIsImplicitReg) {
434         auto implicit_reg = AllocVReg();
435         MovFromInput<RegisterClass>(builder_, implicit_reg, std::get<arg_info.from>(input_args_));
436         return std::tuple{implicit_reg};
437       } else {
438         static_assert(std::is_same_v<Usage, intrinsics::bindings::UseDef>);
439         return std::tuple{std::get<arg_info.from>(input_args_)};
440       }
441     } else if constexpr (arg_info.arg_type == ArgInfo::OUT_TMP_ARG) {
442       if constexpr (kNumOut > 1) {
443         static_assert(kDependentTypeFalse<ArgTraits<ArgBinding>>);
444       } else {
445         CHECK(implicit_result_reg_.IsInvalidReg());
446         implicit_result_reg_ = AllocVReg();
447         return std::tuple{implicit_result_reg_};
448       }
449     } else if constexpr (arg_info.arg_type == ArgInfo::OUT_ARG) {
450       static_assert(!std::is_same_v<ResType, std::monostate>);
451       static_assert(std::is_same_v<Usage, intrinsics::bindings::Def> ||
452                     std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
453       if constexpr (RegisterClass::kAsRegister == 'x') {
454         CHECK(xmm_result_reg_.IsInvalidReg());
455         xmm_result_reg_ = AllocVReg();
456         return std::tuple{xmm_result_reg_};
457       } else if constexpr (kNumOut > 1) {
458         return std::tuple{std::get<arg_info.to>(result_)};
459       } else if constexpr (RegisterClass::kIsImplicitReg) {
460         if constexpr (RegisterClass::kAsRegister == 0) {
461           return std::tuple{flag_register_};
462         } else {
463           CHECK(implicit_result_reg_.IsInvalidReg());
464           implicit_result_reg_ = AllocVReg();
465           return std::tuple{implicit_result_reg_};
466         }
467       } else {
468         return std::tuple{result_};
469       }
470     } else if constexpr (arg_info.arg_type == ArgInfo::TMP_ARG) {
471       static_assert(std::is_same_v<Usage, intrinsics::bindings::Def> ||
472                     std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
473       if constexpr (RegisterClass::kAsRegister == 'm') {
474         static_assert(std::is_same_v<Usage, intrinsics::bindings::DefEarlyClobber>);
475         if (scratch_arg_ >= 2) {
476           FATAL("Only two scratch registers are supported for now");
477         }
478         return std::tuple{x86_64::kMachineRegRBP,
479                           static_cast<int32_t>(offsetof(ThreadState, intrinsics_scratch_area) +
480                                                config::kScratchAreaSlotSize * scratch_arg_++)};
481       } else if constexpr (RegisterClass::kIsImplicitReg) {
482         if constexpr (RegisterClass::kAsRegister == 0) {
483           return std::tuple{flag_register_};
484         } else {
485           auto implicit_reg = AllocVReg();
486           return std::tuple{implicit_reg};
487         }
488       } else {
489         auto reg = AllocVReg();
490         return std::tuple{reg};
491       }
492     } else {
493       static_assert(berberis::kDependentValueFalse<arg_info.arg_type>);
494     }
495   }
496 
497   template <typename T>
498   struct type_wrapper {
499     using type = T;
500   };
501 
502   template <typename AsmCallInfo, typename... ArgBinding>
ProcessBindingsResults(type_wrapper<std::tuple<ArgBinding...>>)503   void ProcessBindingsResults(type_wrapper<std::tuple<ArgBinding...>>) {
504     (ProcessBindingResult<ArgBinding, AsmCallInfo>(), ...);
505     if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 0) {
506       // No return value. Do nothing.
507     } else if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 1) {
508       using ReturnType = std::tuple_element_t<0, typename AsmCallInfo::OutputArguments>;
509       if constexpr (std::is_integral_v<ReturnType> && sizeof(ReturnType) < sizeof(int32_t)) {
510         // Don't handle these types just yet. We are not sure how to expand them and there
511         // are no examples.
512         static_assert(kDependentTypeFalse<ReturnType>);
513       }
514       if constexpr (std::is_same_v<ReturnType, int32_t> || std::is_same_v<ReturnType, uint32_t>) {
515         // Expands 32 bit values as signed. Even if actual results are processed as unsigned!
516         // TODO(b/308951522) replace with Expand node when it's created.
517         builder_->Gen<x86_64::MovsxlqRegReg>(result_, result_);
518       } else if constexpr (std::is_integral_v<ReturnType> &&
519                            sizeof(ReturnType) == sizeof(int64_t)) {
520         // Do nothing, we have already produced expanded value.
521       } else if constexpr (std::is_same_v<ReturnType, intrinsics::Float32> ||
522                            std::is_same_v<ReturnType, intrinsics::Float64>) {
523         // Do nothing, NaN boxing is handled by semantics player.
524       } else {
525         static_assert(kDependentTypeFalse<ReturnType>);
526       }
527     } else {
528       static_assert(kDependentTypeFalse<typename AsmCallInfo::OutputArguments>);
529     }
530   }
531 
532   template <typename ArgBinding, typename AsmCallInfo>
ProcessBindingResult()533   void ProcessBindingResult() {
534     if constexpr (ArgTraits<ArgBinding>::Class::kIsImmediate) {
535       return;
536     } else {
537       using RegisterClass = typename ArgTraits<ArgBinding>::RegisterClass;
538       static constexpr const auto& arg_info = ArgTraits<ArgBinding>::arg_info;
539       if constexpr (RegisterClass::kAsRegister == 'm' || RegisterClass::kAsRegister == 0) {
540         return;
541       } else if constexpr ((arg_info.arg_type == ArgInfo::IN_OUT_ARG ||
542                             arg_info.arg_type == ArgInfo::OUT_ARG) &&
543                            RegisterClass::kAsRegister == 'x') {
544         CHECK(!xmm_result_reg_.IsInvalidReg());
545         MovToResult<RegisterClass>(builder_, result_, xmm_result_reg_);
546       } else if constexpr ((arg_info.arg_type == ArgInfo::OUT_ARG ||
547                             arg_info.arg_type == ArgInfo::IN_OUT_TMP_ARG ||
548                             arg_info.arg_type == ArgInfo::OUT_TMP_ARG) &&
549                            RegisterClass::kIsImplicitReg) {
550         CHECK(!implicit_result_reg_.IsInvalidReg());
551         MovToResult<RegisterClass>(builder_, result_, implicit_result_reg_);
552       }
553     }
554   }
555 
AllocVReg()556   MachineReg AllocVReg() { return builder_->ir()->AllocVReg(); }
557 
558   template <typename T>
UnwrapSimdReg(T r)559   static constexpr auto UnwrapSimdReg(T r) {
560     if constexpr (std::is_same_v<T, SimdReg>) {
561       return r.machine_reg();
562     } else {
563       return r;
564     }
565   }
566 
567   template <typename... T>
UnwrapSimdReg(std::tuple<T...> regs)568   static constexpr auto UnwrapSimdReg(std::tuple<T...> regs) {
569     constexpr const auto num_args = std::tuple_size<std::tuple<T...>>::value;
570     return UnwrapSimdReg(std::make_index_sequence<num_args>(), regs);
571   }
572 
573   template <typename... T, auto... I>
UnwrapSimdReg(std::index_sequence<I...>,std::tuple<T...> regs)574   static constexpr auto UnwrapSimdReg(std::index_sequence<I...>, std::tuple<T...> regs) {
575     return std::make_tuple(UnwrapSimdReg(std::get<I>(regs))...);
576   }
577 
578  private:
579   x86_64::MachineIRBuilder* builder_;
580   ResType result_;
581   MachineReg xmm_result_reg_;
582   MachineReg implicit_result_reg_;
583   FlagRegister flag_register_;
584   std::tuple<ArgType...> input_args_;
585   uint32_t scratch_arg_ = 0;
586   bool success_;
587 };
588 
589 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)590 bool TryInlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
591                                          ResType result,
592                                          FlagRegister flag_register,
593                                          ArgType... args) {
594   if (InlineIntrinsic<kFunction>::TryInlineWithHostRounding(
595           builder, result, flag_register, args...)) {
596     return true;
597   }
598 
599   return TryBindingBasedInlineIntrinsicForHeavyOptimizer<kFunction,
600                                                          ResType,
601                                                          FlagRegister,
602                                                          ArgType...>(
603       builder, result, flag_register, args...);
604 }
605 
606 template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder * builder,ResType result,FlagRegister flag_register,ArgType...args)607 void InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
608                                       ResType result,
609                                       FlagRegister flag_register,
610                                       ArgType... args) {
611   bool success = TryInlineIntrinsicForHeavyOptimizer<kFunction, ResType, FlagRegister, ArgType...>(
612       builder, result, flag_register, args...);
613   CHECK(success);
614 }
615 
616 template <auto kFunction, typename FlagRegister, typename... ArgType>
TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder * builder,FlagRegister flag_register,ArgType...args)617 bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
618                                              FlagRegister flag_register,
619                                              ArgType... args) {
620   return TryBindingBasedInlineIntrinsicForHeavyOptimizer<kFunction,
621                                                          std::monostate,
622                                                          FlagRegister,
623                                                          ArgType...>(
624       builder, std::monostate{}, flag_register, args...);
625 }
626 
627 template <auto kFunction, typename FlagRegister, typename... ArgType>
InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder * builder,FlagRegister flag_register,ArgType...args)628 void InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
629                                           FlagRegister flag_register,
630                                           ArgType... args) {
631   bool success = TryInlineIntrinsicForHeavyOptimizerVoid<kFunction, FlagRegister, ArgType...>(
632       builder, flag_register, args...);
633   CHECK(success);
634 }
635 
636 }  // namespace berberis
637 
638 #endif  // BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
639