1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stdio.h>
18 
19 #include <algorithm>
20 #include <iterator>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 #include <tuple>
25 #include <type_traits>
26 #include <vector>
27 
28 #include "berberis/base/checks.h"
29 #include "berberis/base/config.h"
30 #include "berberis/intrinsics/common/intrinsics_bindings.h"
31 #include "berberis/intrinsics/common/intrinsics_float.h"
32 #include "berberis/intrinsics/intrinsics_args.h"
33 #include "berberis/intrinsics/macro_assembler.h"
34 #include "berberis/intrinsics/simd_register.h"
35 #include "berberis/intrinsics/type_traits.h"
36 
37 #include "text_assembler.h"
38 
39 namespace berberis {
40 
41 template <typename AsmCallInfo>
42 void GenerateOutputVariables(FILE* out, int indent);
43 template <typename AsmCallInfo>
44 void GenerateTemporaries(FILE* out, int indent);
45 template <typename AsmCallInfo>
46 void GenerateInShadows(FILE* out, int indent);
47 template <typename AsmCallInfo>
48 void AssignRegisterNumbers(int* register_numbers);
49 template <typename AsmCallInfo>
50 auto CallTextAssembler(FILE* out, int indent, int* register_numbers);
51 template <typename AsmCallInfo>
52 void GenerateAssemblerOuts(FILE* out, int indent);
53 template <typename AsmCallInfo>
54 void GenerateAssemblerIns(FILE* out,
55                           int indent,
56                           int* register_numbers,
57                           bool need_gpr_macroassembler_scratch,
58                           bool need_gpr_macroassembler_constants);
59 template <typename AsmCallInfo>
60 void GenerateOutShadows(FILE* out, int indent);
61 template <typename AsmCallInfo>
62 void GenerateElementsList(FILE* out,
63                           int indent,
64                           const std::string& prefix,
65                           const std::string& suffix,
66                           const std::vector<std::string>& elements);
67 template <typename AsmCallInfo, typename Arg>
68 constexpr bool NeedInputShadow(Arg arg);
69 template <typename AsmCallInfo, typename Arg>
70 constexpr bool NeedOutputShadow(Arg arg);
71 
72 template <typename AsmCallInfo>
GenerateFunctionHeader(FILE * out,int indent)73 void GenerateFunctionHeader(FILE* out, int indent) {
74   if (strchr(AsmCallInfo::kIntrinsic, '<')) {
75     fprintf(out, "template <>\n");
76   }
77   std::string prefix;
78   if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 0) {
79     prefix = "inline void " + std::string(AsmCallInfo::kIntrinsic) + "(";
80   } else {
81     const char* prefix_of_prefix = "inline std::tuple<";
82     for (const char* type_name : AsmCallInfo::OutputArgumentsTypeNames) {
83       prefix += prefix_of_prefix + std::string(type_name);
84       prefix_of_prefix = ", ";
85     }
86     prefix += "> " + std::string(AsmCallInfo::kIntrinsic) + "(";
87   }
88   std::vector<std::string> ins;
89   for (const char* type_name : AsmCallInfo::InputArgumentsTypeNames) {
90     ins.push_back("[[maybe_unused]] " + std::string(type_name) + " in" +
91                   std::to_string(ins.size()));
92   }
93   GenerateElementsList<AsmCallInfo>(out, indent, prefix, ") {", ins);
94   fprintf(out,
95           "  [[maybe_unused]]  alignas(berberis::config::kScratchAreaAlign)"
96           " uint8_t scratch[berberis::config::kScratchAreaSize];\n");
97   fprintf(out,
98           "  [[maybe_unused]] auto& scratch2 ="
99           " scratch[berberis::config::kScratchAreaSlotSize];\n");
100 }
101 
102 template <typename AsmCallInfo>
GenerateFunctionBody(FILE * out,int indent)103 void GenerateFunctionBody(FILE* out, int indent) {
104   // Declare out variables.
105   GenerateOutputVariables<AsmCallInfo>(out, indent);
106   // Declare temporary variables.
107   GenerateTemporaries<AsmCallInfo>(out, indent);
108   // We need "shadow variables" for ins of types: Float32, Float64 and SIMD128Register.
109   // This is because assembler does not accept these arguments for XMMRegisters and
110   // we couldn't use "float"/"double" function arguments because if ABI issues.
111   GenerateInShadows<AsmCallInfo>(out, indent);
112   // Even if we don't pass any registers we need to allocate at least one element.
113   int register_numbers[std::tuple_size_v<typename AsmCallInfo::Bindings> == 0
114                            ? 1
115                            : std::tuple_size_v<typename AsmCallInfo::Bindings>];
116   // Assign numbers to registers - we need to pass them to assembler and then, later,
117   // to Generator of Input Variable line.
118   AssignRegisterNumbers<AsmCallInfo>(register_numbers);
119   // Print opening line for asm call.
120   if constexpr (AsmCallInfo::kSideEffects) {
121     fprintf(out, "%*s__asm__ __volatile__(\n", indent, "");
122   } else {
123     fprintf(out, "%*s__asm__(\n", indent, "");
124   }
125   // Call text assembler to produce the body of an asm call.
126   auto [need_gpr_macroassembler_scratch, need_gpr_macroassembler_constants] =
127       CallTextAssembler<AsmCallInfo>(out, indent, register_numbers);
128   // Assembler instruction outs.
129   GenerateAssemblerOuts<AsmCallInfo>(out, indent);
130   // Assembler instruction ins.
131   GenerateAssemblerIns<AsmCallInfo>(out,
132                                     indent,
133                                     register_numbers,
134                                     need_gpr_macroassembler_scratch,
135                                     need_gpr_macroassembler_constants);
136   // Close asm call.
137   fprintf(out, "%*s);\n", indent, "");
138   // Generate copies from shadows to outputs.
139   GenerateOutShadows<AsmCallInfo>(out, indent);
140   // Return value from function.
141   if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> > 0) {
142     std::vector<std::string> outs;
143     for (std::size_t id = 0; id < std::tuple_size_v<typename AsmCallInfo::OutputArguments>; ++id) {
144       outs.push_back("out" + std::to_string(id));
145     }
146     GenerateElementsList<AsmCallInfo>(out, indent, "return {", "};", outs);
147   }
148 }
149 
150 template <typename AsmCallInfo>
GenerateOutputVariables(FILE * out,int indent)151 void GenerateOutputVariables(FILE* out, int indent) {
152   std::size_t id = 0;
153   for (const char* type_name : AsmCallInfo::OutputArgumentsTypeNames) {
154     fprintf(out, "%*s%s out%zd;\n", indent, "", type_name, id++);
155   }
156 }
157 
158 template <typename AsmCallInfo>
GenerateTemporaries(FILE * out,int indent)159 void GenerateTemporaries(FILE* out, int indent) {
160   std::size_t id = 0;
161   AsmCallInfo::ProcessBindings([out, &id, indent](auto arg) {
162     using RegisterClass = typename decltype(arg)::RegisterClass;
163     if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
164       if constexpr (!HaveInput(arg.arg_info) && !HaveOutput(arg.arg_info)) {
165         static_assert(
166             std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Def> ||
167             std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::DefEarlyClobber>);
168         fprintf(out,
169                 "%*s%s tmp%zd;\n",
170                 indent,
171                 "",
172                 TypeTraits<typename RegisterClass::Type>::kName,
173                 id++);
174       }
175     }
176   });
177 }
178 
179 template <typename AsmCallInfo>
GenerateInShadows(FILE * out,int indent)180 void GenerateInShadows(FILE* out, int indent) {
181   AsmCallInfo::ProcessBindings([out, indent](auto arg) {
182     using RegisterClass = typename decltype(arg)::RegisterClass;
183     if constexpr (RegisterClass::kAsRegister == 'm') {
184       // Only temporary memory scratch area is supported.
185       static_assert(!HaveInput(arg.arg_info) && !HaveOutput(arg.arg_info));
186     } else if constexpr (RegisterClass::kAsRegister == 'r') {
187       // TODO(b/138439904): remove when clang handling of 'r' constraint would be fixed.
188       if constexpr (NeedInputShadow<AsmCallInfo>(arg)) {
189         fprintf(out, "%2$*1$suint32_t in%3$d_shadow = in%3$d;\n", indent, "", arg.arg_info.from);
190       }
191       if constexpr (NeedOutputShadow<AsmCallInfo>(arg)) {
192         fprintf(out, "%*suint32_t out%d_shadow;\n", indent, "", arg.arg_info.to);
193       }
194     } else if constexpr (RegisterClass::kAsRegister == 'x') {
195       if constexpr (HaveInput(arg.arg_info)) {
196         using Type = std::tuple_element_t<arg.arg_info.from, typename AsmCallInfo::InputArguments>;
197         const char* type_name = TypeTraits<Type>::kName;
198         const char* xmm_type_name;
199         const char* expanded = "";
200         // Types allowed for 'x' restriction are float, double and __m128/__m128i/__m128d
201         // First two work for {,u}int32_t and {,u}int64_t, but small integer types must be expanded.
202         if constexpr (std::is_integral_v<Type> && sizeof(Type) < sizeof(int32_t)) {
203           fprintf(
204               out, "%2$*1$suint32_t in%3$d_expanded = in%3$d;\n", indent, "", arg.arg_info.from);
205           type_name = TypeTraits<uint32_t>::kName;
206           xmm_type_name =
207               TypeTraits<typename TypeTraits<typename TypeTraits<uint32_t>::Float>::Raw>::kName;
208           expanded = "_expanded";
209         } else if constexpr (std::is_integral_v<Type>) {
210           // {,u}int32_t and {,u}int64_t have to be converted to float/double.
211           xmm_type_name =
212               TypeTraits<typename TypeTraits<typename TypeTraits<Type>::Float>::Raw>::kName;
213         } else {
214           // Float32/Float64 can not be used, we need to use raw float/double.
215           xmm_type_name = TypeTraits<typename TypeTraits<Type>::Raw>::kName;
216         }
217         fprintf(out, "%*s%s in%d_shadow;\n", indent, "", xmm_type_name, arg.arg_info.from);
218         fprintf(out,
219                 "%*sstatic_assert(sizeof(%s) == sizeof(%s));\n",
220                 indent,
221                 "",
222                 type_name,
223                 xmm_type_name);
224         // Note: it's not safe to use bit_cast here till we have std::bit_cast from C++20.
225         // If optimizer wouldn't be enabled (e.g. if code is compiled with -O0) then bit_cast
226         // would use %st on 32-bit platform which destroys NaNs.
227         fprintf(out,
228                 "%2$*1$smemcpy(&in%3$d_shadow, &in%3$d%4$s, sizeof(%5$s));\n",
229                 indent,
230                 "",
231                 arg.arg_info.from,
232                 expanded,
233                 xmm_type_name);
234       }
235       if constexpr (HaveOutput(arg.arg_info)) {
236         using Type = std::tuple_element_t<arg.arg_info.to, typename AsmCallInfo::OutputArguments>;
237         const char* xmm_type_name;
238         // {,u}int32_t and {,u}int64_t have to be converted to float/double.
239         if constexpr (std::is_integral_v<Type>) {
240           xmm_type_name =
241               TypeTraits<typename TypeTraits<typename TypeTraits<Type>::Float>::Raw>::kName;
242         } else {
243           // Float32/Float64 can not be used, we need to use raw float/double.
244           xmm_type_name = TypeTraits<typename TypeTraits<Type>::Raw>::kName;
245         }
246         fprintf(out, "%*s%s out%d_shadow;\n", indent, "", xmm_type_name, arg.arg_info.to);
247       }
248     }
249   });
250 }
251 
252 template <typename AsmCallInfo>
AssignRegisterNumbers(int * register_numbers)253 void AssignRegisterNumbers(int* register_numbers) {
254   // Assign number for output (and temporary) arguments.
255   std::size_t id = 0;
256   int arg_counter = 0;
257   AsmCallInfo::ProcessBindings([&id, &arg_counter, &register_numbers](auto arg) {
258     using RegisterClass = typename decltype(arg)::RegisterClass;
259     if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
260       if constexpr (!std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
261         register_numbers[arg_counter] = id++;
262       }
263       ++arg_counter;
264     }
265   });
266   // Assign numbers for input arguments.
267   arg_counter = 0;
268   AsmCallInfo::ProcessBindings([&id, &arg_counter, &register_numbers](auto arg) {
269     using RegisterClass = typename decltype(arg)::RegisterClass;
270     if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
271       if constexpr (std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
272         register_numbers[arg_counter] = id++;
273       }
274       ++arg_counter;
275     }
276   });
277 }
278 
279 template <typename AsmCallInfo>
CallTextAssembler(FILE * out,int indent,int * register_numbers)280 auto CallTextAssembler(FILE* out, int indent, int* register_numbers) {
281   MacroAssembler<TextAssembler> as(indent, out);
282   int arg_counter = 0;
283   AsmCallInfo::ProcessBindings([&arg_counter, &as, register_numbers](auto arg) {
284     using RegisterClass = typename decltype(arg)::RegisterClass;
285     if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
286       if constexpr (RegisterClass::kAsRegister != 'm') {
287         if constexpr (RegisterClass::kIsImplicitReg) {
288           if constexpr (RegisterClass::kAsRegister == 'a') {
289             as.gpr_a = TextAssembler::Register(register_numbers[arg_counter]);
290           } else if constexpr (RegisterClass::kAsRegister == 'c') {
291             as.gpr_c = TextAssembler::Register(register_numbers[arg_counter]);
292           } else {
293             static_assert(RegisterClass::kAsRegister == 'd');
294             as.gpr_d = TextAssembler::Register(register_numbers[arg_counter]);
295           }
296         }
297       }
298       ++arg_counter;
299     }
300   });
301   as.gpr_macroassembler_constants = TextAssembler::Register(arg_counter);
302   arg_counter = 0;
303   int scratch_counter = 0;
304   std::apply(AsmCallInfo::kMacroInstruction,
305              std::tuple_cat(
306                  std::tuple<MacroAssembler<TextAssembler>&>{as},
307                  AsmCallInfo::MakeTuplefromBindings(
308                      [&as, &arg_counter, &scratch_counter, register_numbers](auto arg) {
309                        using RegisterClass = typename decltype(arg)::RegisterClass;
310                        if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
311                          if constexpr (RegisterClass::kAsRegister == 'm') {
312                            if (scratch_counter == 0) {
313                              as.gpr_macroassembler_scratch = TextAssembler::Register(arg_counter++);
314                            } else if (scratch_counter == 1) {
315                              as.gpr_macroassembler_scratch2 =
316                                  TextAssembler::Register(arg_counter++);
317                            } else {
318                              FATAL("Only two scratch registers are supported for now");
319                            }
320                            // Note: as.gpr_scratch in combination with offset is treated by text
321                            // assembler specially.  We rely on offset set here to be the same as
322                            // scratch2 address in scratch buffer.
323                            return std::tuple{TextAssembler::Operand{
324                                .base = as.gpr_scratch,
325                                .disp = static_cast<int32_t>(config::kScratchAreaSlotSize *
326                                                             scratch_counter++)}};
327                          } else if constexpr (RegisterClass::kIsImplicitReg) {
328                            ++arg_counter;
329                            return std::tuple{};
330                          } else {
331                            return std::tuple{register_numbers[arg_counter++]};
332                          }
333                        } else {
334                          return std::tuple{};
335                        }
336                      })));
337   // Verify CPU vendor and SSE restrictions.
338   as.CheckCPUIDRestriction<typename AsmCallInfo::CPUIDRestriction>();
339   return std::tuple{as.need_gpr_macroassembler_scratch(), as.need_gpr_macroassembler_constants()};
340 }
341 
342 template <typename AsmCallInfo>
GenerateAssemblerOuts(FILE * out,int indent)343 void GenerateAssemblerOuts(FILE* out, int indent) {
344   std::vector<std::string> outs;
345   int tmp_id = 0;
346   AsmCallInfo::ProcessBindings([&outs, &tmp_id](auto arg) {
347     using RegisterClass = typename decltype(arg)::RegisterClass;
348     if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS> &&
349                   !std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
350       std::string out = "\"=";
351       if constexpr (std::is_same_v<typename decltype(arg)::Usage,
352                                    intrinsics::bindings::DefEarlyClobber>) {
353         out += "&";
354       }
355       out += RegisterClass::kAsRegister;
356       if constexpr (HaveOutput(arg.arg_info)) {
357         bool need_shadow = NeedOutputShadow<AsmCallInfo>(arg);
358         out += "\"(out" + std::to_string(arg.arg_info.to) + (need_shadow ? "_shadow)" : ")");
359       } else if constexpr (HaveInput(arg.arg_info)) {
360         bool need_shadow = NeedInputShadow<AsmCallInfo>(arg);
361         out += "\"(in" + std::to_string(arg.arg_info.from) + (need_shadow ? "_shadow)" : ")");
362       } else {
363         out += "\"(tmp" + std::to_string(tmp_id++) + ")";
364       }
365       outs.push_back(out);
366     }
367   });
368   GenerateElementsList<AsmCallInfo>(out, indent, "  : ", "", outs);
369 }
370 
371 template <typename AsmCallInfo>
GenerateAssemblerIns(FILE * out,int indent,int * register_numbers,bool need_gpr_macroassembler_scratch,bool need_gpr_macroassembler_constants)372 void GenerateAssemblerIns(FILE* out,
373                           int indent,
374                           int* register_numbers,
375                           bool need_gpr_macroassembler_scratch,
376                           bool need_gpr_macroassembler_constants) {
377   std::vector<std::string> ins;
378   AsmCallInfo::ProcessBindings([&ins](auto arg) {
379     using RegisterClass = typename decltype(arg)::RegisterClass;
380     if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS> &&
381                   std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
382       ins.push_back("\"" + std::string(1, RegisterClass::kAsRegister) + "\"(in" +
383                     std::to_string(arg.arg_info.from) +
384                     (NeedInputShadow<AsmCallInfo>(arg) ? "_shadow)" : ")"));
385     }
386   });
387   if (need_gpr_macroassembler_scratch) {
388     ins.push_back("\"m\"(scratch), \"m\"(scratch2)");
389   }
390   if (need_gpr_macroassembler_constants) {
391     ins.push_back(
392         "\"m\"(*reinterpret_cast<const char*>(&constants_pool::kBerberisMacroAssemblerConstants))");
393   }
394   int arg_counter = 0;
395   AsmCallInfo::ProcessBindings([&ins, &arg_counter, register_numbers](auto arg) {
396     using RegisterClass = typename decltype(arg)::RegisterClass;
397     if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
398       if constexpr (HaveInput(arg.arg_info) &&
399                     !std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
400         ins.push_back("\"" + std::to_string(register_numbers[arg_counter]) + "\"(in" +
401                       std::to_string(arg.arg_info.from) +
402                       (NeedInputShadow<AsmCallInfo>(arg) ? "_shadow)" : ")"));
403       }
404       ++arg_counter;
405     }
406   });
407   GenerateElementsList<AsmCallInfo>(out, indent, "  : ", "", ins);
408 }
409 
410 template <typename AsmCallInfo>
GenerateOutShadows(FILE * out,int indent)411 void GenerateOutShadows(FILE* out, int indent) {
412   AsmCallInfo::ProcessBindings([out, indent](auto arg) {
413     using RegisterClass = typename decltype(arg)::RegisterClass;
414     if constexpr (RegisterClass::kAsRegister == 'r') {
415       // TODO(b/138439904): remove when clang handling of 'r' constraint would be fixed.
416       if constexpr (HaveOutput(arg.arg_info)) {
417         using Type = std::tuple_element_t<arg.arg_info.to, typename AsmCallInfo::OutputArguments>;
418         if constexpr (sizeof(Type) == sizeof(uint8_t)) {
419           fprintf(out, "%2$*1$sout%3$d = out%3$d_shadow;\n", indent, "", arg.arg_info.to);
420         }
421       }
422     } else if constexpr (RegisterClass::kAsRegister == 'x') {
423       if constexpr (HaveOutput(arg.arg_info)) {
424         using Type = std::tuple_element_t<arg.arg_info.to, typename AsmCallInfo::OutputArguments>;
425         const char* type_name = TypeTraits<Type>::kName;
426         const char* xmm_type_name;
427         // {,u}int32_t and {,u}int64_t have to be converted to float/double.
428         if constexpr (std::is_integral_v<Type>) {
429           xmm_type_name =
430               TypeTraits<typename TypeTraits<typename TypeTraits<Type>::Float>::Raw>::kName;
431         } else {
432           // Float32/Float64 can not be used, we need to use raw float/double.
433           xmm_type_name = TypeTraits<typename TypeTraits<Type>::Raw>::kName;
434         }
435         fprintf(out,
436                 "%*sstatic_assert(sizeof(%s) == sizeof(%s));\n",
437                 indent,
438                 "",
439                 type_name,
440                 xmm_type_name);
441         // Note: it's not safe to use bit_cast here till we have std::bit_cast from C++20.
442         // If optimizer wouldn't be enabled (e.g. if code is compiled with -O0) then bit_cast
443         // would use %st on 32-bit platform which destroys NaNs.
444         fprintf(out,
445                 "%2$*1$smemcpy(&out%3$d, &out%3$d_shadow, sizeof(%4$s));\n",
446                 indent,
447                 "",
448                 arg.arg_info.to,
449                 xmm_type_name);
450       }
451     }
452   });
453 }
454 
455 template <typename AsmCallInfo>
GenerateElementsList(FILE * out,int indent,const std::string & prefix,const std::string & suffix,const std::vector<std::string> & elements)456 void GenerateElementsList(FILE* out,
457                           int indent,
458                           const std::string& prefix,
459                           const std::string& suffix,
460                           const std::vector<std::string>& elements) {
461   std::size_t length = prefix.length() + suffix.length();
462   if (elements.size() == 0) {
463     fprintf(out, "%*s%s%s\n", indent, "", prefix.c_str(), suffix.c_str());
464     return;
465   }
466   for (const auto& element : elements) {
467     length += element.length() + 2;
468   }
469   for (const auto& element : elements) {
470     if (&element == &elements[0]) {
471       fprintf(out, "%*s%s%s", indent, "", prefix.c_str(), element.c_str());
472     } else {
473       if (length <= 102) {
474         fprintf(out, ", %s", element.c_str());
475       } else {
476         fprintf(out, ",\n%*s%s", static_cast<int>(prefix.length()) + indent, "", element.c_str());
477       }
478     }
479   }
480   fprintf(out, "%s\n", suffix.c_str());
481 }
482 
483 template <typename AsmCallInfo, typename Arg>
NeedInputShadow(Arg arg)484 constexpr bool NeedInputShadow(Arg arg) {
485   using RegisterClass = typename Arg::RegisterClass;
486   // Without shadow clang silently converts 'r' restriction into 'q' restriction which
487   // is wrong: if %ah or %bh is picked we would produce incorrect result here.
488   // TODO(b/138439904): remove when clang handling of 'r' constraint would be fixed.
489   if constexpr (RegisterClass::kAsRegister == 'r' && HaveInput(arg.arg_info)) {
490     // Only 8-bit registers are special because each 16-bit registers include two of them
491     // (%al/%ah, %cl/%ch, %dl/%dh, %bl/%bh).
492     // Mix of 16-bit and 64-bit registers doesn't trigger bug in Clang.
493     if constexpr (sizeof(std::tuple_element_t<arg.arg_info.from,
494                                               typename AsmCallInfo::InputArguments>) ==
495                   sizeof(uint8_t)) {
496       return true;
497     }
498   } else if constexpr (RegisterClass::kAsRegister == 'x') {
499     return true;
500   }
501   return false;
502 }
503 
504 template <typename AsmCallInfo, typename Arg>
NeedOutputShadow(Arg arg)505 constexpr bool NeedOutputShadow(Arg arg) {
506   using RegisterClass = typename Arg::RegisterClass;
507   // Without shadow clang silently converts 'r' restriction into 'q' restriction which
508   // is wrong: if %ah or %bh is picked we would produce incorrect result here.
509   // TODO(b/138439904): remove when clang handling of 'r' constraint would be fixed.
510   if constexpr (RegisterClass::kAsRegister == 'r' && HaveOutput(arg.arg_info)) {
511     // Only 8-bit registers are special because each some 16-bit registers include two of
512     // them (%al/%ah, %cl/%ch, %dl/%dh, %bl/%bh).
513     // Mix of 16-bit and 64-bit registers don't trigger bug in Clang.
514     if constexpr (sizeof(std::tuple_element_t<arg.arg_info.to,
515                                               typename AsmCallInfo::OutputArguments>) ==
516                   sizeof(uint8_t)) {
517       return true;
518     }
519   } else if constexpr (RegisterClass::kAsRegister == 'x') {
520     return true;
521   }
522   return false;
523 }
524 
525 #include "text_asm_intrinsics_process_bindings-inl.h"
526 
GenerateTextAsmIntrinsics(FILE * out)527 void GenerateTextAsmIntrinsics(FILE* out) {
528   // Note: nullptr means "NoCPUIDRestriction", other values are only assigned in one place below
529   // since the code in this function mostly cares only about three cases:
530   //   • There are no CPU restrictions.
531   //   • There are CPU restrictions but they are the same as in previous case (which is error).
532   //   • There are new CPU restrictions.
533   const char* cpuid_restriction = nullptr /* NoCPUIDRestriction */;
534   bool if_opened = false;
535   std::string running_name;
536   ProcessAllBindings<MacroAssembler<TextAssembler>::MacroAssemblers>(
537       [&running_name, &if_opened, &cpuid_restriction, out](auto&& asm_call_generator) {
538         using AsmCallInfo = std::decay_t<decltype(asm_call_generator)>;
539         std::string full_name = std::string(asm_call_generator.kIntrinsic,
540                                             std::strlen(asm_call_generator.kIntrinsic) - 1) +
541                                 ", kUseCppImplementation>";
542         if (size_t arguments_count = std::tuple_size_v<typename AsmCallInfo::InputArguments>) {
543           full_name += "(in0";
544           for (size_t i = 1; i < arguments_count; ++i) {
545             full_name += ", in" + std::to_string(i);
546           }
547           full_name += ")";
548         } else {
549           full_name += "()";
550         }
551         if (full_name != running_name) {
552           if (if_opened) {
553             if (cpuid_restriction) {
554               fprintf(out, "  } else {\n    return %s;\n", running_name.c_str());
555               cpuid_restriction = nullptr /* NoCPUIDRestriction */;
556             }
557             if_opened = false;
558             fprintf(out, "  }\n");
559           }
560           // Final line of function.
561           if (!running_name.empty()) {
562             fprintf(out, "};\n\n");
563           }
564           GenerateFunctionHeader<AsmCallInfo>(out, 0);
565           running_name = full_name;
566         }
567         using CPUIDRestriction = AsmCallInfo::CPUIDRestriction;
568         // Note: this series of "if constexpr" expressions is the only place where cpuid_restriction
569         // may get a concrete non-zero value;
570         if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::NoCPUIDRestriction>) {
571           if (cpuid_restriction) {
572             fprintf(out, "  } else {\n");
573             cpuid_restriction = nullptr;
574           }
575         } else {
576           if (if_opened) {
577             fprintf(out, "  } else if (");
578           } else {
579             fprintf(out, "  if (");
580             if_opened = true;
581           }
582           cpuid_restriction = TextAssembler::kCPUIDRestrictionString<CPUIDRestriction>;
583           fprintf(out, "%s) {\n", cpuid_restriction);
584         }
585         GenerateFunctionBody<AsmCallInfo>(out, 2 + 2 * if_opened);
586       });
587   if (if_opened) {
588     fprintf(out, "  }\n");
589   }
590   // Final line of function.
591   if (!running_name.empty()) {
592     fprintf(out, "};\n\n");
593   }
594 }
595 
596 }  // namespace berberis
597 
main(int argc,char * argv[])598 int main(int argc, char* argv[]) {
599   FILE* out = argc > 1 ? fopen(argv[1], "w") : stdout;
600   fprintf(out,
601           R"STRING(
602 /*
603  * Copyright (C) 2024 The Android Open Source Project
604  *
605  * Licensed under the Apache License, Version 2.0 (the "License");
606  * you may not use this file except in compliance with the License.
607  * You may obtain a copy of the License at
608  *
609  *      http://www.apache.org/licenses/LICENSE-2.0
610  *
611  * Unless required by applicable law or agreed to in writing, software
612  * distributed under the License is distributed on an "AS IS" BASIS,
613  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
614  * See the License for the specific language governing permissions and
615  * limitations under the License.
616  */
617 
618 // This file automatically generated by gen_text_asm_intrinsics.cc
619 // DO NOT EDIT!
620 
621 #ifndef %2$s_%3$s_INTRINSICS_INTRINSICS_H_
622 #define %2$s_%3$s_INTRINSICS_INTRINSICS_H_
623 
624 #if defined(__i386__) || defined(__x86_64__)
625 #include <xmmintrin.h>
626 #endif
627 
628 #include "berberis/base/config.h"
629 #include "berberis/runtime_primitives/platform.h"
630 #include "%3$s/intrinsics/%1$s_to_all/intrinsics.h"
631 #include "%3$s/intrinsics/vector_intrinsics.h"
632 
633 namespace berberis::constants_pool {
634 
635 struct MacroAssemblerConstants;
636 
637 extern const MacroAssemblerConstants kBerberisMacroAssemblerConstants
638     __attribute__((visibility("hidden")));
639 
640 }  // namespace berberis::constants_pool
641 
642 namespace %3$s {
643 
644 namespace constants_pool {
645 
646 %4$s
647 
648 }  // namespace constants_pool
649 
650 namespace intrinsics {
651 )STRING",
652           berberis::TextAssembler::kArchName,
653           berberis::TextAssembler::kArchGuard,
654           berberis::TextAssembler::kNamespaceName,
655           strcmp(berberis::TextAssembler::kNamespaceName, "berberis")
656               ? "using berberis::constants_pool::kBerberisMacroAssemblerConstants;"
657               : "");
658 
659   berberis::GenerateTextAsmIntrinsics(out);
660   berberis::MakeExtraGuestFunctions(out);
661 
662   fprintf(out,
663           R"STRING(
664 }  // namespace intrinsics
665 
666 }  // namespace %2$s
667 
668 #endif /* %1$s_%2$s_INTRINSICS_INTRINSICS_H_ */
669 )STRING",
670           berberis::TextAssembler::kArchGuard,
671           berberis::TextAssembler::kNamespaceName);
672 
673   fclose(out);
674   return 0;
675 }
676