1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <stdio.h>
18
19 #include <algorithm>
20 #include <iterator>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 #include <tuple>
25 #include <type_traits>
26 #include <vector>
27
28 #include "berberis/base/checks.h"
29 #include "berberis/base/config.h"
30 #include "berberis/intrinsics/common/intrinsics_bindings.h"
31 #include "berberis/intrinsics/common/intrinsics_float.h"
32 #include "berberis/intrinsics/intrinsics_args.h"
33 #include "berberis/intrinsics/macro_assembler.h"
34 #include "berberis/intrinsics/simd_register.h"
35 #include "berberis/intrinsics/type_traits.h"
36
37 #include "text_assembler.h"
38
39 namespace berberis {
40
41 template <typename AsmCallInfo>
42 void GenerateOutputVariables(FILE* out, int indent);
43 template <typename AsmCallInfo>
44 void GenerateTemporaries(FILE* out, int indent);
45 template <typename AsmCallInfo>
46 void GenerateInShadows(FILE* out, int indent);
47 template <typename AsmCallInfo>
48 void AssignRegisterNumbers(int* register_numbers);
49 template <typename AsmCallInfo>
50 auto CallTextAssembler(FILE* out, int indent, int* register_numbers);
51 template <typename AsmCallInfo>
52 void GenerateAssemblerOuts(FILE* out, int indent);
53 template <typename AsmCallInfo>
54 void GenerateAssemblerIns(FILE* out,
55 int indent,
56 int* register_numbers,
57 bool need_gpr_macroassembler_scratch,
58 bool need_gpr_macroassembler_constants);
59 template <typename AsmCallInfo>
60 void GenerateOutShadows(FILE* out, int indent);
61 template <typename AsmCallInfo>
62 void GenerateElementsList(FILE* out,
63 int indent,
64 const std::string& prefix,
65 const std::string& suffix,
66 const std::vector<std::string>& elements);
67 template <typename AsmCallInfo, typename Arg>
68 constexpr bool NeedInputShadow(Arg arg);
69 template <typename AsmCallInfo, typename Arg>
70 constexpr bool NeedOutputShadow(Arg arg);
71
72 template <typename AsmCallInfo>
GenerateFunctionHeader(FILE * out,int indent)73 void GenerateFunctionHeader(FILE* out, int indent) {
74 if (strchr(AsmCallInfo::kIntrinsic, '<')) {
75 fprintf(out, "template <>\n");
76 }
77 std::string prefix;
78 if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> == 0) {
79 prefix = "inline void " + std::string(AsmCallInfo::kIntrinsic) + "(";
80 } else {
81 const char* prefix_of_prefix = "inline std::tuple<";
82 for (const char* type_name : AsmCallInfo::OutputArgumentsTypeNames) {
83 prefix += prefix_of_prefix + std::string(type_name);
84 prefix_of_prefix = ", ";
85 }
86 prefix += "> " + std::string(AsmCallInfo::kIntrinsic) + "(";
87 }
88 std::vector<std::string> ins;
89 for (const char* type_name : AsmCallInfo::InputArgumentsTypeNames) {
90 ins.push_back("[[maybe_unused]] " + std::string(type_name) + " in" +
91 std::to_string(ins.size()));
92 }
93 GenerateElementsList<AsmCallInfo>(out, indent, prefix, ") {", ins);
94 fprintf(out,
95 " [[maybe_unused]] alignas(berberis::config::kScratchAreaAlign)"
96 " uint8_t scratch[berberis::config::kScratchAreaSize];\n");
97 fprintf(out,
98 " [[maybe_unused]] auto& scratch2 ="
99 " scratch[berberis::config::kScratchAreaSlotSize];\n");
100 }
101
102 template <typename AsmCallInfo>
GenerateFunctionBody(FILE * out,int indent)103 void GenerateFunctionBody(FILE* out, int indent) {
104 // Declare out variables.
105 GenerateOutputVariables<AsmCallInfo>(out, indent);
106 // Declare temporary variables.
107 GenerateTemporaries<AsmCallInfo>(out, indent);
108 // We need "shadow variables" for ins of types: Float32, Float64 and SIMD128Register.
109 // This is because assembler does not accept these arguments for XMMRegisters and
110 // we couldn't use "float"/"double" function arguments because if ABI issues.
111 GenerateInShadows<AsmCallInfo>(out, indent);
112 // Even if we don't pass any registers we need to allocate at least one element.
113 int register_numbers[std::tuple_size_v<typename AsmCallInfo::Bindings> == 0
114 ? 1
115 : std::tuple_size_v<typename AsmCallInfo::Bindings>];
116 // Assign numbers to registers - we need to pass them to assembler and then, later,
117 // to Generator of Input Variable line.
118 AssignRegisterNumbers<AsmCallInfo>(register_numbers);
119 // Print opening line for asm call.
120 if constexpr (AsmCallInfo::kSideEffects) {
121 fprintf(out, "%*s__asm__ __volatile__(\n", indent, "");
122 } else {
123 fprintf(out, "%*s__asm__(\n", indent, "");
124 }
125 // Call text assembler to produce the body of an asm call.
126 auto [need_gpr_macroassembler_scratch, need_gpr_macroassembler_constants] =
127 CallTextAssembler<AsmCallInfo>(out, indent, register_numbers);
128 // Assembler instruction outs.
129 GenerateAssemblerOuts<AsmCallInfo>(out, indent);
130 // Assembler instruction ins.
131 GenerateAssemblerIns<AsmCallInfo>(out,
132 indent,
133 register_numbers,
134 need_gpr_macroassembler_scratch,
135 need_gpr_macroassembler_constants);
136 // Close asm call.
137 fprintf(out, "%*s);\n", indent, "");
138 // Generate copies from shadows to outputs.
139 GenerateOutShadows<AsmCallInfo>(out, indent);
140 // Return value from function.
141 if constexpr (std::tuple_size_v<typename AsmCallInfo::OutputArguments> > 0) {
142 std::vector<std::string> outs;
143 for (std::size_t id = 0; id < std::tuple_size_v<typename AsmCallInfo::OutputArguments>; ++id) {
144 outs.push_back("out" + std::to_string(id));
145 }
146 GenerateElementsList<AsmCallInfo>(out, indent, "return {", "};", outs);
147 }
148 }
149
150 template <typename AsmCallInfo>
GenerateOutputVariables(FILE * out,int indent)151 void GenerateOutputVariables(FILE* out, int indent) {
152 std::size_t id = 0;
153 for (const char* type_name : AsmCallInfo::OutputArgumentsTypeNames) {
154 fprintf(out, "%*s%s out%zd;\n", indent, "", type_name, id++);
155 }
156 }
157
158 template <typename AsmCallInfo>
GenerateTemporaries(FILE * out,int indent)159 void GenerateTemporaries(FILE* out, int indent) {
160 std::size_t id = 0;
161 AsmCallInfo::ProcessBindings([out, &id, indent](auto arg) {
162 using RegisterClass = typename decltype(arg)::RegisterClass;
163 if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
164 if constexpr (!HaveInput(arg.arg_info) && !HaveOutput(arg.arg_info)) {
165 static_assert(
166 std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Def> ||
167 std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::DefEarlyClobber>);
168 fprintf(out,
169 "%*s%s tmp%zd;\n",
170 indent,
171 "",
172 TypeTraits<typename RegisterClass::Type>::kName,
173 id++);
174 }
175 }
176 });
177 }
178
179 template <typename AsmCallInfo>
GenerateInShadows(FILE * out,int indent)180 void GenerateInShadows(FILE* out, int indent) {
181 AsmCallInfo::ProcessBindings([out, indent](auto arg) {
182 using RegisterClass = typename decltype(arg)::RegisterClass;
183 if constexpr (RegisterClass::kAsRegister == 'm') {
184 // Only temporary memory scratch area is supported.
185 static_assert(!HaveInput(arg.arg_info) && !HaveOutput(arg.arg_info));
186 } else if constexpr (RegisterClass::kAsRegister == 'r') {
187 // TODO(b/138439904): remove when clang handling of 'r' constraint would be fixed.
188 if constexpr (NeedInputShadow<AsmCallInfo>(arg)) {
189 fprintf(out, "%2$*1$suint32_t in%3$d_shadow = in%3$d;\n", indent, "", arg.arg_info.from);
190 }
191 if constexpr (NeedOutputShadow<AsmCallInfo>(arg)) {
192 fprintf(out, "%*suint32_t out%d_shadow;\n", indent, "", arg.arg_info.to);
193 }
194 } else if constexpr (RegisterClass::kAsRegister == 'x') {
195 if constexpr (HaveInput(arg.arg_info)) {
196 using Type = std::tuple_element_t<arg.arg_info.from, typename AsmCallInfo::InputArguments>;
197 const char* type_name = TypeTraits<Type>::kName;
198 const char* xmm_type_name;
199 const char* expanded = "";
200 // Types allowed for 'x' restriction are float, double and __m128/__m128i/__m128d
201 // First two work for {,u}int32_t and {,u}int64_t, but small integer types must be expanded.
202 if constexpr (std::is_integral_v<Type> && sizeof(Type) < sizeof(int32_t)) {
203 fprintf(
204 out, "%2$*1$suint32_t in%3$d_expanded = in%3$d;\n", indent, "", arg.arg_info.from);
205 type_name = TypeTraits<uint32_t>::kName;
206 xmm_type_name =
207 TypeTraits<typename TypeTraits<typename TypeTraits<uint32_t>::Float>::Raw>::kName;
208 expanded = "_expanded";
209 } else if constexpr (std::is_integral_v<Type>) {
210 // {,u}int32_t and {,u}int64_t have to be converted to float/double.
211 xmm_type_name =
212 TypeTraits<typename TypeTraits<typename TypeTraits<Type>::Float>::Raw>::kName;
213 } else {
214 // Float32/Float64 can not be used, we need to use raw float/double.
215 xmm_type_name = TypeTraits<typename TypeTraits<Type>::Raw>::kName;
216 }
217 fprintf(out, "%*s%s in%d_shadow;\n", indent, "", xmm_type_name, arg.arg_info.from);
218 fprintf(out,
219 "%*sstatic_assert(sizeof(%s) == sizeof(%s));\n",
220 indent,
221 "",
222 type_name,
223 xmm_type_name);
224 // Note: it's not safe to use bit_cast here till we have std::bit_cast from C++20.
225 // If optimizer wouldn't be enabled (e.g. if code is compiled with -O0) then bit_cast
226 // would use %st on 32-bit platform which destroys NaNs.
227 fprintf(out,
228 "%2$*1$smemcpy(&in%3$d_shadow, &in%3$d%4$s, sizeof(%5$s));\n",
229 indent,
230 "",
231 arg.arg_info.from,
232 expanded,
233 xmm_type_name);
234 }
235 if constexpr (HaveOutput(arg.arg_info)) {
236 using Type = std::tuple_element_t<arg.arg_info.to, typename AsmCallInfo::OutputArguments>;
237 const char* xmm_type_name;
238 // {,u}int32_t and {,u}int64_t have to be converted to float/double.
239 if constexpr (std::is_integral_v<Type>) {
240 xmm_type_name =
241 TypeTraits<typename TypeTraits<typename TypeTraits<Type>::Float>::Raw>::kName;
242 } else {
243 // Float32/Float64 can not be used, we need to use raw float/double.
244 xmm_type_name = TypeTraits<typename TypeTraits<Type>::Raw>::kName;
245 }
246 fprintf(out, "%*s%s out%d_shadow;\n", indent, "", xmm_type_name, arg.arg_info.to);
247 }
248 }
249 });
250 }
251
252 template <typename AsmCallInfo>
AssignRegisterNumbers(int * register_numbers)253 void AssignRegisterNumbers(int* register_numbers) {
254 // Assign number for output (and temporary) arguments.
255 std::size_t id = 0;
256 int arg_counter = 0;
257 AsmCallInfo::ProcessBindings([&id, &arg_counter, ®ister_numbers](auto arg) {
258 using RegisterClass = typename decltype(arg)::RegisterClass;
259 if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
260 if constexpr (!std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
261 register_numbers[arg_counter] = id++;
262 }
263 ++arg_counter;
264 }
265 });
266 // Assign numbers for input arguments.
267 arg_counter = 0;
268 AsmCallInfo::ProcessBindings([&id, &arg_counter, ®ister_numbers](auto arg) {
269 using RegisterClass = typename decltype(arg)::RegisterClass;
270 if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
271 if constexpr (std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
272 register_numbers[arg_counter] = id++;
273 }
274 ++arg_counter;
275 }
276 });
277 }
278
279 template <typename AsmCallInfo>
CallTextAssembler(FILE * out,int indent,int * register_numbers)280 auto CallTextAssembler(FILE* out, int indent, int* register_numbers) {
281 MacroAssembler<TextAssembler> as(indent, out);
282 int arg_counter = 0;
283 AsmCallInfo::ProcessBindings([&arg_counter, &as, register_numbers](auto arg) {
284 using RegisterClass = typename decltype(arg)::RegisterClass;
285 if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
286 if constexpr (RegisterClass::kAsRegister != 'm') {
287 if constexpr (RegisterClass::kIsImplicitReg) {
288 if constexpr (RegisterClass::kAsRegister == 'a') {
289 as.gpr_a = TextAssembler::Register(register_numbers[arg_counter]);
290 } else if constexpr (RegisterClass::kAsRegister == 'c') {
291 as.gpr_c = TextAssembler::Register(register_numbers[arg_counter]);
292 } else {
293 static_assert(RegisterClass::kAsRegister == 'd');
294 as.gpr_d = TextAssembler::Register(register_numbers[arg_counter]);
295 }
296 }
297 }
298 ++arg_counter;
299 }
300 });
301 as.gpr_macroassembler_constants = TextAssembler::Register(arg_counter);
302 arg_counter = 0;
303 int scratch_counter = 0;
304 std::apply(AsmCallInfo::kMacroInstruction,
305 std::tuple_cat(
306 std::tuple<MacroAssembler<TextAssembler>&>{as},
307 AsmCallInfo::MakeTuplefromBindings(
308 [&as, &arg_counter, &scratch_counter, register_numbers](auto arg) {
309 using RegisterClass = typename decltype(arg)::RegisterClass;
310 if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
311 if constexpr (RegisterClass::kAsRegister == 'm') {
312 if (scratch_counter == 0) {
313 as.gpr_macroassembler_scratch = TextAssembler::Register(arg_counter++);
314 } else if (scratch_counter == 1) {
315 as.gpr_macroassembler_scratch2 =
316 TextAssembler::Register(arg_counter++);
317 } else {
318 FATAL("Only two scratch registers are supported for now");
319 }
320 // Note: as.gpr_scratch in combination with offset is treated by text
321 // assembler specially. We rely on offset set here to be the same as
322 // scratch2 address in scratch buffer.
323 return std::tuple{TextAssembler::Operand{
324 .base = as.gpr_scratch,
325 .disp = static_cast<int32_t>(config::kScratchAreaSlotSize *
326 scratch_counter++)}};
327 } else if constexpr (RegisterClass::kIsImplicitReg) {
328 ++arg_counter;
329 return std::tuple{};
330 } else {
331 return std::tuple{register_numbers[arg_counter++]};
332 }
333 } else {
334 return std::tuple{};
335 }
336 })));
337 // Verify CPU vendor and SSE restrictions.
338 as.CheckCPUIDRestriction<typename AsmCallInfo::CPUIDRestriction>();
339 return std::tuple{as.need_gpr_macroassembler_scratch(), as.need_gpr_macroassembler_constants()};
340 }
341
342 template <typename AsmCallInfo>
GenerateAssemblerOuts(FILE * out,int indent)343 void GenerateAssemblerOuts(FILE* out, int indent) {
344 std::vector<std::string> outs;
345 int tmp_id = 0;
346 AsmCallInfo::ProcessBindings([&outs, &tmp_id](auto arg) {
347 using RegisterClass = typename decltype(arg)::RegisterClass;
348 if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS> &&
349 !std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
350 std::string out = "\"=";
351 if constexpr (std::is_same_v<typename decltype(arg)::Usage,
352 intrinsics::bindings::DefEarlyClobber>) {
353 out += "&";
354 }
355 out += RegisterClass::kAsRegister;
356 if constexpr (HaveOutput(arg.arg_info)) {
357 bool need_shadow = NeedOutputShadow<AsmCallInfo>(arg);
358 out += "\"(out" + std::to_string(arg.arg_info.to) + (need_shadow ? "_shadow)" : ")");
359 } else if constexpr (HaveInput(arg.arg_info)) {
360 bool need_shadow = NeedInputShadow<AsmCallInfo>(arg);
361 out += "\"(in" + std::to_string(arg.arg_info.from) + (need_shadow ? "_shadow)" : ")");
362 } else {
363 out += "\"(tmp" + std::to_string(tmp_id++) + ")";
364 }
365 outs.push_back(out);
366 }
367 });
368 GenerateElementsList<AsmCallInfo>(out, indent, " : ", "", outs);
369 }
370
371 template <typename AsmCallInfo>
GenerateAssemblerIns(FILE * out,int indent,int * register_numbers,bool need_gpr_macroassembler_scratch,bool need_gpr_macroassembler_constants)372 void GenerateAssemblerIns(FILE* out,
373 int indent,
374 int* register_numbers,
375 bool need_gpr_macroassembler_scratch,
376 bool need_gpr_macroassembler_constants) {
377 std::vector<std::string> ins;
378 AsmCallInfo::ProcessBindings([&ins](auto arg) {
379 using RegisterClass = typename decltype(arg)::RegisterClass;
380 if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS> &&
381 std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
382 ins.push_back("\"" + std::string(1, RegisterClass::kAsRegister) + "\"(in" +
383 std::to_string(arg.arg_info.from) +
384 (NeedInputShadow<AsmCallInfo>(arg) ? "_shadow)" : ")"));
385 }
386 });
387 if (need_gpr_macroassembler_scratch) {
388 ins.push_back("\"m\"(scratch), \"m\"(scratch2)");
389 }
390 if (need_gpr_macroassembler_constants) {
391 ins.push_back(
392 "\"m\"(*reinterpret_cast<const char*>(&constants_pool::kBerberisMacroAssemblerConstants))");
393 }
394 int arg_counter = 0;
395 AsmCallInfo::ProcessBindings([&ins, &arg_counter, register_numbers](auto arg) {
396 using RegisterClass = typename decltype(arg)::RegisterClass;
397 if constexpr (!std::is_same_v<RegisterClass, intrinsics::bindings::FLAGS>) {
398 if constexpr (HaveInput(arg.arg_info) &&
399 !std::is_same_v<typename decltype(arg)::Usage, intrinsics::bindings::Use>) {
400 ins.push_back("\"" + std::to_string(register_numbers[arg_counter]) + "\"(in" +
401 std::to_string(arg.arg_info.from) +
402 (NeedInputShadow<AsmCallInfo>(arg) ? "_shadow)" : ")"));
403 }
404 ++arg_counter;
405 }
406 });
407 GenerateElementsList<AsmCallInfo>(out, indent, " : ", "", ins);
408 }
409
410 template <typename AsmCallInfo>
GenerateOutShadows(FILE * out,int indent)411 void GenerateOutShadows(FILE* out, int indent) {
412 AsmCallInfo::ProcessBindings([out, indent](auto arg) {
413 using RegisterClass = typename decltype(arg)::RegisterClass;
414 if constexpr (RegisterClass::kAsRegister == 'r') {
415 // TODO(b/138439904): remove when clang handling of 'r' constraint would be fixed.
416 if constexpr (HaveOutput(arg.arg_info)) {
417 using Type = std::tuple_element_t<arg.arg_info.to, typename AsmCallInfo::OutputArguments>;
418 if constexpr (sizeof(Type) == sizeof(uint8_t)) {
419 fprintf(out, "%2$*1$sout%3$d = out%3$d_shadow;\n", indent, "", arg.arg_info.to);
420 }
421 }
422 } else if constexpr (RegisterClass::kAsRegister == 'x') {
423 if constexpr (HaveOutput(arg.arg_info)) {
424 using Type = std::tuple_element_t<arg.arg_info.to, typename AsmCallInfo::OutputArguments>;
425 const char* type_name = TypeTraits<Type>::kName;
426 const char* xmm_type_name;
427 // {,u}int32_t and {,u}int64_t have to be converted to float/double.
428 if constexpr (std::is_integral_v<Type>) {
429 xmm_type_name =
430 TypeTraits<typename TypeTraits<typename TypeTraits<Type>::Float>::Raw>::kName;
431 } else {
432 // Float32/Float64 can not be used, we need to use raw float/double.
433 xmm_type_name = TypeTraits<typename TypeTraits<Type>::Raw>::kName;
434 }
435 fprintf(out,
436 "%*sstatic_assert(sizeof(%s) == sizeof(%s));\n",
437 indent,
438 "",
439 type_name,
440 xmm_type_name);
441 // Note: it's not safe to use bit_cast here till we have std::bit_cast from C++20.
442 // If optimizer wouldn't be enabled (e.g. if code is compiled with -O0) then bit_cast
443 // would use %st on 32-bit platform which destroys NaNs.
444 fprintf(out,
445 "%2$*1$smemcpy(&out%3$d, &out%3$d_shadow, sizeof(%4$s));\n",
446 indent,
447 "",
448 arg.arg_info.to,
449 xmm_type_name);
450 }
451 }
452 });
453 }
454
455 template <typename AsmCallInfo>
GenerateElementsList(FILE * out,int indent,const std::string & prefix,const std::string & suffix,const std::vector<std::string> & elements)456 void GenerateElementsList(FILE* out,
457 int indent,
458 const std::string& prefix,
459 const std::string& suffix,
460 const std::vector<std::string>& elements) {
461 std::size_t length = prefix.length() + suffix.length();
462 if (elements.size() == 0) {
463 fprintf(out, "%*s%s%s\n", indent, "", prefix.c_str(), suffix.c_str());
464 return;
465 }
466 for (const auto& element : elements) {
467 length += element.length() + 2;
468 }
469 for (const auto& element : elements) {
470 if (&element == &elements[0]) {
471 fprintf(out, "%*s%s%s", indent, "", prefix.c_str(), element.c_str());
472 } else {
473 if (length <= 102) {
474 fprintf(out, ", %s", element.c_str());
475 } else {
476 fprintf(out, ",\n%*s%s", static_cast<int>(prefix.length()) + indent, "", element.c_str());
477 }
478 }
479 }
480 fprintf(out, "%s\n", suffix.c_str());
481 }
482
483 template <typename AsmCallInfo, typename Arg>
NeedInputShadow(Arg arg)484 constexpr bool NeedInputShadow(Arg arg) {
485 using RegisterClass = typename Arg::RegisterClass;
486 // Without shadow clang silently converts 'r' restriction into 'q' restriction which
487 // is wrong: if %ah or %bh is picked we would produce incorrect result here.
488 // TODO(b/138439904): remove when clang handling of 'r' constraint would be fixed.
489 if constexpr (RegisterClass::kAsRegister == 'r' && HaveInput(arg.arg_info)) {
490 // Only 8-bit registers are special because each 16-bit registers include two of them
491 // (%al/%ah, %cl/%ch, %dl/%dh, %bl/%bh).
492 // Mix of 16-bit and 64-bit registers doesn't trigger bug in Clang.
493 if constexpr (sizeof(std::tuple_element_t<arg.arg_info.from,
494 typename AsmCallInfo::InputArguments>) ==
495 sizeof(uint8_t)) {
496 return true;
497 }
498 } else if constexpr (RegisterClass::kAsRegister == 'x') {
499 return true;
500 }
501 return false;
502 }
503
504 template <typename AsmCallInfo, typename Arg>
NeedOutputShadow(Arg arg)505 constexpr bool NeedOutputShadow(Arg arg) {
506 using RegisterClass = typename Arg::RegisterClass;
507 // Without shadow clang silently converts 'r' restriction into 'q' restriction which
508 // is wrong: if %ah or %bh is picked we would produce incorrect result here.
509 // TODO(b/138439904): remove when clang handling of 'r' constraint would be fixed.
510 if constexpr (RegisterClass::kAsRegister == 'r' && HaveOutput(arg.arg_info)) {
511 // Only 8-bit registers are special because each some 16-bit registers include two of
512 // them (%al/%ah, %cl/%ch, %dl/%dh, %bl/%bh).
513 // Mix of 16-bit and 64-bit registers don't trigger bug in Clang.
514 if constexpr (sizeof(std::tuple_element_t<arg.arg_info.to,
515 typename AsmCallInfo::OutputArguments>) ==
516 sizeof(uint8_t)) {
517 return true;
518 }
519 } else if constexpr (RegisterClass::kAsRegister == 'x') {
520 return true;
521 }
522 return false;
523 }
524
525 #include "text_asm_intrinsics_process_bindings-inl.h"
526
GenerateTextAsmIntrinsics(FILE * out)527 void GenerateTextAsmIntrinsics(FILE* out) {
528 // Note: nullptr means "NoCPUIDRestriction", other values are only assigned in one place below
529 // since the code in this function mostly cares only about three cases:
530 // • There are no CPU restrictions.
531 // • There are CPU restrictions but they are the same as in previous case (which is error).
532 // • There are new CPU restrictions.
533 const char* cpuid_restriction = nullptr /* NoCPUIDRestriction */;
534 bool if_opened = false;
535 std::string running_name;
536 ProcessAllBindings<MacroAssembler<TextAssembler>::MacroAssemblers>(
537 [&running_name, &if_opened, &cpuid_restriction, out](auto&& asm_call_generator) {
538 using AsmCallInfo = std::decay_t<decltype(asm_call_generator)>;
539 std::string full_name = std::string(asm_call_generator.kIntrinsic,
540 std::strlen(asm_call_generator.kIntrinsic) - 1) +
541 ", kUseCppImplementation>";
542 if (size_t arguments_count = std::tuple_size_v<typename AsmCallInfo::InputArguments>) {
543 full_name += "(in0";
544 for (size_t i = 1; i < arguments_count; ++i) {
545 full_name += ", in" + std::to_string(i);
546 }
547 full_name += ")";
548 } else {
549 full_name += "()";
550 }
551 if (full_name != running_name) {
552 if (if_opened) {
553 if (cpuid_restriction) {
554 fprintf(out, " } else {\n return %s;\n", running_name.c_str());
555 cpuid_restriction = nullptr /* NoCPUIDRestriction */;
556 }
557 if_opened = false;
558 fprintf(out, " }\n");
559 }
560 // Final line of function.
561 if (!running_name.empty()) {
562 fprintf(out, "};\n\n");
563 }
564 GenerateFunctionHeader<AsmCallInfo>(out, 0);
565 running_name = full_name;
566 }
567 using CPUIDRestriction = AsmCallInfo::CPUIDRestriction;
568 // Note: this series of "if constexpr" expressions is the only place where cpuid_restriction
569 // may get a concrete non-zero value;
570 if constexpr (std::is_same_v<CPUIDRestriction, intrinsics::bindings::NoCPUIDRestriction>) {
571 if (cpuid_restriction) {
572 fprintf(out, " } else {\n");
573 cpuid_restriction = nullptr;
574 }
575 } else {
576 if (if_opened) {
577 fprintf(out, " } else if (");
578 } else {
579 fprintf(out, " if (");
580 if_opened = true;
581 }
582 cpuid_restriction = TextAssembler::kCPUIDRestrictionString<CPUIDRestriction>;
583 fprintf(out, "%s) {\n", cpuid_restriction);
584 }
585 GenerateFunctionBody<AsmCallInfo>(out, 2 + 2 * if_opened);
586 });
587 if (if_opened) {
588 fprintf(out, " }\n");
589 }
590 // Final line of function.
591 if (!running_name.empty()) {
592 fprintf(out, "};\n\n");
593 }
594 }
595
596 } // namespace berberis
597
main(int argc,char * argv[])598 int main(int argc, char* argv[]) {
599 FILE* out = argc > 1 ? fopen(argv[1], "w") : stdout;
600 fprintf(out,
601 R"STRING(
602 /*
603 * Copyright (C) 2024 The Android Open Source Project
604 *
605 * Licensed under the Apache License, Version 2.0 (the "License");
606 * you may not use this file except in compliance with the License.
607 * You may obtain a copy of the License at
608 *
609 * http://www.apache.org/licenses/LICENSE-2.0
610 *
611 * Unless required by applicable law or agreed to in writing, software
612 * distributed under the License is distributed on an "AS IS" BASIS,
613 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
614 * See the License for the specific language governing permissions and
615 * limitations under the License.
616 */
617
618 // This file automatically generated by gen_text_asm_intrinsics.cc
619 // DO NOT EDIT!
620
621 #ifndef %2$s_%3$s_INTRINSICS_INTRINSICS_H_
622 #define %2$s_%3$s_INTRINSICS_INTRINSICS_H_
623
624 #if defined(__i386__) || defined(__x86_64__)
625 #include <xmmintrin.h>
626 #endif
627
628 #include "berberis/base/config.h"
629 #include "berberis/runtime_primitives/platform.h"
630 #include "%3$s/intrinsics/%1$s_to_all/intrinsics.h"
631 #include "%3$s/intrinsics/vector_intrinsics.h"
632
633 namespace berberis::constants_pool {
634
635 struct MacroAssemblerConstants;
636
637 extern const MacroAssemblerConstants kBerberisMacroAssemblerConstants
638 __attribute__((visibility("hidden")));
639
640 } // namespace berberis::constants_pool
641
642 namespace %3$s {
643
644 namespace constants_pool {
645
646 %4$s
647
648 } // namespace constants_pool
649
650 namespace intrinsics {
651 )STRING",
652 berberis::TextAssembler::kArchName,
653 berberis::TextAssembler::kArchGuard,
654 berberis::TextAssembler::kNamespaceName,
655 strcmp(berberis::TextAssembler::kNamespaceName, "berberis")
656 ? "using berberis::constants_pool::kBerberisMacroAssemblerConstants;"
657 : "");
658
659 berberis::GenerateTextAsmIntrinsics(out);
660 berberis::MakeExtraGuestFunctions(out);
661
662 fprintf(out,
663 R"STRING(
664 } // namespace intrinsics
665
666 } // namespace %2$s
667
668 #endif /* %1$s_%2$s_INTRINSICS_INTRINSICS_H_ */
669 )STRING",
670 berberis::TextAssembler::kArchGuard,
671 berberis::TextAssembler::kNamespaceName);
672
673 fclose(out);
674 return 0;
675 }
676