1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 #pragma once
15
16 #ifdef __cplusplus
17
18 #include <cstddef>
19 #include <cstdint>
20
21 #else
22
23 #include <stddef.h>
24 #include <stdint.h>
25
26 #endif // __cplusplus
27
28 #include "pw_polyfill/static_assert.h"
29 #include "pw_preprocessor/arguments.h"
30 #include "pw_preprocessor/compiler.h"
31 #include "pw_preprocessor/concat.h"
32 #include "pw_preprocessor/util.h"
33 #include "pw_tokenizer/internal/argument_types.h"
34 #include "pw_tokenizer/internal/tokenize_string.h"
35
36 /// The type of the 32-bit token used in place of a string. Also available as
37 /// `pw::tokenizer::Token`.
38 typedef uint32_t pw_tokenizer_Token;
39
40 // Strings may optionally be tokenized to a domain. Strings in different
41 // domains can be processed separately by the token database tools. Each domain
42 // in use must have a corresponding section declared in the linker script. See
43 // `pw_tokenizer_linker_sections.ld` for more details.
44 //
45 // The default domain is an empty string.
46 #define PW_TOKENIZER_DEFAULT_DOMAIN ""
47
48 /// Converts a string literal to a `pw_tokenizer_Token` (`uint32_t`) token in a
49 /// standalone statement. C and C++ compatible. In C++, the string may be a
50 /// literal or a constexpr char array, including function variables like
51 /// `__func__`. In C, the argument must be a string literal. In either case, the
52 /// string must be null terminated, but may contain any characters (including
53 /// '\0').
54 ///
55 /// @code
56 ///
57 /// constexpr uint32_t token = PW_TOKENIZE_STRING("Any string literal!");
58 ///
59 /// @endcode
60 #define PW_TOKENIZE_STRING(string_literal) \
61 PW_TOKENIZE_STRING_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, string_literal)
62
63 /// Converts a string literal to a ``uint32_t`` token within an expression.
64 /// Requires C++.
65 ///
66 /// @code
67 ///
68 /// DoSomething(PW_TOKENIZE_STRING_EXPR("Succeed"));
69 ///
70 /// @endcode
71 #define PW_TOKENIZE_STRING_EXPR(string_literal) \
72 [&] { \
73 constexpr uint32_t lambda_ret_token = PW_TOKENIZE_STRING(string_literal); \
74 return lambda_ret_token; \
75 }()
76
77 /// Tokenizes a string literal in a standalone statement using the specified
78 /// @rstref{domain<module-pw_tokenizer-domains>}. C and C++ compatible.
79 #define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal) \
80 PW_TOKENIZE_STRING_MASK(domain, UINT32_MAX, string_literal)
81
82 /// Tokenizes a string literal using the specified @rstref{domain
83 /// <module-pw_tokenizer-domains>} within an expression. Requires C++.
84 #define PW_TOKENIZE_STRING_DOMAIN_EXPR(domain, string_literal) \
85 [&] { \
86 constexpr uint32_t lambda_ret_token = \
87 PW_TOKENIZE_STRING_DOMAIN(domain, string_literal); \
88 return lambda_ret_token; \
89 }()
90
91 /// Tokenizes a string literal in a standalone statement using the specified
92 /// @rstref{domain <module-pw_tokenizer-domains>} and @rstref{bit mask
93 /// <module-pw_tokenizer-masks>}. C and C++ compatible.
94 #define PW_TOKENIZE_STRING_MASK(domain, mask, string_literal) \
95 /* assign to a variable */ _PW_TOKENIZER_MASK_TOKEN(mask, string_literal); \
96 \
97 static_assert(0 < (mask) && (mask) <= UINT32_MAX, \
98 "Tokenizer masks must be non-zero uint32_t values."); \
99 \
100 PW_TOKENIZER_DEFINE_TOKEN( \
101 _PW_TOKENIZER_MASK_TOKEN(mask, string_literal), domain, string_literal)
102
103 /// Tokenizes a string literal using the specified @rstref{domain
104 /// <module-pw_tokenizer-domains>} and @rstref{bit mask
105 /// <module-pw_tokenizer-masks>} within an expression. Requires C++.
106 #define PW_TOKENIZE_STRING_MASK_EXPR(domain, mask, string_literal) \
107 [&] { \
108 constexpr uint32_t lambda_ret_token = \
109 PW_TOKENIZE_STRING_MASK(domain, mask, string_literal); \
110 return lambda_ret_token; \
111 }()
112
113 #define _PW_TOKENIZER_MASK_TOKEN(mask, string_literal) \
114 ((pw_tokenizer_Token)(mask) & PW_TOKENIZER_STRING_TOKEN(string_literal))
115
116 /// Encodes a tokenized string and arguments to the provided buffer. The size of
117 /// the buffer is passed via a pointer to a `size_t`. After encoding is
118 /// complete, the `size_t` is set to the number of bytes written to the buffer.
119 ///
120 /// The macro's arguments are equivalent to the following function signature:
121 ///
122 /// @code
123 ///
124 /// TokenizeToBuffer(void* buffer,
125 /// size_t* buffer_size_pointer,
126 /// const char* format,
127 /// ...); // printf-style arguments
128 /// @endcode
129 ///
130 /// For example, the following encodes a tokenized string with a temperature to
131 /// a buffer. The buffer is passed to a function to send the message over a
132 /// UART.
133 ///
134 /// @code
135 ///
136 /// uint8_t buffer[32];
137 /// size_t size_bytes = sizeof(buffer);
138 /// PW_TOKENIZE_TO_BUFFER(
139 /// buffer, &size_bytes, "Temperature (C): %0.2f", temperature_c);
140 /// MyProject_EnqueueMessageForUart(buffer, size);
141 ///
142 /// @endcode
143 ///
144 /// While `PW_TOKENIZE_TO_BUFFER` is very flexible, it must be passed a buffer,
145 /// which increases its code size footprint at the call site.
146 #define PW_TOKENIZE_TO_BUFFER(buffer, buffer_size_pointer, format, ...) \
147 PW_TOKENIZE_TO_BUFFER_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, \
148 buffer, \
149 buffer_size_pointer, \
150 format, \
151 __VA_ARGS__)
152
153 /// Same as @c_macro{PW_TOKENIZE_TO_BUFFER}, but tokenizes to the specified
154 /// @rstref{domain <module-pw_tokenizer-domains>}.
155 #define PW_TOKENIZE_TO_BUFFER_DOMAIN( \
156 domain, buffer, buffer_size_pointer, format, ...) \
157 PW_TOKENIZE_TO_BUFFER_MASK( \
158 domain, UINT32_MAX, buffer, buffer_size_pointer, format, __VA_ARGS__)
159
160 /// Same as @c_macro{PW_TOKENIZE_TO_BUFFER_DOMAIN}, but applies a
161 /// @rstref{bit mask <module-pw_tokenizer-masks>} to the token.
162 #define PW_TOKENIZE_TO_BUFFER_MASK( \
163 domain, mask, buffer, buffer_size_pointer, format, ...) \
164 do { \
165 PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
166 _pw_tokenizer_ToBuffer(buffer, \
167 buffer_size_pointer, \
168 PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__)); \
169 } while (0)
170
171 /// @brief Low-level macro for calling functions that handle tokenized strings.
172 ///
173 /// Functions that work with tokenized format strings must take the following
174 /// arguments:
175 ///
176 /// - The 32-bit token (@cpp_type{pw_tokenizer_Token})
177 /// - The 32- or 64-bit argument types (@cpp_type{pw_tokenizer_ArgTypes})
178 /// - Variadic arguments, if any
179 ///
180 /// This macro expands to those arguments. Custom tokenization macros should use
181 /// this macro to pass these arguments to a function or other macro.
182 ///
183 /** @code{cpp}
184 * EncodeMyTokenizedString(uint32_t token,
185 * pw_tokenier_ArgTypes arg_types,
186 * ...);
187 *
188 * #define CUSTOM_TOKENIZATION_MACRO(format, ...) \
189 * PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
190 * EncodeMyTokenizedString(PW_TOKENIZER_REPLACE_FORMAT_STRING(__VA_ARGS__))
191 * @endcode
192 */
193 #define PW_TOKENIZER_REPLACE_FORMAT_STRING(...) \
194 _PW_TOKENIZER_REPLACE_FORMAT_STRING(PW_EMPTY_ARGS(__VA_ARGS__), __VA_ARGS__)
195
196 #define _PW_TOKENIZER_REPLACE_FORMAT_STRING(empty_args, ...) \
197 _PW_CONCAT_2(_PW_TOKENIZER_REPLACE_FORMAT_STRING_, empty_args)(__VA_ARGS__)
198
199 #define _PW_TOKENIZER_REPLACE_FORMAT_STRING_1() _pw_tokenizer_token, 0u
200 #define _PW_TOKENIZER_REPLACE_FORMAT_STRING_0(...) \
201 _pw_tokenizer_token, PW_TOKENIZER_ARG_TYPES(__VA_ARGS__), __VA_ARGS__
202
203 /// Converts a series of arguments to a compact format that replaces the format
204 /// string literal. Evaluates to a `pw_tokenizer_ArgTypes` value.
205 ///
206 /// Depending on the size of `pw_tokenizer_ArgTypes`, the bottom 4 or 6 bits
207 /// store the number of arguments and the remaining bits store the types, two
208 /// bits per type. The arguments are not evaluated; only their types are used.
209 ///
210 /// In general, @c_macro{PW_TOKENIZER_ARG_TYPES} should not be used directly.
211 /// Instead, use @c_macro{PW_TOKENIZER_REPLACE_FORMAT_STRING}.
212 #define PW_TOKENIZER_ARG_TYPES(...) \
213 PW_DELEGATE_BY_ARG_COUNT(_PW_TOKENIZER_TYPES_, __VA_ARGS__)
214
215 PW_EXTERN_C_START
216
217 // These functions encode the tokenized strings. These should not be called
218 // directly. Instead, use the corresponding PW_TOKENIZE_TO_* macros above.
219 void _pw_tokenizer_ToBuffer(void* buffer,
220 size_t* buffer_size_bytes, // input and output arg
221 pw_tokenizer_Token token,
222 pw_tokenizer_ArgTypes types,
223 ...);
224
225 // This empty function allows the compiler to check the format string.
226 static inline void pw_tokenizer_CheckFormatString(const char* format, ...)
227 PW_PRINTF_FORMAT(1, 2);
228
pw_tokenizer_CheckFormatString(const char * format,...)229 static inline void pw_tokenizer_CheckFormatString(const char* format, ...) {
230 (void)format;
231 }
232
233 PW_EXTERN_C_END
234
235 /// Tokenizes a format string with optional arguments and sets the
236 /// `_pw_tokenizer_token` variable to the token. Must be used in its own scope,
237 /// since the same variable is used in every invocation.
238 ///
239 /// The tokenized string uses the specified @rstref{tokenization domain
240 /// <module-pw_tokenizer-domains>}. Use `PW_TOKENIZER_DEFAULT_DOMAIN` for the
241 /// default. The token also may be masked; use `UINT32_MAX` to keep all bits.
242 ///
243 /// This macro checks that the printf-style format string matches the arguments
244 /// and that no more than @c_macro{PW_TOKENIZER_MAX_SUPPORTED_ARGS} are
245 /// provided. It then stores the format string in a special section, and
246 /// calculates the string's token at compile time.
247 // clang-format off
248 #define PW_TOKENIZE_FORMAT_STRING(domain, mask, format, ...) \
249 static_assert( \
250 PW_FUNCTION_ARG_COUNT(__VA_ARGS__) <= PW_TOKENIZER_MAX_SUPPORTED_ARGS, \
251 "Tokenized strings cannot have more than " \
252 PW_STRINGIFY(PW_TOKENIZER_MAX_SUPPORTED_ARGS) " arguments; " \
253 PW_STRINGIFY(PW_FUNCTION_ARG_COUNT(__VA_ARGS__)) \
254 " arguments were used for " #format " (" #__VA_ARGS__ ")"); \
255 PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, __VA_ARGS__)
256 // clang-format on
257
258 /// Equivalent to `PW_TOKENIZE_FORMAT_STRING`, but supports any number of
259 /// arguments.
260 ///
261 /// This is a low-level macro that should rarely be used directly. It is
262 /// intended for situations when @cpp_type{pw_tokenizer_ArgTypes} is not used.
263 /// There are two situations where @cpp_type{pw_tokenizer_ArgTypes} is
264 /// unnecessary:
265 ///
266 /// - The exact format string argument types and count are fixed.
267 /// - The format string supports a variable number of arguments of only one
268 /// type. In this case, @c_macro{PW_FUNCTION_ARG_COUNT} may be used to pass
269 /// the argument count to the function.
270 #define PW_TOKENIZE_FORMAT_STRING_ANY_ARG_COUNT(domain, mask, format, ...) \
271 if (0) { /* Do not execute to prevent double evaluation of the arguments. */ \
272 pw_tokenizer_CheckFormatString(format PW_COMMA_ARGS(__VA_ARGS__)); \
273 } \
274 \
275 /* Tokenize the string to a pw_tokenizer_Token at compile time. */ \
276 static _PW_TOKENIZER_CONST pw_tokenizer_Token _pw_tokenizer_token = \
277 _PW_TOKENIZER_MASK_TOKEN(mask, format); \
278 \
279 PW_TOKENIZER_DEFINE_TOKEN(_pw_tokenizer_token, domain, format)
280
281 // Creates unique names to use for tokenized string entries and linker sections.
282 #define _PW_TOKENIZER_UNIQUE(prefix) PW_CONCAT(prefix, __LINE__, _, __COUNTER__)
283
284 #ifdef __cplusplus
285
286 #define _PW_TOKENIZER_CONST constexpr
287
288 /// Records the original token, domain and string directly.
289 ///
290 /// This macro is intended to be used for tokenized enum and domain support. The
291 /// values are stored as an entry in the ELF section. As a note for tokenized
292 /// enum support, the enum name should be used as the string, and the enum value
293 /// as the token.
294 #define PW_TOKENIZER_DEFINE_TOKEN(token, domain, string) \
295 alignas(1) static constexpr auto _PW_TOKENIZER_SECTION _PW_TOKENIZER_UNIQUE( \
296 _pw_tokenizer_string_entry_) = \
297 ::pw::tokenizer::internal::MakeEntry(token, domain, string)
298
299 namespace pw::tokenizer {
300
301 using Token = ::pw_tokenizer_Token;
302
303 } // namespace pw::tokenizer
304
305 #else
306
307 #define _PW_TOKENIZER_CONST const
308 #define _PW_ALIGNAS(alignment) __attribute__((aligned(alignment)))
309
310 #define PW_TOKENIZER_DEFINE_TOKEN(token, domain, string) \
311 _PW_ALIGNAS(1) static const _PW_TOKENIZER_STRING_ENTRY(token, domain, string)
312
313 #endif // __cplusplus
314
315 // _PW_TOKENIZER_SECTION places the tokenized strings in a special .pw_tokenizer
316 // linker section. Host-side decoding tools read the strings and tokens from
317 // this section to build a database of tokenized strings.
318 //
319 // This section should be declared as type INFO so that it is excluded from the
320 // final binary. To declare the section, as well as the .pw_tokenizer.info
321 // metadata section, add the following to the linker script's SECTIONS command:
322 //
323 // .pw_tokenizer.info 0x0 (INFO) :
324 // {
325 // KEEP(*(.pw_tokenizer.info))
326 // }
327 //
328 // .pw_tokenizer.entries 0x0 (INFO) :
329 // {
330 // KEEP(*(.pw_tokenizer.entries.*))
331 // }
332 //
333 // A linker script snippet that provides these sections is provided in the file
334 // pw_tokenizer_linker_sections.ld. This file may be directly included into
335 // existing linker scripts.
336 //
337 // The tokenized string sections can also be managed without linker script
338 // modifications, though this is not recommended. The section can be extracted
339 // and removed from the ELF with objcopy:
340 //
341 // objcopy --only-section .pw_tokenizer.* <ORIGINAL_ELF> <OUTPUT_ELF>
342 // objcopy --remove-section .pw_tokenizer.* <ORIGINAL_ELF>
343 //
344 // OUTPUT_ELF will be an ELF with only the tokenized strings, and the original
345 // ELF file will have the sections removed.
346 //
347 // Without the above linker script modifications, the section garbage collection
348 // option (--gc-sections) removes the tokenized string sections. To avoid
349 // editing the target linker script, a separate metadata ELF can be linked
350 // without --gc-sections to preserve the tokenized data.
351 //
352 // pw_tokenizer is intended for use with ELF files only. Mach-O files (macOS
353 // executables) do not support section names longer than 16 characters, so a
354 // short, unused section name is used on macOS.
355 #ifdef __APPLE__
356 #define _PW_TOKENIZER_SECTION \
357 PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw.)))
358 #else
359 #define _PW_TOKENIZER_SECTION \
360 PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw_tokenizer.entries.)))
361 #endif // __APPLE__
362