xref: /aosp_15_r20/external/pigweed/pw_tokenizer/tokenize.cc (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1*61c4878aSAndroid Build Coastguard Worker // Copyright 2020 The Pigweed Authors
2*61c4878aSAndroid Build Coastguard Worker //
3*61c4878aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4*61c4878aSAndroid Build Coastguard Worker // use this file except in compliance with the License. You may obtain a copy of
5*61c4878aSAndroid Build Coastguard Worker // the License at
6*61c4878aSAndroid Build Coastguard Worker //
7*61c4878aSAndroid Build Coastguard Worker //     https://www.apache.org/licenses/LICENSE-2.0
8*61c4878aSAndroid Build Coastguard Worker //
9*61c4878aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*61c4878aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11*61c4878aSAndroid Build Coastguard Worker // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12*61c4878aSAndroid Build Coastguard Worker // License for the specific language governing permissions and limitations under
13*61c4878aSAndroid Build Coastguard Worker // the License.
14*61c4878aSAndroid Build Coastguard Worker 
15*61c4878aSAndroid Build Coastguard Worker // This file defines the functions that encode tokenized logs at runtime. These
16*61c4878aSAndroid Build Coastguard Worker // are the only pw_tokenizer functions present in a binary that tokenizes
17*61c4878aSAndroid Build Coastguard Worker // strings. All other tokenizing code is resolved at compile time.
18*61c4878aSAndroid Build Coastguard Worker 
19*61c4878aSAndroid Build Coastguard Worker #include "pw_tokenizer/tokenize.h"
20*61c4878aSAndroid Build Coastguard Worker 
21*61c4878aSAndroid Build Coastguard Worker #include <cstring>
22*61c4878aSAndroid Build Coastguard Worker 
23*61c4878aSAndroid Build Coastguard Worker #include "pw_span/span.h"
24*61c4878aSAndroid Build Coastguard Worker #include "pw_tokenizer/encode_args.h"
25*61c4878aSAndroid Build Coastguard Worker 
26*61c4878aSAndroid Build Coastguard Worker namespace pw::tokenizer {
27*61c4878aSAndroid Build Coastguard Worker namespace {
28*61c4878aSAndroid Build Coastguard Worker 
29*61c4878aSAndroid Build Coastguard Worker static_assert(sizeof(PW_TOKENIZER_NESTED_PREFIX_STR) == 2,
30*61c4878aSAndroid Build Coastguard Worker               "The nested prefix must be a single character string");
31*61c4878aSAndroid Build Coastguard Worker 
32*61c4878aSAndroid Build Coastguard Worker // Store metadata about this compilation's string tokenization in the ELF.
33*61c4878aSAndroid Build Coastguard Worker //
34*61c4878aSAndroid Build Coastguard Worker // The tokenizer metadata will not go into the on-device executable binary code.
35*61c4878aSAndroid Build Coastguard Worker // This metadata will be present in the ELF file's .pw_tokenizer.info section,
36*61c4878aSAndroid Build Coastguard Worker // from which the host-side tooling (Python, Java, etc.) can understand how to
37*61c4878aSAndroid Build Coastguard Worker // decode tokenized strings for the given binary. Only attributes that affect
38*61c4878aSAndroid Build Coastguard Worker // the decoding process are recorded.
39*61c4878aSAndroid Build Coastguard Worker //
40*61c4878aSAndroid Build Coastguard Worker // Tokenizer metadata is stored in an array of key-value pairs. Each Metadata
41*61c4878aSAndroid Build Coastguard Worker // object is 32 bytes: a 24-byte string and an 8-byte value. Metadata structs
42*61c4878aSAndroid Build Coastguard Worker // may be parsed in Python with the struct format '24s<Q'.
PW_PACKED(struct)43*61c4878aSAndroid Build Coastguard Worker PW_PACKED(struct) Metadata {
44*61c4878aSAndroid Build Coastguard Worker   char name[24];   // name of the metadata field
45*61c4878aSAndroid Build Coastguard Worker   uint64_t value;  // value of the field
46*61c4878aSAndroid Build Coastguard Worker };
47*61c4878aSAndroid Build Coastguard Worker 
48*61c4878aSAndroid Build Coastguard Worker static_assert(sizeof(Metadata) == 32, "Metadata should be exactly 32 bytes");
49*61c4878aSAndroid Build Coastguard Worker 
50*61c4878aSAndroid Build Coastguard Worker // Store tokenization metadata in its own section. Mach-O files are not
51*61c4878aSAndroid Build Coastguard Worker // supported by pw_tokenizer, but a short, Mach-O compatible section name is
52*61c4878aSAndroid Build Coastguard Worker // used on macOS so that this file can at least compile.
53*61c4878aSAndroid Build Coastguard Worker #ifdef __APPLE__
54*61c4878aSAndroid Build Coastguard Worker #define PW_TOKENIZER_INFO_SECTION PW_KEEP_IN_SECTION(".pw_tokenizer")
55*61c4878aSAndroid Build Coastguard Worker #else
56*61c4878aSAndroid Build Coastguard Worker #define PW_TOKENIZER_INFO_SECTION PW_KEEP_IN_SECTION(".pw_tokenizer.info")
57*61c4878aSAndroid Build Coastguard Worker #endif  // __APPLE__
58*61c4878aSAndroid Build Coastguard Worker 
59*61c4878aSAndroid Build Coastguard Worker constexpr Metadata metadata[] PW_TOKENIZER_INFO_SECTION = {
60*61c4878aSAndroid Build Coastguard Worker     {"c_hash_length_bytes", PW_TOKENIZER_CFG_C_HASH_LENGTH},
61*61c4878aSAndroid Build Coastguard Worker     {"sizeof_long", sizeof(long)},            // %l conversion specifier
62*61c4878aSAndroid Build Coastguard Worker     {"sizeof_intmax_t", sizeof(intmax_t)},    // %j conversion specifier
63*61c4878aSAndroid Build Coastguard Worker     {"sizeof_size_t", sizeof(size_t)},        // %z conversion specifier
64*61c4878aSAndroid Build Coastguard Worker     {"sizeof_ptrdiff_t", sizeof(ptrdiff_t)},  // %t conversion specifier
65*61c4878aSAndroid Build Coastguard Worker };
66*61c4878aSAndroid Build Coastguard Worker 
67*61c4878aSAndroid Build Coastguard Worker }  // namespace
68*61c4878aSAndroid Build Coastguard Worker 
_pw_tokenizer_ToBuffer(void * buffer,size_t * buffer_size_bytes,Token token,pw_tokenizer_ArgTypes types,...)69*61c4878aSAndroid Build Coastguard Worker extern "C" void _pw_tokenizer_ToBuffer(void* buffer,
70*61c4878aSAndroid Build Coastguard Worker                                        size_t* buffer_size_bytes,
71*61c4878aSAndroid Build Coastguard Worker                                        Token token,
72*61c4878aSAndroid Build Coastguard Worker                                        pw_tokenizer_ArgTypes types,
73*61c4878aSAndroid Build Coastguard Worker                                        ...) {
74*61c4878aSAndroid Build Coastguard Worker   if (*buffer_size_bytes < sizeof(token)) {
75*61c4878aSAndroid Build Coastguard Worker     *buffer_size_bytes = 0;
76*61c4878aSAndroid Build Coastguard Worker     return;
77*61c4878aSAndroid Build Coastguard Worker   }
78*61c4878aSAndroid Build Coastguard Worker 
79*61c4878aSAndroid Build Coastguard Worker   std::memcpy(buffer, &token, sizeof(token));
80*61c4878aSAndroid Build Coastguard Worker 
81*61c4878aSAndroid Build Coastguard Worker   va_list args;
82*61c4878aSAndroid Build Coastguard Worker   va_start(args, types);
83*61c4878aSAndroid Build Coastguard Worker   const size_t encoded_bytes = EncodeArgs(
84*61c4878aSAndroid Build Coastguard Worker       types,
85*61c4878aSAndroid Build Coastguard Worker       args,
86*61c4878aSAndroid Build Coastguard Worker       span<std::byte>(static_cast<std::byte*>(buffer) + sizeof(token),
87*61c4878aSAndroid Build Coastguard Worker                       *buffer_size_bytes - sizeof(token)));
88*61c4878aSAndroid Build Coastguard Worker   va_end(args);
89*61c4878aSAndroid Build Coastguard Worker 
90*61c4878aSAndroid Build Coastguard Worker   *buffer_size_bytes = sizeof(token) + encoded_bytes;
91*61c4878aSAndroid Build Coastguard Worker }
92*61c4878aSAndroid Build Coastguard Worker 
93*61c4878aSAndroid Build Coastguard Worker }  // namespace pw::tokenizer
94