xref: /aosp_15_r20/external/pigweed/pw_tokenizer/detokenize.cc (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1*61c4878aSAndroid Build Coastguard Worker // Copyright 2020 The Pigweed Authors
2*61c4878aSAndroid Build Coastguard Worker //
3*61c4878aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4*61c4878aSAndroid Build Coastguard Worker // use this file except in compliance with the License. You may obtain a copy of
5*61c4878aSAndroid Build Coastguard Worker // the License at
6*61c4878aSAndroid Build Coastguard Worker //
7*61c4878aSAndroid Build Coastguard Worker //     https://www.apache.org/licenses/LICENSE-2.0
8*61c4878aSAndroid Build Coastguard Worker //
9*61c4878aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*61c4878aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11*61c4878aSAndroid Build Coastguard Worker // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12*61c4878aSAndroid Build Coastguard Worker // License for the specific language governing permissions and limitations under
13*61c4878aSAndroid Build Coastguard Worker // the License.
14*61c4878aSAndroid Build Coastguard Worker 
15*61c4878aSAndroid Build Coastguard Worker #include "pw_tokenizer/detokenize.h"
16*61c4878aSAndroid Build Coastguard Worker 
17*61c4878aSAndroid Build Coastguard Worker #include <algorithm>
18*61c4878aSAndroid Build Coastguard Worker #include <cctype>
19*61c4878aSAndroid Build Coastguard Worker #include <cstring>
20*61c4878aSAndroid Build Coastguard Worker #include <string_view>
21*61c4878aSAndroid Build Coastguard Worker #include <vector>
22*61c4878aSAndroid Build Coastguard Worker 
23*61c4878aSAndroid Build Coastguard Worker #include "pw_bytes/bit.h"
24*61c4878aSAndroid Build Coastguard Worker #include "pw_bytes/endian.h"
25*61c4878aSAndroid Build Coastguard Worker #include "pw_elf/reader.h"
26*61c4878aSAndroid Build Coastguard Worker #include "pw_result/result.h"
27*61c4878aSAndroid Build Coastguard Worker #include "pw_status/try.h"
28*61c4878aSAndroid Build Coastguard Worker #include "pw_tokenizer/base64.h"
29*61c4878aSAndroid Build Coastguard Worker #include "pw_tokenizer/internal/decode.h"
30*61c4878aSAndroid Build Coastguard Worker #include "pw_tokenizer/nested_tokenization.h"
31*61c4878aSAndroid Build Coastguard Worker 
32*61c4878aSAndroid Build Coastguard Worker namespace pw::tokenizer {
33*61c4878aSAndroid Build Coastguard Worker namespace {
34*61c4878aSAndroid Build Coastguard Worker 
35*61c4878aSAndroid Build Coastguard Worker class NestedMessageDetokenizer {
36*61c4878aSAndroid Build Coastguard Worker  public:
NestedMessageDetokenizer(const Detokenizer & detokenizer)37*61c4878aSAndroid Build Coastguard Worker   NestedMessageDetokenizer(const Detokenizer& detokenizer)
38*61c4878aSAndroid Build Coastguard Worker       : detokenizer_(detokenizer) {}
39*61c4878aSAndroid Build Coastguard Worker 
Detokenize(std::string_view chunk)40*61c4878aSAndroid Build Coastguard Worker   void Detokenize(std::string_view chunk) {
41*61c4878aSAndroid Build Coastguard Worker     for (char next_char : chunk) {
42*61c4878aSAndroid Build Coastguard Worker       Detokenize(next_char);
43*61c4878aSAndroid Build Coastguard Worker     }
44*61c4878aSAndroid Build Coastguard Worker   }
45*61c4878aSAndroid Build Coastguard Worker 
OutputChangedSinceLastCheck()46*61c4878aSAndroid Build Coastguard Worker   bool OutputChangedSinceLastCheck() {
47*61c4878aSAndroid Build Coastguard Worker     const bool changed = output_changed_;
48*61c4878aSAndroid Build Coastguard Worker     output_changed_ = false;
49*61c4878aSAndroid Build Coastguard Worker     return changed;
50*61c4878aSAndroid Build Coastguard Worker   }
51*61c4878aSAndroid Build Coastguard Worker 
Detokenize(char next_char)52*61c4878aSAndroid Build Coastguard Worker   void Detokenize(char next_char) {
53*61c4878aSAndroid Build Coastguard Worker     switch (state_) {
54*61c4878aSAndroid Build Coastguard Worker       case kNonMessage:
55*61c4878aSAndroid Build Coastguard Worker         if (next_char == PW_TOKENIZER_NESTED_PREFIX) {
56*61c4878aSAndroid Build Coastguard Worker           message_buffer_.push_back(next_char);
57*61c4878aSAndroid Build Coastguard Worker           state_ = kMessage;
58*61c4878aSAndroid Build Coastguard Worker         } else {
59*61c4878aSAndroid Build Coastguard Worker           output_.push_back(next_char);
60*61c4878aSAndroid Build Coastguard Worker         }
61*61c4878aSAndroid Build Coastguard Worker         break;
62*61c4878aSAndroid Build Coastguard Worker       case kMessage:
63*61c4878aSAndroid Build Coastguard Worker         if (base64::IsValidChar(next_char)) {
64*61c4878aSAndroid Build Coastguard Worker           message_buffer_.push_back(next_char);
65*61c4878aSAndroid Build Coastguard Worker         } else {
66*61c4878aSAndroid Build Coastguard Worker           HandleEndOfMessage();
67*61c4878aSAndroid Build Coastguard Worker           if (next_char == PW_TOKENIZER_NESTED_PREFIX) {
68*61c4878aSAndroid Build Coastguard Worker             message_buffer_.push_back(next_char);
69*61c4878aSAndroid Build Coastguard Worker           } else {
70*61c4878aSAndroid Build Coastguard Worker             output_.push_back(next_char);
71*61c4878aSAndroid Build Coastguard Worker             state_ = kNonMessage;
72*61c4878aSAndroid Build Coastguard Worker           }
73*61c4878aSAndroid Build Coastguard Worker         }
74*61c4878aSAndroid Build Coastguard Worker         break;
75*61c4878aSAndroid Build Coastguard Worker     }
76*61c4878aSAndroid Build Coastguard Worker   }
77*61c4878aSAndroid Build Coastguard Worker 
Flush()78*61c4878aSAndroid Build Coastguard Worker   std::string Flush() {
79*61c4878aSAndroid Build Coastguard Worker     if (state_ == kMessage) {
80*61c4878aSAndroid Build Coastguard Worker       HandleEndOfMessage();
81*61c4878aSAndroid Build Coastguard Worker       state_ = kNonMessage;
82*61c4878aSAndroid Build Coastguard Worker     }
83*61c4878aSAndroid Build Coastguard Worker     std::string output(std::move(output_));
84*61c4878aSAndroid Build Coastguard Worker     output_.clear();
85*61c4878aSAndroid Build Coastguard Worker     return output;
86*61c4878aSAndroid Build Coastguard Worker   }
87*61c4878aSAndroid Build Coastguard Worker 
88*61c4878aSAndroid Build Coastguard Worker  private:
HandleEndOfMessage()89*61c4878aSAndroid Build Coastguard Worker   void HandleEndOfMessage() {
90*61c4878aSAndroid Build Coastguard Worker     if (auto result = detokenizer_.DetokenizeBase64Message(message_buffer_);
91*61c4878aSAndroid Build Coastguard Worker         result.ok()) {
92*61c4878aSAndroid Build Coastguard Worker       output_ += result.BestString();
93*61c4878aSAndroid Build Coastguard Worker       output_changed_ = true;
94*61c4878aSAndroid Build Coastguard Worker     } else {
95*61c4878aSAndroid Build Coastguard Worker       output_ += message_buffer_;  // Keep the original if it doesn't decode.
96*61c4878aSAndroid Build Coastguard Worker     }
97*61c4878aSAndroid Build Coastguard Worker     message_buffer_.clear();
98*61c4878aSAndroid Build Coastguard Worker   }
99*61c4878aSAndroid Build Coastguard Worker 
100*61c4878aSAndroid Build Coastguard Worker   const Detokenizer& detokenizer_;
101*61c4878aSAndroid Build Coastguard Worker   std::string output_;
102*61c4878aSAndroid Build Coastguard Worker   std::string message_buffer_;
103*61c4878aSAndroid Build Coastguard Worker 
104*61c4878aSAndroid Build Coastguard Worker   enum : uint8_t { kNonMessage, kMessage } state_ = kNonMessage;
105*61c4878aSAndroid Build Coastguard Worker   bool output_changed_ = false;
106*61c4878aSAndroid Build Coastguard Worker };
107*61c4878aSAndroid Build Coastguard Worker 
UnknownTokenMessage(uint32_t value)108*61c4878aSAndroid Build Coastguard Worker std::string UnknownTokenMessage(uint32_t value) {
109*61c4878aSAndroid Build Coastguard Worker   std::string output(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX "unknown token ");
110*61c4878aSAndroid Build Coastguard Worker 
111*61c4878aSAndroid Build Coastguard Worker   // Output a hexadecimal version of the token.
112*61c4878aSAndroid Build Coastguard Worker   for (int shift = 28; shift >= 0; shift -= 4) {
113*61c4878aSAndroid Build Coastguard Worker     output.push_back("0123456789abcdef"[(value >> shift) & 0xF]);
114*61c4878aSAndroid Build Coastguard Worker   }
115*61c4878aSAndroid Build Coastguard Worker 
116*61c4878aSAndroid Build Coastguard Worker   output.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
117*61c4878aSAndroid Build Coastguard Worker   return output;
118*61c4878aSAndroid Build Coastguard Worker }
119*61c4878aSAndroid Build Coastguard Worker 
120*61c4878aSAndroid Build Coastguard Worker // Decoding result with the date removed, for sorting.
121*61c4878aSAndroid Build Coastguard Worker using DecodingResult = std::pair<DecodedFormatString, uint32_t>;
122*61c4878aSAndroid Build Coastguard Worker 
123*61c4878aSAndroid Build Coastguard Worker // Determines if one result is better than the other if collisions occurred.
124*61c4878aSAndroid Build Coastguard Worker // Returns true if lhs is preferred over rhs. This logic should match the
125*61c4878aSAndroid Build Coastguard Worker // collision resolution logic in detokenize.py.
IsBetterResult(const DecodingResult & lhs,const DecodingResult & rhs)126*61c4878aSAndroid Build Coastguard Worker bool IsBetterResult(const DecodingResult& lhs, const DecodingResult& rhs) {
127*61c4878aSAndroid Build Coastguard Worker   // Favor the result for which decoding succeeded.
128*61c4878aSAndroid Build Coastguard Worker   if (lhs.first.ok() != rhs.first.ok()) {
129*61c4878aSAndroid Build Coastguard Worker     return lhs.first.ok();
130*61c4878aSAndroid Build Coastguard Worker   }
131*61c4878aSAndroid Build Coastguard Worker 
132*61c4878aSAndroid Build Coastguard Worker   // Favor the result for which all bytes were decoded.
133*61c4878aSAndroid Build Coastguard Worker   if ((lhs.first.remaining_bytes() == 0u) !=
134*61c4878aSAndroid Build Coastguard Worker       (rhs.first.remaining_bytes() == 0u)) {
135*61c4878aSAndroid Build Coastguard Worker     return lhs.first.remaining_bytes() == 0u;
136*61c4878aSAndroid Build Coastguard Worker   }
137*61c4878aSAndroid Build Coastguard Worker 
138*61c4878aSAndroid Build Coastguard Worker   // Favor the result with fewer decoding errors.
139*61c4878aSAndroid Build Coastguard Worker   if (lhs.first.decoding_errors() != rhs.first.decoding_errors()) {
140*61c4878aSAndroid Build Coastguard Worker     return lhs.first.decoding_errors() < rhs.first.decoding_errors();
141*61c4878aSAndroid Build Coastguard Worker   }
142*61c4878aSAndroid Build Coastguard Worker 
143*61c4878aSAndroid Build Coastguard Worker   // Favor the result that successfully decoded the most arguments.
144*61c4878aSAndroid Build Coastguard Worker   if (lhs.first.argument_count() != rhs.first.argument_count()) {
145*61c4878aSAndroid Build Coastguard Worker     return lhs.first.argument_count() > rhs.first.argument_count();
146*61c4878aSAndroid Build Coastguard Worker   }
147*61c4878aSAndroid Build Coastguard Worker 
148*61c4878aSAndroid Build Coastguard Worker   // Favor the result that was removed from the database most recently.
149*61c4878aSAndroid Build Coastguard Worker   return lhs.second > rhs.second;
150*61c4878aSAndroid Build Coastguard Worker }
151*61c4878aSAndroid Build Coastguard Worker 
152*61c4878aSAndroid Build Coastguard Worker // Returns true if all characters in data are printable, space, or if the string
153*61c4878aSAndroid Build Coastguard Worker // is empty.
IsPrintableAscii(std::string_view data)154*61c4878aSAndroid Build Coastguard Worker constexpr bool IsPrintableAscii(std::string_view data) {
155*61c4878aSAndroid Build Coastguard Worker   // This follows the logic in pw_tokenizer.decode_optionally_tokenized below:
156*61c4878aSAndroid Build Coastguard Worker   //
157*61c4878aSAndroid Build Coastguard Worker   //   if ''.join(text.split()).isprintable():
158*61c4878aSAndroid Build Coastguard Worker   //     return text
159*61c4878aSAndroid Build Coastguard Worker   //
160*61c4878aSAndroid Build Coastguard Worker   for (int letter : data) {
161*61c4878aSAndroid Build Coastguard Worker     if (std::isprint(letter) == 0 && std::isspace(letter) == 0) {
162*61c4878aSAndroid Build Coastguard Worker       return false;
163*61c4878aSAndroid Build Coastguard Worker     }
164*61c4878aSAndroid Build Coastguard Worker   }
165*61c4878aSAndroid Build Coastguard Worker   return true;
166*61c4878aSAndroid Build Coastguard Worker }
167*61c4878aSAndroid Build Coastguard Worker 
168*61c4878aSAndroid Build Coastguard Worker }  // namespace
169*61c4878aSAndroid Build Coastguard Worker 
DetokenizedString(uint32_t token,const span<const TokenizedStringEntry> & entries,const span<const std::byte> & arguments)170*61c4878aSAndroid Build Coastguard Worker DetokenizedString::DetokenizedString(
171*61c4878aSAndroid Build Coastguard Worker     uint32_t token,
172*61c4878aSAndroid Build Coastguard Worker     const span<const TokenizedStringEntry>& entries,
173*61c4878aSAndroid Build Coastguard Worker     const span<const std::byte>& arguments)
174*61c4878aSAndroid Build Coastguard Worker     : token_(token), has_token_(true) {
175*61c4878aSAndroid Build Coastguard Worker   std::vector<DecodingResult> results;
176*61c4878aSAndroid Build Coastguard Worker 
177*61c4878aSAndroid Build Coastguard Worker   for (const auto& [format, date_removed] : entries) {
178*61c4878aSAndroid Build Coastguard Worker     results.push_back(DecodingResult{
179*61c4878aSAndroid Build Coastguard Worker         format.Format(span(reinterpret_cast<const uint8_t*>(arguments.data()),
180*61c4878aSAndroid Build Coastguard Worker                            arguments.size())),
181*61c4878aSAndroid Build Coastguard Worker         date_removed});
182*61c4878aSAndroid Build Coastguard Worker   }
183*61c4878aSAndroid Build Coastguard Worker 
184*61c4878aSAndroid Build Coastguard Worker   std::sort(results.begin(), results.end(), IsBetterResult);
185*61c4878aSAndroid Build Coastguard Worker 
186*61c4878aSAndroid Build Coastguard Worker   for (auto& result : results) {
187*61c4878aSAndroid Build Coastguard Worker     matches_.push_back(std::move(result.first));
188*61c4878aSAndroid Build Coastguard Worker   }
189*61c4878aSAndroid Build Coastguard Worker }
190*61c4878aSAndroid Build Coastguard Worker 
BestString() const191*61c4878aSAndroid Build Coastguard Worker std::string DetokenizedString::BestString() const {
192*61c4878aSAndroid Build Coastguard Worker   return matches_.empty() ? std::string() : matches_[0].value();
193*61c4878aSAndroid Build Coastguard Worker }
194*61c4878aSAndroid Build Coastguard Worker 
BestStringWithErrors() const195*61c4878aSAndroid Build Coastguard Worker std::string DetokenizedString::BestStringWithErrors() const {
196*61c4878aSAndroid Build Coastguard Worker   if (matches_.empty()) {
197*61c4878aSAndroid Build Coastguard Worker     return has_token_ ? UnknownTokenMessage(token_)
198*61c4878aSAndroid Build Coastguard Worker                       : PW_TOKENIZER_ARG_DECODING_ERROR("missing token");
199*61c4878aSAndroid Build Coastguard Worker   }
200*61c4878aSAndroid Build Coastguard Worker   return matches_[0].value_with_errors();
201*61c4878aSAndroid Build Coastguard Worker }
202*61c4878aSAndroid Build Coastguard Worker 
Detokenizer(const TokenDatabase & database)203*61c4878aSAndroid Build Coastguard Worker Detokenizer::Detokenizer(const TokenDatabase& database) {
204*61c4878aSAndroid Build Coastguard Worker   for (const auto& entry : database) {
205*61c4878aSAndroid Build Coastguard Worker     database_[entry.token].emplace_back(entry.string, entry.date_removed);
206*61c4878aSAndroid Build Coastguard Worker   }
207*61c4878aSAndroid Build Coastguard Worker }
208*61c4878aSAndroid Build Coastguard Worker 
FromElfSection(span<const std::byte> elf_section)209*61c4878aSAndroid Build Coastguard Worker Result<Detokenizer> Detokenizer::FromElfSection(
210*61c4878aSAndroid Build Coastguard Worker     span<const std::byte> elf_section) {
211*61c4878aSAndroid Build Coastguard Worker   size_t index = 0;
212*61c4878aSAndroid Build Coastguard Worker   std::unordered_map<uint32_t, std::vector<TokenizedStringEntry>> database;
213*61c4878aSAndroid Build Coastguard Worker 
214*61c4878aSAndroid Build Coastguard Worker   while (index + sizeof(_pw_tokenizer_EntryHeader) < elf_section.size()) {
215*61c4878aSAndroid Build Coastguard Worker     _pw_tokenizer_EntryHeader header;
216*61c4878aSAndroid Build Coastguard Worker     std::memcpy(
217*61c4878aSAndroid Build Coastguard Worker         &header, elf_section.data() + index, sizeof(_pw_tokenizer_EntryHeader));
218*61c4878aSAndroid Build Coastguard Worker     index += sizeof(_pw_tokenizer_EntryHeader);
219*61c4878aSAndroid Build Coastguard Worker 
220*61c4878aSAndroid Build Coastguard Worker     if (header.magic != _PW_TOKENIZER_ENTRY_MAGIC) {
221*61c4878aSAndroid Build Coastguard Worker       return Status::DataLoss();
222*61c4878aSAndroid Build Coastguard Worker     }
223*61c4878aSAndroid Build Coastguard Worker 
224*61c4878aSAndroid Build Coastguard Worker     index += header.domain_length;
225*61c4878aSAndroid Build Coastguard Worker     if (index + header.string_length <= elf_section.size()) {
226*61c4878aSAndroid Build Coastguard Worker       // TODO(b/326365218): Construct FormatString with string_view to avoid
227*61c4878aSAndroid Build Coastguard Worker       // creating a copy here.
228*61c4878aSAndroid Build Coastguard Worker       std::string entry(
229*61c4878aSAndroid Build Coastguard Worker           reinterpret_cast<const char*>(elf_section.data() + index),
230*61c4878aSAndroid Build Coastguard Worker           header.string_length);
231*61c4878aSAndroid Build Coastguard Worker       index += header.string_length;
232*61c4878aSAndroid Build Coastguard Worker       database[header.token].emplace_back(entry.c_str(),
233*61c4878aSAndroid Build Coastguard Worker                                           TokenDatabase::kDateRemovedNever);
234*61c4878aSAndroid Build Coastguard Worker     }
235*61c4878aSAndroid Build Coastguard Worker   }
236*61c4878aSAndroid Build Coastguard Worker   return Detokenizer(std::move(database));
237*61c4878aSAndroid Build Coastguard Worker }
238*61c4878aSAndroid Build Coastguard Worker 
FromElfFile(stream::SeekableReader & stream)239*61c4878aSAndroid Build Coastguard Worker Result<Detokenizer> Detokenizer::FromElfFile(stream::SeekableReader& stream) {
240*61c4878aSAndroid Build Coastguard Worker   PW_TRY_ASSIGN(auto reader, pw::elf::ElfReader::FromStream(stream));
241*61c4878aSAndroid Build Coastguard Worker 
242*61c4878aSAndroid Build Coastguard Worker   constexpr auto kTokenSectionName = ".pw_tokenizer.entries";
243*61c4878aSAndroid Build Coastguard Worker   PW_TRY_ASSIGN(std::vector<std::byte> section_data,
244*61c4878aSAndroid Build Coastguard Worker                 reader.ReadSection(kTokenSectionName));
245*61c4878aSAndroid Build Coastguard Worker 
246*61c4878aSAndroid Build Coastguard Worker   return Detokenizer::FromElfSection(section_data);
247*61c4878aSAndroid Build Coastguard Worker }
248*61c4878aSAndroid Build Coastguard Worker 
Detokenize(const span<const std::byte> & encoded) const249*61c4878aSAndroid Build Coastguard Worker DetokenizedString Detokenizer::Detokenize(
250*61c4878aSAndroid Build Coastguard Worker     const span<const std::byte>& encoded) const {
251*61c4878aSAndroid Build Coastguard Worker   // The token is missing from the encoded data; there is nothing to do.
252*61c4878aSAndroid Build Coastguard Worker   if (encoded.empty()) {
253*61c4878aSAndroid Build Coastguard Worker     return DetokenizedString();
254*61c4878aSAndroid Build Coastguard Worker   }
255*61c4878aSAndroid Build Coastguard Worker 
256*61c4878aSAndroid Build Coastguard Worker   uint32_t token = bytes::ReadInOrder<uint32_t>(
257*61c4878aSAndroid Build Coastguard Worker       endian::little, encoded.data(), encoded.size());
258*61c4878aSAndroid Build Coastguard Worker 
259*61c4878aSAndroid Build Coastguard Worker   const auto result = database_.find(token);
260*61c4878aSAndroid Build Coastguard Worker 
261*61c4878aSAndroid Build Coastguard Worker   return DetokenizedString(
262*61c4878aSAndroid Build Coastguard Worker       token,
263*61c4878aSAndroid Build Coastguard Worker       result == database_.end() ? span<TokenizedStringEntry>()
264*61c4878aSAndroid Build Coastguard Worker                                 : span(result->second),
265*61c4878aSAndroid Build Coastguard Worker       encoded.size() < sizeof(token) ? span<const std::byte>()
266*61c4878aSAndroid Build Coastguard Worker                                      : encoded.subspan(sizeof(token)));
267*61c4878aSAndroid Build Coastguard Worker }
268*61c4878aSAndroid Build Coastguard Worker 
DetokenizeBase64Message(std::string_view text) const269*61c4878aSAndroid Build Coastguard Worker DetokenizedString Detokenizer::DetokenizeBase64Message(
270*61c4878aSAndroid Build Coastguard Worker     std::string_view text) const {
271*61c4878aSAndroid Build Coastguard Worker   std::string buffer(text);
272*61c4878aSAndroid Build Coastguard Worker   buffer.resize(PrefixedBase64DecodeInPlace(buffer));
273*61c4878aSAndroid Build Coastguard Worker   return Detokenize(buffer);
274*61c4878aSAndroid Build Coastguard Worker }
275*61c4878aSAndroid Build Coastguard Worker 
DetokenizeText(std::string_view text,const unsigned max_passes) const276*61c4878aSAndroid Build Coastguard Worker std::string Detokenizer::DetokenizeText(std::string_view text,
277*61c4878aSAndroid Build Coastguard Worker                                         const unsigned max_passes) const {
278*61c4878aSAndroid Build Coastguard Worker   NestedMessageDetokenizer detokenizer(*this);
279*61c4878aSAndroid Build Coastguard Worker   detokenizer.Detokenize(text);
280*61c4878aSAndroid Build Coastguard Worker 
281*61c4878aSAndroid Build Coastguard Worker   std::string result;
282*61c4878aSAndroid Build Coastguard Worker   unsigned pass = 1;
283*61c4878aSAndroid Build Coastguard Worker 
284*61c4878aSAndroid Build Coastguard Worker   while (true) {
285*61c4878aSAndroid Build Coastguard Worker     result = detokenizer.Flush();
286*61c4878aSAndroid Build Coastguard Worker     if (pass >= max_passes || !detokenizer.OutputChangedSinceLastCheck()) {
287*61c4878aSAndroid Build Coastguard Worker       break;
288*61c4878aSAndroid Build Coastguard Worker     }
289*61c4878aSAndroid Build Coastguard Worker     detokenizer.Detokenize(result);
290*61c4878aSAndroid Build Coastguard Worker     pass += 1;
291*61c4878aSAndroid Build Coastguard Worker   }
292*61c4878aSAndroid Build Coastguard Worker   return result;
293*61c4878aSAndroid Build Coastguard Worker }
294*61c4878aSAndroid Build Coastguard Worker 
DecodeOptionallyTokenizedData(const ConstByteSpan & optionally_tokenized_data)295*61c4878aSAndroid Build Coastguard Worker std::string Detokenizer::DecodeOptionallyTokenizedData(
296*61c4878aSAndroid Build Coastguard Worker     const ConstByteSpan& optionally_tokenized_data) {
297*61c4878aSAndroid Build Coastguard Worker   // Try detokenizing as binary using the best result if available, else use
298*61c4878aSAndroid Build Coastguard Worker   // the input data as a string.
299*61c4878aSAndroid Build Coastguard Worker   const auto result = Detokenize(optionally_tokenized_data);
300*61c4878aSAndroid Build Coastguard Worker   const bool found_matches = !result.matches().empty();
301*61c4878aSAndroid Build Coastguard Worker   // Note: unlike pw_tokenizer.proto.decode_optionally_tokenized, this decoding
302*61c4878aSAndroid Build Coastguard Worker   // process does not encode and decode UTF8 format, it is sufficient to check
303*61c4878aSAndroid Build Coastguard Worker   // if the data is printable ASCII.
304*61c4878aSAndroid Build Coastguard Worker   const std::string data =
305*61c4878aSAndroid Build Coastguard Worker       found_matches
306*61c4878aSAndroid Build Coastguard Worker           ? result.BestString()
307*61c4878aSAndroid Build Coastguard Worker           : std::string(
308*61c4878aSAndroid Build Coastguard Worker                 reinterpret_cast<const char*>(optionally_tokenized_data.data()),
309*61c4878aSAndroid Build Coastguard Worker                 optionally_tokenized_data.size());
310*61c4878aSAndroid Build Coastguard Worker 
311*61c4878aSAndroid Build Coastguard Worker   const bool is_data_printable = IsPrintableAscii(data);
312*61c4878aSAndroid Build Coastguard Worker   if (!found_matches && !is_data_printable) {
313*61c4878aSAndroid Build Coastguard Worker     // Assume the token is unknown or the data is corrupt.
314*61c4878aSAndroid Build Coastguard Worker     std::vector<char> base64_encoding_buffer(
315*61c4878aSAndroid Build Coastguard Worker         Base64EncodedBufferSize(optionally_tokenized_data.size()));
316*61c4878aSAndroid Build Coastguard Worker     const size_t encoded_length = PrefixedBase64Encode(
317*61c4878aSAndroid Build Coastguard Worker         optionally_tokenized_data, span(base64_encoding_buffer));
318*61c4878aSAndroid Build Coastguard Worker     return std::string{base64_encoding_buffer.data(), encoded_length};
319*61c4878aSAndroid Build Coastguard Worker   }
320*61c4878aSAndroid Build Coastguard Worker 
321*61c4878aSAndroid Build Coastguard Worker   // Successfully detokenized, check if the field has more prefixed
322*61c4878aSAndroid Build Coastguard Worker   // base64-encoded tokens.
323*61c4878aSAndroid Build Coastguard Worker   const std::string field = DetokenizeText(data);
324*61c4878aSAndroid Build Coastguard Worker   // If anything detokenized successfully, use that.
325*61c4878aSAndroid Build Coastguard Worker   if (field != data) {
326*61c4878aSAndroid Build Coastguard Worker     return field;
327*61c4878aSAndroid Build Coastguard Worker   }
328*61c4878aSAndroid Build Coastguard Worker 
329*61c4878aSAndroid Build Coastguard Worker   // Attempt to determine whether this is an unknown token or plain text.
330*61c4878aSAndroid Build Coastguard Worker   // Any string with only printable or whitespace characters is plain text.
331*61c4878aSAndroid Build Coastguard Worker   if (found_matches || is_data_printable) {
332*61c4878aSAndroid Build Coastguard Worker     return data;
333*61c4878aSAndroid Build Coastguard Worker   }
334*61c4878aSAndroid Build Coastguard Worker 
335*61c4878aSAndroid Build Coastguard Worker   // Assume this field is tokenized data that could not be decoded.
336*61c4878aSAndroid Build Coastguard Worker   std::vector<char> base64_encoding_buffer(
337*61c4878aSAndroid Build Coastguard Worker       Base64EncodedBufferSize(optionally_tokenized_data.size()));
338*61c4878aSAndroid Build Coastguard Worker   const size_t encoded_length = PrefixedBase64Encode(
339*61c4878aSAndroid Build Coastguard Worker       optionally_tokenized_data, span(base64_encoding_buffer));
340*61c4878aSAndroid Build Coastguard Worker   return std::string{base64_encoding_buffer.data(), encoded_length};
341*61c4878aSAndroid Build Coastguard Worker }
342*61c4878aSAndroid Build Coastguard Worker 
343*61c4878aSAndroid Build Coastguard Worker }  // namespace pw::tokenizer
344