1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #include <google/protobuf/compiler/cpp/parse_function_generator.h>
32 
33 #include <algorithm>
34 #include <limits>
35 #include <string>
36 #include <utility>
37 
38 #include <google/protobuf/wire_format.h>
39 #include <google/protobuf/compiler/cpp/helpers.h>
40 
41 namespace google {
42 namespace protobuf {
43 namespace compiler {
44 namespace cpp {
45 
46 namespace {
47 using google::protobuf::internal::WireFormat;
48 using google::protobuf::internal::WireFormatLite;
49 
GetOrderedFields(const Descriptor * descriptor,const Options & options)50 std::vector<const FieldDescriptor*> GetOrderedFields(
51     const Descriptor* descriptor, const Options& options) {
52   std::vector<const FieldDescriptor*> ordered_fields;
53   for (auto field : FieldRange(descriptor)) {
54     if (!IsFieldStripped(field, options)) {
55       ordered_fields.push_back(field);
56     }
57   }
58   std::sort(ordered_fields.begin(), ordered_fields.end(),
59             [](const FieldDescriptor* a, const FieldDescriptor* b) {
60               return a->number() < b->number();
61             });
62   return ordered_fields;
63 }
64 
HasInternalAccessors(const FieldOptions::CType ctype)65 bool HasInternalAccessors(const FieldOptions::CType ctype) {
66   return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD;
67 }
68 
TagSize(uint32_t field_number)69 int TagSize(uint32_t field_number) {
70   if (field_number < 16) return 1;
71   GOOGLE_CHECK_LT(field_number, (1 << 14))
72       << "coded tag for " << field_number << " too big for uint16_t";
73   return 2;
74 }
75 
76 std::string FieldParseFunctionName(
77     const TailCallTableInfo::FieldEntryInfo& entry, const Options& options);
78 
IsFieldEligibleForFastParsing(const TailCallTableInfo::FieldEntryInfo & entry,const Options & options,MessageSCCAnalyzer * scc_analyzer)79 bool IsFieldEligibleForFastParsing(
80     const TailCallTableInfo::FieldEntryInfo& entry, const Options& options,
81     MessageSCCAnalyzer* scc_analyzer) {
82   const auto* field = entry.field;
83   // Map, oneof, weak, and lazy fields are not handled on the fast path.
84   if (field->is_map() || field->real_containing_oneof() ||
85       field->options().weak() ||
86       IsImplicitWeakField(field, options, scc_analyzer) ||
87       IsLazy(field, options, scc_analyzer)) {
88     return false;
89   }
90 
91   // We will check for a valid auxiliary index range later. However, we might
92   // want to change the value we check for inlined string fields.
93   int aux_idx = entry.aux_idx;
94 
95   switch (field->type()) {
96     case FieldDescriptor::TYPE_ENUM:
97       // If enum values are not validated at parse time, then this field can be
98       // handled on the fast path like an int32.
99       if (HasPreservingUnknownEnumSemantics(field)) {
100         break;
101       }
102       if (field->is_repeated() && field->is_packed()) {
103         return false;
104       }
105       break;
106 
107       // Some bytes fields can be handled on fast path.
108     case FieldDescriptor::TYPE_STRING:
109     case FieldDescriptor::TYPE_BYTES:
110       if (field->options().ctype() != FieldOptions::STRING) {
111         return false;
112       }
113       if (IsStringInlined(field, options)) {
114         GOOGLE_CHECK(!field->is_repeated());
115         // For inlined strings, the donation state index is stored in the
116         // `aux_idx` field of the fast parsing info. We need to check the range
117         // of that value instead of the auxiliary index.
118         aux_idx = entry.inlined_string_idx;
119       }
120       break;
121 
122     default:
123       break;
124   }
125 
126   if (HasHasbit(field)) {
127     // The tailcall parser can only update the first 32 hasbits. Fields with
128     // has-bits beyond the first 32 are handled by mini parsing/fallback.
129     GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString();
130     if (entry.hasbit_idx >= 32) return false;
131   }
132 
133   // If the field needs auxiliary data, then the aux index is needed. This
134   // must fit in a uint8_t.
135   if (aux_idx > std::numeric_limits<uint8_t>::max()) {
136     return false;
137   }
138 
139   // The largest tag that can be read by the tailcall parser is two bytes
140   // when varint-coded. This allows 14 bits for the numeric tag value:
141   //   byte 0   byte 1
142   //   1nnnnttt 0nnnnnnn
143   //    ^^^^^^^  ^^^^^^^
144   if (field->number() >= 1 << 11) return false;
145 
146   return true;
147 }
148 
SplitFastFieldsForSize(const std::vector<TailCallTableInfo::FieldEntryInfo> & field_entries,int table_size_log2,const Options & options,MessageSCCAnalyzer * scc_analyzer)149 std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize(
150     const std::vector<TailCallTableInfo::FieldEntryInfo>& field_entries,
151     int table_size_log2, const Options& options,
152     MessageSCCAnalyzer* scc_analyzer) {
153   std::vector<TailCallTableInfo::FastFieldInfo> result(1 << table_size_log2);
154   const uint32_t idx_mask = result.size() - 1;
155 
156   for (const auto& entry : field_entries) {
157     if (!IsFieldEligibleForFastParsing(entry, options, scc_analyzer)) {
158       continue;
159     }
160 
161     const auto* field = entry.field;
162     uint32_t tag = WireFormat::MakeTag(field);
163 
164     // Construct the varint-coded tag. If it is more than 7 bits, we need to
165     // shift the high bits and add a continue bit.
166     if (uint32_t hibits = tag & 0xFFFFFF80) {
167       tag = tag + hibits + 128;  // tag = lobits + 2*hibits + 128
168     }
169 
170     // The field index is determined by the low bits of the field number, where
171     // the table size determines the width of the mask. The largest table
172     // supported is 32 entries. The parse loop uses these bits directly, so that
173     // the dispatch does not require arithmetic:
174     //        byte 0   byte 1
175     //   tag: 1nnnnttt 0nnnnnnn
176     //        ^^^^^
177     //         idx (table_size_log2=5)
178     // This means that any field number that does not fit in the lower 4 bits
179     // will always have the top bit of its table index asserted.
180     const uint32_t fast_idx = (tag >> 3) & idx_mask;
181 
182     TailCallTableInfo::FastFieldInfo& info = result[fast_idx];
183     if (info.field != nullptr) {
184       // This field entry is already filled.
185       continue;
186     }
187 
188     // Fill in this field's entry:
189     GOOGLE_CHECK(info.func_name.empty()) << info.func_name;
190     info.func_name = FieldParseFunctionName(entry, options);
191     info.field = field;
192     info.coded_tag = tag;
193     // If this field does not have presence, then it can set an out-of-bounds
194     // bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32).
195     info.hasbit_idx = HasHasbit(field) ? entry.hasbit_idx : 63;
196     if (IsStringInlined(field, options)) {
197       GOOGLE_CHECK(!field->is_repeated());
198       info.aux_idx = static_cast<uint8_t>(entry.inlined_string_idx);
199     } else {
200       info.aux_idx = static_cast<uint8_t>(entry.aux_idx);
201     }
202   }
203   return result;
204 }
205 
206 // Filter out fields that will be handled by mini parsing.
FilterMiniParsedFields(const std::vector<const FieldDescriptor * > & fields,const Options & options,MessageSCCAnalyzer * scc_analyzer)207 std::vector<const FieldDescriptor*> FilterMiniParsedFields(
208     const std::vector<const FieldDescriptor*>& fields, const Options& options,
209     MessageSCCAnalyzer* scc_analyzer) {
210   std::vector<const FieldDescriptor*> generated_fallback_fields;
211 
212   for (const auto* field : fields) {
213     bool handled = false;
214     switch (field->type()) {
215       case FieldDescriptor::TYPE_DOUBLE:
216       case FieldDescriptor::TYPE_FLOAT:
217       case FieldDescriptor::TYPE_FIXED32:
218       case FieldDescriptor::TYPE_SFIXED32:
219       case FieldDescriptor::TYPE_FIXED64:
220       case FieldDescriptor::TYPE_SFIXED64:
221       case FieldDescriptor::TYPE_BOOL:
222       case FieldDescriptor::TYPE_UINT32:
223       case FieldDescriptor::TYPE_SINT32:
224       case FieldDescriptor::TYPE_INT32:
225       case FieldDescriptor::TYPE_UINT64:
226       case FieldDescriptor::TYPE_SINT64:
227       case FieldDescriptor::TYPE_INT64:
228         // These are handled by MiniParse, so we don't need any generated
229         // fallback code.
230         handled = true;
231         break;
232 
233       case FieldDescriptor::TYPE_ENUM:
234         if (field->is_repeated() && !HasPreservingUnknownEnumSemantics(field)) {
235           // TODO(b/206890171): handle packed repeated closed enums
236           // Non-packed repeated can be handled using tables, but we still
237           // need to generate fallback code for all repeated enums in order to
238           // handle packed encoding. This is because of the lite/full split
239           // when handling invalid enum values in a packed field.
240           handled = false;
241         } else {
242           handled = true;
243         }
244         break;
245 
246       case FieldDescriptor::TYPE_BYTES:
247       case FieldDescriptor::TYPE_STRING:
248         if (IsStringInlined(field, options)) {
249           // TODO(b/198211897): support InilnedStringField.
250           handled = false;
251         } else {
252           handled = true;
253         }
254         break;
255 
256       case FieldDescriptor::TYPE_MESSAGE:
257       case FieldDescriptor::TYPE_GROUP:
258         // TODO(b/210762816): support remaining field types.
259         if (field->is_map() || IsWeak(field, options) ||
260             IsImplicitWeakField(field, options, scc_analyzer) ||
261             IsLazy(field, options, scc_analyzer)) {
262           handled = false;
263         } else {
264           handled = true;
265         }
266         break;
267 
268       default:
269         handled = false;
270         break;
271     }
272     if (!handled) generated_fallback_fields.push_back(field);
273   }
274 
275   return generated_fallback_fields;
276 }
277 
278 }  // namespace
279 
TailCallTableInfo(const Descriptor * descriptor,const Options & options,const std::vector<const FieldDescriptor * > & ordered_fields,const std::vector<int> & has_bit_indices,const std::vector<int> & inlined_string_indices,MessageSCCAnalyzer * scc_analyzer)280 TailCallTableInfo::TailCallTableInfo(
281     const Descriptor* descriptor, const Options& options,
282     const std::vector<const FieldDescriptor*>& ordered_fields,
283     const std::vector<int>& has_bit_indices,
284     const std::vector<int>& inlined_string_indices,
285     MessageSCCAnalyzer* scc_analyzer) {
286   int oneof_count = descriptor->real_oneof_decl_count();
287   // If this message has any oneof fields, store the case offset in the first
288   // auxiliary entry.
289   if (oneof_count > 0) {
290     GOOGLE_LOG_IF(DFATAL, ordered_fields.empty())
291         << "Invalid message: " << descriptor->full_name() << " has "
292         << oneof_count << " oneof declarations, but no fields";
293     aux_entries.push_back(StrCat("_fl::Offset{offsetof(",
294                                        ClassName(descriptor),
295                                        ", _impl_._oneof_case_)}"));
296   }
297 
298   // If this message has any inlined string fields, store the donation state
299   // offset in the second auxiliary entry.
300   if (!inlined_string_indices.empty()) {
301     aux_entries.resize(2);  // pad if necessary
302     aux_entries[1] =
303         StrCat("_fl::Offset{offsetof(", ClassName(descriptor),
304                      ", _impl_._inlined_string_donated_)}");
305   }
306 
307   // Fill in mini table entries.
308   for (const FieldDescriptor* field : ordered_fields) {
309     field_entries.push_back(
310         {field, (HasHasbit(field) ? has_bit_indices[field->index()] : -1)});
311     auto& entry = field_entries.back();
312 
313     if (field->type() == FieldDescriptor::TYPE_MESSAGE ||
314         field->type() == FieldDescriptor::TYPE_GROUP) {
315       // Message-typed fields have a FieldAux with the default instance pointer.
316       if (field->is_map()) {
317         // TODO(b/205904770): generate aux entries for maps
318       } else if (IsWeak(field, options)) {
319         // Don't generate anything for weak fields. They are handled by the
320         // generated fallback.
321       } else if (IsImplicitWeakField(field, options, scc_analyzer)) {
322         // Implicit weak fields don't need to store a default instance pointer.
323       } else if (IsLazy(field, options, scc_analyzer)) {
324         // Lazy fields are handled by the generated fallback function.
325       } else {
326         field_entries.back().aux_idx = aux_entries.size();
327         const Descriptor* field_type = field->message_type();
328         aux_entries.push_back(StrCat(
329             "reinterpret_cast<const ", QualifiedClassName(field_type, options),
330             "*>(&", QualifiedDefaultInstanceName(field_type, options), ")"));
331       }
332     } else if (field->type() == FieldDescriptor::TYPE_ENUM &&
333                !HasPreservingUnknownEnumSemantics(field)) {
334       // Enum fields which preserve unknown values (proto3 behavior) are
335       // effectively int32 fields with respect to parsing -- i.e., the value
336       // does not need to be validated at parse time.
337       //
338       // Enum fields which do not preserve unknown values (proto2 behavior) use
339       // a FieldAux to store validation information. If the enum values are
340       // sequential (and within a range we can represent), then the FieldAux
341       // entry represents the range using the minimum value (which must fit in
342       // an int16_t) and count (a uint16_t). Otherwise, the entry holds a
343       // pointer to the generated Name_IsValid function.
344 
345       entry.aux_idx = aux_entries.size();
346       const EnumDescriptor* enum_type = field->enum_type();
347       GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString();
348 
349       // Check if the enum values are a single, contiguous range.
350       std::vector<int> enum_values;
351       for (int i = 0, N = enum_type->value_count(); i < N; ++i) {
352         enum_values.push_back(enum_type->value(i)->number());
353       }
354       auto values_begin = enum_values.begin();
355       auto values_end = enum_values.end();
356       std::sort(values_begin, values_end);
357       enum_values.erase(std::unique(values_begin, values_end), values_end);
358 
359       if (enum_values.back() - enum_values[0] == enum_values.size() - 1 &&
360           enum_values[0] >= std::numeric_limits<int16_t>::min() &&
361           enum_values[0] <= std::numeric_limits<int16_t>::max() &&
362           enum_values.size() <= std::numeric_limits<uint16_t>::max()) {
363         entry.is_enum_range = true;
364         aux_entries.push_back(
365             StrCat(enum_values[0], ", ", enum_values.size()));
366       } else {
367         entry.is_enum_range = false;
368         aux_entries.push_back(
369             StrCat(QualifiedClassName(enum_type, options), "_IsValid"));
370       }
371     } else if ((field->type() == FieldDescriptor::TYPE_STRING ||
372                 field->type() == FieldDescriptor::TYPE_BYTES) &&
373                IsStringInlined(field, options)) {
374       GOOGLE_CHECK(!field->is_repeated());
375       // Inlined strings have an extra marker to represent their donation state.
376       int idx = inlined_string_indices[field->index()];
377       // For mini parsing, the donation state index is stored as an `offset`
378       // auxiliary entry.
379       entry.aux_idx = aux_entries.size();
380       aux_entries.push_back(StrCat("_fl::Offset{", idx, "}"));
381       // For fast table parsing, the donation state index is stored instead of
382       // the aux_idx (this will limit the range to 8 bits).
383       entry.inlined_string_idx = idx;
384     }
385   }
386 
387   // Choose the smallest fast table that covers the maximum number of fields.
388   table_size_log2 = 0;  // fallback value
389   int num_fast_fields = -1;
390   for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) {
391     size_t try_size = 1 << try_size_log2;
392     auto split_fields = SplitFastFieldsForSize(field_entries, try_size_log2,
393                                                options, scc_analyzer);
394     GOOGLE_CHECK_EQ(split_fields.size(), try_size);
395     int try_num_fast_fields = 0;
396     for (const auto& info : split_fields) {
397       if (info.field != nullptr) ++try_num_fast_fields;
398     }
399     // Use this size if (and only if) it covers more fields.
400     if (try_num_fast_fields > num_fast_fields) {
401       fast_path_fields = std::move(split_fields);
402       table_size_log2 = try_size_log2;
403       num_fast_fields = try_num_fast_fields;
404     }
405     // The largest table we allow has the same number of entries as the message
406     // has fields, rounded up to the next power of 2 (e.g., a message with 5
407     // fields can have a fast table of size 8). A larger table *might* cover
408     // more fields in certain cases, but a larger table in that case would have
409     // mostly empty entries; so, we cap the size to avoid pathologically sparse
410     // tables.
411     if (try_size > ordered_fields.size()) {
412       break;
413     }
414   }
415 
416   // Filter out fields that are handled by MiniParse. We don't need to generate
417   // a fallback for these, which saves code size.
418   fallback_fields = FilterMiniParsedFields(ordered_fields, options,
419                                            scc_analyzer);
420 
421   // If there are no fallback fields, and at most one extension range, the
422   // parser can use a generic fallback function. Otherwise, a message-specific
423   // fallback routine is needed.
424   use_generated_fallback =
425       !fallback_fields.empty() || descriptor->extension_range_count() > 1;
426 }
427 
ParseFunctionGenerator(const Descriptor * descriptor,int max_has_bit_index,const std::vector<int> & has_bit_indices,const std::vector<int> & inlined_string_indices,const Options & options,MessageSCCAnalyzer * scc_analyzer,const std::map<std::string,std::string> & vars)428 ParseFunctionGenerator::ParseFunctionGenerator(
429     const Descriptor* descriptor, int max_has_bit_index,
430     const std::vector<int>& has_bit_indices,
431     const std::vector<int>& inlined_string_indices, const Options& options,
432     MessageSCCAnalyzer* scc_analyzer,
433     const std::map<std::string, std::string>& vars)
434     : descriptor_(descriptor),
435       scc_analyzer_(scc_analyzer),
436       options_(options),
437       variables_(vars),
438       inlined_string_indices_(inlined_string_indices),
439       ordered_fields_(GetOrderedFields(descriptor_, options_)),
440       num_hasbits_(max_has_bit_index) {
441   if (should_generate_tctable()) {
442     tc_table_info_.reset(new TailCallTableInfo(
443         descriptor_, options_, ordered_fields_, has_bit_indices,
444         inlined_string_indices, scc_analyzer));
445   }
446   SetCommonVars(options_, &variables_);
447   SetCommonMessageDataVariables(descriptor_, &variables_);
448   SetUnknownFieldsVariable(descriptor_, options_, &variables_);
449   variables_["classname"] = ClassName(descriptor, false);
450 }
451 
GenerateMethodDecls(io::Printer * printer)452 void ParseFunctionGenerator::GenerateMethodDecls(io::Printer* printer) {
453   Formatter format(printer, variables_);
454   if (should_generate_tctable()) {
455     format.Outdent();
456     if (should_generate_guarded_tctable()) {
457       format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
458     }
459     format(
460         " private:\n"
461         "  static const char* Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL);\n"
462         " public:\n");
463     if (should_generate_guarded_tctable()) {
464       format("#endif\n");
465     }
466     format.Indent();
467   }
468   format(
469       "const char* _InternalParse(const char* ptr, "
470       "::$proto_ns$::internal::ParseContext* ctx) final;\n");
471 }
472 
GenerateMethodImpls(io::Printer * printer)473 void ParseFunctionGenerator::GenerateMethodImpls(io::Printer* printer) {
474   Formatter format(printer, variables_);
475   bool need_parse_function = true;
476   if (descriptor_->options().message_set_wire_format()) {
477     // Special-case MessageSet.
478     need_parse_function = false;
479     format(
480         "const char* $classname$::_InternalParse(const char* ptr,\n"
481         "                  ::_pbi::ParseContext* ctx) {\n"
482         "$annotate_deserialize$");
483     if (!options_.unverified_lazy_message_sets &&
484         ShouldVerify(descriptor_, options_, scc_analyzer_)) {
485       format(
486           "  ctx->set_lazy_eager_verify_func(&$classname$::InternalVerify);\n");
487     }
488     format(
489         "  return $extensions$.ParseMessageSet(ptr, \n"
490         "      internal_default_instance(), &_internal_metadata_, ctx);\n"
491         "}\n");
492   }
493   if (!should_generate_tctable()) {
494     if (need_parse_function) {
495       GenerateLoopingParseFunction(format);
496     }
497     return;
498   }
499   if (should_generate_guarded_tctable()) {
500     format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n");
501   }
502   if (need_parse_function) {
503     GenerateTailcallParseFunction(format);
504   }
505   if (tc_table_info_->use_generated_fallback) {
506     GenerateTailcallFallbackFunction(format);
507   }
508   if (should_generate_guarded_tctable()) {
509     if (need_parse_function) {
510       format("\n#else  // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n");
511       GenerateLoopingParseFunction(format);
512     }
513     format("\n#endif  // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
514   }
515 }
516 
should_generate_tctable() const517 bool ParseFunctionGenerator::should_generate_tctable() const {
518   if (options_.tctable_mode == Options::kTCTableNever) {
519     return false;
520   }
521   return true;
522 }
523 
GenerateTailcallParseFunction(Formatter & format)524 void ParseFunctionGenerator::GenerateTailcallParseFunction(Formatter& format) {
525   GOOGLE_CHECK(should_generate_tctable());
526 
527   // Generate an `_InternalParse` that starts the tail-calling loop.
528   format(
529       "const char* $classname$::_InternalParse(\n"
530       "    const char* ptr, ::_pbi::ParseContext* ctx) {\n"
531       "$annotate_deserialize$"
532       "  ptr = ::_pbi::TcParser::ParseLoop(this, ptr, ctx, "
533       "&_table_.header);\n");
534   format(
535       "  return ptr;\n"
536       "}\n\n");
537 }
538 
GenerateTailcallFallbackFunction(Formatter & format)539 void ParseFunctionGenerator::GenerateTailcallFallbackFunction(
540     Formatter& format) {
541   GOOGLE_CHECK(should_generate_tctable());
542   format(
543       "const char* $classname$::Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL) {\n"
544       "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) return nullptr\n");
545   format.Indent();
546   format("auto* typed_msg = static_cast<$classname$*>(msg);\n");
547 
548   if (num_hasbits_ > 0) {
549     // Sync hasbits
550     format("typed_msg->_impl_._has_bits_[0] = hasbits;\n");
551   }
552   format("uint32_t tag = data.tag();\n");
553 
554   format.Set("msg", "typed_msg->");
555   format.Set("this", "typed_msg");
556   format.Set("has_bits", "typed_msg->_impl_._has_bits_");
557   format.Set("next_tag", "goto next_tag");
558   GenerateParseIterationBody(format, descriptor_,
559                              tc_table_info_->fallback_fields);
560 
561   format.Outdent();
562   format(
563       "next_tag:\n"
564       "message_done:\n"
565       "  return ptr;\n"
566       "#undef CHK_\n"
567       "}\n");
568 }
569 
570 struct SkipEntry16 {
571   uint16_t skipmap;
572   uint16_t field_entry_offset;
573 };
574 struct SkipEntryBlock {
575   uint32_t first_fnum;
576   std::vector<SkipEntry16> entries;
577 };
578 struct NumToEntryTable {
579   uint32_t skipmap32;  // for fields #1 - #32
580   std::vector<SkipEntryBlock> blocks;
581   // Compute the number of uint16_t required to represent this table.
size16google::protobuf::compiler::cpp::NumToEntryTable582   int size16() const {
583     int size = 2;  // for the termination field#
584     for (const auto& block : blocks) {
585       // 2 for the field#, 1 for a count of skip entries, 2 for each entry.
586       size += 3 + block.entries.size() * 2;
587     }
588     return size;
589   }
590 };
591 
592 static NumToEntryTable MakeNumToEntryTable(
593     const std::vector<const FieldDescriptor*>& field_descriptors);
594 
GenerateDataDecls(io::Printer * printer)595 void ParseFunctionGenerator::GenerateDataDecls(io::Printer* printer) {
596   if (!should_generate_tctable()) {
597     return;
598   }
599   Formatter format(printer, variables_);
600   if (should_generate_guarded_tctable()) {
601     format.Outdent();
602     format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
603     format.Indent();
604   }
605   auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_);
606   format(
607       "static const ::$proto_ns$::internal::"
608       "TcParseTable<$1$, $2$, $3$, $4$, $5$> _table_;\n",
609       tc_table_info_->table_size_log2, ordered_fields_.size(),
610       tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(),
611       field_num_to_entry_table.size16());
612   if (should_generate_guarded_tctable()) {
613     format.Outdent();
614     format("#endif  // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
615     format.Indent();
616   }
617 }
618 
GenerateDataDefinitions(io::Printer * printer)619 void ParseFunctionGenerator::GenerateDataDefinitions(io::Printer* printer) {
620   if (!should_generate_tctable()) {
621     return;
622   }
623   Formatter format(printer, variables_);
624   if (should_generate_guarded_tctable()) {
625     format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
626   }
627   GenerateTailCallTable(format);
628   if (should_generate_guarded_tctable()) {
629     format("#endif  // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
630   }
631 }
632 
GenerateLoopingParseFunction(Formatter & format)633 void ParseFunctionGenerator::GenerateLoopingParseFunction(Formatter& format) {
634   format(
635       "const char* $classname$::_InternalParse(const char* ptr, "
636       "::_pbi::ParseContext* ctx) {\n"
637       "$annotate_deserialize$"
638       "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
639   format.Indent();
640   format.Set("msg", "");
641   format.Set("this", "this");
642   int hasbits_size = 0;
643   if (num_hasbits_ > 0) {
644     hasbits_size = (num_hasbits_ + 31) / 32;
645   }
646   // For now only optimize small hasbits.
647   if (hasbits_size != 1) hasbits_size = 0;
648   if (hasbits_size) {
649     format("_Internal::HasBits has_bits{};\n");
650     format.Set("has_bits", "has_bits");
651   } else {
652     format.Set("has_bits", "_impl_._has_bits_");
653   }
654   format.Set("next_tag", "continue");
655   format("while (!ctx->Done(&ptr)) {\n");
656   format.Indent();
657 
658   format(
659       "uint32_t tag;\n"
660       "ptr = ::_pbi::ReadTag(ptr, &tag);\n");
661   GenerateParseIterationBody(format, descriptor_, ordered_fields_);
662 
663   format.Outdent();
664   format("}  // while\n");
665 
666   format.Outdent();
667   format("message_done:\n");
668   if (hasbits_size) format("  _impl_._has_bits_.Or(has_bits);\n");
669 
670   format(
671       "  return ptr;\n"
672       "failure:\n"
673       "  ptr = nullptr;\n"
674       "  goto message_done;\n"
675       "#undef CHK_\n"
676       "}\n");
677 }
678 
MakeNumToEntryTable(const std::vector<const FieldDescriptor * > & field_descriptors)679 static NumToEntryTable MakeNumToEntryTable(
680     const std::vector<const FieldDescriptor*>& field_descriptors) {
681   NumToEntryTable num_to_entry_table;
682   num_to_entry_table.skipmap32 = static_cast<uint32_t>(-1);
683 
684   // skip_entry_block is the current block of SkipEntries that we're
685   // appending to.  cur_block_first_fnum is the number of the first
686   // field represented by the block.
687   uint16_t field_entry_index = 0;
688   uint16_t N = field_descriptors.size();
689   // First, handle field numbers 1-32, which affect only the initial
690   // skipmap32 and don't generate additional skip-entry blocks.
691   for (; field_entry_index != N; ++field_entry_index) {
692     auto* field_descriptor = field_descriptors[field_entry_index];
693     if (field_descriptor->number() > 32) break;
694     auto skipmap32_index = field_descriptor->number() - 1;
695     num_to_entry_table.skipmap32 -= 1 << skipmap32_index;
696   }
697   // If all the field numbers were less than or equal to 32, we will have
698   // no further entries to process, and we are already done.
699   if (field_entry_index == N) return num_to_entry_table;
700 
701   SkipEntryBlock* block = nullptr;
702   bool start_new_block = true;
703   // To determine sparseness, track the field number corresponding to
704   // the start of the most recent skip entry.
705   uint32_t last_skip_entry_start = 0;
706   for (; field_entry_index != N; ++field_entry_index) {
707     auto* field_descriptor = field_descriptors[field_entry_index];
708     uint32_t fnum = field_descriptor->number();
709     GOOGLE_CHECK_GT(fnum, last_skip_entry_start);
710     if (start_new_block == false) {
711       // If the next field number is within 15 of the last_skip_entry_start, we
712       // continue writing just to that entry.  If it's between 16 and 31 more,
713       // then we just extend the current block by one. If it's more than 31
714       // more, we have to add empty skip entries in order to continue using the
715       // existing block.  Obviously it's just 32 more, it doesn't make sense to
716       // start a whole new block, since new blocks mean having to write out
717       // their starting field number, which is 32 bits, as well as the size of
718       // the additional block, which is 16... while an empty SkipEntry16 only
719       // costs 32 bits.  So if it was 48 more, it's a slight space win; we save
720       // 16 bits, but probably at the cost of slower run time.  We're choosing
721       // 96 for now.
722       if (fnum - last_skip_entry_start > 96) start_new_block = true;
723     }
724     if (start_new_block) {
725       num_to_entry_table.blocks.push_back(SkipEntryBlock{fnum});
726       block = &num_to_entry_table.blocks.back();
727       start_new_block = false;
728     }
729 
730     auto skip_entry_num = (fnum - block->first_fnum) / 16;
731     auto skip_entry_index = (fnum - block->first_fnum) % 16;
732     while (skip_entry_num >= block->entries.size())
733       block->entries.push_back({0xFFFF, field_entry_index});
734     block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index);
735 
736     last_skip_entry_start = fnum - skip_entry_index;
737   }
738   return num_to_entry_table;
739 }
740 
GenerateTailCallTable(Formatter & format)741 void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) {
742   GOOGLE_CHECK(should_generate_tctable());
743   // All entries without a fast-path parsing function need a fallback.
744   std::string fallback;
745   if (tc_table_info_->use_generated_fallback) {
746     fallback = ClassName(descriptor_) + "::Tct_ParseFallback";
747   } else {
748     fallback = "::_pbi::TcParser::GenericFallback";
749     if (GetOptimizeFor(descriptor_->file(), options_) ==
750         FileOptions::LITE_RUNTIME) {
751       fallback += "Lite";
752     }
753   }
754 
755   // For simplicity and speed, the table is not covering all proto
756   // configurations. This model uses a fallback to cover all situations that
757   // the table can't accommodate, together with unknown fields or extensions.
758   // These are number of fields over 32, fields with 3 or more tag bytes,
759   // maps, weak fields, lazy, more than 1 extension range. In the cases
760   // the table is sufficient we can use a generic routine, that just handles
761   // unknown fields and potentially an extension range.
762   auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_);
763   format(
764       "PROTOBUF_ATTRIBUTE_INIT_PRIORITY1\n"
765       "const ::_pbi::TcParseTable<$1$, $2$, $3$, $4$, $5$> "
766       "$classname$::_table_ = "
767       "{\n",
768       tc_table_info_->table_size_log2, ordered_fields_.size(),
769       tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(),
770       field_num_to_entry_table.size16());
771   {
772     auto table_scope = format.ScopedIndent();
773     format("{\n");
774     {
775       auto header_scope = format.ScopedIndent();
776       if (num_hasbits_ > 0 || IsMapEntryMessage(descriptor_)) {
777         format("PROTOBUF_FIELD_OFFSET($classname$, _impl_._has_bits_),\n");
778       } else {
779         format("0,  // no _has_bits_\n");
780       }
781       if (descriptor_->extension_range_count() == 1) {
782         format(
783             "PROTOBUF_FIELD_OFFSET($classname$, $extensions$),\n"
784             "$1$, $2$,  // extension_range_{low,high}\n",
785             descriptor_->extension_range(0)->start,
786             descriptor_->extension_range(0)->end);
787       } else {
788         format("0, 0, 0,  // no _extensions_\n");
789       }
790       format("$1$, $2$,  // max_field_number, fast_idx_mask\n",
791              (ordered_fields_.empty() ? 0 : ordered_fields_.back()->number()),
792              (((1 << tc_table_info_->table_size_log2) - 1) << 3));
793       format(
794           "offsetof(decltype(_table_), field_lookup_table),\n"
795           "$1$,  // skipmap\n",
796           field_num_to_entry_table.skipmap32);
797       if (ordered_fields_.empty()) {
798         format(
799             "offsetof(decltype(_table_), field_names),  // no field_entries\n");
800       } else {
801         format("offsetof(decltype(_table_), field_entries),\n");
802       }
803 
804       format(
805           "$1$,  // num_field_entries\n"
806           "$2$,  // num_aux_entries\n",
807           ordered_fields_.size(), tc_table_info_->aux_entries.size());
808       if (tc_table_info_->aux_entries.empty()) {
809         format(
810             "offsetof(decltype(_table_), field_names),  // no aux_entries\n");
811       } else {
812         format("offsetof(decltype(_table_), aux_entries),\n");
813       }
814       format(
815           "&$1$._instance,\n"
816           "$2$,  // fallback\n"
817           "",
818           DefaultInstanceName(descriptor_, options_), fallback);
819     }
820     format("}, {{\n");
821     {
822       // fast_entries[]
823       auto fast_scope = format.ScopedIndent();
824       GenerateFastFieldEntries(format);
825     }
826     format("}}, {{\n");
827     {
828       // field_lookup_table[]
829       auto field_lookup_scope = format.ScopedIndent();
830       int line_entries = 0;
831       for (int i = 0, N = field_num_to_entry_table.blocks.size(); i < N; ++i) {
832         SkipEntryBlock& entry_block = field_num_to_entry_table.blocks[i];
833         format("$1$, $2$, $3$,\n", entry_block.first_fnum & 65535,
834                entry_block.first_fnum / 65536, entry_block.entries.size());
835         for (auto se16 : entry_block.entries) {
836           if (line_entries == 0) {
837             format("$1$, $2$,", se16.skipmap, se16.field_entry_offset);
838             ++line_entries;
839           } else if (line_entries < 5) {
840             format(" $1$, $2$,", se16.skipmap, se16.field_entry_offset);
841             ++line_entries;
842           } else {
843             format(" $1$, $2$,\n", se16.skipmap, se16.field_entry_offset);
844             line_entries = 0;
845           }
846         }
847       }
848       if (line_entries) format("\n");
849       format("65535, 65535\n");
850     }
851     if (ordered_fields_.empty()) {
852       GOOGLE_LOG_IF(DFATAL, !tc_table_info_->aux_entries.empty())
853           << "Invalid message: " << descriptor_->full_name() << " has "
854           << tc_table_info_->aux_entries.size()
855           << " auxiliary field entries, but no fields";
856       format(
857           "}},\n"
858           "// no field_entries, or aux_entries\n"
859           "{{\n");
860     } else {
861       format("}}, {{\n");
862       {
863         // field_entries[]
864         auto field_scope = format.ScopedIndent();
865         GenerateFieldEntries(format);
866       }
867       if (tc_table_info_->aux_entries.empty()) {
868         format(
869             "}},\n"
870             "// no aux_entries\n"
871             "{{\n");
872       } else {
873         format("}}, {{\n");
874         {
875           // aux_entries[]
876           auto aux_scope = format.ScopedIndent();
877           for (const std::string& aux_entry : tc_table_info_->aux_entries) {
878             format("{$1$},\n", aux_entry);
879           }
880         }
881         format("}}, {{\n");
882       }
883     }  // ordered_fields_.empty()
884     {
885       // field_names[]
886       auto field_name_scope = format.ScopedIndent();
887       GenerateFieldNames(format);
888     }
889     format("}},\n");
890   }
891   format("};\n\n");  // _table_
892 }
893 
GenerateFastFieldEntries(Formatter & format)894 void ParseFunctionGenerator::GenerateFastFieldEntries(Formatter& format) {
895   for (const auto& info : tc_table_info_->fast_path_fields) {
896     if (info.field != nullptr) {
897       PrintFieldComment(format, info.field);
898     }
899     if (info.func_name.empty()) {
900       format("{::_pbi::TcParser::MiniParse, {}},\n");
901     } else {
902       bool cold = ShouldSplit(info.field, options_);
903       format(
904           "{$1$,\n"
905           " {$2$, $3$, $4$, PROTOBUF_FIELD_OFFSET($classname$$5$, $6$)}},\n",
906           info.func_name, info.coded_tag, info.hasbit_idx, info.aux_idx,
907           cold ? "::Impl_::Split" : "",
908           cold ? FieldName(info.field) + "_"
909                : FieldMemberName(info.field, /*cold=*/false));
910     }
911   }
912 }
913 
FormatFieldKind(Formatter & format,const TailCallTableInfo::FieldEntryInfo & entry,const Options & options,MessageSCCAnalyzer * scc_analyzer)914 static void FormatFieldKind(Formatter& format,
915                             const TailCallTableInfo::FieldEntryInfo& entry,
916                             const Options& options,
917                             MessageSCCAnalyzer* scc_analyzer) {
918   const FieldDescriptor* field = entry.field;
919   // Spell the field kind in proto language declaration order, starting with
920   // cardinality:
921   format("(::_fl::kFc");
922   if (HasHasbit(field)) {
923     format("Optional");
924   } else if (field->is_repeated()) {
925     format("Repeated");
926   } else if (field->real_containing_oneof()) {
927     format("Oneof");
928   } else {
929     format("Singular");
930   }
931 
932   // The rest of the type uses convenience aliases:
933   format(" | ::_fl::k");
934   if (field->is_repeated() && field->is_packed()) {
935     format("Packed");
936   }
937   switch (field->type()) {
938     case FieldDescriptor::TYPE_DOUBLE:
939       format("Double");
940       break;
941     case FieldDescriptor::TYPE_FLOAT:
942       format("Float");
943       break;
944     case FieldDescriptor::TYPE_FIXED32:
945       format("Fixed32");
946       break;
947     case FieldDescriptor::TYPE_SFIXED32:
948       format("SFixed32");
949       break;
950     case FieldDescriptor::TYPE_FIXED64:
951       format("Fixed64");
952       break;
953     case FieldDescriptor::TYPE_SFIXED64:
954       format("SFixed64");
955       break;
956     case FieldDescriptor::TYPE_BOOL:
957       format("Bool");
958       break;
959     case FieldDescriptor::TYPE_ENUM:
960       if (HasPreservingUnknownEnumSemantics(field)) {
961         // No validation is required.
962         format("OpenEnum");
963       } else if (entry.is_enum_range) {
964         // Validation is done by range check (start/length in FieldAux).
965         format("EnumRange");
966       } else {
967         // Validation uses the generated _IsValid function.
968         format("Enum");
969       }
970       break;
971     case FieldDescriptor::TYPE_UINT32:
972       format("UInt32");
973       break;
974     case FieldDescriptor::TYPE_SINT32:
975       format("SInt32");
976       break;
977     case FieldDescriptor::TYPE_INT32:
978       format("Int32");
979       break;
980     case FieldDescriptor::TYPE_UINT64:
981       format("UInt64");
982       break;
983     case FieldDescriptor::TYPE_SINT64:
984       format("SInt64");
985       break;
986     case FieldDescriptor::TYPE_INT64:
987       format("Int64");
988       break;
989 
990     case FieldDescriptor::TYPE_BYTES:
991       format("Bytes");
992       break;
993     case FieldDescriptor::TYPE_STRING: {
994       auto mode = GetUtf8CheckMode(field, options);
995       switch (mode) {
996         case Utf8CheckMode::kStrict:
997           format("Utf8String");
998           break;
999         case Utf8CheckMode::kVerify:
1000           format("RawString");
1001           break;
1002         case Utf8CheckMode::kNone:
1003           // Treat LITE_RUNTIME strings as bytes.
1004           format("Bytes");
1005           break;
1006         default:
1007           GOOGLE_LOG(FATAL) << "Invalid Utf8CheckMode (" << static_cast<int>(mode)
1008                      << ") for " << field->DebugString();
1009       }
1010       break;
1011     }
1012 
1013     case FieldDescriptor::TYPE_GROUP:
1014       format("Message | ::_fl::kRepGroup");
1015       break;
1016     case FieldDescriptor::TYPE_MESSAGE:
1017       if (field->is_map()) {
1018         format("Map");
1019       } else {
1020         format("Message");
1021         if (IsLazy(field, options, scc_analyzer)) {
1022           format(" | ::_fl::kRepLazy");
1023         } else if (IsImplicitWeakField(field, options, scc_analyzer)) {
1024           format(" | ::_fl::kRepIWeak");
1025         }
1026       }
1027       break;
1028   }
1029 
1030   // Fill in extra information about string and bytes field representations.
1031   if (field->type() == FieldDescriptor::TYPE_BYTES ||
1032       field->type() == FieldDescriptor::TYPE_STRING) {
1033     if (field->is_repeated()) {
1034       format(" | ::_fl::kRepSString");
1035     } else {
1036       format(" | ::_fl::kRepAString");
1037     }
1038   }
1039 
1040   format(")");
1041 }
1042 
GenerateFieldEntries(Formatter & format)1043 void ParseFunctionGenerator::GenerateFieldEntries(Formatter& format) {
1044   for (const auto& entry : tc_table_info_->field_entries) {
1045     const FieldDescriptor* field = entry.field;
1046     PrintFieldComment(format, field);
1047     format("{");
1048     if (IsWeak(field, options_)) {
1049       // Weak fields are handled by the generated fallback function.
1050       // (These are handled by legacy Google-internal logic.)
1051       format("/* weak */ 0, 0, 0, 0");
1052     } else {
1053       const OneofDescriptor* oneof = field->real_containing_oneof();
1054       bool cold = ShouldSplit(field, options_);
1055       format("PROTOBUF_FIELD_OFFSET($classname$$1$, $2$), $3$, $4$,\n ",
1056              cold ? "::Impl_::Split" : "",
1057              cold ? FieldName(field) + "_"
1058                   : FieldMemberName(field, /*cold=*/false),
1059              (oneof ? oneof->index() : entry.hasbit_idx), entry.aux_idx);
1060       FormatFieldKind(format, entry, options_, scc_analyzer_);
1061     }
1062     format("},\n");
1063   }
1064 }
1065 
1066 static constexpr int kMaxNameLength = 255;
1067 
CalculateFieldNamesSize() const1068 int ParseFunctionGenerator::CalculateFieldNamesSize() const {
1069   // The full name of the message appears first.
1070   int size = std::min(static_cast<int>(descriptor_->full_name().size()),
1071                       kMaxNameLength);
1072   int lengths_size = 1;
1073   for (const auto& entry : tc_table_info_->field_entries) {
1074     const FieldDescriptor* field = entry.field;
1075     GOOGLE_CHECK_LE(field->name().size(), kMaxNameLength);
1076     size += field->name().size();
1077     lengths_size += 1;
1078   }
1079   // align to an 8-byte boundary
1080   lengths_size = (lengths_size + 7) & -8;
1081   return size + lengths_size + 1;
1082 }
1083 
FormatOctal(Formatter & format,int size)1084 static void FormatOctal(Formatter& format, int size) {
1085   int octal_size = ((size >> 6) & 3) * 100 +  //
1086                    ((size >> 3) & 7) * 10 +   //
1087                    ((size >> 0) & 7);
1088   format("\\$1$", octal_size);
1089 }
1090 
GenerateFieldNames(Formatter & format)1091 void ParseFunctionGenerator::GenerateFieldNames(Formatter& format) {
1092   // First, we output the size of each string, as an unsigned byte. The first
1093   // string is the message name.
1094   int count = 1;
1095   format("\"");
1096   FormatOctal(format,
1097               std::min(static_cast<int>(descriptor_->full_name().size()), 255));
1098   for (const auto& entry : tc_table_info_->field_entries) {
1099     FormatOctal(format, entry.field->name().size());
1100     ++count;
1101   }
1102   while (count & 7) {  // align to an 8-byte boundary
1103     format("\\0");
1104     ++count;
1105   }
1106   format("\"\n");
1107   // The message name is stored at the beginning of the string
1108   std::string message_name = descriptor_->full_name();
1109   if (message_name.size() > kMaxNameLength) {
1110     static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2;
1111     message_name = StrCat(
1112         message_name.substr(0, kNameHalfLength), "...",
1113         message_name.substr(message_name.size() - kNameHalfLength));
1114   }
1115   format("\"$1$\"\n", message_name);
1116   // Then we output the actual field names
1117   for (const auto& entry : tc_table_info_->field_entries) {
1118     const FieldDescriptor* field = entry.field;
1119     format("\"$1$\"\n", field->name());
1120   }
1121 }
1122 
GenerateArenaString(Formatter & format,const FieldDescriptor * field)1123 void ParseFunctionGenerator::GenerateArenaString(Formatter& format,
1124                                                  const FieldDescriptor* field) {
1125   if (HasHasbit(field)) {
1126     format("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1127   }
1128   format(
1129       "if (arena != nullptr) {\n"
1130       "  ptr = ctx->ReadArenaString(ptr, &$msg$$field$, arena");
1131   if (IsStringInlined(field, options_)) {
1132     GOOGLE_DCHECK(!inlined_string_indices_.empty());
1133     int inlined_string_index = inlined_string_indices_[field->index()];
1134     GOOGLE_DCHECK_GT(inlined_string_index, 0);
1135     format(", &$msg$$inlined_string_donated_array$[0], $1$, $this$",
1136            inlined_string_index);
1137   } else {
1138     GOOGLE_DCHECK(field->default_value_string().empty());
1139   }
1140   format(
1141       ");\n"
1142       "} else {\n"
1143       "  ptr = ::_pbi::InlineGreedyStringParser("
1144       "$msg$$field$.MutableNoCopy(nullptr), ptr, ctx);\n"
1145       "}\n"
1146       "const std::string* str = &$msg$$field$.Get(); (void)str;\n");
1147 }
1148 
GenerateStrings(Formatter & format,const FieldDescriptor * field,bool check_utf8)1149 void ParseFunctionGenerator::GenerateStrings(Formatter& format,
1150                                              const FieldDescriptor* field,
1151                                              bool check_utf8) {
1152   FieldOptions::CType ctype = FieldOptions::STRING;
1153   if (!options_.opensource_runtime) {
1154     // Open source doesn't support other ctypes;
1155     ctype = field->options().ctype();
1156   }
1157   if (!field->is_repeated() && !options_.opensource_runtime &&
1158       GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1159       // For now only use arena string for strings with empty defaults.
1160       field->default_value_string().empty() &&
1161       !field->real_containing_oneof() && ctype == FieldOptions::STRING) {
1162     GenerateArenaString(format, field);
1163   } else {
1164     std::string parser_name;
1165     switch (ctype) {
1166       case FieldOptions::STRING:
1167         parser_name = "GreedyStringParser";
1168         break;
1169       case FieldOptions::CORD:
1170         parser_name = "CordParser";
1171         break;
1172       case FieldOptions::STRING_PIECE:
1173         parser_name = "StringPieceParser";
1174         break;
1175     }
1176     format(
1177         "auto str = $msg$$1$$2$_$name$();\n"
1178         "ptr = ::_pbi::Inline$3$(str, ptr, ctx);\n",
1179         HasInternalAccessors(ctype) ? "_internal_" : "",
1180         field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1181         parser_name);
1182   }
1183   // It is intentionally placed before VerifyUTF8 because it doesn't make sense
1184   // to verify UTF8 when we already know parsing failed.
1185   format("CHK_(ptr);\n");
1186   if (!check_utf8) return;  // return if this is a bytes field
1187   auto level = GetUtf8CheckMode(field, options_);
1188   switch (level) {
1189     case Utf8CheckMode::kNone:
1190       return;
1191     case Utf8CheckMode::kVerify:
1192       format("#ifndef NDEBUG\n");
1193       break;
1194     case Utf8CheckMode::kStrict:
1195       format("CHK_(");
1196       break;
1197   }
1198   std::string field_name;
1199   field_name = "nullptr";
1200   if (HasDescriptorMethods(field->file(), options_)) {
1201     field_name = StrCat("\"", field->full_name(), "\"");
1202   }
1203   format("::_pbi::VerifyUTF8(str, $1$)", field_name);
1204   switch (level) {
1205     case Utf8CheckMode::kNone:
1206       return;
1207     case Utf8CheckMode::kVerify:
1208       format(
1209           ";\n"
1210           "#endif  // !NDEBUG\n");
1211       break;
1212     case Utf8CheckMode::kStrict:
1213       format(");\n");
1214       break;
1215   }
1216 }
1217 
GenerateLengthDelim(Formatter & format,const FieldDescriptor * field)1218 void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format,
1219                                                  const FieldDescriptor* field) {
1220   if (field->is_packable()) {
1221     if (field->type() == FieldDescriptor::TYPE_ENUM &&
1222         !HasPreservingUnknownEnumSemantics(field)) {
1223       std::string enum_type = QualifiedClassName(field->enum_type(), options_);
1224       format(
1225           "ptr = "
1226           "::$proto_ns$::internal::Packed$1$Parser<$unknown_fields_type$>("
1227           "$msg$_internal_mutable_$name$(), ptr, ctx, $2$_IsValid, "
1228           "&$msg$_internal_metadata_, $3$);\n",
1229           DeclaredTypeMethodName(field->type()), enum_type, field->number());
1230     } else {
1231       format(
1232           "ptr = ::$proto_ns$::internal::Packed$1$Parser("
1233           "$msg$_internal_mutable_$name$(), ptr, ctx);\n",
1234           DeclaredTypeMethodName(field->type()));
1235     }
1236     format("CHK_(ptr);\n");
1237   } else {
1238     auto field_type = field->type();
1239     switch (field_type) {
1240       case FieldDescriptor::TYPE_STRING:
1241         GenerateStrings(format, field, true /* utf8 */);
1242         break;
1243       case FieldDescriptor::TYPE_BYTES:
1244         GenerateStrings(format, field, false /* utf8 */);
1245         break;
1246       case FieldDescriptor::TYPE_MESSAGE: {
1247         if (field->is_map()) {
1248           const FieldDescriptor* val = field->message_type()->map_value();
1249           GOOGLE_CHECK(val);
1250           if (val->type() == FieldDescriptor::TYPE_ENUM &&
1251               !HasPreservingUnknownEnumSemantics(field)) {
1252             format(
1253                 "auto object = "
1254                 "::$proto_ns$::internal::InitEnumParseWrapper<"
1255                 "$unknown_fields_type$>(&$msg$$field$, $1$_IsValid, "
1256                 "$2$, &$msg$_internal_metadata_);\n"
1257                 "ptr = ctx->ParseMessage(&object, ptr);\n",
1258                 QualifiedClassName(val->enum_type(), options_),
1259                 field->number());
1260           } else {
1261             format("ptr = ctx->ParseMessage(&$msg$$field$, ptr);\n");
1262           }
1263         } else if (IsLazy(field, options_, scc_analyzer_)) {
1264           bool eager_verify =
1265               IsEagerlyVerifiedLazy(field, options_, scc_analyzer_);
1266           if (ShouldVerify(descriptor_, options_, scc_analyzer_)) {
1267             format(
1268                 "ctx->set_lazy_eager_verify_func($1$);\n",
1269                 eager_verify
1270                     ? StrCat("&", ClassName(field->message_type(), true),
1271                                    "::InternalVerify")
1272                     : "nullptr");
1273           }
1274           if (field->real_containing_oneof()) {
1275             format(
1276                 "if (!$msg$_internal_has_$name$()) {\n"
1277                 "  $msg$clear_$1$();\n"
1278                 "  $msg$$field$ = ::$proto_ns$::Arena::CreateMessage<\n"
1279                 "      ::$proto_ns$::internal::LazyField>("
1280                 "$msg$GetArenaForAllocation());\n"
1281                 "  $msg$set_has_$name$();\n"
1282                 "}\n"
1283                 "auto* lazy_field = $msg$$field$;\n",
1284                 field->containing_oneof()->name());
1285           } else if (HasHasbit(field)) {
1286             format(
1287                 "_Internal::set_has_$name$(&$has_bits$);\n"
1288                 "auto* lazy_field = &$msg$$field$;\n");
1289           } else {
1290             format("auto* lazy_field = &$msg$$field$;\n");
1291           }
1292           format(
1293               "::$proto_ns$::internal::LazyFieldParseHelper<\n"
1294               "  ::$proto_ns$::internal::LazyField> parse_helper(\n"
1295               "    $1$::default_instance(),\n"
1296               "    $msg$GetArenaForAllocation(),\n"
1297               "    ::google::protobuf::internal::LazyVerifyOption::$2$,\n"
1298               "    lazy_field);\n"
1299               "ptr = ctx->ParseMessage(&parse_helper, ptr);\n",
1300               FieldMessageTypeName(field, options_),
1301               eager_verify ? "kEager" : "kLazy");
1302           if (ShouldVerify(descriptor_, options_, scc_analyzer_) &&
1303               eager_verify) {
1304             format("ctx->set_lazy_eager_verify_func(nullptr);\n");
1305           }
1306         } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1307           if (!field->is_repeated()) {
1308             format(
1309                 "ptr = ctx->ParseMessage(_Internal::mutable_$name$($this$), "
1310                 "ptr);\n");
1311           } else {
1312             format(
1313                 "ptr = ctx->ParseMessage($msg$$field$.AddWeak("
1314                 "reinterpret_cast<const ::$proto_ns$::MessageLite*>($1$ptr_)"
1315                 "), ptr);\n",
1316                 QualifiedDefaultInstanceName(field->message_type(), options_));
1317           }
1318         } else if (IsWeak(field, options_)) {
1319           format(
1320               "{\n"
1321               "  auto* default_ = &reinterpret_cast<const Message&>($1$);\n"
1322               "  ptr = ctx->ParseMessage($msg$$weak_field_map$.MutableMessage("
1323               "$2$, default_), ptr);\n"
1324               "}\n",
1325               QualifiedDefaultInstanceName(field->message_type(), options_),
1326               field->number());
1327         } else {
1328           format(
1329               "ptr = ctx->ParseMessage($msg$_internal_$mutable_field$(), "
1330               "ptr);\n");
1331         }
1332         format("CHK_(ptr);\n");
1333         break;
1334       }
1335       default:
1336         GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1337                    << " filed type is " << field->type();
1338     }
1339   }
1340 }
1341 
ShouldRepeat(const FieldDescriptor * descriptor,WireFormatLite::WireType wiretype)1342 static bool ShouldRepeat(const FieldDescriptor* descriptor,
1343                          WireFormatLite::WireType wiretype) {
1344   constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1345   return descriptor->number() < kMaxTwoByteFieldNumber &&
1346          descriptor->is_repeated() &&
1347          (!descriptor->is_packable() ||
1348           wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1349 }
1350 
GenerateFieldBody(Formatter & format,WireFormatLite::WireType wiretype,const FieldDescriptor * field)1351 void ParseFunctionGenerator::GenerateFieldBody(
1352     Formatter& format, WireFormatLite::WireType wiretype,
1353     const FieldDescriptor* field) {
1354   Formatter::SaveState formatter_state(&format);
1355   format.AddMap(
1356       {{"name", FieldName(field)},
1357        {"primitive_type", PrimitiveTypeName(options_, field->cpp_type())}});
1358   if (field->is_repeated()) {
1359     format.AddMap({{"put_field", StrCat("add_", FieldName(field))},
1360                    {"mutable_field", StrCat("add_", FieldName(field))}});
1361   } else {
1362     format.AddMap(
1363         {{"put_field", StrCat("set_", FieldName(field))},
1364          {"mutable_field", StrCat("mutable_", FieldName(field))}});
1365   }
1366   uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype);
1367   switch (wiretype) {
1368     case WireFormatLite::WIRETYPE_VARINT: {
1369       std::string type = PrimitiveTypeName(options_, field->cpp_type());
1370       if (field->type() == FieldDescriptor::TYPE_ENUM) {
1371         format.Set("enum_type",
1372                    QualifiedClassName(field->enum_type(), options_));
1373         format(
1374             "$uint64$ val = ::$proto_ns$::internal::ReadVarint64(&ptr);\n"
1375             "CHK_(ptr);\n");
1376         if (!HasPreservingUnknownEnumSemantics(field)) {
1377           format("if (PROTOBUF_PREDICT_TRUE($enum_type$_IsValid(val))) {\n");
1378           format.Indent();
1379         }
1380         format("$msg$_internal_$put_field$(static_cast<$enum_type$>(val));\n");
1381         if (!HasPreservingUnknownEnumSemantics(field)) {
1382           format.Outdent();
1383           format(
1384               "} else {\n"
1385               "  ::$proto_ns$::internal::WriteVarint("
1386               "$1$, val, $msg$mutable_unknown_fields());\n"
1387               "}\n",
1388               field->number());
1389         }
1390       } else {
1391         std::string size = (field->type() == FieldDescriptor::TYPE_INT32 ||
1392                             field->type() == FieldDescriptor::TYPE_SINT32 ||
1393                             field->type() == FieldDescriptor::TYPE_UINT32)
1394                                ? "32"
1395                                : "64";
1396         std::string zigzag;
1397         if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1398              field->type() == FieldDescriptor::TYPE_SINT64)) {
1399           zigzag = "ZigZag";
1400         }
1401         if (field->is_repeated() || field->real_containing_oneof()) {
1402           format(
1403               "$msg$_internal_$put_field$("
1404               "::$proto_ns$::internal::ReadVarint$1$$2$(&ptr));\n"
1405               "CHK_(ptr);\n",
1406               zigzag, size);
1407         } else {
1408           if (HasHasbit(field)) {
1409             format("_Internal::set_has_$name$(&$has_bits$);\n");
1410           }
1411           format(
1412               "$msg$$field$ = ::$proto_ns$::internal::ReadVarint$1$$2$(&ptr);\n"
1413               "CHK_(ptr);\n",
1414               zigzag, size);
1415         }
1416       }
1417       break;
1418     }
1419     case WireFormatLite::WIRETYPE_FIXED32:
1420     case WireFormatLite::WIRETYPE_FIXED64: {
1421       if (field->is_repeated() || field->real_containing_oneof()) {
1422         format(
1423             "$msg$_internal_$put_field$("
1424             "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr));\n"
1425             "ptr += sizeof($primitive_type$);\n");
1426       } else {
1427         if (HasHasbit(field)) {
1428           format("_Internal::set_has_$name$(&$has_bits$);\n");
1429         }
1430         format(
1431             "$msg$$field$ = "
1432             "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr);\n"
1433             "ptr += sizeof($primitive_type$);\n");
1434       }
1435       break;
1436     }
1437     case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1438       GenerateLengthDelim(format, field);
1439       break;
1440     }
1441     case WireFormatLite::WIRETYPE_START_GROUP: {
1442       format(
1443           "ptr = ctx->ParseGroup($msg$_internal_$mutable_field$(), ptr, $1$);\n"
1444           "CHK_(ptr);\n",
1445           tag);
1446       break;
1447     }
1448     case WireFormatLite::WIRETYPE_END_GROUP: {
1449       GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1450       break;
1451     }
1452   }  // switch (wire_type)
1453 }
1454 
1455 // Returns the tag for this field and in case of repeated packable fields,
1456 // sets a fallback tag in fallback_tag_ptr.
ExpectedTag(const FieldDescriptor * field,uint32_t * fallback_tag_ptr)1457 static uint32_t ExpectedTag(const FieldDescriptor* field,
1458                             uint32_t* fallback_tag_ptr) {
1459   uint32_t expected_tag;
1460   if (field->is_packable()) {
1461     auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1462     expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype);
1463     GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1464     auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1465     uint32_t fallback_tag =
1466         WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1467 
1468     if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1469     *fallback_tag_ptr = fallback_tag;
1470   } else {
1471     auto expected_wiretype = WireFormat::WireTypeForField(field);
1472     expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype);
1473   }
1474   return expected_tag;
1475 }
1476 
1477 // These variables are used by the generated parse iteration, and must already
1478 // be defined in the generated code:
1479 // - `const char* ptr`: the input buffer.
1480 // - `ParseContext* ctx`: the associated context for `ptr`.
1481 // - implicit `this`: i.e., we must be in a non-static member function.
1482 //
1483 // The macro `CHK_(x)` must be defined. It should return an error condition if
1484 // the macro parameter is false.
1485 //
1486 // Whenever an END_GROUP tag was read, or tag 0 was read, the generated code
1487 // branches to the label `message_done`.
1488 //
1489 // These formatter variables are used:
1490 // - `next_tag`: a single statement to begin parsing the next tag.
1491 //
1492 // At the end of the generated code, the enclosing function should proceed to
1493 // parse the next tag in the stream.
GenerateParseIterationBody(Formatter & format,const Descriptor * descriptor,const std::vector<const FieldDescriptor * > & fields)1494 void ParseFunctionGenerator::GenerateParseIterationBody(
1495     Formatter& format, const Descriptor* descriptor,
1496     const std::vector<const FieldDescriptor*>& fields) {
1497   if (!fields.empty()) {
1498     GenerateFieldSwitch(format, fields);
1499     // Each field `case` only considers field number. Field numbers that are
1500     // not defined in the message, or tags with an incompatible wire type, are
1501     // considered "unusual" cases. They will be handled by the logic below.
1502     format.Outdent();
1503     format("handle_unusual:\n");
1504     format.Indent();
1505   }
1506 
1507   // Unusual/extension/unknown case:
1508   format(
1509       "if ((tag == 0) || ((tag & 7) == 4)) {\n"
1510       "  CHK_(ptr);\n"
1511       "  ctx->SetLastTag(tag);\n"
1512       "  goto message_done;\n"
1513       "}\n");
1514   if (IsMapEntryMessage(descriptor)) {
1515     format("$next_tag$;\n");
1516   } else {
1517     if (descriptor->extension_range_count() > 0) {
1518       format("if (");
1519       for (int i = 0; i < descriptor->extension_range_count(); i++) {
1520         const Descriptor::ExtensionRange* range =
1521             descriptor->extension_range(i);
1522         if (i > 0) format(" ||\n    ");
1523 
1524         uint32_t start_tag = WireFormatLite::MakeTag(
1525             range->start, static_cast<WireFormatLite::WireType>(0));
1526         uint32_t end_tag = WireFormatLite::MakeTag(
1527             range->end, static_cast<WireFormatLite::WireType>(0));
1528 
1529         if (range->end > FieldDescriptor::kMaxNumber) {
1530           format("($1$u <= tag)", start_tag);
1531         } else {
1532           format("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1533         }
1534       }
1535       format(
1536           ") {\n"
1537           "  ptr = $msg$$extensions$.ParseField(tag, ptr, "
1538           "internal_default_instance(), &$msg$_internal_metadata_, ctx);\n"
1539           "  CHK_(ptr != nullptr);\n"
1540           "  $next_tag$;\n"
1541           "}\n");
1542     }
1543     format(
1544         "ptr = UnknownFieldParse(\n"
1545         "    tag,\n"
1546         "    $msg$_internal_metadata_.mutable_unknown_fields<"
1547         "$unknown_fields_type$>(),\n"
1548         "    ptr, ctx);\n"
1549         "CHK_(ptr != nullptr);\n");
1550   }
1551 }
1552 
GenerateFieldSwitch(Formatter & format,const std::vector<const FieldDescriptor * > & fields)1553 void ParseFunctionGenerator::GenerateFieldSwitch(
1554     Formatter& format, const std::vector<const FieldDescriptor*>& fields) {
1555   format("switch (tag >> 3) {\n");
1556   format.Indent();
1557 
1558   for (const auto* field : fields) {
1559     bool cold = ShouldSplit(field, options_);
1560     format.Set("field", FieldMemberName(field, cold));
1561     PrintFieldComment(format, field);
1562     format("case $1$:\n", field->number());
1563     format.Indent();
1564     uint32_t fallback_tag = 0;
1565     uint32_t expected_tag = ExpectedTag(field, &fallback_tag);
1566     format("if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1567            expected_tag & 0xFF);
1568     format.Indent();
1569     if (cold) {
1570       format("$msg$PrepareSplitMessageForWrite();\n");
1571     }
1572     auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1573     uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype);
1574     int tag_size = io::CodedOutputStream::VarintSize32(tag);
1575     bool is_repeat = ShouldRepeat(field, wiretype);
1576     if (is_repeat) {
1577       format(
1578           "ptr -= $1$;\n"
1579           "do {\n"
1580           "  ptr += $1$;\n",
1581           tag_size);
1582       format.Indent();
1583     }
1584     GenerateFieldBody(format, wiretype, field);
1585     if (is_repeat) {
1586       format.Outdent();
1587       format(
1588           "  if (!ctx->DataAvailable(ptr)) break;\n"
1589           "} while (::$proto_ns$::internal::ExpectTag<$1$>(ptr));\n",
1590           tag);
1591     }
1592     format.Outdent();
1593     if (fallback_tag) {
1594       format("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1595              fallback_tag & 0xFF);
1596       format.Indent();
1597       GenerateFieldBody(format, WireFormatLite::GetTagWireType(fallback_tag),
1598                         field);
1599       format.Outdent();
1600     }
1601     format(
1602         "} else\n"
1603         "  goto handle_unusual;\n"
1604         "$next_tag$;\n");
1605     format.Outdent();
1606   }  // for loop over ordered fields
1607 
1608   format(
1609       "default:\n"
1610       "  goto handle_unusual;\n");
1611   format.Outdent();
1612   format("}  // switch\n");
1613 }
1614 
1615 namespace {
1616 
FieldParseFunctionName(const TailCallTableInfo::FieldEntryInfo & entry,const Options & options)1617 std::string FieldParseFunctionName(
1618     const TailCallTableInfo::FieldEntryInfo& entry, const Options& options) {
1619   const FieldDescriptor* field = entry.field;
1620   std::string name = "::_pbi::TcParser::Fast";
1621 
1622   switch (field->type()) {
1623     case FieldDescriptor::TYPE_FIXED32:
1624     case FieldDescriptor::TYPE_SFIXED32:
1625     case FieldDescriptor::TYPE_FLOAT:
1626       name.append("F32");
1627       break;
1628 
1629     case FieldDescriptor::TYPE_FIXED64:
1630     case FieldDescriptor::TYPE_SFIXED64:
1631     case FieldDescriptor::TYPE_DOUBLE:
1632       name.append("F64");
1633       break;
1634 
1635     case FieldDescriptor::TYPE_BOOL:
1636       name.append("V8");
1637       break;
1638     case FieldDescriptor::TYPE_INT32:
1639     case FieldDescriptor::TYPE_UINT32:
1640       name.append("V32");
1641       break;
1642     case FieldDescriptor::TYPE_INT64:
1643     case FieldDescriptor::TYPE_UINT64:
1644       name.append("V64");
1645       break;
1646 
1647     case FieldDescriptor::TYPE_ENUM:
1648       if (HasPreservingUnknownEnumSemantics(field)) {
1649         name.append("V32");
1650         break;
1651       }
1652       if (field->is_repeated() && field->is_packed()) {
1653         GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString();
1654         return "";
1655       }
1656       name.append(entry.is_enum_range ? "Er" : "Ev");
1657       break;
1658 
1659     case FieldDescriptor::TYPE_SINT32:
1660       name.append("Z32");
1661       break;
1662     case FieldDescriptor::TYPE_SINT64:
1663       name.append("Z64");
1664       break;
1665 
1666     case FieldDescriptor::TYPE_BYTES:
1667       name.append("B");
1668       if (IsStringInlined(field, options)) {
1669         name.append("i");
1670       }
1671       break;
1672     case FieldDescriptor::TYPE_STRING:
1673       switch (GetUtf8CheckMode(field, options)) {
1674         case Utf8CheckMode::kNone:
1675           name.append("B");
1676           break;
1677         case Utf8CheckMode::kVerify:
1678           name.append("S");
1679           break;
1680         case Utf8CheckMode::kStrict:
1681           name.append("U");
1682           break;
1683         default:
1684           GOOGLE_LOG(DFATAL) << "Mode not handled: "
1685                       << static_cast<int>(GetUtf8CheckMode(field, options));
1686           return "";
1687       }
1688       if (IsStringInlined(field, options)) {
1689         name.append("i");
1690       }
1691       break;
1692 
1693     case FieldDescriptor::TYPE_MESSAGE:
1694       name.append("M");
1695       break;
1696     case FieldDescriptor::TYPE_GROUP:
1697       name.append("G");
1698       break;
1699 
1700     default:
1701       GOOGLE_LOG(DFATAL) << "Type not handled: " << field->DebugString();
1702       return "";
1703   }
1704 
1705   // The field implementation functions are prefixed by cardinality:
1706   //   `S` for optional or implicit fields.
1707   //   `R` for non-packed repeated.
1708   //   `P` for packed repeated.
1709   name.append(field->is_packed()               ? "P"
1710               : field->is_repeated()           ? "R"
1711               : field->real_containing_oneof() ? "O"
1712                                                : "S");
1713 
1714   // Append the tag length. Fast parsing only handles 1- or 2-byte tags.
1715   name.append(TagSize(field->number()) == 1 ? "1" : "2");
1716 
1717   return name;
1718 }
1719 
1720 }  // namespace
1721 
1722 }  // namespace cpp
1723 }  // namespace compiler
1724 }  // namespace protobuf
1725 }  // namespace google
1726