xref: /aosp_15_r20/external/libtextclassifier/native/utils/grammar/parsing/parser.cc (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker  * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker  *
4*993b0882SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker  *
8*993b0882SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker  *
10*993b0882SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker  * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker  */
16*993b0882SAndroid Build Coastguard Worker 
17*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/parsing/parser.h"
18*993b0882SAndroid Build Coastguard Worker 
19*993b0882SAndroid Build Coastguard Worker #include <algorithm>
20*993b0882SAndroid Build Coastguard Worker #include <unordered_map>
21*993b0882SAndroid Build Coastguard Worker 
22*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/parsing/parse-tree.h"
23*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/rules-utils.h"
24*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/types.h"
25*993b0882SAndroid Build Coastguard Worker #include "utils/zlib/zlib.h"
26*993b0882SAndroid Build Coastguard Worker #include "utils/zlib/zlib_regex.h"
27*993b0882SAndroid Build Coastguard Worker 
28*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3::grammar {
29*993b0882SAndroid Build Coastguard Worker namespace {
30*993b0882SAndroid Build Coastguard Worker 
CheckMemoryUsage(const UnsafeArena * arena)31*993b0882SAndroid Build Coastguard Worker inline bool CheckMemoryUsage(const UnsafeArena* arena) {
32*993b0882SAndroid Build Coastguard Worker   // The maximum memory usage for matching.
33*993b0882SAndroid Build Coastguard Worker   constexpr int kMaxMemoryUsage = 1 << 20;
34*993b0882SAndroid Build Coastguard Worker   return arena->status().bytes_allocated() <= kMaxMemoryUsage;
35*993b0882SAndroid Build Coastguard Worker }
36*993b0882SAndroid Build Coastguard Worker 
37*993b0882SAndroid Build Coastguard Worker // Maps a codepoint to include the token padding if it aligns with a token
38*993b0882SAndroid Build Coastguard Worker // start. Whitespace is ignored when symbols are fed to the matcher. Preceding
39*993b0882SAndroid Build Coastguard Worker // whitespace is merged to the match start so that tokens and non-terminals
40*993b0882SAndroid Build Coastguard Worker // appear next to each other without whitespace. For text or regex annotations,
41*993b0882SAndroid Build Coastguard Worker // we therefore merge the whitespace padding to the start if the annotation
42*993b0882SAndroid Build Coastguard Worker // starts at a token.
MapCodepointToTokenPaddingIfPresent(const std::unordered_map<CodepointIndex,CodepointIndex> & token_alignment,const int start)43*993b0882SAndroid Build Coastguard Worker int MapCodepointToTokenPaddingIfPresent(
44*993b0882SAndroid Build Coastguard Worker     const std::unordered_map<CodepointIndex, CodepointIndex>& token_alignment,
45*993b0882SAndroid Build Coastguard Worker     const int start) {
46*993b0882SAndroid Build Coastguard Worker   const auto it = token_alignment.find(start);
47*993b0882SAndroid Build Coastguard Worker   if (it != token_alignment.end()) {
48*993b0882SAndroid Build Coastguard Worker     return it->second;
49*993b0882SAndroid Build Coastguard Worker   }
50*993b0882SAndroid Build Coastguard Worker   return start;
51*993b0882SAndroid Build Coastguard Worker }
52*993b0882SAndroid Build Coastguard Worker 
53*993b0882SAndroid Build Coastguard Worker }  // namespace
54*993b0882SAndroid Build Coastguard Worker 
Parser(const UniLib * unilib,const RulesSet * rules)55*993b0882SAndroid Build Coastguard Worker Parser::Parser(const UniLib* unilib, const RulesSet* rules)
56*993b0882SAndroid Build Coastguard Worker     : unilib_(*unilib),
57*993b0882SAndroid Build Coastguard Worker       rules_(rules),
58*993b0882SAndroid Build Coastguard Worker       lexer_(unilib),
59*993b0882SAndroid Build Coastguard Worker       nonterminals_(rules_->nonterminals()),
60*993b0882SAndroid Build Coastguard Worker       rules_locales_(ParseRulesLocales(rules_)),
61*993b0882SAndroid Build Coastguard Worker       regex_annotators_(BuildRegexAnnotators()) {}
62*993b0882SAndroid Build Coastguard Worker 
63*993b0882SAndroid Build Coastguard Worker // Uncompresses and build the defined regex annotators.
BuildRegexAnnotators() const64*993b0882SAndroid Build Coastguard Worker std::vector<Parser::RegexAnnotator> Parser::BuildRegexAnnotators() const {
65*993b0882SAndroid Build Coastguard Worker   std::vector<RegexAnnotator> result;
66*993b0882SAndroid Build Coastguard Worker   if (rules_->regex_annotator() != nullptr) {
67*993b0882SAndroid Build Coastguard Worker     std::unique_ptr<ZlibDecompressor> decompressor =
68*993b0882SAndroid Build Coastguard Worker         ZlibDecompressor::Instance();
69*993b0882SAndroid Build Coastguard Worker     result.reserve(rules_->regex_annotator()->size());
70*993b0882SAndroid Build Coastguard Worker     for (const RulesSet_::RegexAnnotator* regex_annotator :
71*993b0882SAndroid Build Coastguard Worker          *rules_->regex_annotator()) {
72*993b0882SAndroid Build Coastguard Worker       result.push_back(
73*993b0882SAndroid Build Coastguard Worker           {UncompressMakeRegexPattern(unilib_, regex_annotator->pattern(),
74*993b0882SAndroid Build Coastguard Worker                                       regex_annotator->compressed_pattern(),
75*993b0882SAndroid Build Coastguard Worker                                       rules_->lazy_regex_compilation(),
76*993b0882SAndroid Build Coastguard Worker                                       decompressor.get()),
77*993b0882SAndroid Build Coastguard Worker            regex_annotator->nonterminal()});
78*993b0882SAndroid Build Coastguard Worker     }
79*993b0882SAndroid Build Coastguard Worker   }
80*993b0882SAndroid Build Coastguard Worker   return result;
81*993b0882SAndroid Build Coastguard Worker }
82*993b0882SAndroid Build Coastguard Worker 
SortedSymbolsForInput(const TextContext & input,UnsafeArena * arena) const83*993b0882SAndroid Build Coastguard Worker std::vector<Symbol> Parser::SortedSymbolsForInput(const TextContext& input,
84*993b0882SAndroid Build Coastguard Worker                                                   UnsafeArena* arena) const {
85*993b0882SAndroid Build Coastguard Worker   // Whitespace is ignored when symbols are fed to the matcher.
86*993b0882SAndroid Build Coastguard Worker   // For regex matches and existing text annotations we therefore have to merge
87*993b0882SAndroid Build Coastguard Worker   // preceding whitespace to the match start so that tokens and non-terminals
88*993b0882SAndroid Build Coastguard Worker   // appear as next to each other without whitespace. We keep track of real
89*993b0882SAndroid Build Coastguard Worker   // token starts and precending whitespace in `token_match_start`, so that we
90*993b0882SAndroid Build Coastguard Worker   // can extend a match's start to include the preceding whitespace.
91*993b0882SAndroid Build Coastguard Worker   std::unordered_map<CodepointIndex, CodepointIndex> token_match_start;
92*993b0882SAndroid Build Coastguard Worker   for (int i = input.context_span.first + 1; i < input.context_span.second;
93*993b0882SAndroid Build Coastguard Worker        i++) {
94*993b0882SAndroid Build Coastguard Worker     const CodepointIndex token_start = input.tokens[i].start;
95*993b0882SAndroid Build Coastguard Worker     const CodepointIndex prev_token_end = input.tokens[i - 1].end;
96*993b0882SAndroid Build Coastguard Worker     if (token_start != prev_token_end) {
97*993b0882SAndroid Build Coastguard Worker       token_match_start[token_start] = prev_token_end;
98*993b0882SAndroid Build Coastguard Worker     }
99*993b0882SAndroid Build Coastguard Worker   }
100*993b0882SAndroid Build Coastguard Worker 
101*993b0882SAndroid Build Coastguard Worker   std::vector<Symbol> symbols;
102*993b0882SAndroid Build Coastguard Worker   CodepointIndex match_offset = input.tokens[input.context_span.first].start;
103*993b0882SAndroid Build Coastguard Worker 
104*993b0882SAndroid Build Coastguard Worker   // Add start symbol.
105*993b0882SAndroid Build Coastguard Worker   if (input.context_span.first == 0 &&
106*993b0882SAndroid Build Coastguard Worker       nonterminals_->start_nt() != kUnassignedNonterm) {
107*993b0882SAndroid Build Coastguard Worker     match_offset = 0;
108*993b0882SAndroid Build Coastguard Worker     symbols.emplace_back(arena->AllocAndInit<ParseTree>(
109*993b0882SAndroid Build Coastguard Worker         nonterminals_->start_nt(), CodepointSpan{0, 0},
110*993b0882SAndroid Build Coastguard Worker         /*match_offset=*/0, ParseTree::Type::kDefault));
111*993b0882SAndroid Build Coastguard Worker   }
112*993b0882SAndroid Build Coastguard Worker 
113*993b0882SAndroid Build Coastguard Worker   if (nonterminals_->wordbreak_nt() != kUnassignedNonterm) {
114*993b0882SAndroid Build Coastguard Worker     symbols.emplace_back(arena->AllocAndInit<ParseTree>(
115*993b0882SAndroid Build Coastguard Worker         nonterminals_->wordbreak_nt(),
116*993b0882SAndroid Build Coastguard Worker         CodepointSpan{match_offset, match_offset},
117*993b0882SAndroid Build Coastguard Worker         /*match_offset=*/match_offset, ParseTree::Type::kDefault));
118*993b0882SAndroid Build Coastguard Worker   }
119*993b0882SAndroid Build Coastguard Worker 
120*993b0882SAndroid Build Coastguard Worker   // Add symbols from tokens.
121*993b0882SAndroid Build Coastguard Worker   for (int i = input.context_span.first; i < input.context_span.second; i++) {
122*993b0882SAndroid Build Coastguard Worker     const Token& token = input.tokens[i];
123*993b0882SAndroid Build Coastguard Worker     lexer_.AppendTokenSymbols(token.value, /*match_offset=*/match_offset,
124*993b0882SAndroid Build Coastguard Worker                               CodepointSpan{token.start, token.end}, &symbols);
125*993b0882SAndroid Build Coastguard Worker     match_offset = token.end;
126*993b0882SAndroid Build Coastguard Worker 
127*993b0882SAndroid Build Coastguard Worker     // Add word break symbol.
128*993b0882SAndroid Build Coastguard Worker     if (nonterminals_->wordbreak_nt() != kUnassignedNonterm) {
129*993b0882SAndroid Build Coastguard Worker       symbols.emplace_back(arena->AllocAndInit<ParseTree>(
130*993b0882SAndroid Build Coastguard Worker           nonterminals_->wordbreak_nt(),
131*993b0882SAndroid Build Coastguard Worker           CodepointSpan{match_offset, match_offset},
132*993b0882SAndroid Build Coastguard Worker           /*match_offset=*/match_offset, ParseTree::Type::kDefault));
133*993b0882SAndroid Build Coastguard Worker     }
134*993b0882SAndroid Build Coastguard Worker   }
135*993b0882SAndroid Build Coastguard Worker 
136*993b0882SAndroid Build Coastguard Worker   // Add end symbol if used by the grammar.
137*993b0882SAndroid Build Coastguard Worker   if (input.context_span.second == input.tokens.size() &&
138*993b0882SAndroid Build Coastguard Worker       nonterminals_->end_nt() != kUnassignedNonterm) {
139*993b0882SAndroid Build Coastguard Worker     symbols.emplace_back(arena->AllocAndInit<ParseTree>(
140*993b0882SAndroid Build Coastguard Worker         nonterminals_->end_nt(), CodepointSpan{match_offset, match_offset},
141*993b0882SAndroid Build Coastguard Worker         /*match_offset=*/match_offset, ParseTree::Type::kDefault));
142*993b0882SAndroid Build Coastguard Worker   }
143*993b0882SAndroid Build Coastguard Worker 
144*993b0882SAndroid Build Coastguard Worker   // Add symbols from the regex annotators.
145*993b0882SAndroid Build Coastguard Worker   const CodepointIndex context_start =
146*993b0882SAndroid Build Coastguard Worker       input.tokens[input.context_span.first].start;
147*993b0882SAndroid Build Coastguard Worker   const CodepointIndex context_end =
148*993b0882SAndroid Build Coastguard Worker       input.tokens[input.context_span.second - 1].end;
149*993b0882SAndroid Build Coastguard Worker   for (const RegexAnnotator& regex_annotator : regex_annotators_) {
150*993b0882SAndroid Build Coastguard Worker     std::unique_ptr<UniLib::RegexMatcher> regex_matcher =
151*993b0882SAndroid Build Coastguard Worker         regex_annotator.pattern->Matcher(UnicodeText::Substring(
152*993b0882SAndroid Build Coastguard Worker             input.text, context_start, context_end, /*do_copy=*/false));
153*993b0882SAndroid Build Coastguard Worker     int status = UniLib::RegexMatcher::kNoError;
154*993b0882SAndroid Build Coastguard Worker     while (regex_matcher->Find(&status) &&
155*993b0882SAndroid Build Coastguard Worker            status == UniLib::RegexMatcher::kNoError) {
156*993b0882SAndroid Build Coastguard Worker       const CodepointSpan span{regex_matcher->Start(0, &status) + context_start,
157*993b0882SAndroid Build Coastguard Worker                                regex_matcher->End(0, &status) + context_start};
158*993b0882SAndroid Build Coastguard Worker       symbols.emplace_back(arena->AllocAndInit<ParseTree>(
159*993b0882SAndroid Build Coastguard Worker           regex_annotator.nonterm, span, /*match_offset=*/
160*993b0882SAndroid Build Coastguard Worker           MapCodepointToTokenPaddingIfPresent(token_match_start, span.first),
161*993b0882SAndroid Build Coastguard Worker           ParseTree::Type::kDefault));
162*993b0882SAndroid Build Coastguard Worker     }
163*993b0882SAndroid Build Coastguard Worker   }
164*993b0882SAndroid Build Coastguard Worker 
165*993b0882SAndroid Build Coastguard Worker   // Add symbols based on annotations.
166*993b0882SAndroid Build Coastguard Worker   if (auto annotation_nonterminals = nonterminals_->annotation_nt()) {
167*993b0882SAndroid Build Coastguard Worker     for (const AnnotatedSpan& annotated_span : input.annotations) {
168*993b0882SAndroid Build Coastguard Worker       const ClassificationResult& classification =
169*993b0882SAndroid Build Coastguard Worker           annotated_span.classification.front();
170*993b0882SAndroid Build Coastguard Worker       if (auto entry = annotation_nonterminals->LookupByKey(
171*993b0882SAndroid Build Coastguard Worker               classification.collection.c_str())) {
172*993b0882SAndroid Build Coastguard Worker         symbols.emplace_back(arena->AllocAndInit<AnnotationNode>(
173*993b0882SAndroid Build Coastguard Worker             entry->value(), annotated_span.span, /*match_offset=*/
174*993b0882SAndroid Build Coastguard Worker             MapCodepointToTokenPaddingIfPresent(token_match_start,
175*993b0882SAndroid Build Coastguard Worker                                                 annotated_span.span.first),
176*993b0882SAndroid Build Coastguard Worker             &classification));
177*993b0882SAndroid Build Coastguard Worker       }
178*993b0882SAndroid Build Coastguard Worker     }
179*993b0882SAndroid Build Coastguard Worker   }
180*993b0882SAndroid Build Coastguard Worker 
181*993b0882SAndroid Build Coastguard Worker   std::stable_sort(
182*993b0882SAndroid Build Coastguard Worker       symbols.begin(), symbols.end(), [](const Symbol& a, const Symbol& b) {
183*993b0882SAndroid Build Coastguard Worker         // Sort by increasing (end, start) position to guarantee the
184*993b0882SAndroid Build Coastguard Worker         // matcher requirement that the tokens are fed in non-decreasing
185*993b0882SAndroid Build Coastguard Worker         // end position order.
186*993b0882SAndroid Build Coastguard Worker         return std::tie(a.codepoint_span.second, a.codepoint_span.first) <
187*993b0882SAndroid Build Coastguard Worker                std::tie(b.codepoint_span.second, b.codepoint_span.first);
188*993b0882SAndroid Build Coastguard Worker       });
189*993b0882SAndroid Build Coastguard Worker 
190*993b0882SAndroid Build Coastguard Worker   return symbols;
191*993b0882SAndroid Build Coastguard Worker }
192*993b0882SAndroid Build Coastguard Worker 
EmitSymbol(const Symbol & symbol,UnsafeArena * arena,Matcher * matcher) const193*993b0882SAndroid Build Coastguard Worker void Parser::EmitSymbol(const Symbol& symbol, UnsafeArena* arena,
194*993b0882SAndroid Build Coastguard Worker                         Matcher* matcher) const {
195*993b0882SAndroid Build Coastguard Worker   if (!CheckMemoryUsage(arena)) {
196*993b0882SAndroid Build Coastguard Worker     return;
197*993b0882SAndroid Build Coastguard Worker   }
198*993b0882SAndroid Build Coastguard Worker   switch (symbol.type) {
199*993b0882SAndroid Build Coastguard Worker     case Symbol::Type::TYPE_PARSE_TREE: {
200*993b0882SAndroid Build Coastguard Worker       // Just emit the parse tree.
201*993b0882SAndroid Build Coastguard Worker       matcher->AddParseTree(symbol.parse_tree);
202*993b0882SAndroid Build Coastguard Worker       return;
203*993b0882SAndroid Build Coastguard Worker     }
204*993b0882SAndroid Build Coastguard Worker     case Symbol::Type::TYPE_DIGITS: {
205*993b0882SAndroid Build Coastguard Worker       // Emit <digits> if used by the rules.
206*993b0882SAndroid Build Coastguard Worker       if (nonterminals_->digits_nt() != kUnassignedNonterm) {
207*993b0882SAndroid Build Coastguard Worker         matcher->AddParseTree(arena->AllocAndInit<ParseTree>(
208*993b0882SAndroid Build Coastguard Worker             nonterminals_->digits_nt(), symbol.codepoint_span,
209*993b0882SAndroid Build Coastguard Worker             symbol.match_offset, ParseTree::Type::kDefault));
210*993b0882SAndroid Build Coastguard Worker       }
211*993b0882SAndroid Build Coastguard Worker 
212*993b0882SAndroid Build Coastguard Worker       // Emit <n_digits> if used by the rules.
213*993b0882SAndroid Build Coastguard Worker       if (nonterminals_->n_digits_nt() != nullptr) {
214*993b0882SAndroid Build Coastguard Worker         const int num_digits =
215*993b0882SAndroid Build Coastguard Worker             symbol.codepoint_span.second - symbol.codepoint_span.first;
216*993b0882SAndroid Build Coastguard Worker         if (num_digits <= nonterminals_->n_digits_nt()->size()) {
217*993b0882SAndroid Build Coastguard Worker           const Nonterm n_digits_nt =
218*993b0882SAndroid Build Coastguard Worker               nonterminals_->n_digits_nt()->Get(num_digits - 1);
219*993b0882SAndroid Build Coastguard Worker           if (n_digits_nt != kUnassignedNonterm) {
220*993b0882SAndroid Build Coastguard Worker             matcher->AddParseTree(arena->AllocAndInit<ParseTree>(
221*993b0882SAndroid Build Coastguard Worker                 nonterminals_->n_digits_nt()->Get(num_digits - 1),
222*993b0882SAndroid Build Coastguard Worker                 symbol.codepoint_span, symbol.match_offset,
223*993b0882SAndroid Build Coastguard Worker                 ParseTree::Type::kDefault));
224*993b0882SAndroid Build Coastguard Worker           }
225*993b0882SAndroid Build Coastguard Worker         }
226*993b0882SAndroid Build Coastguard Worker       }
227*993b0882SAndroid Build Coastguard Worker       break;
228*993b0882SAndroid Build Coastguard Worker     }
229*993b0882SAndroid Build Coastguard Worker     case Symbol::Type::TYPE_TERM: {
230*993b0882SAndroid Build Coastguard Worker       // Emit <uppercase_token> if used by the rules.
231*993b0882SAndroid Build Coastguard Worker       if (nonterminals_->uppercase_token_nt() != 0 &&
232*993b0882SAndroid Build Coastguard Worker           unilib_.IsUpperText(
233*993b0882SAndroid Build Coastguard Worker               UTF8ToUnicodeText(symbol.lexeme, /*do_copy=*/false))) {
234*993b0882SAndroid Build Coastguard Worker         matcher->AddParseTree(arena->AllocAndInit<ParseTree>(
235*993b0882SAndroid Build Coastguard Worker             nonterminals_->uppercase_token_nt(), symbol.codepoint_span,
236*993b0882SAndroid Build Coastguard Worker             symbol.match_offset, ParseTree::Type::kDefault));
237*993b0882SAndroid Build Coastguard Worker       }
238*993b0882SAndroid Build Coastguard Worker       break;
239*993b0882SAndroid Build Coastguard Worker     }
240*993b0882SAndroid Build Coastguard Worker     default:
241*993b0882SAndroid Build Coastguard Worker       break;
242*993b0882SAndroid Build Coastguard Worker   }
243*993b0882SAndroid Build Coastguard Worker 
244*993b0882SAndroid Build Coastguard Worker   // Emit the token as terminal.
245*993b0882SAndroid Build Coastguard Worker   matcher->AddTerminal(symbol.codepoint_span, symbol.match_offset,
246*993b0882SAndroid Build Coastguard Worker                        symbol.lexeme);
247*993b0882SAndroid Build Coastguard Worker 
248*993b0882SAndroid Build Coastguard Worker   // Emit <token> if used by rules.
249*993b0882SAndroid Build Coastguard Worker   matcher->AddParseTree(arena->AllocAndInit<ParseTree>(
250*993b0882SAndroid Build Coastguard Worker       nonterminals_->token_nt(), symbol.codepoint_span, symbol.match_offset,
251*993b0882SAndroid Build Coastguard Worker       ParseTree::Type::kDefault));
252*993b0882SAndroid Build Coastguard Worker }
253*993b0882SAndroid Build Coastguard Worker 
254*993b0882SAndroid Build Coastguard Worker // Parses an input text and returns the root rule derivations.
Parse(const TextContext & input,UnsafeArena * arena) const255*993b0882SAndroid Build Coastguard Worker std::vector<Derivation> Parser::Parse(const TextContext& input,
256*993b0882SAndroid Build Coastguard Worker                                       UnsafeArena* arena) const {
257*993b0882SAndroid Build Coastguard Worker   // Check the tokens, input can be non-empty (whitespace) but have no tokens.
258*993b0882SAndroid Build Coastguard Worker   if (input.tokens.empty()) {
259*993b0882SAndroid Build Coastguard Worker     return {};
260*993b0882SAndroid Build Coastguard Worker   }
261*993b0882SAndroid Build Coastguard Worker 
262*993b0882SAndroid Build Coastguard Worker   // Select locale matching rules.
263*993b0882SAndroid Build Coastguard Worker   std::vector<const RulesSet_::Rules*> locale_rules =
264*993b0882SAndroid Build Coastguard Worker       SelectLocaleMatchingShards(rules_, rules_locales_, input.locales);
265*993b0882SAndroid Build Coastguard Worker 
266*993b0882SAndroid Build Coastguard Worker   if (locale_rules.empty()) {
267*993b0882SAndroid Build Coastguard Worker     // Nothing to do.
268*993b0882SAndroid Build Coastguard Worker     return {};
269*993b0882SAndroid Build Coastguard Worker   }
270*993b0882SAndroid Build Coastguard Worker 
271*993b0882SAndroid Build Coastguard Worker   Matcher matcher(&unilib_, rules_, locale_rules, arena);
272*993b0882SAndroid Build Coastguard Worker   for (const Symbol& symbol : SortedSymbolsForInput(input, arena)) {
273*993b0882SAndroid Build Coastguard Worker     EmitSymbol(symbol, arena, &matcher);
274*993b0882SAndroid Build Coastguard Worker   }
275*993b0882SAndroid Build Coastguard Worker   matcher.Finish();
276*993b0882SAndroid Build Coastguard Worker   return matcher.chart().derivations();
277*993b0882SAndroid Build Coastguard Worker }
278*993b0882SAndroid Build Coastguard Worker 
279*993b0882SAndroid Build Coastguard Worker }  // namespace libtextclassifier3::grammar
280