xref: /aosp_15_r20/external/libtextclassifier/native/utils/grammar/rules.fbs (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1*993b0882SAndroid Build Coastguard Worker//
2*993b0882SAndroid Build Coastguard Worker// Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker//
4*993b0882SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker// You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker//
8*993b0882SAndroid Build Coastguard Worker//      http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker//
10*993b0882SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker// limitations under the License.
15*993b0882SAndroid Build Coastguard Worker//
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Workerinclude "utils/grammar/semantics/expression.fbs";
18*993b0882SAndroid Build Coastguard Workerinclude "utils/i18n/language-tag.fbs";
19*993b0882SAndroid Build Coastguard Workerinclude "utils/zlib/buffer.fbs";
20*993b0882SAndroid Build Coastguard Worker
21*993b0882SAndroid Build Coastguard Worker// The terminal rules map as sorted strings table.
22*993b0882SAndroid Build Coastguard Worker// The sorted terminal strings table is represented as offsets into the
23*993b0882SAndroid Build Coastguard Worker// global strings pool, this allows to save memory between localized
24*993b0882SAndroid Build Coastguard Worker// rules sets.
25*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Rules_;
26*993b0882SAndroid Build Coastguard Workertable TerminalRulesMap {
27*993b0882SAndroid Build Coastguard Worker  // The offsets into the terminals pool.
28*993b0882SAndroid Build Coastguard Worker  terminal_offsets:[uint];
29*993b0882SAndroid Build Coastguard Worker
30*993b0882SAndroid Build Coastguard Worker  // The lhs set associated with a terminal rule.
31*993b0882SAndroid Build Coastguard Worker  // This is an offset into the (deduplicated) global `lhs_set` vector.
32*993b0882SAndroid Build Coastguard Worker  lhs_set_index:[uint];
33*993b0882SAndroid Build Coastguard Worker
34*993b0882SAndroid Build Coastguard Worker  // Bounds the lengths of the terminal strings for quick early lookup
35*993b0882SAndroid Build Coastguard Worker  // abort.
36*993b0882SAndroid Build Coastguard Worker  min_terminal_length:int;
37*993b0882SAndroid Build Coastguard Worker
38*993b0882SAndroid Build Coastguard Worker  max_terminal_length:int;
39*993b0882SAndroid Build Coastguard Worker}
40*993b0882SAndroid Build Coastguard Worker
41*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Rules_;
42*993b0882SAndroid Build Coastguard Workerstruct UnaryRulesEntry {
43*993b0882SAndroid Build Coastguard Worker  key:uint (key);
44*993b0882SAndroid Build Coastguard Worker  value:uint;
45*993b0882SAndroid Build Coastguard Worker}
46*993b0882SAndroid Build Coastguard Worker
47*993b0882SAndroid Build Coastguard Worker// One key, value pair entry in the binary rules hash map.
48*993b0882SAndroid Build Coastguard Worker// The key is a pair of nonterminals and the value the index of the lhs set.
49*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Rules_;
50*993b0882SAndroid Build Coastguard Workerstruct BinaryRule {
51*993b0882SAndroid Build Coastguard Worker  // The two rhs nonterminals.
52*993b0882SAndroid Build Coastguard Worker  rhs_first:uint;
53*993b0882SAndroid Build Coastguard Worker
54*993b0882SAndroid Build Coastguard Worker  rhs_second:uint;
55*993b0882SAndroid Build Coastguard Worker
56*993b0882SAndroid Build Coastguard Worker  // The lhs set associated with this binary rule.
57*993b0882SAndroid Build Coastguard Worker  // This is an offset into the (deduplicated) global `lhs_set` vector.
58*993b0882SAndroid Build Coastguard Worker  lhs_set_index:uint;
59*993b0882SAndroid Build Coastguard Worker}
60*993b0882SAndroid Build Coastguard Worker
61*993b0882SAndroid Build Coastguard Worker// One bucket in the binary rule hash map that contains all entries for a
62*993b0882SAndroid Build Coastguard Worker// given hash value.
63*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Rules_;
64*993b0882SAndroid Build Coastguard Workertable BinaryRuleTableBucket {
65*993b0882SAndroid Build Coastguard Worker  rules:[BinaryRule];
66*993b0882SAndroid Build Coastguard Worker}
67*993b0882SAndroid Build Coastguard Worker
68*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_;
69*993b0882SAndroid Build Coastguard Workertable Rules {
70*993b0882SAndroid Build Coastguard Worker  // The locale this rule set applies to.
71*993b0882SAndroid Build Coastguard Worker  locale:[LanguageTag];
72*993b0882SAndroid Build Coastguard Worker
73*993b0882SAndroid Build Coastguard Worker  terminal_rules:Rules_.TerminalRulesMap;
74*993b0882SAndroid Build Coastguard Worker  lowercase_terminal_rules:Rules_.TerminalRulesMap;
75*993b0882SAndroid Build Coastguard Worker
76*993b0882SAndroid Build Coastguard Worker  // The unary rules map.
77*993b0882SAndroid Build Coastguard Worker  // This is a map from a nonterminal to an lhs set index into the
78*993b0882SAndroid Build Coastguard Worker  // (deduplicated) global `lhs_set` vector.
79*993b0882SAndroid Build Coastguard Worker  unary_rules:[Rules_.UnaryRulesEntry];
80*993b0882SAndroid Build Coastguard Worker
81*993b0882SAndroid Build Coastguard Worker  // The binary rules (hash) map.
82*993b0882SAndroid Build Coastguard Worker  // This is a map from nonterminal pair to an lhs set index into the
83*993b0882SAndroid Build Coastguard Worker  // (deduplicated) global `lhs_set` vector.
84*993b0882SAndroid Build Coastguard Worker  binary_rules:[Rules_.BinaryRuleTableBucket];
85*993b0882SAndroid Build Coastguard Worker}
86*993b0882SAndroid Build Coastguard Worker
87*993b0882SAndroid Build Coastguard Worker// A set of lhs nonterminals associated with a rule match.
88*993b0882SAndroid Build Coastguard Worker// Most commonly, that is just the id of the lhs nonterminal of the rule that
89*993b0882SAndroid Build Coastguard Worker// is triggered, in this case `lhs` is set to the id of the nonterminal.
90*993b0882SAndroid Build Coastguard Worker// If a callback needs to be triggered, lhs is the (negated) index into the
91*993b0882SAndroid Build Coastguard Worker// `lhs` vector below that specifies additionally to the nonterminal, also the
92*993b0882SAndroid Build Coastguard Worker// callback and parameter to call.
93*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_;
94*993b0882SAndroid Build Coastguard Workertable LhsSet {
95*993b0882SAndroid Build Coastguard Worker  lhs:[int];
96*993b0882SAndroid Build Coastguard Worker}
97*993b0882SAndroid Build Coastguard Worker
98*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_;
99*993b0882SAndroid Build Coastguard Workerstruct Lhs {
100*993b0882SAndroid Build Coastguard Worker  // The lhs nonterminal.
101*993b0882SAndroid Build Coastguard Worker  nonterminal:uint;
102*993b0882SAndroid Build Coastguard Worker
103*993b0882SAndroid Build Coastguard Worker  // The id of the callback to trigger.
104*993b0882SAndroid Build Coastguard Worker  callback_id:uint;
105*993b0882SAndroid Build Coastguard Worker
106*993b0882SAndroid Build Coastguard Worker  // A parameter to pass when invoking the callback.
107*993b0882SAndroid Build Coastguard Worker  callback_param:ulong;
108*993b0882SAndroid Build Coastguard Worker
109*993b0882SAndroid Build Coastguard Worker  // The maximum amount of whitespace allowed between the two parts.
110*993b0882SAndroid Build Coastguard Worker  // A value of -1 allows for unbounded whitespace.
111*993b0882SAndroid Build Coastguard Worker  max_whitespace_gap:byte;
112*993b0882SAndroid Build Coastguard Worker}
113*993b0882SAndroid Build Coastguard Worker
114*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Nonterminals_;
115*993b0882SAndroid Build Coastguard Workertable AnnotationNtEntry {
116*993b0882SAndroid Build Coastguard Worker  key:string (key, shared);
117*993b0882SAndroid Build Coastguard Worker  value:int;
118*993b0882SAndroid Build Coastguard Worker}
119*993b0882SAndroid Build Coastguard Worker
120*993b0882SAndroid Build Coastguard Worker// Usage of pre-defined non-terminals that the lexer can generate if used by
121*993b0882SAndroid Build Coastguard Worker// the grammar.
122*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_;
123*993b0882SAndroid Build Coastguard Workertable Nonterminals {
124*993b0882SAndroid Build Coastguard Worker  // Id of the nonterminal indicating the start of input.
125*993b0882SAndroid Build Coastguard Worker  start_nt:int;
126*993b0882SAndroid Build Coastguard Worker
127*993b0882SAndroid Build Coastguard Worker  // Id of the nonterminal indicating the end of input.
128*993b0882SAndroid Build Coastguard Worker  end_nt:int;
129*993b0882SAndroid Build Coastguard Worker
130*993b0882SAndroid Build Coastguard Worker  // Id of the nonterminal indicating a token.
131*993b0882SAndroid Build Coastguard Worker  token_nt:int;
132*993b0882SAndroid Build Coastguard Worker
133*993b0882SAndroid Build Coastguard Worker  // Id of the nonterminal indicating a string of digits.
134*993b0882SAndroid Build Coastguard Worker  digits_nt:int;
135*993b0882SAndroid Build Coastguard Worker
136*993b0882SAndroid Build Coastguard Worker  // `n_digits_nt[k]` is the id of the nonterminal indicating a string of
137*993b0882SAndroid Build Coastguard Worker  // `k` digits.
138*993b0882SAndroid Build Coastguard Worker  n_digits_nt:[int];
139*993b0882SAndroid Build Coastguard Worker
140*993b0882SAndroid Build Coastguard Worker  // Id of the nonterminal indicating a word or token boundary.
141*993b0882SAndroid Build Coastguard Worker  wordbreak_nt:int;
142*993b0882SAndroid Build Coastguard Worker
143*993b0882SAndroid Build Coastguard Worker  // Id of the nonterminal indicating an uppercase token.
144*993b0882SAndroid Build Coastguard Worker  uppercase_token_nt:int;
145*993b0882SAndroid Build Coastguard Worker
146*993b0882SAndroid Build Coastguard Worker  // Predefined nonterminals for annotations.
147*993b0882SAndroid Build Coastguard Worker  // Maps annotation/collection names to non-terminal ids.
148*993b0882SAndroid Build Coastguard Worker  annotation_nt:[Nonterminals_.AnnotationNtEntry];
149*993b0882SAndroid Build Coastguard Worker}
150*993b0882SAndroid Build Coastguard Worker
151*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.DebugInformation_;
152*993b0882SAndroid Build Coastguard Workertable NonterminalNamesEntry {
153*993b0882SAndroid Build Coastguard Worker  key:int (key);
154*993b0882SAndroid Build Coastguard Worker  value:string (shared);
155*993b0882SAndroid Build Coastguard Worker}
156*993b0882SAndroid Build Coastguard Worker
157*993b0882SAndroid Build Coastguard Worker// Debug information for e.g. printing parse trees and show match
158*993b0882SAndroid Build Coastguard Worker// information.
159*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_;
160*993b0882SAndroid Build Coastguard Workertable DebugInformation {
161*993b0882SAndroid Build Coastguard Worker  nonterminal_names:[DebugInformation_.NonterminalNamesEntry];
162*993b0882SAndroid Build Coastguard Worker}
163*993b0882SAndroid Build Coastguard Worker
164*993b0882SAndroid Build Coastguard Worker// Regex annotators.
165*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_;
166*993b0882SAndroid Build Coastguard Workertable RegexAnnotator {
167*993b0882SAndroid Build Coastguard Worker  // The pattern to run.
168*993b0882SAndroid Build Coastguard Worker  pattern:string (shared);
169*993b0882SAndroid Build Coastguard Worker
170*993b0882SAndroid Build Coastguard Worker  compressed_pattern:CompressedBuffer;
171*993b0882SAndroid Build Coastguard Worker
172*993b0882SAndroid Build Coastguard Worker  // The nonterminal to trigger.
173*993b0882SAndroid Build Coastguard Worker  nonterminal:uint;
174*993b0882SAndroid Build Coastguard Worker}
175*993b0882SAndroid Build Coastguard Worker
176*993b0882SAndroid Build Coastguard Worker// Context free grammar rules representation.
177*993b0882SAndroid Build Coastguard Worker// Rules are represented in (mostly) Chomsky Normal Form, where all rules are
178*993b0882SAndroid Build Coastguard Worker// of the following form, either:
179*993b0882SAndroid Build Coastguard Worker// * <nonterm> ::= term
180*993b0882SAndroid Build Coastguard Worker// * <nonterm> ::= <nonterm>
181*993b0882SAndroid Build Coastguard Worker// * <nonterm> ::= <nonterm> <nonterm>
182*993b0882SAndroid Build Coastguard Worker// The `terminals`, `unary_rules` and `binary_rules` maps below represent
183*993b0882SAndroid Build Coastguard Worker// these sets of rules.
184*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar;
185*993b0882SAndroid Build Coastguard Workertable RulesSet {
186*993b0882SAndroid Build Coastguard Worker  rules:[RulesSet_.Rules];
187*993b0882SAndroid Build Coastguard Worker  lhs_set:[RulesSet_.LhsSet];
188*993b0882SAndroid Build Coastguard Worker  lhs:[RulesSet_.Lhs];
189*993b0882SAndroid Build Coastguard Worker
190*993b0882SAndroid Build Coastguard Worker  // Terminals string pool.
191*993b0882SAndroid Build Coastguard Worker  // The strings are zero-byte delimited and offset indexed by
192*993b0882SAndroid Build Coastguard Worker  // `terminal_offsets` in the terminals rules map.
193*993b0882SAndroid Build Coastguard Worker  terminals:string (shared);
194*993b0882SAndroid Build Coastguard Worker
195*993b0882SAndroid Build Coastguard Worker  nonterminals:RulesSet_.Nonterminals;
196*993b0882SAndroid Build Coastguard Worker  reserved_6:int16 (deprecated);
197*993b0882SAndroid Build Coastguard Worker  debug_information:RulesSet_.DebugInformation;
198*993b0882SAndroid Build Coastguard Worker  regex_annotator:[RulesSet_.RegexAnnotator];
199*993b0882SAndroid Build Coastguard Worker
200*993b0882SAndroid Build Coastguard Worker  // If true, will compile the regexes only on first use.
201*993b0882SAndroid Build Coastguard Worker  lazy_regex_compilation:bool;
202*993b0882SAndroid Build Coastguard Worker
203*993b0882SAndroid Build Coastguard Worker  // The semantic expressions associated with rule matches.
204*993b0882SAndroid Build Coastguard Worker  semantic_expression:[SemanticExpression];
205*993b0882SAndroid Build Coastguard Worker
206*993b0882SAndroid Build Coastguard Worker  // The schema defining the semantic results.
207*993b0882SAndroid Build Coastguard Worker  semantic_values_schema:[ubyte];
208*993b0882SAndroid Build Coastguard Worker}
209*993b0882SAndroid Build Coastguard Worker
210