1*993b0882SAndroid Build Coastguard Worker// 2*993b0882SAndroid Build Coastguard Worker// Copyright (C) 2018 The Android Open Source Project 3*993b0882SAndroid Build Coastguard Worker// 4*993b0882SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License"); 5*993b0882SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License. 6*993b0882SAndroid Build Coastguard Worker// You may obtain a copy of the License at 7*993b0882SAndroid Build Coastguard Worker// 8*993b0882SAndroid Build Coastguard Worker// http://www.apache.org/licenses/LICENSE-2.0 9*993b0882SAndroid Build Coastguard Worker// 10*993b0882SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software 11*993b0882SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS, 12*993b0882SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*993b0882SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and 14*993b0882SAndroid Build Coastguard Worker// limitations under the License. 15*993b0882SAndroid Build Coastguard Worker// 16*993b0882SAndroid Build Coastguard Worker 17*993b0882SAndroid Build Coastguard Workerinclude "utils/grammar/semantics/expression.fbs"; 18*993b0882SAndroid Build Coastguard Workerinclude "utils/i18n/language-tag.fbs"; 19*993b0882SAndroid Build Coastguard Workerinclude "utils/zlib/buffer.fbs"; 20*993b0882SAndroid Build Coastguard Worker 21*993b0882SAndroid Build Coastguard Worker// The terminal rules map as sorted strings table. 22*993b0882SAndroid Build Coastguard Worker// The sorted terminal strings table is represented as offsets into the 23*993b0882SAndroid Build Coastguard Worker// global strings pool, this allows to save memory between localized 24*993b0882SAndroid Build Coastguard Worker// rules sets. 25*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Rules_; 26*993b0882SAndroid Build Coastguard Workertable TerminalRulesMap { 27*993b0882SAndroid Build Coastguard Worker // The offsets into the terminals pool. 28*993b0882SAndroid Build Coastguard Worker terminal_offsets:[uint]; 29*993b0882SAndroid Build Coastguard Worker 30*993b0882SAndroid Build Coastguard Worker // The lhs set associated with a terminal rule. 31*993b0882SAndroid Build Coastguard Worker // This is an offset into the (deduplicated) global `lhs_set` vector. 32*993b0882SAndroid Build Coastguard Worker lhs_set_index:[uint]; 33*993b0882SAndroid Build Coastguard Worker 34*993b0882SAndroid Build Coastguard Worker // Bounds the lengths of the terminal strings for quick early lookup 35*993b0882SAndroid Build Coastguard Worker // abort. 36*993b0882SAndroid Build Coastguard Worker min_terminal_length:int; 37*993b0882SAndroid Build Coastguard Worker 38*993b0882SAndroid Build Coastguard Worker max_terminal_length:int; 39*993b0882SAndroid Build Coastguard Worker} 40*993b0882SAndroid Build Coastguard Worker 41*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Rules_; 42*993b0882SAndroid Build Coastguard Workerstruct UnaryRulesEntry { 43*993b0882SAndroid Build Coastguard Worker key:uint (key); 44*993b0882SAndroid Build Coastguard Worker value:uint; 45*993b0882SAndroid Build Coastguard Worker} 46*993b0882SAndroid Build Coastguard Worker 47*993b0882SAndroid Build Coastguard Worker// One key, value pair entry in the binary rules hash map. 48*993b0882SAndroid Build Coastguard Worker// The key is a pair of nonterminals and the value the index of the lhs set. 49*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Rules_; 50*993b0882SAndroid Build Coastguard Workerstruct BinaryRule { 51*993b0882SAndroid Build Coastguard Worker // The two rhs nonterminals. 52*993b0882SAndroid Build Coastguard Worker rhs_first:uint; 53*993b0882SAndroid Build Coastguard Worker 54*993b0882SAndroid Build Coastguard Worker rhs_second:uint; 55*993b0882SAndroid Build Coastguard Worker 56*993b0882SAndroid Build Coastguard Worker // The lhs set associated with this binary rule. 57*993b0882SAndroid Build Coastguard Worker // This is an offset into the (deduplicated) global `lhs_set` vector. 58*993b0882SAndroid Build Coastguard Worker lhs_set_index:uint; 59*993b0882SAndroid Build Coastguard Worker} 60*993b0882SAndroid Build Coastguard Worker 61*993b0882SAndroid Build Coastguard Worker// One bucket in the binary rule hash map that contains all entries for a 62*993b0882SAndroid Build Coastguard Worker// given hash value. 63*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Rules_; 64*993b0882SAndroid Build Coastguard Workertable BinaryRuleTableBucket { 65*993b0882SAndroid Build Coastguard Worker rules:[BinaryRule]; 66*993b0882SAndroid Build Coastguard Worker} 67*993b0882SAndroid Build Coastguard Worker 68*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_; 69*993b0882SAndroid Build Coastguard Workertable Rules { 70*993b0882SAndroid Build Coastguard Worker // The locale this rule set applies to. 71*993b0882SAndroid Build Coastguard Worker locale:[LanguageTag]; 72*993b0882SAndroid Build Coastguard Worker 73*993b0882SAndroid Build Coastguard Worker terminal_rules:Rules_.TerminalRulesMap; 74*993b0882SAndroid Build Coastguard Worker lowercase_terminal_rules:Rules_.TerminalRulesMap; 75*993b0882SAndroid Build Coastguard Worker 76*993b0882SAndroid Build Coastguard Worker // The unary rules map. 77*993b0882SAndroid Build Coastguard Worker // This is a map from a nonterminal to an lhs set index into the 78*993b0882SAndroid Build Coastguard Worker // (deduplicated) global `lhs_set` vector. 79*993b0882SAndroid Build Coastguard Worker unary_rules:[Rules_.UnaryRulesEntry]; 80*993b0882SAndroid Build Coastguard Worker 81*993b0882SAndroid Build Coastguard Worker // The binary rules (hash) map. 82*993b0882SAndroid Build Coastguard Worker // This is a map from nonterminal pair to an lhs set index into the 83*993b0882SAndroid Build Coastguard Worker // (deduplicated) global `lhs_set` vector. 84*993b0882SAndroid Build Coastguard Worker binary_rules:[Rules_.BinaryRuleTableBucket]; 85*993b0882SAndroid Build Coastguard Worker} 86*993b0882SAndroid Build Coastguard Worker 87*993b0882SAndroid Build Coastguard Worker// A set of lhs nonterminals associated with a rule match. 88*993b0882SAndroid Build Coastguard Worker// Most commonly, that is just the id of the lhs nonterminal of the rule that 89*993b0882SAndroid Build Coastguard Worker// is triggered, in this case `lhs` is set to the id of the nonterminal. 90*993b0882SAndroid Build Coastguard Worker// If a callback needs to be triggered, lhs is the (negated) index into the 91*993b0882SAndroid Build Coastguard Worker// `lhs` vector below that specifies additionally to the nonterminal, also the 92*993b0882SAndroid Build Coastguard Worker// callback and parameter to call. 93*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_; 94*993b0882SAndroid Build Coastguard Workertable LhsSet { 95*993b0882SAndroid Build Coastguard Worker lhs:[int]; 96*993b0882SAndroid Build Coastguard Worker} 97*993b0882SAndroid Build Coastguard Worker 98*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_; 99*993b0882SAndroid Build Coastguard Workerstruct Lhs { 100*993b0882SAndroid Build Coastguard Worker // The lhs nonterminal. 101*993b0882SAndroid Build Coastguard Worker nonterminal:uint; 102*993b0882SAndroid Build Coastguard Worker 103*993b0882SAndroid Build Coastguard Worker // The id of the callback to trigger. 104*993b0882SAndroid Build Coastguard Worker callback_id:uint; 105*993b0882SAndroid Build Coastguard Worker 106*993b0882SAndroid Build Coastguard Worker // A parameter to pass when invoking the callback. 107*993b0882SAndroid Build Coastguard Worker callback_param:ulong; 108*993b0882SAndroid Build Coastguard Worker 109*993b0882SAndroid Build Coastguard Worker // The maximum amount of whitespace allowed between the two parts. 110*993b0882SAndroid Build Coastguard Worker // A value of -1 allows for unbounded whitespace. 111*993b0882SAndroid Build Coastguard Worker max_whitespace_gap:byte; 112*993b0882SAndroid Build Coastguard Worker} 113*993b0882SAndroid Build Coastguard Worker 114*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.Nonterminals_; 115*993b0882SAndroid Build Coastguard Workertable AnnotationNtEntry { 116*993b0882SAndroid Build Coastguard Worker key:string (key, shared); 117*993b0882SAndroid Build Coastguard Worker value:int; 118*993b0882SAndroid Build Coastguard Worker} 119*993b0882SAndroid Build Coastguard Worker 120*993b0882SAndroid Build Coastguard Worker// Usage of pre-defined non-terminals that the lexer can generate if used by 121*993b0882SAndroid Build Coastguard Worker// the grammar. 122*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_; 123*993b0882SAndroid Build Coastguard Workertable Nonterminals { 124*993b0882SAndroid Build Coastguard Worker // Id of the nonterminal indicating the start of input. 125*993b0882SAndroid Build Coastguard Worker start_nt:int; 126*993b0882SAndroid Build Coastguard Worker 127*993b0882SAndroid Build Coastguard Worker // Id of the nonterminal indicating the end of input. 128*993b0882SAndroid Build Coastguard Worker end_nt:int; 129*993b0882SAndroid Build Coastguard Worker 130*993b0882SAndroid Build Coastguard Worker // Id of the nonterminal indicating a token. 131*993b0882SAndroid Build Coastguard Worker token_nt:int; 132*993b0882SAndroid Build Coastguard Worker 133*993b0882SAndroid Build Coastguard Worker // Id of the nonterminal indicating a string of digits. 134*993b0882SAndroid Build Coastguard Worker digits_nt:int; 135*993b0882SAndroid Build Coastguard Worker 136*993b0882SAndroid Build Coastguard Worker // `n_digits_nt[k]` is the id of the nonterminal indicating a string of 137*993b0882SAndroid Build Coastguard Worker // `k` digits. 138*993b0882SAndroid Build Coastguard Worker n_digits_nt:[int]; 139*993b0882SAndroid Build Coastguard Worker 140*993b0882SAndroid Build Coastguard Worker // Id of the nonterminal indicating a word or token boundary. 141*993b0882SAndroid Build Coastguard Worker wordbreak_nt:int; 142*993b0882SAndroid Build Coastguard Worker 143*993b0882SAndroid Build Coastguard Worker // Id of the nonterminal indicating an uppercase token. 144*993b0882SAndroid Build Coastguard Worker uppercase_token_nt:int; 145*993b0882SAndroid Build Coastguard Worker 146*993b0882SAndroid Build Coastguard Worker // Predefined nonterminals for annotations. 147*993b0882SAndroid Build Coastguard Worker // Maps annotation/collection names to non-terminal ids. 148*993b0882SAndroid Build Coastguard Worker annotation_nt:[Nonterminals_.AnnotationNtEntry]; 149*993b0882SAndroid Build Coastguard Worker} 150*993b0882SAndroid Build Coastguard Worker 151*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_.DebugInformation_; 152*993b0882SAndroid Build Coastguard Workertable NonterminalNamesEntry { 153*993b0882SAndroid Build Coastguard Worker key:int (key); 154*993b0882SAndroid Build Coastguard Worker value:string (shared); 155*993b0882SAndroid Build Coastguard Worker} 156*993b0882SAndroid Build Coastguard Worker 157*993b0882SAndroid Build Coastguard Worker// Debug information for e.g. printing parse trees and show match 158*993b0882SAndroid Build Coastguard Worker// information. 159*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_; 160*993b0882SAndroid Build Coastguard Workertable DebugInformation { 161*993b0882SAndroid Build Coastguard Worker nonterminal_names:[DebugInformation_.NonterminalNamesEntry]; 162*993b0882SAndroid Build Coastguard Worker} 163*993b0882SAndroid Build Coastguard Worker 164*993b0882SAndroid Build Coastguard Worker// Regex annotators. 165*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar.RulesSet_; 166*993b0882SAndroid Build Coastguard Workertable RegexAnnotator { 167*993b0882SAndroid Build Coastguard Worker // The pattern to run. 168*993b0882SAndroid Build Coastguard Worker pattern:string (shared); 169*993b0882SAndroid Build Coastguard Worker 170*993b0882SAndroid Build Coastguard Worker compressed_pattern:CompressedBuffer; 171*993b0882SAndroid Build Coastguard Worker 172*993b0882SAndroid Build Coastguard Worker // The nonterminal to trigger. 173*993b0882SAndroid Build Coastguard Worker nonterminal:uint; 174*993b0882SAndroid Build Coastguard Worker} 175*993b0882SAndroid Build Coastguard Worker 176*993b0882SAndroid Build Coastguard Worker// Context free grammar rules representation. 177*993b0882SAndroid Build Coastguard Worker// Rules are represented in (mostly) Chomsky Normal Form, where all rules are 178*993b0882SAndroid Build Coastguard Worker// of the following form, either: 179*993b0882SAndroid Build Coastguard Worker// * <nonterm> ::= term 180*993b0882SAndroid Build Coastguard Worker// * <nonterm> ::= <nonterm> 181*993b0882SAndroid Build Coastguard Worker// * <nonterm> ::= <nonterm> <nonterm> 182*993b0882SAndroid Build Coastguard Worker// The `terminals`, `unary_rules` and `binary_rules` maps below represent 183*993b0882SAndroid Build Coastguard Worker// these sets of rules. 184*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.grammar; 185*993b0882SAndroid Build Coastguard Workertable RulesSet { 186*993b0882SAndroid Build Coastguard Worker rules:[RulesSet_.Rules]; 187*993b0882SAndroid Build Coastguard Worker lhs_set:[RulesSet_.LhsSet]; 188*993b0882SAndroid Build Coastguard Worker lhs:[RulesSet_.Lhs]; 189*993b0882SAndroid Build Coastguard Worker 190*993b0882SAndroid Build Coastguard Worker // Terminals string pool. 191*993b0882SAndroid Build Coastguard Worker // The strings are zero-byte delimited and offset indexed by 192*993b0882SAndroid Build Coastguard Worker // `terminal_offsets` in the terminals rules map. 193*993b0882SAndroid Build Coastguard Worker terminals:string (shared); 194*993b0882SAndroid Build Coastguard Worker 195*993b0882SAndroid Build Coastguard Worker nonterminals:RulesSet_.Nonterminals; 196*993b0882SAndroid Build Coastguard Worker reserved_6:int16 (deprecated); 197*993b0882SAndroid Build Coastguard Worker debug_information:RulesSet_.DebugInformation; 198*993b0882SAndroid Build Coastguard Worker regex_annotator:[RulesSet_.RegexAnnotator]; 199*993b0882SAndroid Build Coastguard Worker 200*993b0882SAndroid Build Coastguard Worker // If true, will compile the regexes only on first use. 201*993b0882SAndroid Build Coastguard Worker lazy_regex_compilation:bool; 202*993b0882SAndroid Build Coastguard Worker 203*993b0882SAndroid Build Coastguard Worker // The semantic expressions associated with rule matches. 204*993b0882SAndroid Build Coastguard Worker semantic_expression:[SemanticExpression]; 205*993b0882SAndroid Build Coastguard Worker 206*993b0882SAndroid Build Coastguard Worker // The schema defining the semantic results. 207*993b0882SAndroid Build Coastguard Worker semantic_values_schema:[ubyte]; 208*993b0882SAndroid Build Coastguard Worker} 209*993b0882SAndroid Build Coastguard Worker 210