xref: /aosp_15_r20/external/libtextclassifier/native/annotator/model.fbs (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1*993b0882SAndroid Build Coastguard Worker//
2*993b0882SAndroid Build Coastguard Worker// Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker//
4*993b0882SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker// You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker//
8*993b0882SAndroid Build Coastguard Worker//      http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker//
10*993b0882SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker// limitations under the License.
15*993b0882SAndroid Build Coastguard Worker//
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Workerinclude "annotator/entity-data.fbs";
18*993b0882SAndroid Build Coastguard Workerinclude "annotator/experimental/experimental.fbs";
19*993b0882SAndroid Build Coastguard Workerinclude "utils/codepoint-range.fbs";
20*993b0882SAndroid Build Coastguard Workerinclude "utils/container/bit-vector.fbs";
21*993b0882SAndroid Build Coastguard Workerinclude "utils/flatbuffers/flatbuffers.fbs";
22*993b0882SAndroid Build Coastguard Workerinclude "utils/grammar/rules.fbs";
23*993b0882SAndroid Build Coastguard Workerinclude "utils/intents/intent-config.fbs";
24*993b0882SAndroid Build Coastguard Workerinclude "utils/normalization.fbs";
25*993b0882SAndroid Build Coastguard Workerinclude "utils/resources.fbs";
26*993b0882SAndroid Build Coastguard Workerinclude "utils/tokenizer.fbs";
27*993b0882SAndroid Build Coastguard Workerinclude "utils/zlib/buffer.fbs";
28*993b0882SAndroid Build Coastguard Worker
29*993b0882SAndroid Build Coastguard Workerfile_identifier "TC2 ";
30*993b0882SAndroid Build Coastguard Worker
31*993b0882SAndroid Build Coastguard Worker// The possible model modes, represents a bit field.
32*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
33*993b0882SAndroid Build Coastguard Workerenum ModeFlag : int {
34*993b0882SAndroid Build Coastguard Worker  NONE = 0,
35*993b0882SAndroid Build Coastguard Worker  ANNOTATION = 1,
36*993b0882SAndroid Build Coastguard Worker  CLASSIFICATION = 2,
37*993b0882SAndroid Build Coastguard Worker  ANNOTATION_AND_CLASSIFICATION = 3,
38*993b0882SAndroid Build Coastguard Worker  SELECTION = 4,
39*993b0882SAndroid Build Coastguard Worker  ANNOTATION_AND_SELECTION = 5,
40*993b0882SAndroid Build Coastguard Worker  CLASSIFICATION_AND_SELECTION = 6,
41*993b0882SAndroid Build Coastguard Worker  ALL = 7,
42*993b0882SAndroid Build Coastguard Worker}
43*993b0882SAndroid Build Coastguard Worker
44*993b0882SAndroid Build Coastguard Worker// Enum for specifying the annotation usecase.
45*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
46*993b0882SAndroid Build Coastguard Workerenum AnnotationUsecase : int {
47*993b0882SAndroid Build Coastguard Worker  // Results are optimized for Smart{Select,Share,Linkify}.
48*993b0882SAndroid Build Coastguard Worker  ANNOTATION_USECASE_SMART = 0,
49*993b0882SAndroid Build Coastguard Worker  // Smart{Select,Share,Linkify}
50*993b0882SAndroid Build Coastguard Worker
51*993b0882SAndroid Build Coastguard Worker  // Results are optimized for using TextClassifier as an infrastructure that
52*993b0882SAndroid Build Coastguard Worker  // annotates as much as possible.
53*993b0882SAndroid Build Coastguard Worker  ANNOTATION_USECASE_RAW = 1,
54*993b0882SAndroid Build Coastguard Worker}
55*993b0882SAndroid Build Coastguard Worker
56*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
57*993b0882SAndroid Build Coastguard Workerenum DatetimeExtractorType : int {
58*993b0882SAndroid Build Coastguard Worker  UNKNOWN_DATETIME_EXTRACTOR_TYPE = 0,
59*993b0882SAndroid Build Coastguard Worker  AM = 1,
60*993b0882SAndroid Build Coastguard Worker  PM = 2,
61*993b0882SAndroid Build Coastguard Worker  JANUARY = 3,
62*993b0882SAndroid Build Coastguard Worker  FEBRUARY = 4,
63*993b0882SAndroid Build Coastguard Worker  MARCH = 5,
64*993b0882SAndroid Build Coastguard Worker  APRIL = 6,
65*993b0882SAndroid Build Coastguard Worker  MAY = 7,
66*993b0882SAndroid Build Coastguard Worker  JUNE = 8,
67*993b0882SAndroid Build Coastguard Worker  JULY = 9,
68*993b0882SAndroid Build Coastguard Worker  AUGUST = 10,
69*993b0882SAndroid Build Coastguard Worker  SEPTEMBER = 11,
70*993b0882SAndroid Build Coastguard Worker  OCTOBER = 12,
71*993b0882SAndroid Build Coastguard Worker  NOVEMBER = 13,
72*993b0882SAndroid Build Coastguard Worker  DECEMBER = 14,
73*993b0882SAndroid Build Coastguard Worker  NEXT = 15,
74*993b0882SAndroid Build Coastguard Worker  NEXT_OR_SAME = 16,
75*993b0882SAndroid Build Coastguard Worker  LAST = 17,
76*993b0882SAndroid Build Coastguard Worker  NOW = 18,
77*993b0882SAndroid Build Coastguard Worker  TOMORROW = 19,
78*993b0882SAndroid Build Coastguard Worker  YESTERDAY = 20,
79*993b0882SAndroid Build Coastguard Worker  PAST = 21,
80*993b0882SAndroid Build Coastguard Worker  FUTURE = 22,
81*993b0882SAndroid Build Coastguard Worker  DAY = 23,
82*993b0882SAndroid Build Coastguard Worker  WEEK = 24,
83*993b0882SAndroid Build Coastguard Worker  MONTH = 25,
84*993b0882SAndroid Build Coastguard Worker  YEAR = 26,
85*993b0882SAndroid Build Coastguard Worker  MONDAY = 27,
86*993b0882SAndroid Build Coastguard Worker  TUESDAY = 28,
87*993b0882SAndroid Build Coastguard Worker  WEDNESDAY = 29,
88*993b0882SAndroid Build Coastguard Worker  THURSDAY = 30,
89*993b0882SAndroid Build Coastguard Worker  FRIDAY = 31,
90*993b0882SAndroid Build Coastguard Worker  SATURDAY = 32,
91*993b0882SAndroid Build Coastguard Worker  SUNDAY = 33,
92*993b0882SAndroid Build Coastguard Worker  DAYS = 34,
93*993b0882SAndroid Build Coastguard Worker  WEEKS = 35,
94*993b0882SAndroid Build Coastguard Worker  MONTHS = 36,
95*993b0882SAndroid Build Coastguard Worker
96*993b0882SAndroid Build Coastguard Worker  // TODO(zilka): Make the following 3 values singular for consistency.
97*993b0882SAndroid Build Coastguard Worker  HOURS = 37,
98*993b0882SAndroid Build Coastguard Worker
99*993b0882SAndroid Build Coastguard Worker  MINUTES = 38,
100*993b0882SAndroid Build Coastguard Worker  SECONDS = 39,
101*993b0882SAndroid Build Coastguard Worker  YEARS = 40,
102*993b0882SAndroid Build Coastguard Worker  DIGITS = 41,
103*993b0882SAndroid Build Coastguard Worker  SIGNEDDIGITS = 42,
104*993b0882SAndroid Build Coastguard Worker  ZERO = 43,
105*993b0882SAndroid Build Coastguard Worker  ONE = 44,
106*993b0882SAndroid Build Coastguard Worker  TWO = 45,
107*993b0882SAndroid Build Coastguard Worker  THREE = 46,
108*993b0882SAndroid Build Coastguard Worker  FOUR = 47,
109*993b0882SAndroid Build Coastguard Worker  FIVE = 48,
110*993b0882SAndroid Build Coastguard Worker  SIX = 49,
111*993b0882SAndroid Build Coastguard Worker  SEVEN = 50,
112*993b0882SAndroid Build Coastguard Worker  EIGHT = 51,
113*993b0882SAndroid Build Coastguard Worker  NINE = 52,
114*993b0882SAndroid Build Coastguard Worker  TEN = 53,
115*993b0882SAndroid Build Coastguard Worker  ELEVEN = 54,
116*993b0882SAndroid Build Coastguard Worker  TWELVE = 55,
117*993b0882SAndroid Build Coastguard Worker  THIRTEEN = 56,
118*993b0882SAndroid Build Coastguard Worker  FOURTEEN = 57,
119*993b0882SAndroid Build Coastguard Worker  FIFTEEN = 58,
120*993b0882SAndroid Build Coastguard Worker  SIXTEEN = 59,
121*993b0882SAndroid Build Coastguard Worker  SEVENTEEN = 60,
122*993b0882SAndroid Build Coastguard Worker  EIGHTEEN = 61,
123*993b0882SAndroid Build Coastguard Worker  NINETEEN = 62,
124*993b0882SAndroid Build Coastguard Worker  TWENTY = 63,
125*993b0882SAndroid Build Coastguard Worker  THIRTY = 64,
126*993b0882SAndroid Build Coastguard Worker  FORTY = 65,
127*993b0882SAndroid Build Coastguard Worker  FIFTY = 66,
128*993b0882SAndroid Build Coastguard Worker  SIXTY = 67,
129*993b0882SAndroid Build Coastguard Worker  SEVENTY = 68,
130*993b0882SAndroid Build Coastguard Worker  EIGHTY = 69,
131*993b0882SAndroid Build Coastguard Worker  NINETY = 70,
132*993b0882SAndroid Build Coastguard Worker  HUNDRED = 71,
133*993b0882SAndroid Build Coastguard Worker  THOUSAND = 72,
134*993b0882SAndroid Build Coastguard Worker  NOON = 73,
135*993b0882SAndroid Build Coastguard Worker  MIDNIGHT = 74,
136*993b0882SAndroid Build Coastguard Worker}
137*993b0882SAndroid Build Coastguard Worker
138*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
139*993b0882SAndroid Build Coastguard Workerenum DatetimeGroupType : int {
140*993b0882SAndroid Build Coastguard Worker  GROUP_UNKNOWN = 0,
141*993b0882SAndroid Build Coastguard Worker  GROUP_UNUSED = 1,
142*993b0882SAndroid Build Coastguard Worker  GROUP_YEAR = 2,
143*993b0882SAndroid Build Coastguard Worker  GROUP_MONTH = 3,
144*993b0882SAndroid Build Coastguard Worker  GROUP_DAY = 4,
145*993b0882SAndroid Build Coastguard Worker  GROUP_HOUR = 5,
146*993b0882SAndroid Build Coastguard Worker  GROUP_MINUTE = 6,
147*993b0882SAndroid Build Coastguard Worker  GROUP_SECOND = 7,
148*993b0882SAndroid Build Coastguard Worker  GROUP_AMPM = 8,
149*993b0882SAndroid Build Coastguard Worker  GROUP_RELATIONDISTANCE = 9,
150*993b0882SAndroid Build Coastguard Worker  GROUP_RELATION = 10,
151*993b0882SAndroid Build Coastguard Worker  GROUP_RELATIONTYPE = 11,
152*993b0882SAndroid Build Coastguard Worker
153*993b0882SAndroid Build Coastguard Worker  // Dummy groups serve just as an inflator of the selection. E.g. we might want
154*993b0882SAndroid Build Coastguard Worker  // to select more text than was contained in an envelope of all extractor
155*993b0882SAndroid Build Coastguard Worker  // spans.
156*993b0882SAndroid Build Coastguard Worker  GROUP_DUMMY1 = 12,
157*993b0882SAndroid Build Coastguard Worker
158*993b0882SAndroid Build Coastguard Worker  GROUP_DUMMY2 = 13,
159*993b0882SAndroid Build Coastguard Worker  GROUP_ABSOLUTETIME = 14,
160*993b0882SAndroid Build Coastguard Worker}
161*993b0882SAndroid Build Coastguard Worker
162*993b0882SAndroid Build Coastguard Worker// Options for the model that predicts text selection.
163*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
164*993b0882SAndroid Build Coastguard Workertable SelectionModelOptions {
165*993b0882SAndroid Build Coastguard Worker  // If true, before the selection is returned, the unpaired brackets contained
166*993b0882SAndroid Build Coastguard Worker  // in the predicted selection are stripped from the both selection ends.
167*993b0882SAndroid Build Coastguard Worker  // The bracket codepoints are defined in the Unicode standard:
168*993b0882SAndroid Build Coastguard Worker  // http://www.unicode.org/Public/UNIDATA/BidiBrackets.txt
169*993b0882SAndroid Build Coastguard Worker  strip_unpaired_brackets:bool = true;
170*993b0882SAndroid Build Coastguard Worker
171*993b0882SAndroid Build Coastguard Worker  // Number of hypothetical click positions on either side of the actual click
172*993b0882SAndroid Build Coastguard Worker  // to consider in order to enforce symmetry.
173*993b0882SAndroid Build Coastguard Worker  symmetry_context_size:int;
174*993b0882SAndroid Build Coastguard Worker
175*993b0882SAndroid Build Coastguard Worker  // Number of examples to bundle in one batch for inference.
176*993b0882SAndroid Build Coastguard Worker  batch_size:int = 1024;
177*993b0882SAndroid Build Coastguard Worker
178*993b0882SAndroid Build Coastguard Worker  // Whether to always classify a suggested selection or only on demand.
179*993b0882SAndroid Build Coastguard Worker  always_classify_suggested_selection:bool = false;
180*993b0882SAndroid Build Coastguard Worker}
181*993b0882SAndroid Build Coastguard Worker
182*993b0882SAndroid Build Coastguard Worker// Options for the model that classifies a text selection.
183*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
184*993b0882SAndroid Build Coastguard Workertable ClassificationModelOptions {
185*993b0882SAndroid Build Coastguard Worker  // Limits for phone numbers.
186*993b0882SAndroid Build Coastguard Worker  phone_min_num_digits:int = 7;
187*993b0882SAndroid Build Coastguard Worker
188*993b0882SAndroid Build Coastguard Worker  phone_max_num_digits:int = 15;
189*993b0882SAndroid Build Coastguard Worker
190*993b0882SAndroid Build Coastguard Worker  // Limits for addresses.
191*993b0882SAndroid Build Coastguard Worker  address_min_num_tokens:int;
192*993b0882SAndroid Build Coastguard Worker
193*993b0882SAndroid Build Coastguard Worker  // Maximum number of tokens to attempt a classification (-1 is unlimited).
194*993b0882SAndroid Build Coastguard Worker  max_num_tokens:int = -1;
195*993b0882SAndroid Build Coastguard Worker}
196*993b0882SAndroid Build Coastguard Worker
197*993b0882SAndroid Build Coastguard Worker// Options for post-checks, checksums and verification to apply on a match.
198*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
199*993b0882SAndroid Build Coastguard Workertable VerificationOptions {
200*993b0882SAndroid Build Coastguard Worker  verify_luhn_checksum:bool = false;
201*993b0882SAndroid Build Coastguard Worker
202*993b0882SAndroid Build Coastguard Worker  // Lua verifier to use.
203*993b0882SAndroid Build Coastguard Worker  // Index of the lua verifier in the model.
204*993b0882SAndroid Build Coastguard Worker  lua_verifier:int = -1;
205*993b0882SAndroid Build Coastguard Worker}
206*993b0882SAndroid Build Coastguard Worker
207*993b0882SAndroid Build Coastguard Worker// Behaviour of rule capturing groups.
208*993b0882SAndroid Build Coastguard Worker// This specifies how the text and span of a capturing group, in a regular
209*993b0882SAndroid Build Coastguard Worker// expression or from a capturing match in a grammar rule, should be handled.
210*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
211*993b0882SAndroid Build Coastguard Workertable CapturingGroup {
212*993b0882SAndroid Build Coastguard Worker  // If true, the span of the capturing group will be used to
213*993b0882SAndroid Build Coastguard Worker  // extend the selection.
214*993b0882SAndroid Build Coastguard Worker  extend_selection:bool = true;
215*993b0882SAndroid Build Coastguard Worker
216*993b0882SAndroid Build Coastguard Worker  // If set, the text of the capturing group will be used to set a field in
217*993b0882SAndroid Build Coastguard Worker  // the classfication result entity data.
218*993b0882SAndroid Build Coastguard Worker  entity_field_path:FlatbufferFieldPath;
219*993b0882SAndroid Build Coastguard Worker
220*993b0882SAndroid Build Coastguard Worker  // If set, the flatbuffer entity data will be merged with the
221*993b0882SAndroid Build Coastguard Worker  // classification result entity data.
222*993b0882SAndroid Build Coastguard Worker  serialized_entity_data:string (shared);
223*993b0882SAndroid Build Coastguard Worker
224*993b0882SAndroid Build Coastguard Worker  // If set, normalization to apply before text is used in entity data.
225*993b0882SAndroid Build Coastguard Worker  normalization_options:NormalizationOptions;
226*993b0882SAndroid Build Coastguard Worker
227*993b0882SAndroid Build Coastguard Worker  entity_data:EntityData;
228*993b0882SAndroid Build Coastguard Worker}
229*993b0882SAndroid Build Coastguard Worker
230*993b0882SAndroid Build Coastguard Worker// List of regular expression matchers to check.
231*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.RegexModel_;
232*993b0882SAndroid Build Coastguard Workertable Pattern {
233*993b0882SAndroid Build Coastguard Worker  // The name of the collection of a match.
234*993b0882SAndroid Build Coastguard Worker  collection_name:string (shared);
235*993b0882SAndroid Build Coastguard Worker
236*993b0882SAndroid Build Coastguard Worker  // The pattern to check.
237*993b0882SAndroid Build Coastguard Worker  pattern:string (shared);
238*993b0882SAndroid Build Coastguard Worker
239*993b0882SAndroid Build Coastguard Worker  // The modes for which to apply the patterns.
240*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
241*993b0882SAndroid Build Coastguard Worker
242*993b0882SAndroid Build Coastguard Worker  // The final score to assign to the results of this pattern.
243*993b0882SAndroid Build Coastguard Worker  target_classification_score:float = 1;
244*993b0882SAndroid Build Coastguard Worker
245*993b0882SAndroid Build Coastguard Worker  // Priority score used for conflict resolution with the other models.
246*993b0882SAndroid Build Coastguard Worker  priority_score:float = 0;
247*993b0882SAndroid Build Coastguard Worker
248*993b0882SAndroid Build Coastguard Worker  // If true, will use an approximate matching implementation implemented
249*993b0882SAndroid Build Coastguard Worker  // using Find() instead of the true Match(). This approximate matching will
250*993b0882SAndroid Build Coastguard Worker  // use the first Find() result and then check that it spans the whole input.
251*993b0882SAndroid Build Coastguard Worker  use_approximate_matching:bool = false;
252*993b0882SAndroid Build Coastguard Worker
253*993b0882SAndroid Build Coastguard Worker  compressed_pattern:CompressedBuffer;
254*993b0882SAndroid Build Coastguard Worker
255*993b0882SAndroid Build Coastguard Worker  // Verification to apply on a match.
256*993b0882SAndroid Build Coastguard Worker  verification_options:VerificationOptions;
257*993b0882SAndroid Build Coastguard Worker
258*993b0882SAndroid Build Coastguard Worker  capturing_group:[CapturingGroup];
259*993b0882SAndroid Build Coastguard Worker
260*993b0882SAndroid Build Coastguard Worker  // Entity data to set for a match.
261*993b0882SAndroid Build Coastguard Worker  serialized_entity_data:string (shared);
262*993b0882SAndroid Build Coastguard Worker
263*993b0882SAndroid Build Coastguard Worker  entity_data:EntityData;
264*993b0882SAndroid Build Coastguard Worker}
265*993b0882SAndroid Build Coastguard Worker
266*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
267*993b0882SAndroid Build Coastguard Workertable RegexModel {
268*993b0882SAndroid Build Coastguard Worker  patterns:[RegexModel_.Pattern];
269*993b0882SAndroid Build Coastguard Worker
270*993b0882SAndroid Build Coastguard Worker  // If true, will compile the regexes only on first use.
271*993b0882SAndroid Build Coastguard Worker  lazy_regex_compilation:bool = true;
272*993b0882SAndroid Build Coastguard Worker
273*993b0882SAndroid Build Coastguard Worker  // Lua scripts for match verification.
274*993b0882SAndroid Build Coastguard Worker  // The verifier can access:
275*993b0882SAndroid Build Coastguard Worker  // * `context`: The context as a string.
276*993b0882SAndroid Build Coastguard Worker  // * `match`: The groups of the regex match as an array, each group gives
277*993b0882SAndroid Build Coastguard Worker  // * `begin`: span start
278*993b0882SAndroid Build Coastguard Worker  // * `end`: span end
279*993b0882SAndroid Build Coastguard Worker  // * `text`: the text
280*993b0882SAndroid Build Coastguard Worker  // The verifier is expected to return a boolean, indicating whether the
281*993b0882SAndroid Build Coastguard Worker  // verification succeeded or not.
282*993b0882SAndroid Build Coastguard Worker  lua_verifier:[string];
283*993b0882SAndroid Build Coastguard Worker}
284*993b0882SAndroid Build Coastguard Worker
285*993b0882SAndroid Build Coastguard Worker// List of regex patterns.
286*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.DatetimeModelPattern_;
287*993b0882SAndroid Build Coastguard Workertable Regex {
288*993b0882SAndroid Build Coastguard Worker  pattern:string (shared);
289*993b0882SAndroid Build Coastguard Worker
290*993b0882SAndroid Build Coastguard Worker  // The ith entry specifies the type of the ith capturing group.
291*993b0882SAndroid Build Coastguard Worker  // This is used to decide how the matched content has to be parsed.
292*993b0882SAndroid Build Coastguard Worker  groups:[DatetimeGroupType];
293*993b0882SAndroid Build Coastguard Worker
294*993b0882SAndroid Build Coastguard Worker  compressed_pattern:CompressedBuffer;
295*993b0882SAndroid Build Coastguard Worker}
296*993b0882SAndroid Build Coastguard Worker
297*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
298*993b0882SAndroid Build Coastguard Workertable DatetimeModelPattern {
299*993b0882SAndroid Build Coastguard Worker  regexes:[DatetimeModelPattern_.Regex];
300*993b0882SAndroid Build Coastguard Worker
301*993b0882SAndroid Build Coastguard Worker  // List of locale indices in DatetimeModel that represent the locales that
302*993b0882SAndroid Build Coastguard Worker  // these patterns should be used for. If empty, can be used for all locales.
303*993b0882SAndroid Build Coastguard Worker  locales:[int];
304*993b0882SAndroid Build Coastguard Worker
305*993b0882SAndroid Build Coastguard Worker  // The final score to assign to the results of this pattern.
306*993b0882SAndroid Build Coastguard Worker  target_classification_score:float = 1;
307*993b0882SAndroid Build Coastguard Worker
308*993b0882SAndroid Build Coastguard Worker  // Priority score used for conflict resolution with the other models.
309*993b0882SAndroid Build Coastguard Worker  priority_score:float = 0;
310*993b0882SAndroid Build Coastguard Worker
311*993b0882SAndroid Build Coastguard Worker  // The modes for which to apply the patterns.
312*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
313*993b0882SAndroid Build Coastguard Worker
314*993b0882SAndroid Build Coastguard Worker  // The annotation usecases for which to apply the patterns.
315*993b0882SAndroid Build Coastguard Worker  // This is a flag field for values of AnnotationUsecase.
316*993b0882SAndroid Build Coastguard Worker  enabled_annotation_usecases:uint = 4294967295;
317*993b0882SAndroid Build Coastguard Worker}
318*993b0882SAndroid Build Coastguard Worker
319*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
320*993b0882SAndroid Build Coastguard Workertable DatetimeModelExtractor {
321*993b0882SAndroid Build Coastguard Worker  extractor:DatetimeExtractorType;
322*993b0882SAndroid Build Coastguard Worker  pattern:string (shared);
323*993b0882SAndroid Build Coastguard Worker  locales:[int];
324*993b0882SAndroid Build Coastguard Worker  compressed_pattern:CompressedBuffer;
325*993b0882SAndroid Build Coastguard Worker}
326*993b0882SAndroid Build Coastguard Worker
327*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
328*993b0882SAndroid Build Coastguard Workertable DatetimeModel {
329*993b0882SAndroid Build Coastguard Worker  // List of BCP 47 locale strings representing all locales supported by the
330*993b0882SAndroid Build Coastguard Worker  // model. The individual patterns refer back to them using an index.
331*993b0882SAndroid Build Coastguard Worker  locales:[string];
332*993b0882SAndroid Build Coastguard Worker
333*993b0882SAndroid Build Coastguard Worker  patterns:[DatetimeModelPattern];
334*993b0882SAndroid Build Coastguard Worker  extractors:[DatetimeModelExtractor];
335*993b0882SAndroid Build Coastguard Worker
336*993b0882SAndroid Build Coastguard Worker  // If true, will use the extractors for determining the match location as
337*993b0882SAndroid Build Coastguard Worker  // opposed to using the location where the global pattern matched.
338*993b0882SAndroid Build Coastguard Worker  use_extractors_for_locating:bool = true;
339*993b0882SAndroid Build Coastguard Worker
340*993b0882SAndroid Build Coastguard Worker  // List of locale ids, rules of whose are always run, after the requested
341*993b0882SAndroid Build Coastguard Worker  // ones.
342*993b0882SAndroid Build Coastguard Worker  default_locales:[int];
343*993b0882SAndroid Build Coastguard Worker
344*993b0882SAndroid Build Coastguard Worker  // If true, will generate the alternative interpretations for ambiguous
345*993b0882SAndroid Build Coastguard Worker  // datetime expressions.
346*993b0882SAndroid Build Coastguard Worker  generate_alternative_interpretations_when_ambiguous:bool = false;
347*993b0882SAndroid Build Coastguard Worker
348*993b0882SAndroid Build Coastguard Worker  // If true, will compile the regexes only on first use.
349*993b0882SAndroid Build Coastguard Worker  lazy_regex_compilation:bool = true;
350*993b0882SAndroid Build Coastguard Worker
351*993b0882SAndroid Build Coastguard Worker  // If true, will give only future dates (when the day is not specified).
352*993b0882SAndroid Build Coastguard Worker  prefer_future_for_unspecified_date:bool = false;
353*993b0882SAndroid Build Coastguard Worker}
354*993b0882SAndroid Build Coastguard Worker
355*993b0882SAndroid Build Coastguard Worker// Configuration for the tokenizer.
356*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
357*993b0882SAndroid Build Coastguard Workertable GrammarTokenizerOptions {
358*993b0882SAndroid Build Coastguard Worker  tokenization_type:TokenizationType = ICU;
359*993b0882SAndroid Build Coastguard Worker
360*993b0882SAndroid Build Coastguard Worker  // If true, white space tokens will be kept when using the icu tokenizer.
361*993b0882SAndroid Build Coastguard Worker  icu_preserve_whitespace_tokens:bool = false;
362*993b0882SAndroid Build Coastguard Worker
363*993b0882SAndroid Build Coastguard Worker  // Codepoint ranges that determine what role the different codepoints play
364*993b0882SAndroid Build Coastguard Worker  // during tokenized. The ranges must not overlap.
365*993b0882SAndroid Build Coastguard Worker  tokenization_codepoint_config:[TokenizationCodepointRange];
366*993b0882SAndroid Build Coastguard Worker
367*993b0882SAndroid Build Coastguard Worker  // A set of codepoint ranges to use in the mixed tokenization mode to identify
368*993b0882SAndroid Build Coastguard Worker  // stretches of tokens to re-tokenize using the internal tokenizer.
369*993b0882SAndroid Build Coastguard Worker  internal_tokenizer_codepoint_ranges:[CodepointRange];
370*993b0882SAndroid Build Coastguard Worker
371*993b0882SAndroid Build Coastguard Worker  // If true, tokens will be also split when the codepoint's script_id changes
372*993b0882SAndroid Build Coastguard Worker  // as defined in TokenizationCodepointRange.
373*993b0882SAndroid Build Coastguard Worker  tokenize_on_script_change:bool = false;
374*993b0882SAndroid Build Coastguard Worker}
375*993b0882SAndroid Build Coastguard Worker
376*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.DatetimeModelLibrary_;
377*993b0882SAndroid Build Coastguard Workertable Item {
378*993b0882SAndroid Build Coastguard Worker  key:string (shared);
379*993b0882SAndroid Build Coastguard Worker  value:DatetimeModel;
380*993b0882SAndroid Build Coastguard Worker}
381*993b0882SAndroid Build Coastguard Worker
382*993b0882SAndroid Build Coastguard Worker// A set of named DateTime models.
383*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
384*993b0882SAndroid Build Coastguard Workertable DatetimeModelLibrary {
385*993b0882SAndroid Build Coastguard Worker  models:[DatetimeModelLibrary_.Item];
386*993b0882SAndroid Build Coastguard Worker}
387*993b0882SAndroid Build Coastguard Worker
388*993b0882SAndroid Build Coastguard Worker// Classification result to instantiate for a rule match.
389*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.GrammarModel_;
390*993b0882SAndroid Build Coastguard Workertable RuleClassificationResult {
391*993b0882SAndroid Build Coastguard Worker  // The name of the collection.
392*993b0882SAndroid Build Coastguard Worker  collection_name:string (shared);
393*993b0882SAndroid Build Coastguard Worker
394*993b0882SAndroid Build Coastguard Worker  // The score.
395*993b0882SAndroid Build Coastguard Worker  target_classification_score:float = 1;
396*993b0882SAndroid Build Coastguard Worker
397*993b0882SAndroid Build Coastguard Worker  // The priority score used for conflict resolution with the other models.
398*993b0882SAndroid Build Coastguard Worker  priority_score:float = 0;
399*993b0882SAndroid Build Coastguard Worker
400*993b0882SAndroid Build Coastguard Worker  // Behaviour of capturing matches.
401*993b0882SAndroid Build Coastguard Worker  capturing_group:[CapturingGroup];
402*993b0882SAndroid Build Coastguard Worker
403*993b0882SAndroid Build Coastguard Worker  // Entity data to set for a match.
404*993b0882SAndroid Build Coastguard Worker  serialized_entity_data:string (shared);
405*993b0882SAndroid Build Coastguard Worker
406*993b0882SAndroid Build Coastguard Worker  // Enabled modes.
407*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
408*993b0882SAndroid Build Coastguard Worker
409*993b0882SAndroid Build Coastguard Worker  entity_data:EntityData;
410*993b0882SAndroid Build Coastguard Worker}
411*993b0882SAndroid Build Coastguard Worker
412*993b0882SAndroid Build Coastguard Worker// Configuration for grammar based annotators.
413*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
414*993b0882SAndroid Build Coastguard Workertable GrammarModel {
415*993b0882SAndroid Build Coastguard Worker  // The grammar rules.
416*993b0882SAndroid Build Coastguard Worker  rules:grammar.RulesSet;
417*993b0882SAndroid Build Coastguard Worker
418*993b0882SAndroid Build Coastguard Worker  // Deprecated. Used only for the old implementation of the grammar model.
419*993b0882SAndroid Build Coastguard Worker  rule_classification_result:[GrammarModel_.RuleClassificationResult];
420*993b0882SAndroid Build Coastguard Worker
421*993b0882SAndroid Build Coastguard Worker  // Number of tokens in the context to use for classification and text
422*993b0882SAndroid Build Coastguard Worker  // selection suggestion.
423*993b0882SAndroid Build Coastguard Worker  // A value -1 uses the full context.
424*993b0882SAndroid Build Coastguard Worker  context_left_num_tokens:int;
425*993b0882SAndroid Build Coastguard Worker
426*993b0882SAndroid Build Coastguard Worker  context_right_num_tokens:int;
427*993b0882SAndroid Build Coastguard Worker
428*993b0882SAndroid Build Coastguard Worker  // Grammar specific tokenizer options.
429*993b0882SAndroid Build Coastguard Worker  tokenizer_options:GrammarTokenizerOptions;
430*993b0882SAndroid Build Coastguard Worker
431*993b0882SAndroid Build Coastguard Worker  // The score.
432*993b0882SAndroid Build Coastguard Worker  target_classification_score:float = 1;
433*993b0882SAndroid Build Coastguard Worker
434*993b0882SAndroid Build Coastguard Worker  // The priority score used for conflict resolution with the other models.
435*993b0882SAndroid Build Coastguard Worker  priority_score:float = 1;
436*993b0882SAndroid Build Coastguard Worker
437*993b0882SAndroid Build Coastguard Worker  // Global enabled modes. Use this instead of
438*993b0882SAndroid Build Coastguard Worker  // `rule_classification_result.enabled_modes`.
439*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
440*993b0882SAndroid Build Coastguard Worker}
441*993b0882SAndroid Build Coastguard Worker
442*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.MoneyParsingOptions_;
443*993b0882SAndroid Build Coastguard Workertable QuantitiesNameToExponentEntry {
444*993b0882SAndroid Build Coastguard Worker  key:string (key, shared);
445*993b0882SAndroid Build Coastguard Worker  value:int;
446*993b0882SAndroid Build Coastguard Worker}
447*993b0882SAndroid Build Coastguard Worker
448*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
449*993b0882SAndroid Build Coastguard Workertable MoneyParsingOptions {
450*993b0882SAndroid Build Coastguard Worker  // Separators (codepoints) marking decimal or thousand in the money amount.
451*993b0882SAndroid Build Coastguard Worker  separators:[int];
452*993b0882SAndroid Build Coastguard Worker
453*993b0882SAndroid Build Coastguard Worker  // Mapping between a quantity string (e.g. "million") and the power of 10
454*993b0882SAndroid Build Coastguard Worker  // it multiplies the amount with (e.g. 6 in case of "million").
455*993b0882SAndroid Build Coastguard Worker  // NOTE: The entries need to be sorted by key since we use LookupByKey.
456*993b0882SAndroid Build Coastguard Worker  quantities_name_to_exponent:[MoneyParsingOptions_.QuantitiesNameToExponentEntry];
457*993b0882SAndroid Build Coastguard Worker}
458*993b0882SAndroid Build Coastguard Worker
459*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.ModelTriggeringOptions_;
460*993b0882SAndroid Build Coastguard Workertable CollectionToPriorityEntry {
461*993b0882SAndroid Build Coastguard Worker  key:string (key, shared);
462*993b0882SAndroid Build Coastguard Worker  value:float;
463*993b0882SAndroid Build Coastguard Worker}
464*993b0882SAndroid Build Coastguard Worker
465*993b0882SAndroid Build Coastguard Worker// Options controlling the output of the Tensorflow Lite models.
466*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
467*993b0882SAndroid Build Coastguard Workertable ModelTriggeringOptions {
468*993b0882SAndroid Build Coastguard Worker  // Lower bound threshold for filtering annotation model outputs.
469*993b0882SAndroid Build Coastguard Worker  min_annotate_confidence:float = 0;
470*993b0882SAndroid Build Coastguard Worker
471*993b0882SAndroid Build Coastguard Worker  // The modes for which to enable the models.
472*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
473*993b0882SAndroid Build Coastguard Worker
474*993b0882SAndroid Build Coastguard Worker  // Comma-separated list of locales (BCP 47 tags) that dictionary
475*993b0882SAndroid Build Coastguard Worker  // classification supports.
476*993b0882SAndroid Build Coastguard Worker  dictionary_locales:string (shared);
477*993b0882SAndroid Build Coastguard Worker
478*993b0882SAndroid Build Coastguard Worker  // Comma-separated list of locales (BCP 47 tags) that the model supports, that
479*993b0882SAndroid Build Coastguard Worker  // are used to prevent  triggering on input in unsupported languages. If
480*993b0882SAndroid Build Coastguard Worker  // empty, the model will trigger on all inputs.
481*993b0882SAndroid Build Coastguard Worker  locales:string (shared);
482*993b0882SAndroid Build Coastguard Worker
483*993b0882SAndroid Build Coastguard Worker  // Priority score assigned to the "other" class from ML model.
484*993b0882SAndroid Build Coastguard Worker  other_collection_priority_score:float = -1000;
485*993b0882SAndroid Build Coastguard Worker
486*993b0882SAndroid Build Coastguard Worker  // Priority score assigned to knowledge engine annotations.
487*993b0882SAndroid Build Coastguard Worker  knowledge_priority_score:float = 0;
488*993b0882SAndroid Build Coastguard Worker  reserved_7:int16 (deprecated);
489*993b0882SAndroid Build Coastguard Worker
490*993b0882SAndroid Build Coastguard Worker  // Apply a factor to the priority score for entities that are added to this
491*993b0882SAndroid Build Coastguard Worker  // map. Key: collection type e.g. "address", "phone"..., Value: float number.
492*993b0882SAndroid Build Coastguard Worker  // NOTE: The entries here need to be sorted since we use LookupByKey.
493*993b0882SAndroid Build Coastguard Worker  collection_to_priority:[ModelTriggeringOptions_.CollectionToPriorityEntry];
494*993b0882SAndroid Build Coastguard Worker
495*993b0882SAndroid Build Coastguard Worker  // Enabled modes for the knowledge engine model.
496*993b0882SAndroid Build Coastguard Worker  knowledge_enabled_modes:ModeFlag = ALL;
497*993b0882SAndroid Build Coastguard Worker
498*993b0882SAndroid Build Coastguard Worker  // Enabled modes for the experimental model.
499*993b0882SAndroid Build Coastguard Worker  experimental_enabled_modes:ModeFlag = ALL;
500*993b0882SAndroid Build Coastguard Worker
501*993b0882SAndroid Build Coastguard Worker  // Enabled modes for the installed app model.
502*993b0882SAndroid Build Coastguard Worker  installed_app_enabled_modes:ModeFlag = ALL;
503*993b0882SAndroid Build Coastguard Worker}
504*993b0882SAndroid Build Coastguard Worker
505*993b0882SAndroid Build Coastguard Worker// Options controlling the output of the classifier.
506*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
507*993b0882SAndroid Build Coastguard Workertable OutputOptions {
508*993b0882SAndroid Build Coastguard Worker  // Lists of collection names that will be filtered out at the output:
509*993b0882SAndroid Build Coastguard Worker  // - For annotation, the spans of given collection are simply dropped.
510*993b0882SAndroid Build Coastguard Worker  // - For classification, the result is mapped to the class "other".
511*993b0882SAndroid Build Coastguard Worker  // - For selection, the spans of given class are returned as
512*993b0882SAndroid Build Coastguard Worker  // single-selection.
513*993b0882SAndroid Build Coastguard Worker  filtered_collections_annotation:[string];
514*993b0882SAndroid Build Coastguard Worker
515*993b0882SAndroid Build Coastguard Worker  filtered_collections_classification:[string];
516*993b0882SAndroid Build Coastguard Worker  filtered_collections_selection:[string];
517*993b0882SAndroid Build Coastguard Worker}
518*993b0882SAndroid Build Coastguard Worker
519*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.Model_;
520*993b0882SAndroid Build Coastguard Workertable EmbeddingPruningMask {
521*993b0882SAndroid Build Coastguard Worker  // If true, use pruning mask. In this case, we use mask
522*993b0882SAndroid Build Coastguard Worker  // pruning_mask to determine the mapping of hashed-charactergrams.
523*993b0882SAndroid Build Coastguard Worker  enabled:bool;
524*993b0882SAndroid Build Coastguard Worker
525*993b0882SAndroid Build Coastguard Worker  // Packing of the binary pruning mask into uint64 values.
526*993b0882SAndroid Build Coastguard Worker  pruning_mask:[ulong] (force_align: 16);
527*993b0882SAndroid Build Coastguard Worker
528*993b0882SAndroid Build Coastguard Worker  // Number of buckets before pruning.
529*993b0882SAndroid Build Coastguard Worker  full_num_buckets:int;
530*993b0882SAndroid Build Coastguard Worker
531*993b0882SAndroid Build Coastguard Worker  // Index of row of compressed embedding matrix to which all pruned buckets
532*993b0882SAndroid Build Coastguard Worker  // are mapped.
533*993b0882SAndroid Build Coastguard Worker  pruned_row_bucket_id:int;
534*993b0882SAndroid Build Coastguard Worker}
535*993b0882SAndroid Build Coastguard Worker
536*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.Model_;
537*993b0882SAndroid Build Coastguard Workertable ConflictResolutionOptions {
538*993b0882SAndroid Build Coastguard Worker  // If true, will prioritize the longest annotation during conflict
539*993b0882SAndroid Build Coastguard Worker  // resolution.
540*993b0882SAndroid Build Coastguard Worker  prioritize_longest_annotation:bool = false;
541*993b0882SAndroid Build Coastguard Worker
542*993b0882SAndroid Build Coastguard Worker  // If true, the annotator will perform conflict resolution between the
543*993b0882SAndroid Build Coastguard Worker  // different sub-annotators also in the RAW mode. If false, no conflict
544*993b0882SAndroid Build Coastguard Worker  // resolution will be performed in RAW mode.
545*993b0882SAndroid Build Coastguard Worker  do_conflict_resolution_in_raw_mode:bool = true;
546*993b0882SAndroid Build Coastguard Worker}
547*993b0882SAndroid Build Coastguard Worker
548*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
549*993b0882SAndroid Build Coastguard Workertable Model {
550*993b0882SAndroid Build Coastguard Worker  // Comma-separated list of locales supported by the model as BCP 47 tags.
551*993b0882SAndroid Build Coastguard Worker  locales:string (shared);
552*993b0882SAndroid Build Coastguard Worker
553*993b0882SAndroid Build Coastguard Worker  version:int;
554*993b0882SAndroid Build Coastguard Worker
555*993b0882SAndroid Build Coastguard Worker  // A name for the model that can be used for e.g. logging.
556*993b0882SAndroid Build Coastguard Worker  name:string (shared);
557*993b0882SAndroid Build Coastguard Worker
558*993b0882SAndroid Build Coastguard Worker  selection_feature_options:FeatureProcessorOptions;
559*993b0882SAndroid Build Coastguard Worker  classification_feature_options:FeatureProcessorOptions;
560*993b0882SAndroid Build Coastguard Worker
561*993b0882SAndroid Build Coastguard Worker  // Tensorflow Lite models.
562*993b0882SAndroid Build Coastguard Worker  selection_model:[ubyte] (force_align: 16);
563*993b0882SAndroid Build Coastguard Worker
564*993b0882SAndroid Build Coastguard Worker  classification_model:[ubyte] (force_align: 16);
565*993b0882SAndroid Build Coastguard Worker  embedding_model:[ubyte] (force_align: 16);
566*993b0882SAndroid Build Coastguard Worker
567*993b0882SAndroid Build Coastguard Worker  // Options for the different models.
568*993b0882SAndroid Build Coastguard Worker  selection_options:SelectionModelOptions;
569*993b0882SAndroid Build Coastguard Worker
570*993b0882SAndroid Build Coastguard Worker  classification_options:ClassificationModelOptions;
571*993b0882SAndroid Build Coastguard Worker  regex_model:RegexModel;
572*993b0882SAndroid Build Coastguard Worker  datetime_model:DatetimeModel;
573*993b0882SAndroid Build Coastguard Worker
574*993b0882SAndroid Build Coastguard Worker  // Options controlling the output of the models.
575*993b0882SAndroid Build Coastguard Worker  triggering_options:ModelTriggeringOptions;
576*993b0882SAndroid Build Coastguard Worker
577*993b0882SAndroid Build Coastguard Worker  // Global switch that controls if SuggestSelection(), ClassifyText() and
578*993b0882SAndroid Build Coastguard Worker  // Annotate() will run. If a mode is disabled it returns empty/no-op results.
579*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
580*993b0882SAndroid Build Coastguard Worker
581*993b0882SAndroid Build Coastguard Worker  // If true, will snap the selections that consist only of whitespaces to the
582*993b0882SAndroid Build Coastguard Worker  // containing suggested span. Otherwise, no suggestion is proposed, since the
583*993b0882SAndroid Build Coastguard Worker  // selections are not part of any token.
584*993b0882SAndroid Build Coastguard Worker  snap_whitespace_selections:bool = true;
585*993b0882SAndroid Build Coastguard Worker
586*993b0882SAndroid Build Coastguard Worker  // Global configuration for the output of SuggestSelection(), ClassifyText()
587*993b0882SAndroid Build Coastguard Worker  // and Annotate().
588*993b0882SAndroid Build Coastguard Worker  output_options:OutputOptions;
589*993b0882SAndroid Build Coastguard Worker
590*993b0882SAndroid Build Coastguard Worker  // Configures how Intents should be generated on Android.
591*993b0882SAndroid Build Coastguard Worker  android_intent_options:AndroidIntentFactoryOptions;
592*993b0882SAndroid Build Coastguard Worker
593*993b0882SAndroid Build Coastguard Worker  intent_options:IntentFactoryModel;
594*993b0882SAndroid Build Coastguard Worker
595*993b0882SAndroid Build Coastguard Worker  // Model resources.
596*993b0882SAndroid Build Coastguard Worker  resources:ResourcePool;
597*993b0882SAndroid Build Coastguard Worker
598*993b0882SAndroid Build Coastguard Worker  // Schema data for handling entity data.
599*993b0882SAndroid Build Coastguard Worker  entity_data_schema:[ubyte];
600*993b0882SAndroid Build Coastguard Worker
601*993b0882SAndroid Build Coastguard Worker  number_annotator_options:NumberAnnotatorOptions;
602*993b0882SAndroid Build Coastguard Worker  duration_annotator_options:DurationAnnotatorOptions;
603*993b0882SAndroid Build Coastguard Worker
604*993b0882SAndroid Build Coastguard Worker  // Comma-separated list of locales (BCP 47 tags) that the model supports, that
605*993b0882SAndroid Build Coastguard Worker  // are used to prevent  triggering on input in unsupported languages. If
606*993b0882SAndroid Build Coastguard Worker  // empty, the model will trigger on all inputs.
607*993b0882SAndroid Build Coastguard Worker  triggering_locales:string (shared);
608*993b0882SAndroid Build Coastguard Worker
609*993b0882SAndroid Build Coastguard Worker  embedding_pruning_mask:Model_.EmbeddingPruningMask;
610*993b0882SAndroid Build Coastguard Worker  reserved_25:int16 (deprecated);
611*993b0882SAndroid Build Coastguard Worker  contact_annotator_options:ContactAnnotatorOptions;
612*993b0882SAndroid Build Coastguard Worker  money_parsing_options:MoneyParsingOptions;
613*993b0882SAndroid Build Coastguard Worker  translate_annotator_options:TranslateAnnotatorOptions;
614*993b0882SAndroid Build Coastguard Worker  grammar_model:GrammarModel;
615*993b0882SAndroid Build Coastguard Worker  conflict_resolution_options:Model_.ConflictResolutionOptions;
616*993b0882SAndroid Build Coastguard Worker  experimental_model:ExperimentalModel;
617*993b0882SAndroid Build Coastguard Worker  pod_ner_model:PodNerModel;
618*993b0882SAndroid Build Coastguard Worker  vocab_model:VocabModel;
619*993b0882SAndroid Build Coastguard Worker  datetime_grammar_model:GrammarModel;
620*993b0882SAndroid Build Coastguard Worker}
621*993b0882SAndroid Build Coastguard Worker
622*993b0882SAndroid Build Coastguard Worker// Method for selecting the center token.
623*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.FeatureProcessorOptions_;
624*993b0882SAndroid Build Coastguard Workerenum CenterTokenSelectionMethod : int {
625*993b0882SAndroid Build Coastguard Worker  DEFAULT_CENTER_TOKEN_METHOD = 0,
626*993b0882SAndroid Build Coastguard Worker  // Invalid option.
627*993b0882SAndroid Build Coastguard Worker
628*993b0882SAndroid Build Coastguard Worker  // Use click indices to determine the center token.
629*993b0882SAndroid Build Coastguard Worker  CENTER_TOKEN_FROM_CLICK = 1,
630*993b0882SAndroid Build Coastguard Worker
631*993b0882SAndroid Build Coastguard Worker  // Use selection indices to get a token range, and select the middle of it
632*993b0882SAndroid Build Coastguard Worker  // as the center token.
633*993b0882SAndroid Build Coastguard Worker  CENTER_TOKEN_MIDDLE_OF_SELECTION = 2,
634*993b0882SAndroid Build Coastguard Worker}
635*993b0882SAndroid Build Coastguard Worker
636*993b0882SAndroid Build Coastguard Worker// Bounds-sensitive feature extraction configuration.
637*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.FeatureProcessorOptions_;
638*993b0882SAndroid Build Coastguard Workertable BoundsSensitiveFeatures {
639*993b0882SAndroid Build Coastguard Worker  // Enables the extraction of bounds-sensitive features, instead of the click
640*993b0882SAndroid Build Coastguard Worker  // context features.
641*993b0882SAndroid Build Coastguard Worker  enabled:bool;
642*993b0882SAndroid Build Coastguard Worker
643*993b0882SAndroid Build Coastguard Worker  // The numbers of tokens to extract in specific locations relative to the
644*993b0882SAndroid Build Coastguard Worker  // bounds.
645*993b0882SAndroid Build Coastguard Worker  // Immediately before the span.
646*993b0882SAndroid Build Coastguard Worker  num_tokens_before:int;
647*993b0882SAndroid Build Coastguard Worker
648*993b0882SAndroid Build Coastguard Worker  // Inside the span, aligned with the beginning.
649*993b0882SAndroid Build Coastguard Worker  num_tokens_inside_left:int;
650*993b0882SAndroid Build Coastguard Worker
651*993b0882SAndroid Build Coastguard Worker  // Inside the span, aligned with the end.
652*993b0882SAndroid Build Coastguard Worker  num_tokens_inside_right:int;
653*993b0882SAndroid Build Coastguard Worker
654*993b0882SAndroid Build Coastguard Worker  // Immediately after the span.
655*993b0882SAndroid Build Coastguard Worker  num_tokens_after:int;
656*993b0882SAndroid Build Coastguard Worker
657*993b0882SAndroid Build Coastguard Worker  // If true, also extracts the tokens of the entire span and adds up their
658*993b0882SAndroid Build Coastguard Worker  // features forming one "token" to include in the extracted features.
659*993b0882SAndroid Build Coastguard Worker  include_inside_bag:bool;
660*993b0882SAndroid Build Coastguard Worker
661*993b0882SAndroid Build Coastguard Worker  // If true, includes the selection length (in the number of tokens) as a
662*993b0882SAndroid Build Coastguard Worker  // feature.
663*993b0882SAndroid Build Coastguard Worker  include_inside_length:bool;
664*993b0882SAndroid Build Coastguard Worker
665*993b0882SAndroid Build Coastguard Worker  // If true, for selection, single token spans are not run through the model
666*993b0882SAndroid Build Coastguard Worker  // and their score is assumed to be zero.
667*993b0882SAndroid Build Coastguard Worker  score_single_token_spans_as_zero:bool;
668*993b0882SAndroid Build Coastguard Worker}
669*993b0882SAndroid Build Coastguard Worker
670*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
671*993b0882SAndroid Build Coastguard Workertable FeatureProcessorOptions {
672*993b0882SAndroid Build Coastguard Worker  // Number of buckets used for hashing charactergrams.
673*993b0882SAndroid Build Coastguard Worker  num_buckets:int = -1;
674*993b0882SAndroid Build Coastguard Worker
675*993b0882SAndroid Build Coastguard Worker  // Size of the embedding.
676*993b0882SAndroid Build Coastguard Worker  embedding_size:int = -1;
677*993b0882SAndroid Build Coastguard Worker
678*993b0882SAndroid Build Coastguard Worker  // Number of bits for quantization for embeddings.
679*993b0882SAndroid Build Coastguard Worker  embedding_quantization_bits:int = 8;
680*993b0882SAndroid Build Coastguard Worker
681*993b0882SAndroid Build Coastguard Worker  // Context size defines the number of words to the left and to the right of
682*993b0882SAndroid Build Coastguard Worker  // the selected word to be used as context. For example, if context size is
683*993b0882SAndroid Build Coastguard Worker  // N, then we take N words to the left and N words to the right of the
684*993b0882SAndroid Build Coastguard Worker  // selected word as its context.
685*993b0882SAndroid Build Coastguard Worker  context_size:int = -1;
686*993b0882SAndroid Build Coastguard Worker
687*993b0882SAndroid Build Coastguard Worker  // Maximum number of words of the context to select in total.
688*993b0882SAndroid Build Coastguard Worker  max_selection_span:int = -1;
689*993b0882SAndroid Build Coastguard Worker
690*993b0882SAndroid Build Coastguard Worker  // Orders of charactergrams to extract. E.g., 2 means character bigrams, 3
691*993b0882SAndroid Build Coastguard Worker  // character trigrams etc.
692*993b0882SAndroid Build Coastguard Worker  chargram_orders:[int];
693*993b0882SAndroid Build Coastguard Worker
694*993b0882SAndroid Build Coastguard Worker  // Maximum length of a word, in codepoints.
695*993b0882SAndroid Build Coastguard Worker  max_word_length:int = 20;
696*993b0882SAndroid Build Coastguard Worker
697*993b0882SAndroid Build Coastguard Worker  // If true, will use the unicode-aware functionality for extracting features.
698*993b0882SAndroid Build Coastguard Worker  unicode_aware_features:bool = false;
699*993b0882SAndroid Build Coastguard Worker
700*993b0882SAndroid Build Coastguard Worker  // Whether to extract the token case feature.
701*993b0882SAndroid Build Coastguard Worker  extract_case_feature:bool = false;
702*993b0882SAndroid Build Coastguard Worker
703*993b0882SAndroid Build Coastguard Worker  // Whether to extract the selection mask feature.
704*993b0882SAndroid Build Coastguard Worker  extract_selection_mask_feature:bool = false;
705*993b0882SAndroid Build Coastguard Worker
706*993b0882SAndroid Build Coastguard Worker  // List of regexps to run over each token. For each regexp, if there is a
707*993b0882SAndroid Build Coastguard Worker  // match, a dense feature of 1.0 is emitted. Otherwise -1.0 is used.
708*993b0882SAndroid Build Coastguard Worker  regexp_feature:[string];
709*993b0882SAndroid Build Coastguard Worker
710*993b0882SAndroid Build Coastguard Worker  // Whether to remap all digits to a single number.
711*993b0882SAndroid Build Coastguard Worker  remap_digits:bool = false;
712*993b0882SAndroid Build Coastguard Worker
713*993b0882SAndroid Build Coastguard Worker  // Whether to lower-case each token before generating hashgrams.
714*993b0882SAndroid Build Coastguard Worker  lowercase_tokens:bool;
715*993b0882SAndroid Build Coastguard Worker
716*993b0882SAndroid Build Coastguard Worker  // If true, the selection classifier output will contain only the selections
717*993b0882SAndroid Build Coastguard Worker  // that are feasible (e.g., those that are shorter than max_selection_span),
718*993b0882SAndroid Build Coastguard Worker  // if false, the output will be a complete cross-product of possible
719*993b0882SAndroid Build Coastguard Worker  // selections to the left and possible selections to the right, including the
720*993b0882SAndroid Build Coastguard Worker  // infeasible ones.
721*993b0882SAndroid Build Coastguard Worker  // NOTE: Exists mainly for compatibility with older models that were trained
722*993b0882SAndroid Build Coastguard Worker  // with the non-reduced output space.
723*993b0882SAndroid Build Coastguard Worker  selection_reduced_output_space:bool = true;
724*993b0882SAndroid Build Coastguard Worker
725*993b0882SAndroid Build Coastguard Worker  // Collection names.
726*993b0882SAndroid Build Coastguard Worker  collections:[string];
727*993b0882SAndroid Build Coastguard Worker
728*993b0882SAndroid Build Coastguard Worker  // An index of collection in collections to be used if a collection name can't
729*993b0882SAndroid Build Coastguard Worker  // be mapped to an id.
730*993b0882SAndroid Build Coastguard Worker  default_collection:int = -1;
731*993b0882SAndroid Build Coastguard Worker
732*993b0882SAndroid Build Coastguard Worker  // If true, will split the input by lines, and only use the line that contains
733*993b0882SAndroid Build Coastguard Worker  // the clicked token.
734*993b0882SAndroid Build Coastguard Worker  only_use_line_with_click:bool = false;
735*993b0882SAndroid Build Coastguard Worker
736*993b0882SAndroid Build Coastguard Worker  // If true, will split tokens that contain the selection boundary, at the
737*993b0882SAndroid Build Coastguard Worker  // position of the boundary.
738*993b0882SAndroid Build Coastguard Worker  // E.g. "foo{bar}@google.com" -> "foo", "bar", "@google.com"
739*993b0882SAndroid Build Coastguard Worker  split_tokens_on_selection_boundaries:bool = false;
740*993b0882SAndroid Build Coastguard Worker
741*993b0882SAndroid Build Coastguard Worker  // Codepoint ranges that determine how different codepoints are tokenized.
742*993b0882SAndroid Build Coastguard Worker  // The ranges must not overlap.
743*993b0882SAndroid Build Coastguard Worker  tokenization_codepoint_config:[TokenizationCodepointRange];
744*993b0882SAndroid Build Coastguard Worker
745*993b0882SAndroid Build Coastguard Worker  center_token_selection_method:FeatureProcessorOptions_.CenterTokenSelectionMethod;
746*993b0882SAndroid Build Coastguard Worker
747*993b0882SAndroid Build Coastguard Worker  // If true, span boundaries will be snapped to containing tokens and not
748*993b0882SAndroid Build Coastguard Worker  // required to exactly match token boundaries.
749*993b0882SAndroid Build Coastguard Worker  snap_label_span_boundaries_to_containing_tokens:bool;
750*993b0882SAndroid Build Coastguard Worker
751*993b0882SAndroid Build Coastguard Worker  // A set of codepoint ranges supported by the model.
752*993b0882SAndroid Build Coastguard Worker  supported_codepoint_ranges:[CodepointRange];
753*993b0882SAndroid Build Coastguard Worker
754*993b0882SAndroid Build Coastguard Worker  // A set of codepoint ranges to use in the mixed tokenization mode to identify
755*993b0882SAndroid Build Coastguard Worker  // stretches of tokens to re-tokenize using the internal tokenizer.
756*993b0882SAndroid Build Coastguard Worker  internal_tokenizer_codepoint_ranges:[CodepointRange];
757*993b0882SAndroid Build Coastguard Worker
758*993b0882SAndroid Build Coastguard Worker  // Minimum ratio of supported codepoints in the input context. If the ratio
759*993b0882SAndroid Build Coastguard Worker  // is lower than this, the feature computation will fail.
760*993b0882SAndroid Build Coastguard Worker  min_supported_codepoint_ratio:float = 0;
761*993b0882SAndroid Build Coastguard Worker
762*993b0882SAndroid Build Coastguard Worker  // Used for versioning the format of features the model expects.
763*993b0882SAndroid Build Coastguard Worker  // - feature_version == 0:
764*993b0882SAndroid Build Coastguard Worker  // For each token the features consist of:
765*993b0882SAndroid Build Coastguard Worker  // - chargram embeddings
766*993b0882SAndroid Build Coastguard Worker  // - dense features
767*993b0882SAndroid Build Coastguard Worker  // Chargram embeddings for tokens are concatenated first together,
768*993b0882SAndroid Build Coastguard Worker  // and at the end, the dense features for the tokens are concatenated
769*993b0882SAndroid Build Coastguard Worker  // to it. So the resulting feature vector has two regions.
770*993b0882SAndroid Build Coastguard Worker  feature_version:int = 0;
771*993b0882SAndroid Build Coastguard Worker
772*993b0882SAndroid Build Coastguard Worker  tokenization_type:TokenizationType = INTERNAL_TOKENIZER;
773*993b0882SAndroid Build Coastguard Worker  icu_preserve_whitespace_tokens:bool = false;
774*993b0882SAndroid Build Coastguard Worker
775*993b0882SAndroid Build Coastguard Worker  // List of codepoints that will be stripped from beginning and end of
776*993b0882SAndroid Build Coastguard Worker  // predicted spans.
777*993b0882SAndroid Build Coastguard Worker  ignored_span_boundary_codepoints:[int];
778*993b0882SAndroid Build Coastguard Worker
779*993b0882SAndroid Build Coastguard Worker  bounds_sensitive_features:FeatureProcessorOptions_.BoundsSensitiveFeatures;
780*993b0882SAndroid Build Coastguard Worker
781*993b0882SAndroid Build Coastguard Worker  // List of allowed charactergrams. The extracted charactergrams are filtered
782*993b0882SAndroid Build Coastguard Worker  // using this list, and charactergrams that are not present are interpreted as
783*993b0882SAndroid Build Coastguard Worker  // out-of-vocabulary.
784*993b0882SAndroid Build Coastguard Worker  // If no allowed_chargrams are specified, all charactergrams are allowed.
785*993b0882SAndroid Build Coastguard Worker  // The field is typed as bytes type to allow non-UTF8 chargrams.
786*993b0882SAndroid Build Coastguard Worker  allowed_chargrams:[string];
787*993b0882SAndroid Build Coastguard Worker
788*993b0882SAndroid Build Coastguard Worker  // If true, tokens will be also split when the codepoint's script_id changes
789*993b0882SAndroid Build Coastguard Worker  // as defined in TokenizationCodepointRange.
790*993b0882SAndroid Build Coastguard Worker  tokenize_on_script_change:bool = false;
791*993b0882SAndroid Build Coastguard Worker
792*993b0882SAndroid Build Coastguard Worker  // If true, the pipe character '|' will be used as a newline character when
793*993b0882SAndroid Build Coastguard Worker  // splitting lines.
794*993b0882SAndroid Build Coastguard Worker  use_pipe_character_for_newline:bool = true;
795*993b0882SAndroid Build Coastguard Worker}
796*993b0882SAndroid Build Coastguard Worker
797*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
798*993b0882SAndroid Build Coastguard Workertable NumberAnnotatorOptions {
799*993b0882SAndroid Build Coastguard Worker  // If true, number and percentage annotations will be produced.
800*993b0882SAndroid Build Coastguard Worker  enabled:bool = false;
801*993b0882SAndroid Build Coastguard Worker
802*993b0882SAndroid Build Coastguard Worker  // Score to assign to the annotated numbers and percentages in the annotator.
803*993b0882SAndroid Build Coastguard Worker  score:float = 1;
804*993b0882SAndroid Build Coastguard Worker
805*993b0882SAndroid Build Coastguard Worker  // Number priority score used for conflict resolution with the other models.
806*993b0882SAndroid Build Coastguard Worker  priority_score:float = 0;
807*993b0882SAndroid Build Coastguard Worker
808*993b0882SAndroid Build Coastguard Worker  // The modes in which to enable number and percentage annotations.
809*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
810*993b0882SAndroid Build Coastguard Worker
811*993b0882SAndroid Build Coastguard Worker  // The annotation usecases for which to produce number annotations.
812*993b0882SAndroid Build Coastguard Worker  // This is a flag field for values of AnnotationUsecase.
813*993b0882SAndroid Build Coastguard Worker  enabled_annotation_usecases:uint = 4294967295;
814*993b0882SAndroid Build Coastguard Worker
815*993b0882SAndroid Build Coastguard Worker  // [Deprecated] A list of codepoints that can form a prefix of a valid number.
816*993b0882SAndroid Build Coastguard Worker  allowed_prefix_codepoints:[int];
817*993b0882SAndroid Build Coastguard Worker
818*993b0882SAndroid Build Coastguard Worker  // [Deprecated] A list of codepoints that can form a suffix of a valid number.
819*993b0882SAndroid Build Coastguard Worker  allowed_suffix_codepoints:[int];
820*993b0882SAndroid Build Coastguard Worker
821*993b0882SAndroid Build Coastguard Worker  // [Deprecated] List of codepoints that will be stripped from beginning of
822*993b0882SAndroid Build Coastguard Worker  // predicted spans.
823*993b0882SAndroid Build Coastguard Worker  ignored_prefix_span_boundary_codepoints:[int];
824*993b0882SAndroid Build Coastguard Worker
825*993b0882SAndroid Build Coastguard Worker  // [Deprecated] List of codepoints that will be stripped from end of predicted
826*993b0882SAndroid Build Coastguard Worker  // spans.
827*993b0882SAndroid Build Coastguard Worker  ignored_suffix_span_boundary_codepoints:[int];
828*993b0882SAndroid Build Coastguard Worker
829*993b0882SAndroid Build Coastguard Worker  // [Deprecated] If true, percent annotations will be produced.
830*993b0882SAndroid Build Coastguard Worker  enable_percentage:bool = false;
831*993b0882SAndroid Build Coastguard Worker
832*993b0882SAndroid Build Coastguard Worker  // Zero separated and ordered list of suffixes that mark a percent.
833*993b0882SAndroid Build Coastguard Worker  percentage_pieces_string:string (shared);
834*993b0882SAndroid Build Coastguard Worker
835*993b0882SAndroid Build Coastguard Worker  // [Deprecated] List of suffixes offsets in the percent_pieces_string string.
836*993b0882SAndroid Build Coastguard Worker  percentage_pieces_offsets:[int];
837*993b0882SAndroid Build Coastguard Worker
838*993b0882SAndroid Build Coastguard Worker  // Priority score for the percentage annotation.
839*993b0882SAndroid Build Coastguard Worker  percentage_priority_score:float = 1;
840*993b0882SAndroid Build Coastguard Worker
841*993b0882SAndroid Build Coastguard Worker  // Float number priority score used for conflict resolution with the other
842*993b0882SAndroid Build Coastguard Worker  // models.
843*993b0882SAndroid Build Coastguard Worker  float_number_priority_score:float = 0;
844*993b0882SAndroid Build Coastguard Worker
845*993b0882SAndroid Build Coastguard Worker  // The maximum number of digits an annotated number can have. Requirement:
846*993b0882SAndroid Build Coastguard Worker  // the value should be less or equal to 20.
847*993b0882SAndroid Build Coastguard Worker  max_number_of_digits:int = 20;
848*993b0882SAndroid Build Coastguard Worker
849*993b0882SAndroid Build Coastguard Worker  // The annotation usecases for which to produce percentage annotations.
850*993b0882SAndroid Build Coastguard Worker  // This is a flag field for values of AnnotationUsecase.
851*993b0882SAndroid Build Coastguard Worker  percentage_annotation_usecases:uint = 2;
852*993b0882SAndroid Build Coastguard Worker}
853*993b0882SAndroid Build Coastguard Worker
854*993b0882SAndroid Build Coastguard Worker// DurationAnnotator is so far tailored for English and Japanese only.
855*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
856*993b0882SAndroid Build Coastguard Workertable DurationAnnotatorOptions {
857*993b0882SAndroid Build Coastguard Worker  // If true, duration annotations will be produced.
858*993b0882SAndroid Build Coastguard Worker  enabled:bool = false;
859*993b0882SAndroid Build Coastguard Worker
860*993b0882SAndroid Build Coastguard Worker  // Score to assign to the annotated durations from the annotator.
861*993b0882SAndroid Build Coastguard Worker  score:float = 1;
862*993b0882SAndroid Build Coastguard Worker
863*993b0882SAndroid Build Coastguard Worker  // Priority score used for conflict resolution with the other models.
864*993b0882SAndroid Build Coastguard Worker  priority_score:float = 0;
865*993b0882SAndroid Build Coastguard Worker
866*993b0882SAndroid Build Coastguard Worker  // The modes in which to enable duration annotations.
867*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
868*993b0882SAndroid Build Coastguard Worker
869*993b0882SAndroid Build Coastguard Worker  // The annotation usecases for which to produce duration annotations.
870*993b0882SAndroid Build Coastguard Worker  enabled_annotation_usecases:uint = 4294967295;
871*993b0882SAndroid Build Coastguard Worker
872*993b0882SAndroid Build Coastguard Worker  // Durations typically look like XX hours and XX minutes etc... The list of
873*993b0882SAndroid Build Coastguard Worker  // strings below enumerate variants of "hours", "minutes", etc. in these
874*993b0882SAndroid Build Coastguard Worker  // expressions. These are verbatim strings that are matched against tokens in
875*993b0882SAndroid Build Coastguard Worker  // the input.
876*993b0882SAndroid Build Coastguard Worker  week_expressions:[string];
877*993b0882SAndroid Build Coastguard Worker
878*993b0882SAndroid Build Coastguard Worker  day_expressions:[string];
879*993b0882SAndroid Build Coastguard Worker  hour_expressions:[string];
880*993b0882SAndroid Build Coastguard Worker  minute_expressions:[string];
881*993b0882SAndroid Build Coastguard Worker  second_expressions:[string];
882*993b0882SAndroid Build Coastguard Worker
883*993b0882SAndroid Build Coastguard Worker  // List of expressions that doesn't break a duration expression (can become
884*993b0882SAndroid Build Coastguard Worker  // a part of it) but has not semantic meaning.
885*993b0882SAndroid Build Coastguard Worker  filler_expressions:[string];
886*993b0882SAndroid Build Coastguard Worker
887*993b0882SAndroid Build Coastguard Worker  // List of expressions that mean half of a unit of duration (e.g. "half an
888*993b0882SAndroid Build Coastguard Worker  // hour").
889*993b0882SAndroid Build Coastguard Worker  half_expressions:[string];
890*993b0882SAndroid Build Coastguard Worker
891*993b0882SAndroid Build Coastguard Worker  // Set of condepoints that can split the Annotator tokens to sub-tokens for
892*993b0882SAndroid Build Coastguard Worker  // sub-token matching.
893*993b0882SAndroid Build Coastguard Worker  sub_token_separator_codepoints:[int];
894*993b0882SAndroid Build Coastguard Worker
895*993b0882SAndroid Build Coastguard Worker  // If this is true, unit must be associated with quantity. For example, a
896*993b0882SAndroid Build Coastguard Worker  // phrase "minute" is not parsed as one minute duration if this is true.
897*993b0882SAndroid Build Coastguard Worker  require_quantity:bool;
898*993b0882SAndroid Build Coastguard Worker
899*993b0882SAndroid Build Coastguard Worker  // If this is true, dangling quantity is included in the annotation. For
900*993b0882SAndroid Build Coastguard Worker  // example, "10 minutes 20" is interpreted as 10 minutes and 20 seconds.
901*993b0882SAndroid Build Coastguard Worker  enable_dangling_quantity_interpretation:bool = true;
902*993b0882SAndroid Build Coastguard Worker}
903*993b0882SAndroid Build Coastguard Worker
904*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
905*993b0882SAndroid Build Coastguard Workertable ContactAnnotatorOptions {
906*993b0882SAndroid Build Coastguard Worker  // Supported for English genitives only so far.
907*993b0882SAndroid Build Coastguard Worker  enable_declension:bool;
908*993b0882SAndroid Build Coastguard Worker
909*993b0882SAndroid Build Coastguard Worker  // For each language there is a customized list of supported declensions.
910*993b0882SAndroid Build Coastguard Worker  language:string (shared);
911*993b0882SAndroid Build Coastguard Worker
912*993b0882SAndroid Build Coastguard Worker  // Enabled modes.
913*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
914*993b0882SAndroid Build Coastguard Worker}
915*993b0882SAndroid Build Coastguard Worker
916*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.TranslateAnnotatorOptions_;
917*993b0882SAndroid Build Coastguard Workerenum Algorithm : int {
918*993b0882SAndroid Build Coastguard Worker  DEFAULT_ALGORITHM = 0,
919*993b0882SAndroid Build Coastguard Worker  BACKOFF = 1,
920*993b0882SAndroid Build Coastguard Worker}
921*993b0882SAndroid Build Coastguard Worker
922*993b0882SAndroid Build Coastguard Worker// Backoff is the algorithm shipped with Android Q.
923*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.TranslateAnnotatorOptions_;
924*993b0882SAndroid Build Coastguard Workertable BackoffOptions {
925*993b0882SAndroid Build Coastguard Worker  // The minimum size of text to prefer for detection (in codepoints).
926*993b0882SAndroid Build Coastguard Worker  min_text_size:int = 20;
927*993b0882SAndroid Build Coastguard Worker
928*993b0882SAndroid Build Coastguard Worker  // For reducing the score when text is less than the preferred size.
929*993b0882SAndroid Build Coastguard Worker  penalize_ratio:float = 1;
930*993b0882SAndroid Build Coastguard Worker
931*993b0882SAndroid Build Coastguard Worker  // Original detection score to surrounding text detection score ratios.
932*993b0882SAndroid Build Coastguard Worker  subject_text_score_ratio:float = 0.4;
933*993b0882SAndroid Build Coastguard Worker}
934*993b0882SAndroid Build Coastguard Worker
935*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
936*993b0882SAndroid Build Coastguard Workertable TranslateAnnotatorOptions {
937*993b0882SAndroid Build Coastguard Worker  enabled:bool = false;
938*993b0882SAndroid Build Coastguard Worker
939*993b0882SAndroid Build Coastguard Worker  // Score to assign to the classification results.
940*993b0882SAndroid Build Coastguard Worker  score:float = 1;
941*993b0882SAndroid Build Coastguard Worker
942*993b0882SAndroid Build Coastguard Worker  // Priority score used for conflict resolution with the other models.
943*993b0882SAndroid Build Coastguard Worker  priority_score:float;
944*993b0882SAndroid Build Coastguard Worker
945*993b0882SAndroid Build Coastguard Worker  algorithm:TranslateAnnotatorOptions_.Algorithm;
946*993b0882SAndroid Build Coastguard Worker  backoff_options:TranslateAnnotatorOptions_.BackoffOptions;
947*993b0882SAndroid Build Coastguard Worker
948*993b0882SAndroid Build Coastguard Worker  // Enabled modes.
949*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = CLASSIFICATION;
950*993b0882SAndroid Build Coastguard Worker}
951*993b0882SAndroid Build Coastguard Worker
952*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.PodNerModel_;
953*993b0882SAndroid Build Coastguard Workertable Collection {
954*993b0882SAndroid Build Coastguard Worker  // Collection's name (e.g., "location", "person").
955*993b0882SAndroid Build Coastguard Worker  name:string (shared);
956*993b0882SAndroid Build Coastguard Worker
957*993b0882SAndroid Build Coastguard Worker  // Priority scores used for conflict resolution with the other annotators
958*993b0882SAndroid Build Coastguard Worker  // when the annotation is made over a single/multi token text.
959*993b0882SAndroid Build Coastguard Worker  single_token_priority_score:float;
960*993b0882SAndroid Build Coastguard Worker
961*993b0882SAndroid Build Coastguard Worker  multi_token_priority_score:float;
962*993b0882SAndroid Build Coastguard Worker}
963*993b0882SAndroid Build Coastguard Worker
964*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.PodNerModel_.Label_;
965*993b0882SAndroid Build Coastguard Workerenum BoiseType : int {
966*993b0882SAndroid Build Coastguard Worker  NONE = 0,
967*993b0882SAndroid Build Coastguard Worker  BEGIN = 1,
968*993b0882SAndroid Build Coastguard Worker  O = 2,
969*993b0882SAndroid Build Coastguard Worker  // No label.
970*993b0882SAndroid Build Coastguard Worker
971*993b0882SAndroid Build Coastguard Worker  INTERMEDIATE = 3,
972*993b0882SAndroid Build Coastguard Worker  SINGLE = 4,
973*993b0882SAndroid Build Coastguard Worker  END = 5,
974*993b0882SAndroid Build Coastguard Worker}
975*993b0882SAndroid Build Coastguard Worker
976*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.PodNerModel_.Label_;
977*993b0882SAndroid Build Coastguard Workerenum MentionType : int {
978*993b0882SAndroid Build Coastguard Worker  UNDEFINED = 0,
979*993b0882SAndroid Build Coastguard Worker  NAM = 1,
980*993b0882SAndroid Build Coastguard Worker  NOM = 2,
981*993b0882SAndroid Build Coastguard Worker}
982*993b0882SAndroid Build Coastguard Worker
983*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.PodNerModel_;
984*993b0882SAndroid Build Coastguard Workertable Label {
985*993b0882SAndroid Build Coastguard Worker  boise_type:Label_.BoiseType;
986*993b0882SAndroid Build Coastguard Worker  mention_type:Label_.MentionType;
987*993b0882SAndroid Build Coastguard Worker  collection_id:int;
988*993b0882SAndroid Build Coastguard Worker  // points to the collections array above.
989*993b0882SAndroid Build Coastguard Worker}
990*993b0882SAndroid Build Coastguard Worker
991*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
992*993b0882SAndroid Build Coastguard Workertable PodNerModel {
993*993b0882SAndroid Build Coastguard Worker  tflite_model:[ubyte];
994*993b0882SAndroid Build Coastguard Worker  word_piece_vocab:[ubyte];
995*993b0882SAndroid Build Coastguard Worker  lowercase_input:bool = true;
996*993b0882SAndroid Build Coastguard Worker
997*993b0882SAndroid Build Coastguard Worker  // Index of mention_logits tensor in the output of the tflite model. Can
998*993b0882SAndroid Build Coastguard Worker  // be found in the textproto output after model is converted to tflite.
999*993b0882SAndroid Build Coastguard Worker  logits_index_in_output_tensor:int = 0;
1000*993b0882SAndroid Build Coastguard Worker
1001*993b0882SAndroid Build Coastguard Worker  // Whether to append a period at the end of an input that doesn't already
1002*993b0882SAndroid Build Coastguard Worker  // end in punctuation.
1003*993b0882SAndroid Build Coastguard Worker  append_final_period:bool = false;
1004*993b0882SAndroid Build Coastguard Worker
1005*993b0882SAndroid Build Coastguard Worker  // Priority score used for conflict resolution with the other models. Used
1006*993b0882SAndroid Build Coastguard Worker  // only if collections_array is empty.
1007*993b0882SAndroid Build Coastguard Worker  priority_score:float = 0;
1008*993b0882SAndroid Build Coastguard Worker
1009*993b0882SAndroid Build Coastguard Worker  // Maximum number of wordpieces supported by the model.
1010*993b0882SAndroid Build Coastguard Worker  max_num_wordpieces:int = 128;
1011*993b0882SAndroid Build Coastguard Worker
1012*993b0882SAndroid Build Coastguard Worker  // In case of long text (number of wordpieces greater than the max) we use
1013*993b0882SAndroid Build Coastguard Worker  // sliding window approach, this determines the number of overlapping
1014*993b0882SAndroid Build Coastguard Worker  // wordpieces between two consecutive windows. This overlap enables context
1015*993b0882SAndroid Build Coastguard Worker  // for each word NER annotates.
1016*993b0882SAndroid Build Coastguard Worker  sliding_window_num_wordpieces_overlap:int = 20;
1017*993b0882SAndroid Build Coastguard Worker  reserved_9:int16 (deprecated);
1018*993b0882SAndroid Build Coastguard Worker
1019*993b0882SAndroid Build Coastguard Worker  // The possible labels the ner model can output. If empty the default labels
1020*993b0882SAndroid Build Coastguard Worker  // will be used.
1021*993b0882SAndroid Build Coastguard Worker  labels:[PodNerModel_.Label];
1022*993b0882SAndroid Build Coastguard Worker
1023*993b0882SAndroid Build Coastguard Worker  // If the ratio of unknown wordpieces in the input text is greater than this
1024*993b0882SAndroid Build Coastguard Worker  // maximum, the text won't be annotated.
1025*993b0882SAndroid Build Coastguard Worker  max_ratio_unknown_wordpieces:float = 0.1;
1026*993b0882SAndroid Build Coastguard Worker
1027*993b0882SAndroid Build Coastguard Worker  // Possible collections for labeled entities.
1028*993b0882SAndroid Build Coastguard Worker  collections:[PodNerModel_.Collection];
1029*993b0882SAndroid Build Coastguard Worker
1030*993b0882SAndroid Build Coastguard Worker  // Minimum word-length and wordpieces-length required for the text to be
1031*993b0882SAndroid Build Coastguard Worker  // annotated.
1032*993b0882SAndroid Build Coastguard Worker  min_number_of_tokens:int = 1;
1033*993b0882SAndroid Build Coastguard Worker
1034*993b0882SAndroid Build Coastguard Worker  min_number_of_wordpieces:int = 1;
1035*993b0882SAndroid Build Coastguard Worker
1036*993b0882SAndroid Build Coastguard Worker  // Enabled modes.
1037*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ALL;
1038*993b0882SAndroid Build Coastguard Worker}
1039*993b0882SAndroid Build Coastguard Worker
1040*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3;
1041*993b0882SAndroid Build Coastguard Workertable VocabModel {
1042*993b0882SAndroid Build Coastguard Worker  // A trie that stores a list of vocabs that triggers "Define". A id is
1043*993b0882SAndroid Build Coastguard Worker  // returned when looking up a vocab from the trie and the id can be used
1044*993b0882SAndroid Build Coastguard Worker  // to access more information about that vocab. The marisa trie library
1045*993b0882SAndroid Build Coastguard Worker  // requires 8-byte alignment because the first thing in a marisa trie is a
1046*993b0882SAndroid Build Coastguard Worker  // 64-bit integer.
1047*993b0882SAndroid Build Coastguard Worker  vocab_trie:[ubyte] (force_align: 8);
1048*993b0882SAndroid Build Coastguard Worker
1049*993b0882SAndroid Build Coastguard Worker  // A bit vector that tells if the vocab should trigger "Define" for users of
1050*993b0882SAndroid Build Coastguard Worker  // beginner proficiency only. To look up the bit vector, use the id returned
1051*993b0882SAndroid Build Coastguard Worker  // by the trie.
1052*993b0882SAndroid Build Coastguard Worker  beginner_level:BitVectorData;
1053*993b0882SAndroid Build Coastguard Worker
1054*993b0882SAndroid Build Coastguard Worker  // A sorted list of indices of vocabs that should not trigger "Define" if
1055*993b0882SAndroid Build Coastguard Worker  // its leading character is in upper case. The indices are those returned by
1056*993b0882SAndroid Build Coastguard Worker  // trie. You may perform binary search to look up an index.
1057*993b0882SAndroid Build Coastguard Worker  do_not_trigger_in_upper_case:BitVectorData;
1058*993b0882SAndroid Build Coastguard Worker
1059*993b0882SAndroid Build Coastguard Worker  // Comma-separated list of locales (BCP 47 tags) that the model supports, that
1060*993b0882SAndroid Build Coastguard Worker  // are used to prevent  triggering on input in unsupported languages. If
1061*993b0882SAndroid Build Coastguard Worker  // empty, the model will trigger on all inputs.
1062*993b0882SAndroid Build Coastguard Worker  triggering_locales:string (shared);
1063*993b0882SAndroid Build Coastguard Worker
1064*993b0882SAndroid Build Coastguard Worker  // The final score to assign to the results of the vocab model
1065*993b0882SAndroid Build Coastguard Worker  target_classification_score:float = 1;
1066*993b0882SAndroid Build Coastguard Worker
1067*993b0882SAndroid Build Coastguard Worker  // Priority score used for conflict resolution with the other models.
1068*993b0882SAndroid Build Coastguard Worker  priority_score:float = 0;
1069*993b0882SAndroid Build Coastguard Worker
1070*993b0882SAndroid Build Coastguard Worker  // Enabled modes.
1071*993b0882SAndroid Build Coastguard Worker  enabled_modes:ModeFlag = ANNOTATION_AND_CLASSIFICATION;
1072*993b0882SAndroid Build Coastguard Worker}
1073*993b0882SAndroid Build Coastguard Worker
1074*993b0882SAndroid Build Coastguard Workerroot_type libtextclassifier3.Model;
1075