1*993b0882SAndroid Build Coastguard Worker// 2*993b0882SAndroid Build Coastguard Worker// Copyright (C) 2018 The Android Open Source Project 3*993b0882SAndroid Build Coastguard Worker// 4*993b0882SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License"); 5*993b0882SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License. 6*993b0882SAndroid Build Coastguard Worker// You may obtain a copy of the License at 7*993b0882SAndroid Build Coastguard Worker// 8*993b0882SAndroid Build Coastguard Worker// http://www.apache.org/licenses/LICENSE-2.0 9*993b0882SAndroid Build Coastguard Worker// 10*993b0882SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software 11*993b0882SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS, 12*993b0882SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*993b0882SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and 14*993b0882SAndroid Build Coastguard Worker// limitations under the License. 15*993b0882SAndroid Build Coastguard Worker// 16*993b0882SAndroid Build Coastguard Worker 17*993b0882SAndroid Build Coastguard Workerinclude "annotator/entity-data.fbs"; 18*993b0882SAndroid Build Coastguard Workerinclude "annotator/experimental/experimental.fbs"; 19*993b0882SAndroid Build Coastguard Workerinclude "utils/codepoint-range.fbs"; 20*993b0882SAndroid Build Coastguard Workerinclude "utils/container/bit-vector.fbs"; 21*993b0882SAndroid Build Coastguard Workerinclude "utils/flatbuffers/flatbuffers.fbs"; 22*993b0882SAndroid Build Coastguard Workerinclude "utils/grammar/rules.fbs"; 23*993b0882SAndroid Build Coastguard Workerinclude "utils/intents/intent-config.fbs"; 24*993b0882SAndroid Build Coastguard Workerinclude "utils/normalization.fbs"; 25*993b0882SAndroid Build Coastguard Workerinclude "utils/resources.fbs"; 26*993b0882SAndroid Build Coastguard Workerinclude "utils/tokenizer.fbs"; 27*993b0882SAndroid Build Coastguard Workerinclude "utils/zlib/buffer.fbs"; 28*993b0882SAndroid Build Coastguard Worker 29*993b0882SAndroid Build Coastguard Workerfile_identifier "TC2 "; 30*993b0882SAndroid Build Coastguard Worker 31*993b0882SAndroid Build Coastguard Worker// The possible model modes, represents a bit field. 32*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 33*993b0882SAndroid Build Coastguard Workerenum ModeFlag : int { 34*993b0882SAndroid Build Coastguard Worker NONE = 0, 35*993b0882SAndroid Build Coastguard Worker ANNOTATION = 1, 36*993b0882SAndroid Build Coastguard Worker CLASSIFICATION = 2, 37*993b0882SAndroid Build Coastguard Worker ANNOTATION_AND_CLASSIFICATION = 3, 38*993b0882SAndroid Build Coastguard Worker SELECTION = 4, 39*993b0882SAndroid Build Coastguard Worker ANNOTATION_AND_SELECTION = 5, 40*993b0882SAndroid Build Coastguard Worker CLASSIFICATION_AND_SELECTION = 6, 41*993b0882SAndroid Build Coastguard Worker ALL = 7, 42*993b0882SAndroid Build Coastguard Worker} 43*993b0882SAndroid Build Coastguard Worker 44*993b0882SAndroid Build Coastguard Worker// Enum for specifying the annotation usecase. 45*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 46*993b0882SAndroid Build Coastguard Workerenum AnnotationUsecase : int { 47*993b0882SAndroid Build Coastguard Worker // Results are optimized for Smart{Select,Share,Linkify}. 48*993b0882SAndroid Build Coastguard Worker ANNOTATION_USECASE_SMART = 0, 49*993b0882SAndroid Build Coastguard Worker // Smart{Select,Share,Linkify} 50*993b0882SAndroid Build Coastguard Worker 51*993b0882SAndroid Build Coastguard Worker // Results are optimized for using TextClassifier as an infrastructure that 52*993b0882SAndroid Build Coastguard Worker // annotates as much as possible. 53*993b0882SAndroid Build Coastguard Worker ANNOTATION_USECASE_RAW = 1, 54*993b0882SAndroid Build Coastguard Worker} 55*993b0882SAndroid Build Coastguard Worker 56*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 57*993b0882SAndroid Build Coastguard Workerenum DatetimeExtractorType : int { 58*993b0882SAndroid Build Coastguard Worker UNKNOWN_DATETIME_EXTRACTOR_TYPE = 0, 59*993b0882SAndroid Build Coastguard Worker AM = 1, 60*993b0882SAndroid Build Coastguard Worker PM = 2, 61*993b0882SAndroid Build Coastguard Worker JANUARY = 3, 62*993b0882SAndroid Build Coastguard Worker FEBRUARY = 4, 63*993b0882SAndroid Build Coastguard Worker MARCH = 5, 64*993b0882SAndroid Build Coastguard Worker APRIL = 6, 65*993b0882SAndroid Build Coastguard Worker MAY = 7, 66*993b0882SAndroid Build Coastguard Worker JUNE = 8, 67*993b0882SAndroid Build Coastguard Worker JULY = 9, 68*993b0882SAndroid Build Coastguard Worker AUGUST = 10, 69*993b0882SAndroid Build Coastguard Worker SEPTEMBER = 11, 70*993b0882SAndroid Build Coastguard Worker OCTOBER = 12, 71*993b0882SAndroid Build Coastguard Worker NOVEMBER = 13, 72*993b0882SAndroid Build Coastguard Worker DECEMBER = 14, 73*993b0882SAndroid Build Coastguard Worker NEXT = 15, 74*993b0882SAndroid Build Coastguard Worker NEXT_OR_SAME = 16, 75*993b0882SAndroid Build Coastguard Worker LAST = 17, 76*993b0882SAndroid Build Coastguard Worker NOW = 18, 77*993b0882SAndroid Build Coastguard Worker TOMORROW = 19, 78*993b0882SAndroid Build Coastguard Worker YESTERDAY = 20, 79*993b0882SAndroid Build Coastguard Worker PAST = 21, 80*993b0882SAndroid Build Coastguard Worker FUTURE = 22, 81*993b0882SAndroid Build Coastguard Worker DAY = 23, 82*993b0882SAndroid Build Coastguard Worker WEEK = 24, 83*993b0882SAndroid Build Coastguard Worker MONTH = 25, 84*993b0882SAndroid Build Coastguard Worker YEAR = 26, 85*993b0882SAndroid Build Coastguard Worker MONDAY = 27, 86*993b0882SAndroid Build Coastguard Worker TUESDAY = 28, 87*993b0882SAndroid Build Coastguard Worker WEDNESDAY = 29, 88*993b0882SAndroid Build Coastguard Worker THURSDAY = 30, 89*993b0882SAndroid Build Coastguard Worker FRIDAY = 31, 90*993b0882SAndroid Build Coastguard Worker SATURDAY = 32, 91*993b0882SAndroid Build Coastguard Worker SUNDAY = 33, 92*993b0882SAndroid Build Coastguard Worker DAYS = 34, 93*993b0882SAndroid Build Coastguard Worker WEEKS = 35, 94*993b0882SAndroid Build Coastguard Worker MONTHS = 36, 95*993b0882SAndroid Build Coastguard Worker 96*993b0882SAndroid Build Coastguard Worker // TODO(zilka): Make the following 3 values singular for consistency. 97*993b0882SAndroid Build Coastguard Worker HOURS = 37, 98*993b0882SAndroid Build Coastguard Worker 99*993b0882SAndroid Build Coastguard Worker MINUTES = 38, 100*993b0882SAndroid Build Coastguard Worker SECONDS = 39, 101*993b0882SAndroid Build Coastguard Worker YEARS = 40, 102*993b0882SAndroid Build Coastguard Worker DIGITS = 41, 103*993b0882SAndroid Build Coastguard Worker SIGNEDDIGITS = 42, 104*993b0882SAndroid Build Coastguard Worker ZERO = 43, 105*993b0882SAndroid Build Coastguard Worker ONE = 44, 106*993b0882SAndroid Build Coastguard Worker TWO = 45, 107*993b0882SAndroid Build Coastguard Worker THREE = 46, 108*993b0882SAndroid Build Coastguard Worker FOUR = 47, 109*993b0882SAndroid Build Coastguard Worker FIVE = 48, 110*993b0882SAndroid Build Coastguard Worker SIX = 49, 111*993b0882SAndroid Build Coastguard Worker SEVEN = 50, 112*993b0882SAndroid Build Coastguard Worker EIGHT = 51, 113*993b0882SAndroid Build Coastguard Worker NINE = 52, 114*993b0882SAndroid Build Coastguard Worker TEN = 53, 115*993b0882SAndroid Build Coastguard Worker ELEVEN = 54, 116*993b0882SAndroid Build Coastguard Worker TWELVE = 55, 117*993b0882SAndroid Build Coastguard Worker THIRTEEN = 56, 118*993b0882SAndroid Build Coastguard Worker FOURTEEN = 57, 119*993b0882SAndroid Build Coastguard Worker FIFTEEN = 58, 120*993b0882SAndroid Build Coastguard Worker SIXTEEN = 59, 121*993b0882SAndroid Build Coastguard Worker SEVENTEEN = 60, 122*993b0882SAndroid Build Coastguard Worker EIGHTEEN = 61, 123*993b0882SAndroid Build Coastguard Worker NINETEEN = 62, 124*993b0882SAndroid Build Coastguard Worker TWENTY = 63, 125*993b0882SAndroid Build Coastguard Worker THIRTY = 64, 126*993b0882SAndroid Build Coastguard Worker FORTY = 65, 127*993b0882SAndroid Build Coastguard Worker FIFTY = 66, 128*993b0882SAndroid Build Coastguard Worker SIXTY = 67, 129*993b0882SAndroid Build Coastguard Worker SEVENTY = 68, 130*993b0882SAndroid Build Coastguard Worker EIGHTY = 69, 131*993b0882SAndroid Build Coastguard Worker NINETY = 70, 132*993b0882SAndroid Build Coastguard Worker HUNDRED = 71, 133*993b0882SAndroid Build Coastguard Worker THOUSAND = 72, 134*993b0882SAndroid Build Coastguard Worker NOON = 73, 135*993b0882SAndroid Build Coastguard Worker MIDNIGHT = 74, 136*993b0882SAndroid Build Coastguard Worker} 137*993b0882SAndroid Build Coastguard Worker 138*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 139*993b0882SAndroid Build Coastguard Workerenum DatetimeGroupType : int { 140*993b0882SAndroid Build Coastguard Worker GROUP_UNKNOWN = 0, 141*993b0882SAndroid Build Coastguard Worker GROUP_UNUSED = 1, 142*993b0882SAndroid Build Coastguard Worker GROUP_YEAR = 2, 143*993b0882SAndroid Build Coastguard Worker GROUP_MONTH = 3, 144*993b0882SAndroid Build Coastguard Worker GROUP_DAY = 4, 145*993b0882SAndroid Build Coastguard Worker GROUP_HOUR = 5, 146*993b0882SAndroid Build Coastguard Worker GROUP_MINUTE = 6, 147*993b0882SAndroid Build Coastguard Worker GROUP_SECOND = 7, 148*993b0882SAndroid Build Coastguard Worker GROUP_AMPM = 8, 149*993b0882SAndroid Build Coastguard Worker GROUP_RELATIONDISTANCE = 9, 150*993b0882SAndroid Build Coastguard Worker GROUP_RELATION = 10, 151*993b0882SAndroid Build Coastguard Worker GROUP_RELATIONTYPE = 11, 152*993b0882SAndroid Build Coastguard Worker 153*993b0882SAndroid Build Coastguard Worker // Dummy groups serve just as an inflator of the selection. E.g. we might want 154*993b0882SAndroid Build Coastguard Worker // to select more text than was contained in an envelope of all extractor 155*993b0882SAndroid Build Coastguard Worker // spans. 156*993b0882SAndroid Build Coastguard Worker GROUP_DUMMY1 = 12, 157*993b0882SAndroid Build Coastguard Worker 158*993b0882SAndroid Build Coastguard Worker GROUP_DUMMY2 = 13, 159*993b0882SAndroid Build Coastguard Worker GROUP_ABSOLUTETIME = 14, 160*993b0882SAndroid Build Coastguard Worker} 161*993b0882SAndroid Build Coastguard Worker 162*993b0882SAndroid Build Coastguard Worker// Options for the model that predicts text selection. 163*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 164*993b0882SAndroid Build Coastguard Workertable SelectionModelOptions { 165*993b0882SAndroid Build Coastguard Worker // If true, before the selection is returned, the unpaired brackets contained 166*993b0882SAndroid Build Coastguard Worker // in the predicted selection are stripped from the both selection ends. 167*993b0882SAndroid Build Coastguard Worker // The bracket codepoints are defined in the Unicode standard: 168*993b0882SAndroid Build Coastguard Worker // http://www.unicode.org/Public/UNIDATA/BidiBrackets.txt 169*993b0882SAndroid Build Coastguard Worker strip_unpaired_brackets:bool = true; 170*993b0882SAndroid Build Coastguard Worker 171*993b0882SAndroid Build Coastguard Worker // Number of hypothetical click positions on either side of the actual click 172*993b0882SAndroid Build Coastguard Worker // to consider in order to enforce symmetry. 173*993b0882SAndroid Build Coastguard Worker symmetry_context_size:int; 174*993b0882SAndroid Build Coastguard Worker 175*993b0882SAndroid Build Coastguard Worker // Number of examples to bundle in one batch for inference. 176*993b0882SAndroid Build Coastguard Worker batch_size:int = 1024; 177*993b0882SAndroid Build Coastguard Worker 178*993b0882SAndroid Build Coastguard Worker // Whether to always classify a suggested selection or only on demand. 179*993b0882SAndroid Build Coastguard Worker always_classify_suggested_selection:bool = false; 180*993b0882SAndroid Build Coastguard Worker} 181*993b0882SAndroid Build Coastguard Worker 182*993b0882SAndroid Build Coastguard Worker// Options for the model that classifies a text selection. 183*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 184*993b0882SAndroid Build Coastguard Workertable ClassificationModelOptions { 185*993b0882SAndroid Build Coastguard Worker // Limits for phone numbers. 186*993b0882SAndroid Build Coastguard Worker phone_min_num_digits:int = 7; 187*993b0882SAndroid Build Coastguard Worker 188*993b0882SAndroid Build Coastguard Worker phone_max_num_digits:int = 15; 189*993b0882SAndroid Build Coastguard Worker 190*993b0882SAndroid Build Coastguard Worker // Limits for addresses. 191*993b0882SAndroid Build Coastguard Worker address_min_num_tokens:int; 192*993b0882SAndroid Build Coastguard Worker 193*993b0882SAndroid Build Coastguard Worker // Maximum number of tokens to attempt a classification (-1 is unlimited). 194*993b0882SAndroid Build Coastguard Worker max_num_tokens:int = -1; 195*993b0882SAndroid Build Coastguard Worker} 196*993b0882SAndroid Build Coastguard Worker 197*993b0882SAndroid Build Coastguard Worker// Options for post-checks, checksums and verification to apply on a match. 198*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 199*993b0882SAndroid Build Coastguard Workertable VerificationOptions { 200*993b0882SAndroid Build Coastguard Worker verify_luhn_checksum:bool = false; 201*993b0882SAndroid Build Coastguard Worker 202*993b0882SAndroid Build Coastguard Worker // Lua verifier to use. 203*993b0882SAndroid Build Coastguard Worker // Index of the lua verifier in the model. 204*993b0882SAndroid Build Coastguard Worker lua_verifier:int = -1; 205*993b0882SAndroid Build Coastguard Worker} 206*993b0882SAndroid Build Coastguard Worker 207*993b0882SAndroid Build Coastguard Worker// Behaviour of rule capturing groups. 208*993b0882SAndroid Build Coastguard Worker// This specifies how the text and span of a capturing group, in a regular 209*993b0882SAndroid Build Coastguard Worker// expression or from a capturing match in a grammar rule, should be handled. 210*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 211*993b0882SAndroid Build Coastguard Workertable CapturingGroup { 212*993b0882SAndroid Build Coastguard Worker // If true, the span of the capturing group will be used to 213*993b0882SAndroid Build Coastguard Worker // extend the selection. 214*993b0882SAndroid Build Coastguard Worker extend_selection:bool = true; 215*993b0882SAndroid Build Coastguard Worker 216*993b0882SAndroid Build Coastguard Worker // If set, the text of the capturing group will be used to set a field in 217*993b0882SAndroid Build Coastguard Worker // the classfication result entity data. 218*993b0882SAndroid Build Coastguard Worker entity_field_path:FlatbufferFieldPath; 219*993b0882SAndroid Build Coastguard Worker 220*993b0882SAndroid Build Coastguard Worker // If set, the flatbuffer entity data will be merged with the 221*993b0882SAndroid Build Coastguard Worker // classification result entity data. 222*993b0882SAndroid Build Coastguard Worker serialized_entity_data:string (shared); 223*993b0882SAndroid Build Coastguard Worker 224*993b0882SAndroid Build Coastguard Worker // If set, normalization to apply before text is used in entity data. 225*993b0882SAndroid Build Coastguard Worker normalization_options:NormalizationOptions; 226*993b0882SAndroid Build Coastguard Worker 227*993b0882SAndroid Build Coastguard Worker entity_data:EntityData; 228*993b0882SAndroid Build Coastguard Worker} 229*993b0882SAndroid Build Coastguard Worker 230*993b0882SAndroid Build Coastguard Worker// List of regular expression matchers to check. 231*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.RegexModel_; 232*993b0882SAndroid Build Coastguard Workertable Pattern { 233*993b0882SAndroid Build Coastguard Worker // The name of the collection of a match. 234*993b0882SAndroid Build Coastguard Worker collection_name:string (shared); 235*993b0882SAndroid Build Coastguard Worker 236*993b0882SAndroid Build Coastguard Worker // The pattern to check. 237*993b0882SAndroid Build Coastguard Worker pattern:string (shared); 238*993b0882SAndroid Build Coastguard Worker 239*993b0882SAndroid Build Coastguard Worker // The modes for which to apply the patterns. 240*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 241*993b0882SAndroid Build Coastguard Worker 242*993b0882SAndroid Build Coastguard Worker // The final score to assign to the results of this pattern. 243*993b0882SAndroid Build Coastguard Worker target_classification_score:float = 1; 244*993b0882SAndroid Build Coastguard Worker 245*993b0882SAndroid Build Coastguard Worker // Priority score used for conflict resolution with the other models. 246*993b0882SAndroid Build Coastguard Worker priority_score:float = 0; 247*993b0882SAndroid Build Coastguard Worker 248*993b0882SAndroid Build Coastguard Worker // If true, will use an approximate matching implementation implemented 249*993b0882SAndroid Build Coastguard Worker // using Find() instead of the true Match(). This approximate matching will 250*993b0882SAndroid Build Coastguard Worker // use the first Find() result and then check that it spans the whole input. 251*993b0882SAndroid Build Coastguard Worker use_approximate_matching:bool = false; 252*993b0882SAndroid Build Coastguard Worker 253*993b0882SAndroid Build Coastguard Worker compressed_pattern:CompressedBuffer; 254*993b0882SAndroid Build Coastguard Worker 255*993b0882SAndroid Build Coastguard Worker // Verification to apply on a match. 256*993b0882SAndroid Build Coastguard Worker verification_options:VerificationOptions; 257*993b0882SAndroid Build Coastguard Worker 258*993b0882SAndroid Build Coastguard Worker capturing_group:[CapturingGroup]; 259*993b0882SAndroid Build Coastguard Worker 260*993b0882SAndroid Build Coastguard Worker // Entity data to set for a match. 261*993b0882SAndroid Build Coastguard Worker serialized_entity_data:string (shared); 262*993b0882SAndroid Build Coastguard Worker 263*993b0882SAndroid Build Coastguard Worker entity_data:EntityData; 264*993b0882SAndroid Build Coastguard Worker} 265*993b0882SAndroid Build Coastguard Worker 266*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 267*993b0882SAndroid Build Coastguard Workertable RegexModel { 268*993b0882SAndroid Build Coastguard Worker patterns:[RegexModel_.Pattern]; 269*993b0882SAndroid Build Coastguard Worker 270*993b0882SAndroid Build Coastguard Worker // If true, will compile the regexes only on first use. 271*993b0882SAndroid Build Coastguard Worker lazy_regex_compilation:bool = true; 272*993b0882SAndroid Build Coastguard Worker 273*993b0882SAndroid Build Coastguard Worker // Lua scripts for match verification. 274*993b0882SAndroid Build Coastguard Worker // The verifier can access: 275*993b0882SAndroid Build Coastguard Worker // * `context`: The context as a string. 276*993b0882SAndroid Build Coastguard Worker // * `match`: The groups of the regex match as an array, each group gives 277*993b0882SAndroid Build Coastguard Worker // * `begin`: span start 278*993b0882SAndroid Build Coastguard Worker // * `end`: span end 279*993b0882SAndroid Build Coastguard Worker // * `text`: the text 280*993b0882SAndroid Build Coastguard Worker // The verifier is expected to return a boolean, indicating whether the 281*993b0882SAndroid Build Coastguard Worker // verification succeeded or not. 282*993b0882SAndroid Build Coastguard Worker lua_verifier:[string]; 283*993b0882SAndroid Build Coastguard Worker} 284*993b0882SAndroid Build Coastguard Worker 285*993b0882SAndroid Build Coastguard Worker// List of regex patterns. 286*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.DatetimeModelPattern_; 287*993b0882SAndroid Build Coastguard Workertable Regex { 288*993b0882SAndroid Build Coastguard Worker pattern:string (shared); 289*993b0882SAndroid Build Coastguard Worker 290*993b0882SAndroid Build Coastguard Worker // The ith entry specifies the type of the ith capturing group. 291*993b0882SAndroid Build Coastguard Worker // This is used to decide how the matched content has to be parsed. 292*993b0882SAndroid Build Coastguard Worker groups:[DatetimeGroupType]; 293*993b0882SAndroid Build Coastguard Worker 294*993b0882SAndroid Build Coastguard Worker compressed_pattern:CompressedBuffer; 295*993b0882SAndroid Build Coastguard Worker} 296*993b0882SAndroid Build Coastguard Worker 297*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 298*993b0882SAndroid Build Coastguard Workertable DatetimeModelPattern { 299*993b0882SAndroid Build Coastguard Worker regexes:[DatetimeModelPattern_.Regex]; 300*993b0882SAndroid Build Coastguard Worker 301*993b0882SAndroid Build Coastguard Worker // List of locale indices in DatetimeModel that represent the locales that 302*993b0882SAndroid Build Coastguard Worker // these patterns should be used for. If empty, can be used for all locales. 303*993b0882SAndroid Build Coastguard Worker locales:[int]; 304*993b0882SAndroid Build Coastguard Worker 305*993b0882SAndroid Build Coastguard Worker // The final score to assign to the results of this pattern. 306*993b0882SAndroid Build Coastguard Worker target_classification_score:float = 1; 307*993b0882SAndroid Build Coastguard Worker 308*993b0882SAndroid Build Coastguard Worker // Priority score used for conflict resolution with the other models. 309*993b0882SAndroid Build Coastguard Worker priority_score:float = 0; 310*993b0882SAndroid Build Coastguard Worker 311*993b0882SAndroid Build Coastguard Worker // The modes for which to apply the patterns. 312*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 313*993b0882SAndroid Build Coastguard Worker 314*993b0882SAndroid Build Coastguard Worker // The annotation usecases for which to apply the patterns. 315*993b0882SAndroid Build Coastguard Worker // This is a flag field for values of AnnotationUsecase. 316*993b0882SAndroid Build Coastguard Worker enabled_annotation_usecases:uint = 4294967295; 317*993b0882SAndroid Build Coastguard Worker} 318*993b0882SAndroid Build Coastguard Worker 319*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 320*993b0882SAndroid Build Coastguard Workertable DatetimeModelExtractor { 321*993b0882SAndroid Build Coastguard Worker extractor:DatetimeExtractorType; 322*993b0882SAndroid Build Coastguard Worker pattern:string (shared); 323*993b0882SAndroid Build Coastguard Worker locales:[int]; 324*993b0882SAndroid Build Coastguard Worker compressed_pattern:CompressedBuffer; 325*993b0882SAndroid Build Coastguard Worker} 326*993b0882SAndroid Build Coastguard Worker 327*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 328*993b0882SAndroid Build Coastguard Workertable DatetimeModel { 329*993b0882SAndroid Build Coastguard Worker // List of BCP 47 locale strings representing all locales supported by the 330*993b0882SAndroid Build Coastguard Worker // model. The individual patterns refer back to them using an index. 331*993b0882SAndroid Build Coastguard Worker locales:[string]; 332*993b0882SAndroid Build Coastguard Worker 333*993b0882SAndroid Build Coastguard Worker patterns:[DatetimeModelPattern]; 334*993b0882SAndroid Build Coastguard Worker extractors:[DatetimeModelExtractor]; 335*993b0882SAndroid Build Coastguard Worker 336*993b0882SAndroid Build Coastguard Worker // If true, will use the extractors for determining the match location as 337*993b0882SAndroid Build Coastguard Worker // opposed to using the location where the global pattern matched. 338*993b0882SAndroid Build Coastguard Worker use_extractors_for_locating:bool = true; 339*993b0882SAndroid Build Coastguard Worker 340*993b0882SAndroid Build Coastguard Worker // List of locale ids, rules of whose are always run, after the requested 341*993b0882SAndroid Build Coastguard Worker // ones. 342*993b0882SAndroid Build Coastguard Worker default_locales:[int]; 343*993b0882SAndroid Build Coastguard Worker 344*993b0882SAndroid Build Coastguard Worker // If true, will generate the alternative interpretations for ambiguous 345*993b0882SAndroid Build Coastguard Worker // datetime expressions. 346*993b0882SAndroid Build Coastguard Worker generate_alternative_interpretations_when_ambiguous:bool = false; 347*993b0882SAndroid Build Coastguard Worker 348*993b0882SAndroid Build Coastguard Worker // If true, will compile the regexes only on first use. 349*993b0882SAndroid Build Coastguard Worker lazy_regex_compilation:bool = true; 350*993b0882SAndroid Build Coastguard Worker 351*993b0882SAndroid Build Coastguard Worker // If true, will give only future dates (when the day is not specified). 352*993b0882SAndroid Build Coastguard Worker prefer_future_for_unspecified_date:bool = false; 353*993b0882SAndroid Build Coastguard Worker} 354*993b0882SAndroid Build Coastguard Worker 355*993b0882SAndroid Build Coastguard Worker// Configuration for the tokenizer. 356*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 357*993b0882SAndroid Build Coastguard Workertable GrammarTokenizerOptions { 358*993b0882SAndroid Build Coastguard Worker tokenization_type:TokenizationType = ICU; 359*993b0882SAndroid Build Coastguard Worker 360*993b0882SAndroid Build Coastguard Worker // If true, white space tokens will be kept when using the icu tokenizer. 361*993b0882SAndroid Build Coastguard Worker icu_preserve_whitespace_tokens:bool = false; 362*993b0882SAndroid Build Coastguard Worker 363*993b0882SAndroid Build Coastguard Worker // Codepoint ranges that determine what role the different codepoints play 364*993b0882SAndroid Build Coastguard Worker // during tokenized. The ranges must not overlap. 365*993b0882SAndroid Build Coastguard Worker tokenization_codepoint_config:[TokenizationCodepointRange]; 366*993b0882SAndroid Build Coastguard Worker 367*993b0882SAndroid Build Coastguard Worker // A set of codepoint ranges to use in the mixed tokenization mode to identify 368*993b0882SAndroid Build Coastguard Worker // stretches of tokens to re-tokenize using the internal tokenizer. 369*993b0882SAndroid Build Coastguard Worker internal_tokenizer_codepoint_ranges:[CodepointRange]; 370*993b0882SAndroid Build Coastguard Worker 371*993b0882SAndroid Build Coastguard Worker // If true, tokens will be also split when the codepoint's script_id changes 372*993b0882SAndroid Build Coastguard Worker // as defined in TokenizationCodepointRange. 373*993b0882SAndroid Build Coastguard Worker tokenize_on_script_change:bool = false; 374*993b0882SAndroid Build Coastguard Worker} 375*993b0882SAndroid Build Coastguard Worker 376*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.DatetimeModelLibrary_; 377*993b0882SAndroid Build Coastguard Workertable Item { 378*993b0882SAndroid Build Coastguard Worker key:string (shared); 379*993b0882SAndroid Build Coastguard Worker value:DatetimeModel; 380*993b0882SAndroid Build Coastguard Worker} 381*993b0882SAndroid Build Coastguard Worker 382*993b0882SAndroid Build Coastguard Worker// A set of named DateTime models. 383*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 384*993b0882SAndroid Build Coastguard Workertable DatetimeModelLibrary { 385*993b0882SAndroid Build Coastguard Worker models:[DatetimeModelLibrary_.Item]; 386*993b0882SAndroid Build Coastguard Worker} 387*993b0882SAndroid Build Coastguard Worker 388*993b0882SAndroid Build Coastguard Worker// Classification result to instantiate for a rule match. 389*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.GrammarModel_; 390*993b0882SAndroid Build Coastguard Workertable RuleClassificationResult { 391*993b0882SAndroid Build Coastguard Worker // The name of the collection. 392*993b0882SAndroid Build Coastguard Worker collection_name:string (shared); 393*993b0882SAndroid Build Coastguard Worker 394*993b0882SAndroid Build Coastguard Worker // The score. 395*993b0882SAndroid Build Coastguard Worker target_classification_score:float = 1; 396*993b0882SAndroid Build Coastguard Worker 397*993b0882SAndroid Build Coastguard Worker // The priority score used for conflict resolution with the other models. 398*993b0882SAndroid Build Coastguard Worker priority_score:float = 0; 399*993b0882SAndroid Build Coastguard Worker 400*993b0882SAndroid Build Coastguard Worker // Behaviour of capturing matches. 401*993b0882SAndroid Build Coastguard Worker capturing_group:[CapturingGroup]; 402*993b0882SAndroid Build Coastguard Worker 403*993b0882SAndroid Build Coastguard Worker // Entity data to set for a match. 404*993b0882SAndroid Build Coastguard Worker serialized_entity_data:string (shared); 405*993b0882SAndroid Build Coastguard Worker 406*993b0882SAndroid Build Coastguard Worker // Enabled modes. 407*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 408*993b0882SAndroid Build Coastguard Worker 409*993b0882SAndroid Build Coastguard Worker entity_data:EntityData; 410*993b0882SAndroid Build Coastguard Worker} 411*993b0882SAndroid Build Coastguard Worker 412*993b0882SAndroid Build Coastguard Worker// Configuration for grammar based annotators. 413*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 414*993b0882SAndroid Build Coastguard Workertable GrammarModel { 415*993b0882SAndroid Build Coastguard Worker // The grammar rules. 416*993b0882SAndroid Build Coastguard Worker rules:grammar.RulesSet; 417*993b0882SAndroid Build Coastguard Worker 418*993b0882SAndroid Build Coastguard Worker // Deprecated. Used only for the old implementation of the grammar model. 419*993b0882SAndroid Build Coastguard Worker rule_classification_result:[GrammarModel_.RuleClassificationResult]; 420*993b0882SAndroid Build Coastguard Worker 421*993b0882SAndroid Build Coastguard Worker // Number of tokens in the context to use for classification and text 422*993b0882SAndroid Build Coastguard Worker // selection suggestion. 423*993b0882SAndroid Build Coastguard Worker // A value -1 uses the full context. 424*993b0882SAndroid Build Coastguard Worker context_left_num_tokens:int; 425*993b0882SAndroid Build Coastguard Worker 426*993b0882SAndroid Build Coastguard Worker context_right_num_tokens:int; 427*993b0882SAndroid Build Coastguard Worker 428*993b0882SAndroid Build Coastguard Worker // Grammar specific tokenizer options. 429*993b0882SAndroid Build Coastguard Worker tokenizer_options:GrammarTokenizerOptions; 430*993b0882SAndroid Build Coastguard Worker 431*993b0882SAndroid Build Coastguard Worker // The score. 432*993b0882SAndroid Build Coastguard Worker target_classification_score:float = 1; 433*993b0882SAndroid Build Coastguard Worker 434*993b0882SAndroid Build Coastguard Worker // The priority score used for conflict resolution with the other models. 435*993b0882SAndroid Build Coastguard Worker priority_score:float = 1; 436*993b0882SAndroid Build Coastguard Worker 437*993b0882SAndroid Build Coastguard Worker // Global enabled modes. Use this instead of 438*993b0882SAndroid Build Coastguard Worker // `rule_classification_result.enabled_modes`. 439*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 440*993b0882SAndroid Build Coastguard Worker} 441*993b0882SAndroid Build Coastguard Worker 442*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.MoneyParsingOptions_; 443*993b0882SAndroid Build Coastguard Workertable QuantitiesNameToExponentEntry { 444*993b0882SAndroid Build Coastguard Worker key:string (key, shared); 445*993b0882SAndroid Build Coastguard Worker value:int; 446*993b0882SAndroid Build Coastguard Worker} 447*993b0882SAndroid Build Coastguard Worker 448*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 449*993b0882SAndroid Build Coastguard Workertable MoneyParsingOptions { 450*993b0882SAndroid Build Coastguard Worker // Separators (codepoints) marking decimal or thousand in the money amount. 451*993b0882SAndroid Build Coastguard Worker separators:[int]; 452*993b0882SAndroid Build Coastguard Worker 453*993b0882SAndroid Build Coastguard Worker // Mapping between a quantity string (e.g. "million") and the power of 10 454*993b0882SAndroid Build Coastguard Worker // it multiplies the amount with (e.g. 6 in case of "million"). 455*993b0882SAndroid Build Coastguard Worker // NOTE: The entries need to be sorted by key since we use LookupByKey. 456*993b0882SAndroid Build Coastguard Worker quantities_name_to_exponent:[MoneyParsingOptions_.QuantitiesNameToExponentEntry]; 457*993b0882SAndroid Build Coastguard Worker} 458*993b0882SAndroid Build Coastguard Worker 459*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.ModelTriggeringOptions_; 460*993b0882SAndroid Build Coastguard Workertable CollectionToPriorityEntry { 461*993b0882SAndroid Build Coastguard Worker key:string (key, shared); 462*993b0882SAndroid Build Coastguard Worker value:float; 463*993b0882SAndroid Build Coastguard Worker} 464*993b0882SAndroid Build Coastguard Worker 465*993b0882SAndroid Build Coastguard Worker// Options controlling the output of the Tensorflow Lite models. 466*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 467*993b0882SAndroid Build Coastguard Workertable ModelTriggeringOptions { 468*993b0882SAndroid Build Coastguard Worker // Lower bound threshold for filtering annotation model outputs. 469*993b0882SAndroid Build Coastguard Worker min_annotate_confidence:float = 0; 470*993b0882SAndroid Build Coastguard Worker 471*993b0882SAndroid Build Coastguard Worker // The modes for which to enable the models. 472*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 473*993b0882SAndroid Build Coastguard Worker 474*993b0882SAndroid Build Coastguard Worker // Comma-separated list of locales (BCP 47 tags) that dictionary 475*993b0882SAndroid Build Coastguard Worker // classification supports. 476*993b0882SAndroid Build Coastguard Worker dictionary_locales:string (shared); 477*993b0882SAndroid Build Coastguard Worker 478*993b0882SAndroid Build Coastguard Worker // Comma-separated list of locales (BCP 47 tags) that the model supports, that 479*993b0882SAndroid Build Coastguard Worker // are used to prevent triggering on input in unsupported languages. If 480*993b0882SAndroid Build Coastguard Worker // empty, the model will trigger on all inputs. 481*993b0882SAndroid Build Coastguard Worker locales:string (shared); 482*993b0882SAndroid Build Coastguard Worker 483*993b0882SAndroid Build Coastguard Worker // Priority score assigned to the "other" class from ML model. 484*993b0882SAndroid Build Coastguard Worker other_collection_priority_score:float = -1000; 485*993b0882SAndroid Build Coastguard Worker 486*993b0882SAndroid Build Coastguard Worker // Priority score assigned to knowledge engine annotations. 487*993b0882SAndroid Build Coastguard Worker knowledge_priority_score:float = 0; 488*993b0882SAndroid Build Coastguard Worker reserved_7:int16 (deprecated); 489*993b0882SAndroid Build Coastguard Worker 490*993b0882SAndroid Build Coastguard Worker // Apply a factor to the priority score for entities that are added to this 491*993b0882SAndroid Build Coastguard Worker // map. Key: collection type e.g. "address", "phone"..., Value: float number. 492*993b0882SAndroid Build Coastguard Worker // NOTE: The entries here need to be sorted since we use LookupByKey. 493*993b0882SAndroid Build Coastguard Worker collection_to_priority:[ModelTriggeringOptions_.CollectionToPriorityEntry]; 494*993b0882SAndroid Build Coastguard Worker 495*993b0882SAndroid Build Coastguard Worker // Enabled modes for the knowledge engine model. 496*993b0882SAndroid Build Coastguard Worker knowledge_enabled_modes:ModeFlag = ALL; 497*993b0882SAndroid Build Coastguard Worker 498*993b0882SAndroid Build Coastguard Worker // Enabled modes for the experimental model. 499*993b0882SAndroid Build Coastguard Worker experimental_enabled_modes:ModeFlag = ALL; 500*993b0882SAndroid Build Coastguard Worker 501*993b0882SAndroid Build Coastguard Worker // Enabled modes for the installed app model. 502*993b0882SAndroid Build Coastguard Worker installed_app_enabled_modes:ModeFlag = ALL; 503*993b0882SAndroid Build Coastguard Worker} 504*993b0882SAndroid Build Coastguard Worker 505*993b0882SAndroid Build Coastguard Worker// Options controlling the output of the classifier. 506*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 507*993b0882SAndroid Build Coastguard Workertable OutputOptions { 508*993b0882SAndroid Build Coastguard Worker // Lists of collection names that will be filtered out at the output: 509*993b0882SAndroid Build Coastguard Worker // - For annotation, the spans of given collection are simply dropped. 510*993b0882SAndroid Build Coastguard Worker // - For classification, the result is mapped to the class "other". 511*993b0882SAndroid Build Coastguard Worker // - For selection, the spans of given class are returned as 512*993b0882SAndroid Build Coastguard Worker // single-selection. 513*993b0882SAndroid Build Coastguard Worker filtered_collections_annotation:[string]; 514*993b0882SAndroid Build Coastguard Worker 515*993b0882SAndroid Build Coastguard Worker filtered_collections_classification:[string]; 516*993b0882SAndroid Build Coastguard Worker filtered_collections_selection:[string]; 517*993b0882SAndroid Build Coastguard Worker} 518*993b0882SAndroid Build Coastguard Worker 519*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.Model_; 520*993b0882SAndroid Build Coastguard Workertable EmbeddingPruningMask { 521*993b0882SAndroid Build Coastguard Worker // If true, use pruning mask. In this case, we use mask 522*993b0882SAndroid Build Coastguard Worker // pruning_mask to determine the mapping of hashed-charactergrams. 523*993b0882SAndroid Build Coastguard Worker enabled:bool; 524*993b0882SAndroid Build Coastguard Worker 525*993b0882SAndroid Build Coastguard Worker // Packing of the binary pruning mask into uint64 values. 526*993b0882SAndroid Build Coastguard Worker pruning_mask:[ulong] (force_align: 16); 527*993b0882SAndroid Build Coastguard Worker 528*993b0882SAndroid Build Coastguard Worker // Number of buckets before pruning. 529*993b0882SAndroid Build Coastguard Worker full_num_buckets:int; 530*993b0882SAndroid Build Coastguard Worker 531*993b0882SAndroid Build Coastguard Worker // Index of row of compressed embedding matrix to which all pruned buckets 532*993b0882SAndroid Build Coastguard Worker // are mapped. 533*993b0882SAndroid Build Coastguard Worker pruned_row_bucket_id:int; 534*993b0882SAndroid Build Coastguard Worker} 535*993b0882SAndroid Build Coastguard Worker 536*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.Model_; 537*993b0882SAndroid Build Coastguard Workertable ConflictResolutionOptions { 538*993b0882SAndroid Build Coastguard Worker // If true, will prioritize the longest annotation during conflict 539*993b0882SAndroid Build Coastguard Worker // resolution. 540*993b0882SAndroid Build Coastguard Worker prioritize_longest_annotation:bool = false; 541*993b0882SAndroid Build Coastguard Worker 542*993b0882SAndroid Build Coastguard Worker // If true, the annotator will perform conflict resolution between the 543*993b0882SAndroid Build Coastguard Worker // different sub-annotators also in the RAW mode. If false, no conflict 544*993b0882SAndroid Build Coastguard Worker // resolution will be performed in RAW mode. 545*993b0882SAndroid Build Coastguard Worker do_conflict_resolution_in_raw_mode:bool = true; 546*993b0882SAndroid Build Coastguard Worker} 547*993b0882SAndroid Build Coastguard Worker 548*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 549*993b0882SAndroid Build Coastguard Workertable Model { 550*993b0882SAndroid Build Coastguard Worker // Comma-separated list of locales supported by the model as BCP 47 tags. 551*993b0882SAndroid Build Coastguard Worker locales:string (shared); 552*993b0882SAndroid Build Coastguard Worker 553*993b0882SAndroid Build Coastguard Worker version:int; 554*993b0882SAndroid Build Coastguard Worker 555*993b0882SAndroid Build Coastguard Worker // A name for the model that can be used for e.g. logging. 556*993b0882SAndroid Build Coastguard Worker name:string (shared); 557*993b0882SAndroid Build Coastguard Worker 558*993b0882SAndroid Build Coastguard Worker selection_feature_options:FeatureProcessorOptions; 559*993b0882SAndroid Build Coastguard Worker classification_feature_options:FeatureProcessorOptions; 560*993b0882SAndroid Build Coastguard Worker 561*993b0882SAndroid Build Coastguard Worker // Tensorflow Lite models. 562*993b0882SAndroid Build Coastguard Worker selection_model:[ubyte] (force_align: 16); 563*993b0882SAndroid Build Coastguard Worker 564*993b0882SAndroid Build Coastguard Worker classification_model:[ubyte] (force_align: 16); 565*993b0882SAndroid Build Coastguard Worker embedding_model:[ubyte] (force_align: 16); 566*993b0882SAndroid Build Coastguard Worker 567*993b0882SAndroid Build Coastguard Worker // Options for the different models. 568*993b0882SAndroid Build Coastguard Worker selection_options:SelectionModelOptions; 569*993b0882SAndroid Build Coastguard Worker 570*993b0882SAndroid Build Coastguard Worker classification_options:ClassificationModelOptions; 571*993b0882SAndroid Build Coastguard Worker regex_model:RegexModel; 572*993b0882SAndroid Build Coastguard Worker datetime_model:DatetimeModel; 573*993b0882SAndroid Build Coastguard Worker 574*993b0882SAndroid Build Coastguard Worker // Options controlling the output of the models. 575*993b0882SAndroid Build Coastguard Worker triggering_options:ModelTriggeringOptions; 576*993b0882SAndroid Build Coastguard Worker 577*993b0882SAndroid Build Coastguard Worker // Global switch that controls if SuggestSelection(), ClassifyText() and 578*993b0882SAndroid Build Coastguard Worker // Annotate() will run. If a mode is disabled it returns empty/no-op results. 579*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 580*993b0882SAndroid Build Coastguard Worker 581*993b0882SAndroid Build Coastguard Worker // If true, will snap the selections that consist only of whitespaces to the 582*993b0882SAndroid Build Coastguard Worker // containing suggested span. Otherwise, no suggestion is proposed, since the 583*993b0882SAndroid Build Coastguard Worker // selections are not part of any token. 584*993b0882SAndroid Build Coastguard Worker snap_whitespace_selections:bool = true; 585*993b0882SAndroid Build Coastguard Worker 586*993b0882SAndroid Build Coastguard Worker // Global configuration for the output of SuggestSelection(), ClassifyText() 587*993b0882SAndroid Build Coastguard Worker // and Annotate(). 588*993b0882SAndroid Build Coastguard Worker output_options:OutputOptions; 589*993b0882SAndroid Build Coastguard Worker 590*993b0882SAndroid Build Coastguard Worker // Configures how Intents should be generated on Android. 591*993b0882SAndroid Build Coastguard Worker android_intent_options:AndroidIntentFactoryOptions; 592*993b0882SAndroid Build Coastguard Worker 593*993b0882SAndroid Build Coastguard Worker intent_options:IntentFactoryModel; 594*993b0882SAndroid Build Coastguard Worker 595*993b0882SAndroid Build Coastguard Worker // Model resources. 596*993b0882SAndroid Build Coastguard Worker resources:ResourcePool; 597*993b0882SAndroid Build Coastguard Worker 598*993b0882SAndroid Build Coastguard Worker // Schema data for handling entity data. 599*993b0882SAndroid Build Coastguard Worker entity_data_schema:[ubyte]; 600*993b0882SAndroid Build Coastguard Worker 601*993b0882SAndroid Build Coastguard Worker number_annotator_options:NumberAnnotatorOptions; 602*993b0882SAndroid Build Coastguard Worker duration_annotator_options:DurationAnnotatorOptions; 603*993b0882SAndroid Build Coastguard Worker 604*993b0882SAndroid Build Coastguard Worker // Comma-separated list of locales (BCP 47 tags) that the model supports, that 605*993b0882SAndroid Build Coastguard Worker // are used to prevent triggering on input in unsupported languages. If 606*993b0882SAndroid Build Coastguard Worker // empty, the model will trigger on all inputs. 607*993b0882SAndroid Build Coastguard Worker triggering_locales:string (shared); 608*993b0882SAndroid Build Coastguard Worker 609*993b0882SAndroid Build Coastguard Worker embedding_pruning_mask:Model_.EmbeddingPruningMask; 610*993b0882SAndroid Build Coastguard Worker reserved_25:int16 (deprecated); 611*993b0882SAndroid Build Coastguard Worker contact_annotator_options:ContactAnnotatorOptions; 612*993b0882SAndroid Build Coastguard Worker money_parsing_options:MoneyParsingOptions; 613*993b0882SAndroid Build Coastguard Worker translate_annotator_options:TranslateAnnotatorOptions; 614*993b0882SAndroid Build Coastguard Worker grammar_model:GrammarModel; 615*993b0882SAndroid Build Coastguard Worker conflict_resolution_options:Model_.ConflictResolutionOptions; 616*993b0882SAndroid Build Coastguard Worker experimental_model:ExperimentalModel; 617*993b0882SAndroid Build Coastguard Worker pod_ner_model:PodNerModel; 618*993b0882SAndroid Build Coastguard Worker vocab_model:VocabModel; 619*993b0882SAndroid Build Coastguard Worker datetime_grammar_model:GrammarModel; 620*993b0882SAndroid Build Coastguard Worker} 621*993b0882SAndroid Build Coastguard Worker 622*993b0882SAndroid Build Coastguard Worker// Method for selecting the center token. 623*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.FeatureProcessorOptions_; 624*993b0882SAndroid Build Coastguard Workerenum CenterTokenSelectionMethod : int { 625*993b0882SAndroid Build Coastguard Worker DEFAULT_CENTER_TOKEN_METHOD = 0, 626*993b0882SAndroid Build Coastguard Worker // Invalid option. 627*993b0882SAndroid Build Coastguard Worker 628*993b0882SAndroid Build Coastguard Worker // Use click indices to determine the center token. 629*993b0882SAndroid Build Coastguard Worker CENTER_TOKEN_FROM_CLICK = 1, 630*993b0882SAndroid Build Coastguard Worker 631*993b0882SAndroid Build Coastguard Worker // Use selection indices to get a token range, and select the middle of it 632*993b0882SAndroid Build Coastguard Worker // as the center token. 633*993b0882SAndroid Build Coastguard Worker CENTER_TOKEN_MIDDLE_OF_SELECTION = 2, 634*993b0882SAndroid Build Coastguard Worker} 635*993b0882SAndroid Build Coastguard Worker 636*993b0882SAndroid Build Coastguard Worker// Bounds-sensitive feature extraction configuration. 637*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.FeatureProcessorOptions_; 638*993b0882SAndroid Build Coastguard Workertable BoundsSensitiveFeatures { 639*993b0882SAndroid Build Coastguard Worker // Enables the extraction of bounds-sensitive features, instead of the click 640*993b0882SAndroid Build Coastguard Worker // context features. 641*993b0882SAndroid Build Coastguard Worker enabled:bool; 642*993b0882SAndroid Build Coastguard Worker 643*993b0882SAndroid Build Coastguard Worker // The numbers of tokens to extract in specific locations relative to the 644*993b0882SAndroid Build Coastguard Worker // bounds. 645*993b0882SAndroid Build Coastguard Worker // Immediately before the span. 646*993b0882SAndroid Build Coastguard Worker num_tokens_before:int; 647*993b0882SAndroid Build Coastguard Worker 648*993b0882SAndroid Build Coastguard Worker // Inside the span, aligned with the beginning. 649*993b0882SAndroid Build Coastguard Worker num_tokens_inside_left:int; 650*993b0882SAndroid Build Coastguard Worker 651*993b0882SAndroid Build Coastguard Worker // Inside the span, aligned with the end. 652*993b0882SAndroid Build Coastguard Worker num_tokens_inside_right:int; 653*993b0882SAndroid Build Coastguard Worker 654*993b0882SAndroid Build Coastguard Worker // Immediately after the span. 655*993b0882SAndroid Build Coastguard Worker num_tokens_after:int; 656*993b0882SAndroid Build Coastguard Worker 657*993b0882SAndroid Build Coastguard Worker // If true, also extracts the tokens of the entire span and adds up their 658*993b0882SAndroid Build Coastguard Worker // features forming one "token" to include in the extracted features. 659*993b0882SAndroid Build Coastguard Worker include_inside_bag:bool; 660*993b0882SAndroid Build Coastguard Worker 661*993b0882SAndroid Build Coastguard Worker // If true, includes the selection length (in the number of tokens) as a 662*993b0882SAndroid Build Coastguard Worker // feature. 663*993b0882SAndroid Build Coastguard Worker include_inside_length:bool; 664*993b0882SAndroid Build Coastguard Worker 665*993b0882SAndroid Build Coastguard Worker // If true, for selection, single token spans are not run through the model 666*993b0882SAndroid Build Coastguard Worker // and their score is assumed to be zero. 667*993b0882SAndroid Build Coastguard Worker score_single_token_spans_as_zero:bool; 668*993b0882SAndroid Build Coastguard Worker} 669*993b0882SAndroid Build Coastguard Worker 670*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 671*993b0882SAndroid Build Coastguard Workertable FeatureProcessorOptions { 672*993b0882SAndroid Build Coastguard Worker // Number of buckets used for hashing charactergrams. 673*993b0882SAndroid Build Coastguard Worker num_buckets:int = -1; 674*993b0882SAndroid Build Coastguard Worker 675*993b0882SAndroid Build Coastguard Worker // Size of the embedding. 676*993b0882SAndroid Build Coastguard Worker embedding_size:int = -1; 677*993b0882SAndroid Build Coastguard Worker 678*993b0882SAndroid Build Coastguard Worker // Number of bits for quantization for embeddings. 679*993b0882SAndroid Build Coastguard Worker embedding_quantization_bits:int = 8; 680*993b0882SAndroid Build Coastguard Worker 681*993b0882SAndroid Build Coastguard Worker // Context size defines the number of words to the left and to the right of 682*993b0882SAndroid Build Coastguard Worker // the selected word to be used as context. For example, if context size is 683*993b0882SAndroid Build Coastguard Worker // N, then we take N words to the left and N words to the right of the 684*993b0882SAndroid Build Coastguard Worker // selected word as its context. 685*993b0882SAndroid Build Coastguard Worker context_size:int = -1; 686*993b0882SAndroid Build Coastguard Worker 687*993b0882SAndroid Build Coastguard Worker // Maximum number of words of the context to select in total. 688*993b0882SAndroid Build Coastguard Worker max_selection_span:int = -1; 689*993b0882SAndroid Build Coastguard Worker 690*993b0882SAndroid Build Coastguard Worker // Orders of charactergrams to extract. E.g., 2 means character bigrams, 3 691*993b0882SAndroid Build Coastguard Worker // character trigrams etc. 692*993b0882SAndroid Build Coastguard Worker chargram_orders:[int]; 693*993b0882SAndroid Build Coastguard Worker 694*993b0882SAndroid Build Coastguard Worker // Maximum length of a word, in codepoints. 695*993b0882SAndroid Build Coastguard Worker max_word_length:int = 20; 696*993b0882SAndroid Build Coastguard Worker 697*993b0882SAndroid Build Coastguard Worker // If true, will use the unicode-aware functionality for extracting features. 698*993b0882SAndroid Build Coastguard Worker unicode_aware_features:bool = false; 699*993b0882SAndroid Build Coastguard Worker 700*993b0882SAndroid Build Coastguard Worker // Whether to extract the token case feature. 701*993b0882SAndroid Build Coastguard Worker extract_case_feature:bool = false; 702*993b0882SAndroid Build Coastguard Worker 703*993b0882SAndroid Build Coastguard Worker // Whether to extract the selection mask feature. 704*993b0882SAndroid Build Coastguard Worker extract_selection_mask_feature:bool = false; 705*993b0882SAndroid Build Coastguard Worker 706*993b0882SAndroid Build Coastguard Worker // List of regexps to run over each token. For each regexp, if there is a 707*993b0882SAndroid Build Coastguard Worker // match, a dense feature of 1.0 is emitted. Otherwise -1.0 is used. 708*993b0882SAndroid Build Coastguard Worker regexp_feature:[string]; 709*993b0882SAndroid Build Coastguard Worker 710*993b0882SAndroid Build Coastguard Worker // Whether to remap all digits to a single number. 711*993b0882SAndroid Build Coastguard Worker remap_digits:bool = false; 712*993b0882SAndroid Build Coastguard Worker 713*993b0882SAndroid Build Coastguard Worker // Whether to lower-case each token before generating hashgrams. 714*993b0882SAndroid Build Coastguard Worker lowercase_tokens:bool; 715*993b0882SAndroid Build Coastguard Worker 716*993b0882SAndroid Build Coastguard Worker // If true, the selection classifier output will contain only the selections 717*993b0882SAndroid Build Coastguard Worker // that are feasible (e.g., those that are shorter than max_selection_span), 718*993b0882SAndroid Build Coastguard Worker // if false, the output will be a complete cross-product of possible 719*993b0882SAndroid Build Coastguard Worker // selections to the left and possible selections to the right, including the 720*993b0882SAndroid Build Coastguard Worker // infeasible ones. 721*993b0882SAndroid Build Coastguard Worker // NOTE: Exists mainly for compatibility with older models that were trained 722*993b0882SAndroid Build Coastguard Worker // with the non-reduced output space. 723*993b0882SAndroid Build Coastguard Worker selection_reduced_output_space:bool = true; 724*993b0882SAndroid Build Coastguard Worker 725*993b0882SAndroid Build Coastguard Worker // Collection names. 726*993b0882SAndroid Build Coastguard Worker collections:[string]; 727*993b0882SAndroid Build Coastguard Worker 728*993b0882SAndroid Build Coastguard Worker // An index of collection in collections to be used if a collection name can't 729*993b0882SAndroid Build Coastguard Worker // be mapped to an id. 730*993b0882SAndroid Build Coastguard Worker default_collection:int = -1; 731*993b0882SAndroid Build Coastguard Worker 732*993b0882SAndroid Build Coastguard Worker // If true, will split the input by lines, and only use the line that contains 733*993b0882SAndroid Build Coastguard Worker // the clicked token. 734*993b0882SAndroid Build Coastguard Worker only_use_line_with_click:bool = false; 735*993b0882SAndroid Build Coastguard Worker 736*993b0882SAndroid Build Coastguard Worker // If true, will split tokens that contain the selection boundary, at the 737*993b0882SAndroid Build Coastguard Worker // position of the boundary. 738*993b0882SAndroid Build Coastguard Worker // E.g. "foo{bar}@google.com" -> "foo", "bar", "@google.com" 739*993b0882SAndroid Build Coastguard Worker split_tokens_on_selection_boundaries:bool = false; 740*993b0882SAndroid Build Coastguard Worker 741*993b0882SAndroid Build Coastguard Worker // Codepoint ranges that determine how different codepoints are tokenized. 742*993b0882SAndroid Build Coastguard Worker // The ranges must not overlap. 743*993b0882SAndroid Build Coastguard Worker tokenization_codepoint_config:[TokenizationCodepointRange]; 744*993b0882SAndroid Build Coastguard Worker 745*993b0882SAndroid Build Coastguard Worker center_token_selection_method:FeatureProcessorOptions_.CenterTokenSelectionMethod; 746*993b0882SAndroid Build Coastguard Worker 747*993b0882SAndroid Build Coastguard Worker // If true, span boundaries will be snapped to containing tokens and not 748*993b0882SAndroid Build Coastguard Worker // required to exactly match token boundaries. 749*993b0882SAndroid Build Coastguard Worker snap_label_span_boundaries_to_containing_tokens:bool; 750*993b0882SAndroid Build Coastguard Worker 751*993b0882SAndroid Build Coastguard Worker // A set of codepoint ranges supported by the model. 752*993b0882SAndroid Build Coastguard Worker supported_codepoint_ranges:[CodepointRange]; 753*993b0882SAndroid Build Coastguard Worker 754*993b0882SAndroid Build Coastguard Worker // A set of codepoint ranges to use in the mixed tokenization mode to identify 755*993b0882SAndroid Build Coastguard Worker // stretches of tokens to re-tokenize using the internal tokenizer. 756*993b0882SAndroid Build Coastguard Worker internal_tokenizer_codepoint_ranges:[CodepointRange]; 757*993b0882SAndroid Build Coastguard Worker 758*993b0882SAndroid Build Coastguard Worker // Minimum ratio of supported codepoints in the input context. If the ratio 759*993b0882SAndroid Build Coastguard Worker // is lower than this, the feature computation will fail. 760*993b0882SAndroid Build Coastguard Worker min_supported_codepoint_ratio:float = 0; 761*993b0882SAndroid Build Coastguard Worker 762*993b0882SAndroid Build Coastguard Worker // Used for versioning the format of features the model expects. 763*993b0882SAndroid Build Coastguard Worker // - feature_version == 0: 764*993b0882SAndroid Build Coastguard Worker // For each token the features consist of: 765*993b0882SAndroid Build Coastguard Worker // - chargram embeddings 766*993b0882SAndroid Build Coastguard Worker // - dense features 767*993b0882SAndroid Build Coastguard Worker // Chargram embeddings for tokens are concatenated first together, 768*993b0882SAndroid Build Coastguard Worker // and at the end, the dense features for the tokens are concatenated 769*993b0882SAndroid Build Coastguard Worker // to it. So the resulting feature vector has two regions. 770*993b0882SAndroid Build Coastguard Worker feature_version:int = 0; 771*993b0882SAndroid Build Coastguard Worker 772*993b0882SAndroid Build Coastguard Worker tokenization_type:TokenizationType = INTERNAL_TOKENIZER; 773*993b0882SAndroid Build Coastguard Worker icu_preserve_whitespace_tokens:bool = false; 774*993b0882SAndroid Build Coastguard Worker 775*993b0882SAndroid Build Coastguard Worker // List of codepoints that will be stripped from beginning and end of 776*993b0882SAndroid Build Coastguard Worker // predicted spans. 777*993b0882SAndroid Build Coastguard Worker ignored_span_boundary_codepoints:[int]; 778*993b0882SAndroid Build Coastguard Worker 779*993b0882SAndroid Build Coastguard Worker bounds_sensitive_features:FeatureProcessorOptions_.BoundsSensitiveFeatures; 780*993b0882SAndroid Build Coastguard Worker 781*993b0882SAndroid Build Coastguard Worker // List of allowed charactergrams. The extracted charactergrams are filtered 782*993b0882SAndroid Build Coastguard Worker // using this list, and charactergrams that are not present are interpreted as 783*993b0882SAndroid Build Coastguard Worker // out-of-vocabulary. 784*993b0882SAndroid Build Coastguard Worker // If no allowed_chargrams are specified, all charactergrams are allowed. 785*993b0882SAndroid Build Coastguard Worker // The field is typed as bytes type to allow non-UTF8 chargrams. 786*993b0882SAndroid Build Coastguard Worker allowed_chargrams:[string]; 787*993b0882SAndroid Build Coastguard Worker 788*993b0882SAndroid Build Coastguard Worker // If true, tokens will be also split when the codepoint's script_id changes 789*993b0882SAndroid Build Coastguard Worker // as defined in TokenizationCodepointRange. 790*993b0882SAndroid Build Coastguard Worker tokenize_on_script_change:bool = false; 791*993b0882SAndroid Build Coastguard Worker 792*993b0882SAndroid Build Coastguard Worker // If true, the pipe character '|' will be used as a newline character when 793*993b0882SAndroid Build Coastguard Worker // splitting lines. 794*993b0882SAndroid Build Coastguard Worker use_pipe_character_for_newline:bool = true; 795*993b0882SAndroid Build Coastguard Worker} 796*993b0882SAndroid Build Coastguard Worker 797*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 798*993b0882SAndroid Build Coastguard Workertable NumberAnnotatorOptions { 799*993b0882SAndroid Build Coastguard Worker // If true, number and percentage annotations will be produced. 800*993b0882SAndroid Build Coastguard Worker enabled:bool = false; 801*993b0882SAndroid Build Coastguard Worker 802*993b0882SAndroid Build Coastguard Worker // Score to assign to the annotated numbers and percentages in the annotator. 803*993b0882SAndroid Build Coastguard Worker score:float = 1; 804*993b0882SAndroid Build Coastguard Worker 805*993b0882SAndroid Build Coastguard Worker // Number priority score used for conflict resolution with the other models. 806*993b0882SAndroid Build Coastguard Worker priority_score:float = 0; 807*993b0882SAndroid Build Coastguard Worker 808*993b0882SAndroid Build Coastguard Worker // The modes in which to enable number and percentage annotations. 809*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 810*993b0882SAndroid Build Coastguard Worker 811*993b0882SAndroid Build Coastguard Worker // The annotation usecases for which to produce number annotations. 812*993b0882SAndroid Build Coastguard Worker // This is a flag field for values of AnnotationUsecase. 813*993b0882SAndroid Build Coastguard Worker enabled_annotation_usecases:uint = 4294967295; 814*993b0882SAndroid Build Coastguard Worker 815*993b0882SAndroid Build Coastguard Worker // [Deprecated] A list of codepoints that can form a prefix of a valid number. 816*993b0882SAndroid Build Coastguard Worker allowed_prefix_codepoints:[int]; 817*993b0882SAndroid Build Coastguard Worker 818*993b0882SAndroid Build Coastguard Worker // [Deprecated] A list of codepoints that can form a suffix of a valid number. 819*993b0882SAndroid Build Coastguard Worker allowed_suffix_codepoints:[int]; 820*993b0882SAndroid Build Coastguard Worker 821*993b0882SAndroid Build Coastguard Worker // [Deprecated] List of codepoints that will be stripped from beginning of 822*993b0882SAndroid Build Coastguard Worker // predicted spans. 823*993b0882SAndroid Build Coastguard Worker ignored_prefix_span_boundary_codepoints:[int]; 824*993b0882SAndroid Build Coastguard Worker 825*993b0882SAndroid Build Coastguard Worker // [Deprecated] List of codepoints that will be stripped from end of predicted 826*993b0882SAndroid Build Coastguard Worker // spans. 827*993b0882SAndroid Build Coastguard Worker ignored_suffix_span_boundary_codepoints:[int]; 828*993b0882SAndroid Build Coastguard Worker 829*993b0882SAndroid Build Coastguard Worker // [Deprecated] If true, percent annotations will be produced. 830*993b0882SAndroid Build Coastguard Worker enable_percentage:bool = false; 831*993b0882SAndroid Build Coastguard Worker 832*993b0882SAndroid Build Coastguard Worker // Zero separated and ordered list of suffixes that mark a percent. 833*993b0882SAndroid Build Coastguard Worker percentage_pieces_string:string (shared); 834*993b0882SAndroid Build Coastguard Worker 835*993b0882SAndroid Build Coastguard Worker // [Deprecated] List of suffixes offsets in the percent_pieces_string string. 836*993b0882SAndroid Build Coastguard Worker percentage_pieces_offsets:[int]; 837*993b0882SAndroid Build Coastguard Worker 838*993b0882SAndroid Build Coastguard Worker // Priority score for the percentage annotation. 839*993b0882SAndroid Build Coastguard Worker percentage_priority_score:float = 1; 840*993b0882SAndroid Build Coastguard Worker 841*993b0882SAndroid Build Coastguard Worker // Float number priority score used for conflict resolution with the other 842*993b0882SAndroid Build Coastguard Worker // models. 843*993b0882SAndroid Build Coastguard Worker float_number_priority_score:float = 0; 844*993b0882SAndroid Build Coastguard Worker 845*993b0882SAndroid Build Coastguard Worker // The maximum number of digits an annotated number can have. Requirement: 846*993b0882SAndroid Build Coastguard Worker // the value should be less or equal to 20. 847*993b0882SAndroid Build Coastguard Worker max_number_of_digits:int = 20; 848*993b0882SAndroid Build Coastguard Worker 849*993b0882SAndroid Build Coastguard Worker // The annotation usecases for which to produce percentage annotations. 850*993b0882SAndroid Build Coastguard Worker // This is a flag field for values of AnnotationUsecase. 851*993b0882SAndroid Build Coastguard Worker percentage_annotation_usecases:uint = 2; 852*993b0882SAndroid Build Coastguard Worker} 853*993b0882SAndroid Build Coastguard Worker 854*993b0882SAndroid Build Coastguard Worker// DurationAnnotator is so far tailored for English and Japanese only. 855*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 856*993b0882SAndroid Build Coastguard Workertable DurationAnnotatorOptions { 857*993b0882SAndroid Build Coastguard Worker // If true, duration annotations will be produced. 858*993b0882SAndroid Build Coastguard Worker enabled:bool = false; 859*993b0882SAndroid Build Coastguard Worker 860*993b0882SAndroid Build Coastguard Worker // Score to assign to the annotated durations from the annotator. 861*993b0882SAndroid Build Coastguard Worker score:float = 1; 862*993b0882SAndroid Build Coastguard Worker 863*993b0882SAndroid Build Coastguard Worker // Priority score used for conflict resolution with the other models. 864*993b0882SAndroid Build Coastguard Worker priority_score:float = 0; 865*993b0882SAndroid Build Coastguard Worker 866*993b0882SAndroid Build Coastguard Worker // The modes in which to enable duration annotations. 867*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 868*993b0882SAndroid Build Coastguard Worker 869*993b0882SAndroid Build Coastguard Worker // The annotation usecases for which to produce duration annotations. 870*993b0882SAndroid Build Coastguard Worker enabled_annotation_usecases:uint = 4294967295; 871*993b0882SAndroid Build Coastguard Worker 872*993b0882SAndroid Build Coastguard Worker // Durations typically look like XX hours and XX minutes etc... The list of 873*993b0882SAndroid Build Coastguard Worker // strings below enumerate variants of "hours", "minutes", etc. in these 874*993b0882SAndroid Build Coastguard Worker // expressions. These are verbatim strings that are matched against tokens in 875*993b0882SAndroid Build Coastguard Worker // the input. 876*993b0882SAndroid Build Coastguard Worker week_expressions:[string]; 877*993b0882SAndroid Build Coastguard Worker 878*993b0882SAndroid Build Coastguard Worker day_expressions:[string]; 879*993b0882SAndroid Build Coastguard Worker hour_expressions:[string]; 880*993b0882SAndroid Build Coastguard Worker minute_expressions:[string]; 881*993b0882SAndroid Build Coastguard Worker second_expressions:[string]; 882*993b0882SAndroid Build Coastguard Worker 883*993b0882SAndroid Build Coastguard Worker // List of expressions that doesn't break a duration expression (can become 884*993b0882SAndroid Build Coastguard Worker // a part of it) but has not semantic meaning. 885*993b0882SAndroid Build Coastguard Worker filler_expressions:[string]; 886*993b0882SAndroid Build Coastguard Worker 887*993b0882SAndroid Build Coastguard Worker // List of expressions that mean half of a unit of duration (e.g. "half an 888*993b0882SAndroid Build Coastguard Worker // hour"). 889*993b0882SAndroid Build Coastguard Worker half_expressions:[string]; 890*993b0882SAndroid Build Coastguard Worker 891*993b0882SAndroid Build Coastguard Worker // Set of condepoints that can split the Annotator tokens to sub-tokens for 892*993b0882SAndroid Build Coastguard Worker // sub-token matching. 893*993b0882SAndroid Build Coastguard Worker sub_token_separator_codepoints:[int]; 894*993b0882SAndroid Build Coastguard Worker 895*993b0882SAndroid Build Coastguard Worker // If this is true, unit must be associated with quantity. For example, a 896*993b0882SAndroid Build Coastguard Worker // phrase "minute" is not parsed as one minute duration if this is true. 897*993b0882SAndroid Build Coastguard Worker require_quantity:bool; 898*993b0882SAndroid Build Coastguard Worker 899*993b0882SAndroid Build Coastguard Worker // If this is true, dangling quantity is included in the annotation. For 900*993b0882SAndroid Build Coastguard Worker // example, "10 minutes 20" is interpreted as 10 minutes and 20 seconds. 901*993b0882SAndroid Build Coastguard Worker enable_dangling_quantity_interpretation:bool = true; 902*993b0882SAndroid Build Coastguard Worker} 903*993b0882SAndroid Build Coastguard Worker 904*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 905*993b0882SAndroid Build Coastguard Workertable ContactAnnotatorOptions { 906*993b0882SAndroid Build Coastguard Worker // Supported for English genitives only so far. 907*993b0882SAndroid Build Coastguard Worker enable_declension:bool; 908*993b0882SAndroid Build Coastguard Worker 909*993b0882SAndroid Build Coastguard Worker // For each language there is a customized list of supported declensions. 910*993b0882SAndroid Build Coastguard Worker language:string (shared); 911*993b0882SAndroid Build Coastguard Worker 912*993b0882SAndroid Build Coastguard Worker // Enabled modes. 913*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 914*993b0882SAndroid Build Coastguard Worker} 915*993b0882SAndroid Build Coastguard Worker 916*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.TranslateAnnotatorOptions_; 917*993b0882SAndroid Build Coastguard Workerenum Algorithm : int { 918*993b0882SAndroid Build Coastguard Worker DEFAULT_ALGORITHM = 0, 919*993b0882SAndroid Build Coastguard Worker BACKOFF = 1, 920*993b0882SAndroid Build Coastguard Worker} 921*993b0882SAndroid Build Coastguard Worker 922*993b0882SAndroid Build Coastguard Worker// Backoff is the algorithm shipped with Android Q. 923*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.TranslateAnnotatorOptions_; 924*993b0882SAndroid Build Coastguard Workertable BackoffOptions { 925*993b0882SAndroid Build Coastguard Worker // The minimum size of text to prefer for detection (in codepoints). 926*993b0882SAndroid Build Coastguard Worker min_text_size:int = 20; 927*993b0882SAndroid Build Coastguard Worker 928*993b0882SAndroid Build Coastguard Worker // For reducing the score when text is less than the preferred size. 929*993b0882SAndroid Build Coastguard Worker penalize_ratio:float = 1; 930*993b0882SAndroid Build Coastguard Worker 931*993b0882SAndroid Build Coastguard Worker // Original detection score to surrounding text detection score ratios. 932*993b0882SAndroid Build Coastguard Worker subject_text_score_ratio:float = 0.4; 933*993b0882SAndroid Build Coastguard Worker} 934*993b0882SAndroid Build Coastguard Worker 935*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 936*993b0882SAndroid Build Coastguard Workertable TranslateAnnotatorOptions { 937*993b0882SAndroid Build Coastguard Worker enabled:bool = false; 938*993b0882SAndroid Build Coastguard Worker 939*993b0882SAndroid Build Coastguard Worker // Score to assign to the classification results. 940*993b0882SAndroid Build Coastguard Worker score:float = 1; 941*993b0882SAndroid Build Coastguard Worker 942*993b0882SAndroid Build Coastguard Worker // Priority score used for conflict resolution with the other models. 943*993b0882SAndroid Build Coastguard Worker priority_score:float; 944*993b0882SAndroid Build Coastguard Worker 945*993b0882SAndroid Build Coastguard Worker algorithm:TranslateAnnotatorOptions_.Algorithm; 946*993b0882SAndroid Build Coastguard Worker backoff_options:TranslateAnnotatorOptions_.BackoffOptions; 947*993b0882SAndroid Build Coastguard Worker 948*993b0882SAndroid Build Coastguard Worker // Enabled modes. 949*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = CLASSIFICATION; 950*993b0882SAndroid Build Coastguard Worker} 951*993b0882SAndroid Build Coastguard Worker 952*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.PodNerModel_; 953*993b0882SAndroid Build Coastguard Workertable Collection { 954*993b0882SAndroid Build Coastguard Worker // Collection's name (e.g., "location", "person"). 955*993b0882SAndroid Build Coastguard Worker name:string (shared); 956*993b0882SAndroid Build Coastguard Worker 957*993b0882SAndroid Build Coastguard Worker // Priority scores used for conflict resolution with the other annotators 958*993b0882SAndroid Build Coastguard Worker // when the annotation is made over a single/multi token text. 959*993b0882SAndroid Build Coastguard Worker single_token_priority_score:float; 960*993b0882SAndroid Build Coastguard Worker 961*993b0882SAndroid Build Coastguard Worker multi_token_priority_score:float; 962*993b0882SAndroid Build Coastguard Worker} 963*993b0882SAndroid Build Coastguard Worker 964*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.PodNerModel_.Label_; 965*993b0882SAndroid Build Coastguard Workerenum BoiseType : int { 966*993b0882SAndroid Build Coastguard Worker NONE = 0, 967*993b0882SAndroid Build Coastguard Worker BEGIN = 1, 968*993b0882SAndroid Build Coastguard Worker O = 2, 969*993b0882SAndroid Build Coastguard Worker // No label. 970*993b0882SAndroid Build Coastguard Worker 971*993b0882SAndroid Build Coastguard Worker INTERMEDIATE = 3, 972*993b0882SAndroid Build Coastguard Worker SINGLE = 4, 973*993b0882SAndroid Build Coastguard Worker END = 5, 974*993b0882SAndroid Build Coastguard Worker} 975*993b0882SAndroid Build Coastguard Worker 976*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.PodNerModel_.Label_; 977*993b0882SAndroid Build Coastguard Workerenum MentionType : int { 978*993b0882SAndroid Build Coastguard Worker UNDEFINED = 0, 979*993b0882SAndroid Build Coastguard Worker NAM = 1, 980*993b0882SAndroid Build Coastguard Worker NOM = 2, 981*993b0882SAndroid Build Coastguard Worker} 982*993b0882SAndroid Build Coastguard Worker 983*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3.PodNerModel_; 984*993b0882SAndroid Build Coastguard Workertable Label { 985*993b0882SAndroid Build Coastguard Worker boise_type:Label_.BoiseType; 986*993b0882SAndroid Build Coastguard Worker mention_type:Label_.MentionType; 987*993b0882SAndroid Build Coastguard Worker collection_id:int; 988*993b0882SAndroid Build Coastguard Worker // points to the collections array above. 989*993b0882SAndroid Build Coastguard Worker} 990*993b0882SAndroid Build Coastguard Worker 991*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 992*993b0882SAndroid Build Coastguard Workertable PodNerModel { 993*993b0882SAndroid Build Coastguard Worker tflite_model:[ubyte]; 994*993b0882SAndroid Build Coastguard Worker word_piece_vocab:[ubyte]; 995*993b0882SAndroid Build Coastguard Worker lowercase_input:bool = true; 996*993b0882SAndroid Build Coastguard Worker 997*993b0882SAndroid Build Coastguard Worker // Index of mention_logits tensor in the output of the tflite model. Can 998*993b0882SAndroid Build Coastguard Worker // be found in the textproto output after model is converted to tflite. 999*993b0882SAndroid Build Coastguard Worker logits_index_in_output_tensor:int = 0; 1000*993b0882SAndroid Build Coastguard Worker 1001*993b0882SAndroid Build Coastguard Worker // Whether to append a period at the end of an input that doesn't already 1002*993b0882SAndroid Build Coastguard Worker // end in punctuation. 1003*993b0882SAndroid Build Coastguard Worker append_final_period:bool = false; 1004*993b0882SAndroid Build Coastguard Worker 1005*993b0882SAndroid Build Coastguard Worker // Priority score used for conflict resolution with the other models. Used 1006*993b0882SAndroid Build Coastguard Worker // only if collections_array is empty. 1007*993b0882SAndroid Build Coastguard Worker priority_score:float = 0; 1008*993b0882SAndroid Build Coastguard Worker 1009*993b0882SAndroid Build Coastguard Worker // Maximum number of wordpieces supported by the model. 1010*993b0882SAndroid Build Coastguard Worker max_num_wordpieces:int = 128; 1011*993b0882SAndroid Build Coastguard Worker 1012*993b0882SAndroid Build Coastguard Worker // In case of long text (number of wordpieces greater than the max) we use 1013*993b0882SAndroid Build Coastguard Worker // sliding window approach, this determines the number of overlapping 1014*993b0882SAndroid Build Coastguard Worker // wordpieces between two consecutive windows. This overlap enables context 1015*993b0882SAndroid Build Coastguard Worker // for each word NER annotates. 1016*993b0882SAndroid Build Coastguard Worker sliding_window_num_wordpieces_overlap:int = 20; 1017*993b0882SAndroid Build Coastguard Worker reserved_9:int16 (deprecated); 1018*993b0882SAndroid Build Coastguard Worker 1019*993b0882SAndroid Build Coastguard Worker // The possible labels the ner model can output. If empty the default labels 1020*993b0882SAndroid Build Coastguard Worker // will be used. 1021*993b0882SAndroid Build Coastguard Worker labels:[PodNerModel_.Label]; 1022*993b0882SAndroid Build Coastguard Worker 1023*993b0882SAndroid Build Coastguard Worker // If the ratio of unknown wordpieces in the input text is greater than this 1024*993b0882SAndroid Build Coastguard Worker // maximum, the text won't be annotated. 1025*993b0882SAndroid Build Coastguard Worker max_ratio_unknown_wordpieces:float = 0.1; 1026*993b0882SAndroid Build Coastguard Worker 1027*993b0882SAndroid Build Coastguard Worker // Possible collections for labeled entities. 1028*993b0882SAndroid Build Coastguard Worker collections:[PodNerModel_.Collection]; 1029*993b0882SAndroid Build Coastguard Worker 1030*993b0882SAndroid Build Coastguard Worker // Minimum word-length and wordpieces-length required for the text to be 1031*993b0882SAndroid Build Coastguard Worker // annotated. 1032*993b0882SAndroid Build Coastguard Worker min_number_of_tokens:int = 1; 1033*993b0882SAndroid Build Coastguard Worker 1034*993b0882SAndroid Build Coastguard Worker min_number_of_wordpieces:int = 1; 1035*993b0882SAndroid Build Coastguard Worker 1036*993b0882SAndroid Build Coastguard Worker // Enabled modes. 1037*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ALL; 1038*993b0882SAndroid Build Coastguard Worker} 1039*993b0882SAndroid Build Coastguard Worker 1040*993b0882SAndroid Build Coastguard Workernamespace libtextclassifier3; 1041*993b0882SAndroid Build Coastguard Workertable VocabModel { 1042*993b0882SAndroid Build Coastguard Worker // A trie that stores a list of vocabs that triggers "Define". A id is 1043*993b0882SAndroid Build Coastguard Worker // returned when looking up a vocab from the trie and the id can be used 1044*993b0882SAndroid Build Coastguard Worker // to access more information about that vocab. The marisa trie library 1045*993b0882SAndroid Build Coastguard Worker // requires 8-byte alignment because the first thing in a marisa trie is a 1046*993b0882SAndroid Build Coastguard Worker // 64-bit integer. 1047*993b0882SAndroid Build Coastguard Worker vocab_trie:[ubyte] (force_align: 8); 1048*993b0882SAndroid Build Coastguard Worker 1049*993b0882SAndroid Build Coastguard Worker // A bit vector that tells if the vocab should trigger "Define" for users of 1050*993b0882SAndroid Build Coastguard Worker // beginner proficiency only. To look up the bit vector, use the id returned 1051*993b0882SAndroid Build Coastguard Worker // by the trie. 1052*993b0882SAndroid Build Coastguard Worker beginner_level:BitVectorData; 1053*993b0882SAndroid Build Coastguard Worker 1054*993b0882SAndroid Build Coastguard Worker // A sorted list of indices of vocabs that should not trigger "Define" if 1055*993b0882SAndroid Build Coastguard Worker // its leading character is in upper case. The indices are those returned by 1056*993b0882SAndroid Build Coastguard Worker // trie. You may perform binary search to look up an index. 1057*993b0882SAndroid Build Coastguard Worker do_not_trigger_in_upper_case:BitVectorData; 1058*993b0882SAndroid Build Coastguard Worker 1059*993b0882SAndroid Build Coastguard Worker // Comma-separated list of locales (BCP 47 tags) that the model supports, that 1060*993b0882SAndroid Build Coastguard Worker // are used to prevent triggering on input in unsupported languages. If 1061*993b0882SAndroid Build Coastguard Worker // empty, the model will trigger on all inputs. 1062*993b0882SAndroid Build Coastguard Worker triggering_locales:string (shared); 1063*993b0882SAndroid Build Coastguard Worker 1064*993b0882SAndroid Build Coastguard Worker // The final score to assign to the results of the vocab model 1065*993b0882SAndroid Build Coastguard Worker target_classification_score:float = 1; 1066*993b0882SAndroid Build Coastguard Worker 1067*993b0882SAndroid Build Coastguard Worker // Priority score used for conflict resolution with the other models. 1068*993b0882SAndroid Build Coastguard Worker priority_score:float = 0; 1069*993b0882SAndroid Build Coastguard Worker 1070*993b0882SAndroid Build Coastguard Worker // Enabled modes. 1071*993b0882SAndroid Build Coastguard Worker enabled_modes:ModeFlag = ANNOTATION_AND_CLASSIFICATION; 1072*993b0882SAndroid Build Coastguard Worker} 1073*993b0882SAndroid Build Coastguard Worker 1074*993b0882SAndroid Build Coastguard Workerroot_type libtextclassifier3.Model; 1075