1// Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/config.fbs"; 15 16namespace tflite.ops.custom.sentencepiece; 17 18table EncoderConfig { 19 // Version of the encoder. 20 version: EncoderVersion = SENTENCE_PIECE; 21 start_code: int32 = 0; 22 end_code: int32 = 0; 23 24 unknown_code: int32 = -1; 25 // Weight of "unknown code" when encoding. "Penalty" because it usually has a 26 // big negative weight,less than any other sentencepiece. 27 unknown_penalty: float = 0; 28 29 // The offset for encoding, usually used when codes with low codes are reserved 30 // for some special needs. 31 encoding_offset: int32; 32 33 // String pieces for encoding. 34 pieces: Trie; 35 pieces_scores: [float]; 36 37 // Normalization related parameters. 38 remove_extra_whitespaces: bool; 39 40 // Add a whitespace prefix before encoding. 41 add_dummy_prefix: bool; 42 43 // Escape whitespaces during encoding so the decoder can restore them exactly as 44 // in the input. 45 escape_whitespaces: bool; 46 47 // Normalization parameters. 48 normalized_prefixes: Trie; 49 normalized_replacements: [byte]; 50} 51 52root_type EncoderConfig; 53