1// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/config.fbs";
15
16namespace tflite.ops.custom.sentencepiece;
17
18table EncoderConfig {
19  // Version of the encoder.
20  version: EncoderVersion = SENTENCE_PIECE;
21  start_code: int32 = 0;
22  end_code: int32 = 0;
23
24  unknown_code: int32 = -1;
25  // Weight of "unknown code" when encoding. "Penalty" because it usually has a
26  // big negative weight,less than any other sentencepiece.
27  unknown_penalty: float = 0;
28
29  // The offset for encoding, usually used when codes with low codes are reserved
30  // for some special needs.
31  encoding_offset: int32;
32
33  // String pieces for encoding.
34  pieces: Trie;
35  pieces_scores: [float];
36
37  // Normalization related parameters.
38  remove_extra_whitespaces: bool;
39
40  // Add a whitespace prefix before encoding.
41  add_dummy_prefix: bool;
42
43  // Escape whitespaces during encoding so the decoder can restore them exactly as
44  // in the input.
45  escape_whitespaces: bool;
46
47  // Normalization parameters.
48  normalized_prefixes: Trie;
49  normalized_replacements: [byte];
50}
51
52root_type EncoderConfig;
53