1*14675a02SAndroid Build Coastguard Worker /* 2*14675a02SAndroid Build Coastguard Worker * Copyright 2022 Google LLC 3*14675a02SAndroid Build Coastguard Worker * 4*14675a02SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*14675a02SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*14675a02SAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*14675a02SAndroid Build Coastguard Worker * 8*14675a02SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*14675a02SAndroid Build Coastguard Worker * 10*14675a02SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*14675a02SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*14675a02SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*14675a02SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*14675a02SAndroid Build Coastguard Worker * limitations under the License. 15*14675a02SAndroid Build Coastguard Worker */ 16*14675a02SAndroid Build Coastguard Worker #ifndef FCP_DICTIONARY_DICTIONARY_H_ 17*14675a02SAndroid Build Coastguard Worker #define FCP_DICTIONARY_DICTIONARY_H_ 18*14675a02SAndroid Build Coastguard Worker 19*14675a02SAndroid Build Coastguard Worker #include <cstdint> 20*14675a02SAndroid Build Coastguard Worker #include <memory> 21*14675a02SAndroid Build Coastguard Worker #include <string> 22*14675a02SAndroid Build Coastguard Worker #include <vector> 23*14675a02SAndroid Build Coastguard Worker 24*14675a02SAndroid Build Coastguard Worker #include "absl/status/statusor.h" 25*14675a02SAndroid Build Coastguard Worker #include "absl/strings/string_view.h" 26*14675a02SAndroid Build Coastguard Worker #include "fcp/dictionary/dictionary.pb.h" 27*14675a02SAndroid Build Coastguard Worker 28*14675a02SAndroid Build Coastguard Worker namespace fcp { 29*14675a02SAndroid Build Coastguard Worker namespace dictionary { 30*14675a02SAndroid Build Coastguard Worker 31*14675a02SAndroid Build Coastguard Worker // Interface for mapping tokens (usually words) to indices. 32*14675a02SAndroid Build Coastguard Worker class Dictionary { 33*14675a02SAndroid Build Coastguard Worker public: ~Dictionary()34*14675a02SAndroid Build Coastguard Worker virtual ~Dictionary() {} 35*14675a02SAndroid Build Coastguard Worker 36*14675a02SAndroid Build Coastguard Worker // Returns the number of elements in the dictionary. 37*14675a02SAndroid Build Coastguard Worker virtual int32_t Size() const = 0; 38*14675a02SAndroid Build Coastguard Worker 39*14675a02SAndroid Build Coastguard Worker // Returns the index of token in the dictionary or kNotFound if not found. 40*14675a02SAndroid Build Coastguard Worker virtual int32_t TokenToId(const std::string& token) const = 0; 41*14675a02SAndroid Build Coastguard Worker 42*14675a02SAndroid Build Coastguard Worker // Maps an ID to a string if the ID represents a valid token. 43*14675a02SAndroid Build Coastguard Worker // Returns "" on error. 44*14675a02SAndroid Build Coastguard Worker virtual std::string IdToToken(int32_t id) const = 0; 45*14675a02SAndroid Build Coastguard Worker 46*14675a02SAndroid Build Coastguard Worker // Returns true if the given id is set via DictionaryDescription.SpecialIds. 47*14675a02SAndroid Build Coastguard Worker virtual bool IsSpecialId(int32_t id) const = 0; 48*14675a02SAndroid Build Coastguard Worker 49*14675a02SAndroid Build Coastguard Worker // Returns a sorted (ascending) list of ids to filter from the predictions. 50*14675a02SAndroid Build Coastguard Worker // Can be used for e.g. punctuation. Includes special ids. 51*14675a02SAndroid Build Coastguard Worker virtual const std::vector<int32_t>& GetSortedOutputBlocklistIds() const = 0; 52*14675a02SAndroid Build Coastguard Worker 53*14675a02SAndroid Build Coastguard Worker // Returns the special ids used in this dictionary. 54*14675a02SAndroid Build Coastguard Worker virtual const DictionaryDescription::SpecialIds& GetSpecialIds() const = 0; 55*14675a02SAndroid Build Coastguard Worker 56*14675a02SAndroid Build Coastguard Worker // Id returned when an element is not found. This is distinct from the id 57*14675a02SAndroid Build Coastguard Worker // of the unknown_token (if one is configured). 58*14675a02SAndroid Build Coastguard Worker static constexpr int32_t kNotFound = -1; 59*14675a02SAndroid Build Coastguard Worker 60*14675a02SAndroid Build Coastguard Worker // 61*14675a02SAndroid Build Coastguard Worker // Static constructors 62*14675a02SAndroid Build Coastguard Worker // 63*14675a02SAndroid Build Coastguard Worker 64*14675a02SAndroid Build Coastguard Worker // Creates a dictionary from a self-describing DictionaryDescription proto. 65*14675a02SAndroid Build Coastguard Worker static absl::StatusOr<std::unique_ptr<Dictionary>> Create( 66*14675a02SAndroid Build Coastguard Worker const DictionaryDescription& description); 67*14675a02SAndroid Build Coastguard Worker }; 68*14675a02SAndroid Build Coastguard Worker 69*14675a02SAndroid Build Coastguard Worker } // namespace dictionary 70*14675a02SAndroid Build Coastguard Worker } // namespace fcp 71*14675a02SAndroid Build Coastguard Worker 72*14675a02SAndroid Build Coastguard Worker #endif // FCP_DICTIONARY_DICTIONARY_H_ 73