1 /* 2 * Copyright 2022 Google LLC 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef FCP_DICTIONARY_DICTIONARY_H_ 17 #define FCP_DICTIONARY_DICTIONARY_H_ 18 19 #include <cstdint> 20 #include <memory> 21 #include <string> 22 #include <vector> 23 24 #include "absl/status/statusor.h" 25 #include "absl/strings/string_view.h" 26 #include "fcp/dictionary/dictionary.pb.h" 27 28 namespace fcp { 29 namespace dictionary { 30 31 // Interface for mapping tokens (usually words) to indices. 32 class Dictionary { 33 public: ~Dictionary()34 virtual ~Dictionary() {} 35 36 // Returns the number of elements in the dictionary. 37 virtual int32_t Size() const = 0; 38 39 // Returns the index of token in the dictionary or kNotFound if not found. 40 virtual int32_t TokenToId(const std::string& token) const = 0; 41 42 // Maps an ID to a string if the ID represents a valid token. 43 // Returns "" on error. 44 virtual std::string IdToToken(int32_t id) const = 0; 45 46 // Returns true if the given id is set via DictionaryDescription.SpecialIds. 47 virtual bool IsSpecialId(int32_t id) const = 0; 48 49 // Returns a sorted (ascending) list of ids to filter from the predictions. 50 // Can be used for e.g. punctuation. Includes special ids. 51 virtual const std::vector<int32_t>& GetSortedOutputBlocklistIds() const = 0; 52 53 // Returns the special ids used in this dictionary. 54 virtual const DictionaryDescription::SpecialIds& GetSpecialIds() const = 0; 55 56 // Id returned when an element is not found. This is distinct from the id 57 // of the unknown_token (if one is configured). 58 static constexpr int32_t kNotFound = -1; 59 60 // 61 // Static constructors 62 // 63 64 // Creates a dictionary from a self-describing DictionaryDescription proto. 65 static absl::StatusOr<std::unique_ptr<Dictionary>> Create( 66 const DictionaryDescription& description); 67 }; 68 69 } // namespace dictionary 70 } // namespace fcp 71 72 #endif // FCP_DICTIONARY_DICTIONARY_H_ 73