xref: /aosp_15_r20/external/federated-compute/fcp/dictionary/dictionary.h (revision 14675a029014e728ec732f129a32e299b2da0601)
1 /*
2  * Copyright 2022 Google LLC
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef FCP_DICTIONARY_DICTIONARY_H_
17 #define FCP_DICTIONARY_DICTIONARY_H_
18 
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <vector>
23 
24 #include "absl/status/statusor.h"
25 #include "absl/strings/string_view.h"
26 #include "fcp/dictionary/dictionary.pb.h"
27 
28 namespace fcp {
29 namespace dictionary {
30 
31 // Interface for mapping tokens (usually words) to indices.
32 class Dictionary {
33  public:
~Dictionary()34   virtual ~Dictionary() {}
35 
36   // Returns the number of elements in the dictionary.
37   virtual int32_t Size() const = 0;
38 
39   // Returns the index of token in the dictionary or kNotFound if not found.
40   virtual int32_t TokenToId(const std::string& token) const = 0;
41 
42   // Maps an ID to a string if the ID represents a valid token.
43   // Returns "" on error.
44   virtual std::string IdToToken(int32_t id) const = 0;
45 
46   // Returns true if the given id is set via DictionaryDescription.SpecialIds.
47   virtual bool IsSpecialId(int32_t id) const = 0;
48 
49   // Returns a sorted (ascending) list of ids to filter from the predictions.
50   // Can be used for e.g. punctuation. Includes special ids.
51   virtual const std::vector<int32_t>& GetSortedOutputBlocklistIds() const = 0;
52 
53   // Returns the special ids used in this dictionary.
54   virtual const DictionaryDescription::SpecialIds& GetSpecialIds() const = 0;
55 
56   // Id returned when an element is not found. This is distinct from the id
57   // of the unknown_token (if one is configured).
58   static constexpr int32_t kNotFound = -1;
59 
60   //
61   // Static constructors
62   //
63 
64   // Creates a dictionary from a self-describing DictionaryDescription proto.
65   static absl::StatusOr<std::unique_ptr<Dictionary>> Create(
66       const DictionaryDescription& description);
67 };
68 
69 }  // namespace dictionary
70 }  // namespace fcp
71 
72 #endif  // FCP_DICTIONARY_DICTIONARY_H_
73