xref: /aosp_15_r20/external/federated-compute/fcp/dictionary/dictionary.h (revision 14675a029014e728ec732f129a32e299b2da0601)
1*14675a02SAndroid Build Coastguard Worker /*
2*14675a02SAndroid Build Coastguard Worker  * Copyright 2022 Google LLC
3*14675a02SAndroid Build Coastguard Worker  *
4*14675a02SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*14675a02SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*14675a02SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*14675a02SAndroid Build Coastguard Worker  *
8*14675a02SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*14675a02SAndroid Build Coastguard Worker  *
10*14675a02SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*14675a02SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*14675a02SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*14675a02SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*14675a02SAndroid Build Coastguard Worker  * limitations under the License.
15*14675a02SAndroid Build Coastguard Worker  */
16*14675a02SAndroid Build Coastguard Worker #ifndef FCP_DICTIONARY_DICTIONARY_H_
17*14675a02SAndroid Build Coastguard Worker #define FCP_DICTIONARY_DICTIONARY_H_
18*14675a02SAndroid Build Coastguard Worker 
19*14675a02SAndroid Build Coastguard Worker #include <cstdint>
20*14675a02SAndroid Build Coastguard Worker #include <memory>
21*14675a02SAndroid Build Coastguard Worker #include <string>
22*14675a02SAndroid Build Coastguard Worker #include <vector>
23*14675a02SAndroid Build Coastguard Worker 
24*14675a02SAndroid Build Coastguard Worker #include "absl/status/statusor.h"
25*14675a02SAndroid Build Coastguard Worker #include "absl/strings/string_view.h"
26*14675a02SAndroid Build Coastguard Worker #include "fcp/dictionary/dictionary.pb.h"
27*14675a02SAndroid Build Coastguard Worker 
28*14675a02SAndroid Build Coastguard Worker namespace fcp {
29*14675a02SAndroid Build Coastguard Worker namespace dictionary {
30*14675a02SAndroid Build Coastguard Worker 
31*14675a02SAndroid Build Coastguard Worker // Interface for mapping tokens (usually words) to indices.
32*14675a02SAndroid Build Coastguard Worker class Dictionary {
33*14675a02SAndroid Build Coastguard Worker  public:
~Dictionary()34*14675a02SAndroid Build Coastguard Worker   virtual ~Dictionary() {}
35*14675a02SAndroid Build Coastguard Worker 
36*14675a02SAndroid Build Coastguard Worker   // Returns the number of elements in the dictionary.
37*14675a02SAndroid Build Coastguard Worker   virtual int32_t Size() const = 0;
38*14675a02SAndroid Build Coastguard Worker 
39*14675a02SAndroid Build Coastguard Worker   // Returns the index of token in the dictionary or kNotFound if not found.
40*14675a02SAndroid Build Coastguard Worker   virtual int32_t TokenToId(const std::string& token) const = 0;
41*14675a02SAndroid Build Coastguard Worker 
42*14675a02SAndroid Build Coastguard Worker   // Maps an ID to a string if the ID represents a valid token.
43*14675a02SAndroid Build Coastguard Worker   // Returns "" on error.
44*14675a02SAndroid Build Coastguard Worker   virtual std::string IdToToken(int32_t id) const = 0;
45*14675a02SAndroid Build Coastguard Worker 
46*14675a02SAndroid Build Coastguard Worker   // Returns true if the given id is set via DictionaryDescription.SpecialIds.
47*14675a02SAndroid Build Coastguard Worker   virtual bool IsSpecialId(int32_t id) const = 0;
48*14675a02SAndroid Build Coastguard Worker 
49*14675a02SAndroid Build Coastguard Worker   // Returns a sorted (ascending) list of ids to filter from the predictions.
50*14675a02SAndroid Build Coastguard Worker   // Can be used for e.g. punctuation. Includes special ids.
51*14675a02SAndroid Build Coastguard Worker   virtual const std::vector<int32_t>& GetSortedOutputBlocklistIds() const = 0;
52*14675a02SAndroid Build Coastguard Worker 
53*14675a02SAndroid Build Coastguard Worker   // Returns the special ids used in this dictionary.
54*14675a02SAndroid Build Coastguard Worker   virtual const DictionaryDescription::SpecialIds& GetSpecialIds() const = 0;
55*14675a02SAndroid Build Coastguard Worker 
56*14675a02SAndroid Build Coastguard Worker   // Id returned when an element is not found. This is distinct from the id
57*14675a02SAndroid Build Coastguard Worker   // of the unknown_token (if one is configured).
58*14675a02SAndroid Build Coastguard Worker   static constexpr int32_t kNotFound = -1;
59*14675a02SAndroid Build Coastguard Worker 
60*14675a02SAndroid Build Coastguard Worker   //
61*14675a02SAndroid Build Coastguard Worker   // Static constructors
62*14675a02SAndroid Build Coastguard Worker   //
63*14675a02SAndroid Build Coastguard Worker 
64*14675a02SAndroid Build Coastguard Worker   // Creates a dictionary from a self-describing DictionaryDescription proto.
65*14675a02SAndroid Build Coastguard Worker   static absl::StatusOr<std::unique_ptr<Dictionary>> Create(
66*14675a02SAndroid Build Coastguard Worker       const DictionaryDescription& description);
67*14675a02SAndroid Build Coastguard Worker };
68*14675a02SAndroid Build Coastguard Worker 
69*14675a02SAndroid Build Coastguard Worker }  // namespace dictionary
70*14675a02SAndroid Build Coastguard Worker }  // namespace fcp
71*14675a02SAndroid Build Coastguard Worker 
72*14675a02SAndroid Build Coastguard Worker #endif  // FCP_DICTIONARY_DICTIONARY_H_
73