xref: /aosp_15_r20/external/icing/icing/schema/property-util.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_PROPERTY_UTIL_H_
16 #define ICING_SCHEMA_PROPERTY_UTIL_H_
17 
18 #include <cstddef>
19 #include <cstdint>
20 #include <string>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24 
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/proto/document.pb.h"
28 
29 namespace icing {
30 namespace lib {
31 
32 namespace property_util {
33 
34 // Definition:
35 // - Expr (short for expression): with or without index.
36 // - property_name: one level of property name without index. E.g. "abc", "def".
37 // - property_name_expr: one level of property name with or without index. E.g.
38 //                       "abc", "abc[0]", "def[1]".
39 // - property_path: multiple levels (including one) of property names without
40 //                  indices. E.g. "abc", "abc.def".
41 // - property_path_expr: multiple levels (including one) of property name
42 //                       expressions. E.g. "abc", "abc[0]", "abc.def",
43 //                       "abc[0].def", "abc[0].def[1]".
44 //
45 // Set relationship graph (A -> B: A is a subset of B):
46 //
47 // property_path -> property_path_expr
48 //      ^                   ^
49 //      |                   |
50 // property_name -> property_name_expr
51 inline constexpr std::string_view kPropertyPathSeparator = ".";
52 inline constexpr std::string_view kLBracket = "[";
53 inline constexpr std::string_view kRBracket = "]";
54 
55 inline constexpr int kWildcardPropertyIndex = -1;
56 
57 struct PropertyInfo {
58   std::string name;
59   int index;
60 
PropertyInfoPropertyInfo61   explicit PropertyInfo(std::string name_in, int index_in)
62       : name(std::move(name_in)), index(index_in) {}
63 };
64 
65 // Converts a property (value) index to string, wrapped by kLBracket and
66 // kRBracket.
67 //
68 // REQUIRES: index should be valid or kWildcardPropertyIndex.
69 //
70 // Returns:
71 //   - "" if index is kWildcardPropertyIndex.
72 //   - kLBracket + std::to_string(index) + kRBracket for all non
73 //     kWildcardPropertyIndex indices.
74 std::string ConvertToPropertyExprIndexStr(int index);
75 
76 // Concatenates 2 property path expressions.
77 //
78 // Returns:
79 //   - property_path_expr1 + "." + property_path_expr2 if both are not empty.
80 //   - property_path_expr1 if property_path_expr2 is empty.
81 //   - property_path_expr2 if property_path_expr1 is empty.
82 //   - "" if both are empty.
83 std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
84                                         std::string_view property_path_expr2);
85 
86 // Splits a property path expression into multiple property name expressions.
87 //
88 // Returns: a vector of property name expressions.
89 std::vector<std::string_view> SplitPropertyPathExpr(
90     std::string_view property_path_expr);
91 
92 // Parses a property name expression into (property name, property index). If
93 // the index expression is missing, then the returned property index will be
94 // kWildcardPropertyIndex.
95 //
96 // Examples:
97 //   - ParsePropertyNameExpr("foo") will return ("foo",
98 //     kWildcardPropertyIndex).
99 //   - ParsePropertyNameExpr("foo[5]") will return ("foo", 5).
100 //
101 // Returns: a PropertyInfo instance.
102 PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr);
103 
104 // Parses a property path expression into multiple (property name, property
105 // index). It is similar to ParsePropertyPathExpr, except property path
106 // expression can contain multiple name expressions.
107 //
108 // Examples:
109 //   - ParsePropertyPathExpr("foo") will return [("foo",
110 //     kWildcardPropertyIndex)].
111 //   - ParsePropertyPathExpr("foo[5]") will return [("foo", 5)].
112 //   - ParsePropertyPathExpr("foo.bar[2]") will return [("foo",
113 //     kWildcardPropertyIndex), ("bar", 2)]
114 //
115 // Returns: a vector of PropertyInfo instances.
116 std::vector<PropertyInfo> ParsePropertyPathExpr(
117     std::string_view property_path_expr);
118 
119 // A property path property_path_expr1 is considered a parent of another
120 // property path property_path_expr2 if:
121 // 1. property_path_expr2 == property_path_expr1, OR
122 // 2. property_path_expr2 consists of the entire path of property_path_expr1
123 //    + "." + [some other property path].
124 //
125 // Note that this can only be used for property name strings that do not
126 // contain the property index.
127 //
128 // Examples:
129 //   - IsParentPropertyPath("foo", "foo") will return true.
130 //   - IsParentPropertyPath("foo", "foo.bar") will return true.
131 //   - IsParentPropertyPath("foo", "bar.foo") will return false.
132 //   - IsParentPropertyPath("foo.bar", "foo.foo.bar") will return false.
133 //
134 // Returns: true if property_path_expr1 is a parent property path of
135 // property_path_expr2.
136 bool IsParentPropertyPath(std::string_view property_path_expr1,
137                           std::string_view property_path_expr2);
138 
139 // Gets the desired PropertyProto from the document by given property name.
140 // Since the input parameter is property name, this function only deals with
141 // the first level of properties in the document and cannot deal with nested
142 // documents.
143 //
144 // Returns:
145 //   - const PropertyInfo* if property name exists in the document.
146 //   - nullptr if property name not found.
147 const PropertyProto* GetPropertyProto(const DocumentProto& document,
148                                       std::string_view property_name);
149 
150 template <typename T>
ExtractPropertyValues(const PropertyProto & property)151 libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValues(
152     const PropertyProto& property) {
153   return absl_ports::UnimplementedError(
154       "Unimplemented template type for ExtractPropertyValues");
155 }
156 
157 template <>
158 libtextclassifier3::StatusOr<std::vector<std::string>>
159 ExtractPropertyValues<std::string>(const PropertyProto& property);
160 
161 template <>
162 libtextclassifier3::StatusOr<std::vector<std::string_view>>
163 ExtractPropertyValues<std::string_view>(const PropertyProto& property);
164 
165 template <>
166 libtextclassifier3::StatusOr<std::vector<int64_t>>
167 ExtractPropertyValues<int64_t>(const PropertyProto& property);
168 
169 template <>
170 libtextclassifier3::StatusOr<std::vector<double>> ExtractPropertyValues<double>(
171     const PropertyProto& property);
172 
173 template <>
174 libtextclassifier3::StatusOr<std::vector<bool>> ExtractPropertyValues<bool>(
175     const PropertyProto& property);
176 
177 template <>
178 libtextclassifier3::StatusOr<std::vector<PropertyProto::VectorProto>>
179 ExtractPropertyValues<PropertyProto::VectorProto>(
180     const PropertyProto& property);
181 
182 template <>
183 libtextclassifier3::StatusOr<std::vector<PropertyProto::BlobHandleProto>>
184 ExtractPropertyValues<PropertyProto::BlobHandleProto>(
185     const PropertyProto& property);
186 
187 template <typename T>
ExtractPropertyValuesFromDocument(const DocumentProto & document,std::string_view property_path)188 libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValuesFromDocument(
189     const DocumentProto& document, std::string_view property_path) {
190   // Finds the first property name in property_path
191   size_t separator_position = property_path.find(kPropertyPathSeparator);
192   std::string_view current_property_name =
193       (separator_position == std::string::npos)
194           ? property_path
195           : property_path.substr(0, separator_position);
196 
197   const PropertyProto* property_proto =
198       GetPropertyProto(document, current_property_name);
199   if (property_proto == nullptr) {
200     // Property name not found, it could be one of the following 2 cases:
201     // 1. The property is optional and it's not in the document
202     // 2. The property name is invalid
203     return std::vector<T>();
204   }
205 
206   if (separator_position == std::string::npos) {
207     // Current property name is the last one in property path.
208     return ExtractPropertyValues<T>(*property_proto);
209   }
210 
211   // Extracts property values recursively
212   std::string_view sub_property_path =
213       property_path.substr(separator_position + 1);
214   std::vector<T> nested_document_content;
215   for (const DocumentProto& nested_document :
216        property_proto->document_values()) {
217     auto content_or = ExtractPropertyValuesFromDocument<T>(nested_document,
218                                                            sub_property_path);
219     if (content_or.ok()) {
220       std::vector<T> content = std::move(content_or).ValueOrDie();
221       std::move(content.begin(), content.end(),
222                 std::back_inserter(nested_document_content));
223     }
224   }
225   return nested_document_content;
226 }
227 
228 }  // namespace property_util
229 
230 }  // namespace lib
231 }  // namespace icing
232 
233 #endif  // ICING_SCHEMA_PROPERTY_UTIL_H_
234