1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_SCHEMA_PROPERTY_UTIL_H_
16 #define ICING_SCHEMA_PROPERTY_UTIL_H_
17
18 #include <cstddef>
19 #include <cstdint>
20 #include <string>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/proto/document.pb.h"
28
29 namespace icing {
30 namespace lib {
31
32 namespace property_util {
33
34 // Definition:
35 // - Expr (short for expression): with or without index.
36 // - property_name: one level of property name without index. E.g. "abc", "def".
37 // - property_name_expr: one level of property name with or without index. E.g.
38 // "abc", "abc[0]", "def[1]".
39 // - property_path: multiple levels (including one) of property names without
40 // indices. E.g. "abc", "abc.def".
41 // - property_path_expr: multiple levels (including one) of property name
42 // expressions. E.g. "abc", "abc[0]", "abc.def",
43 // "abc[0].def", "abc[0].def[1]".
44 //
45 // Set relationship graph (A -> B: A is a subset of B):
46 //
47 // property_path -> property_path_expr
48 // ^ ^
49 // | |
50 // property_name -> property_name_expr
51 inline constexpr std::string_view kPropertyPathSeparator = ".";
52 inline constexpr std::string_view kLBracket = "[";
53 inline constexpr std::string_view kRBracket = "]";
54
55 inline constexpr int kWildcardPropertyIndex = -1;
56
57 struct PropertyInfo {
58 std::string name;
59 int index;
60
PropertyInfoPropertyInfo61 explicit PropertyInfo(std::string name_in, int index_in)
62 : name(std::move(name_in)), index(index_in) {}
63 };
64
65 // Converts a property (value) index to string, wrapped by kLBracket and
66 // kRBracket.
67 //
68 // REQUIRES: index should be valid or kWildcardPropertyIndex.
69 //
70 // Returns:
71 // - "" if index is kWildcardPropertyIndex.
72 // - kLBracket + std::to_string(index) + kRBracket for all non
73 // kWildcardPropertyIndex indices.
74 std::string ConvertToPropertyExprIndexStr(int index);
75
76 // Concatenates 2 property path expressions.
77 //
78 // Returns:
79 // - property_path_expr1 + "." + property_path_expr2 if both are not empty.
80 // - property_path_expr1 if property_path_expr2 is empty.
81 // - property_path_expr2 if property_path_expr1 is empty.
82 // - "" if both are empty.
83 std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
84 std::string_view property_path_expr2);
85
86 // Splits a property path expression into multiple property name expressions.
87 //
88 // Returns: a vector of property name expressions.
89 std::vector<std::string_view> SplitPropertyPathExpr(
90 std::string_view property_path_expr);
91
92 // Parses a property name expression into (property name, property index). If
93 // the index expression is missing, then the returned property index will be
94 // kWildcardPropertyIndex.
95 //
96 // Examples:
97 // - ParsePropertyNameExpr("foo") will return ("foo",
98 // kWildcardPropertyIndex).
99 // - ParsePropertyNameExpr("foo[5]") will return ("foo", 5).
100 //
101 // Returns: a PropertyInfo instance.
102 PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr);
103
104 // Parses a property path expression into multiple (property name, property
105 // index). It is similar to ParsePropertyPathExpr, except property path
106 // expression can contain multiple name expressions.
107 //
108 // Examples:
109 // - ParsePropertyPathExpr("foo") will return [("foo",
110 // kWildcardPropertyIndex)].
111 // - ParsePropertyPathExpr("foo[5]") will return [("foo", 5)].
112 // - ParsePropertyPathExpr("foo.bar[2]") will return [("foo",
113 // kWildcardPropertyIndex), ("bar", 2)]
114 //
115 // Returns: a vector of PropertyInfo instances.
116 std::vector<PropertyInfo> ParsePropertyPathExpr(
117 std::string_view property_path_expr);
118
119 // A property path property_path_expr1 is considered a parent of another
120 // property path property_path_expr2 if:
121 // 1. property_path_expr2 == property_path_expr1, OR
122 // 2. property_path_expr2 consists of the entire path of property_path_expr1
123 // + "." + [some other property path].
124 //
125 // Note that this can only be used for property name strings that do not
126 // contain the property index.
127 //
128 // Examples:
129 // - IsParentPropertyPath("foo", "foo") will return true.
130 // - IsParentPropertyPath("foo", "foo.bar") will return true.
131 // - IsParentPropertyPath("foo", "bar.foo") will return false.
132 // - IsParentPropertyPath("foo.bar", "foo.foo.bar") will return false.
133 //
134 // Returns: true if property_path_expr1 is a parent property path of
135 // property_path_expr2.
136 bool IsParentPropertyPath(std::string_view property_path_expr1,
137 std::string_view property_path_expr2);
138
139 // Gets the desired PropertyProto from the document by given property name.
140 // Since the input parameter is property name, this function only deals with
141 // the first level of properties in the document and cannot deal with nested
142 // documents.
143 //
144 // Returns:
145 // - const PropertyInfo* if property name exists in the document.
146 // - nullptr if property name not found.
147 const PropertyProto* GetPropertyProto(const DocumentProto& document,
148 std::string_view property_name);
149
150 template <typename T>
ExtractPropertyValues(const PropertyProto & property)151 libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValues(
152 const PropertyProto& property) {
153 return absl_ports::UnimplementedError(
154 "Unimplemented template type for ExtractPropertyValues");
155 }
156
157 template <>
158 libtextclassifier3::StatusOr<std::vector<std::string>>
159 ExtractPropertyValues<std::string>(const PropertyProto& property);
160
161 template <>
162 libtextclassifier3::StatusOr<std::vector<std::string_view>>
163 ExtractPropertyValues<std::string_view>(const PropertyProto& property);
164
165 template <>
166 libtextclassifier3::StatusOr<std::vector<int64_t>>
167 ExtractPropertyValues<int64_t>(const PropertyProto& property);
168
169 template <>
170 libtextclassifier3::StatusOr<std::vector<double>> ExtractPropertyValues<double>(
171 const PropertyProto& property);
172
173 template <>
174 libtextclassifier3::StatusOr<std::vector<bool>> ExtractPropertyValues<bool>(
175 const PropertyProto& property);
176
177 template <>
178 libtextclassifier3::StatusOr<std::vector<PropertyProto::VectorProto>>
179 ExtractPropertyValues<PropertyProto::VectorProto>(
180 const PropertyProto& property);
181
182 template <>
183 libtextclassifier3::StatusOr<std::vector<PropertyProto::BlobHandleProto>>
184 ExtractPropertyValues<PropertyProto::BlobHandleProto>(
185 const PropertyProto& property);
186
187 template <typename T>
ExtractPropertyValuesFromDocument(const DocumentProto & document,std::string_view property_path)188 libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValuesFromDocument(
189 const DocumentProto& document, std::string_view property_path) {
190 // Finds the first property name in property_path
191 size_t separator_position = property_path.find(kPropertyPathSeparator);
192 std::string_view current_property_name =
193 (separator_position == std::string::npos)
194 ? property_path
195 : property_path.substr(0, separator_position);
196
197 const PropertyProto* property_proto =
198 GetPropertyProto(document, current_property_name);
199 if (property_proto == nullptr) {
200 // Property name not found, it could be one of the following 2 cases:
201 // 1. The property is optional and it's not in the document
202 // 2. The property name is invalid
203 return std::vector<T>();
204 }
205
206 if (separator_position == std::string::npos) {
207 // Current property name is the last one in property path.
208 return ExtractPropertyValues<T>(*property_proto);
209 }
210
211 // Extracts property values recursively
212 std::string_view sub_property_path =
213 property_path.substr(separator_position + 1);
214 std::vector<T> nested_document_content;
215 for (const DocumentProto& nested_document :
216 property_proto->document_values()) {
217 auto content_or = ExtractPropertyValuesFromDocument<T>(nested_document,
218 sub_property_path);
219 if (content_or.ok()) {
220 std::vector<T> content = std::move(content_or).ValueOrDie();
221 std::move(content.begin(), content.end(),
222 std::back_inserter(nested_document_content));
223 }
224 }
225 return nested_document_content;
226 }
227
228 } // namespace property_util
229
230 } // namespace lib
231 } // namespace icing
232
233 #endif // ICING_SCHEMA_PROPERTY_UTIL_H_
234