1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/iterator/doc-hit-info-iterator-by-uri.h"
16
17 #include <memory>
18 #include <string>
19 #include <utility>
20
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "gmock/gmock.h"
23 #include "gtest/gtest.h"
24 #include "icing/document-builder.h"
25 #include "icing/feature-flags.h"
26 #include "icing/file/filesystem.h"
27 #include "icing/file/portable-file-backed-proto-log.h"
28 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
29 #include "icing/proto/document.pb.h"
30 #include "icing/proto/schema.pb.h"
31 #include "icing/schema-builder.h"
32 #include "icing/schema/schema-store.h"
33 #include "icing/store/document-id.h"
34 #include "icing/store/document-store.h"
35 #include "icing/testing/common-matchers.h"
36 #include "icing/testing/fake-clock.h"
37 #include "icing/testing/test-feature-flags.h"
38 #include "icing/testing/tmp-directory.h"
39
40 namespace icing {
41 namespace lib {
42
43 namespace {
44
45 using ::testing::ElementsAre;
46 using ::testing::IsEmpty;
47
48 class DocHitInfoIteratorByUriTest : public ::testing::Test {
49 protected:
DocHitInfoIteratorByUriTest()50 DocHitInfoIteratorByUriTest() : test_dir_(GetTestTempDir() + "/icing") {}
51
SetUp()52 void SetUp() override {
53 feature_flags_ = std::make_unique<FeatureFlags>(GetTestFeatureFlags());
54
55 filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
56
57 SchemaProto schema =
58 SchemaBuilder()
59 .AddType(SchemaTypeConfigBuilder().SetType("email"))
60 .Build();
61 ICING_ASSERT_OK_AND_ASSIGN(
62 schema_store_, SchemaStore::Create(&filesystem_, test_dir_,
63 &fake_clock_, feature_flags_.get()));
64 ICING_ASSERT_OK(schema_store_->SetSchema(
65 schema, /*ignore_errors_and_delete_documents=*/false,
66 /*allow_circular_schema_definitions=*/false));
67
68 ICING_ASSERT_OK_AND_ASSIGN(
69 DocumentStore::CreateResult create_result,
70 DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
71 schema_store_.get(), feature_flags_.get(),
72 /*force_recovery_and_revalidate_documents=*/false,
73 /*pre_mapping_fbv=*/false,
74 /*use_persistent_hash_map=*/true,
75 PortableFileBackedProtoLog<
76 DocumentWrapper>::kDefaultCompressionLevel,
77 /*initialize_stats=*/nullptr));
78 document_store_ = std::move(create_result.document_store);
79 }
80
TearDown()81 void TearDown() override {
82 document_store_.reset();
83 schema_store_.reset();
84 filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
85 }
86
87 std::unique_ptr<FeatureFlags> feature_flags_;
88 std::unique_ptr<SchemaStore> schema_store_;
89 std::unique_ptr<DocumentStore> document_store_;
90 FakeClock fake_clock_;
91 const Filesystem filesystem_;
92 const std::string test_dir_;
93 };
94
TEST_F(DocHitInfoIteratorByUriTest,EmptyFilterIsInvalid)95 TEST_F(DocHitInfoIteratorByUriTest, EmptyFilterIsInvalid) {
96 // Create a search spec without a uri filter specified.
97 SearchSpecProto search_spec;
98 EXPECT_THAT(
99 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec),
100 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
101
102 // Add a namespace group with no uris.
103 NamespaceDocumentUriGroup* namespace_uris =
104 search_spec.add_document_uri_filters();
105 namespace_uris->set_namespace_("namespace");
106 EXPECT_THAT(
107 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec),
108 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
109 }
110
TEST_F(DocHitInfoIteratorByUriTest,MatchesSomeDocuments)111 TEST_F(DocHitInfoIteratorByUriTest, MatchesSomeDocuments) {
112 // Put documents
113 DocumentProto document1 = DocumentBuilder()
114 .SetKey("namespace", "email/1")
115 .SetSchema("email")
116 .Build();
117 DocumentProto document2 = DocumentBuilder()
118 .SetKey("namespace", "email/2")
119 .SetSchema("email")
120 .Build();
121 DocumentProto document3 = DocumentBuilder()
122 .SetKey("namespace", "email/3")
123 .SetSchema("email")
124 .Build();
125 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
126 document_store_->Put(document1));
127 DocumentId document_id1 = put_result.new_document_id;
128 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
129 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
130 DocumentId document_id3 = put_result.new_document_id;
131
132 // Create a search spec with uri filters that only match document1 and
133 // document3.
134 SearchSpecProto search_spec;
135 NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
136 uris->set_namespace_("namespace");
137 uris->add_document_uris("email/1");
138 uris->add_document_uris("email/3");
139
140 ICING_ASSERT_OK_AND_ASSIGN(
141 std::unique_ptr<DocHitInfoIteratorByUri> iterator,
142 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
143 EXPECT_THAT(GetDocumentIds(iterator.get()),
144 ElementsAre(document_id3, document_id1));
145 EXPECT_FALSE(iterator->Advance().ok());
146 }
147
TEST_F(DocHitInfoIteratorByUriTest,MatchesAllDocuments)148 TEST_F(DocHitInfoIteratorByUriTest, MatchesAllDocuments) {
149 // Put documents
150 DocumentProto document1 = DocumentBuilder()
151 .SetKey("namespace", "email/1")
152 .SetSchema("email")
153 .Build();
154 DocumentProto document2 = DocumentBuilder()
155 .SetKey("namespace", "email/2")
156 .SetSchema("email")
157 .Build();
158 DocumentProto document3 = DocumentBuilder()
159 .SetKey("namespace", "email/3")
160 .SetSchema("email")
161 .Build();
162 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
163 document_store_->Put(document1));
164 DocumentId document_id1 = put_result.new_document_id;
165 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
166 DocumentId document_id2 = put_result.new_document_id;
167 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
168 DocumentId document_id3 = put_result.new_document_id;
169
170 // Create a search spec with uri filters that match all documents.
171 SearchSpecProto search_spec;
172 NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
173 uris->set_namespace_("namespace");
174 uris->add_document_uris("email/1");
175 uris->add_document_uris("email/2");
176 uris->add_document_uris("email/3");
177
178 ICING_ASSERT_OK_AND_ASSIGN(
179 std::unique_ptr<DocHitInfoIteratorByUri> iterator,
180 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
181 EXPECT_THAT(GetDocumentIds(iterator.get()),
182 ElementsAre(document_id3, document_id2, document_id1));
183 EXPECT_FALSE(iterator->Advance().ok());
184 }
185
TEST_F(DocHitInfoIteratorByUriTest,NonexistentUriIsOk)186 TEST_F(DocHitInfoIteratorByUriTest, NonexistentUriIsOk) {
187 // Put documents
188 DocumentProto document1 = DocumentBuilder()
189 .SetKey("namespace", "email/1")
190 .SetSchema("email")
191 .Build();
192 DocumentProto document2 = DocumentBuilder()
193 .SetKey("namespace", "email/2")
194 .SetSchema("email")
195 .Build();
196 DocumentProto document3 = DocumentBuilder()
197 .SetKey("namespace", "email/3")
198 .SetSchema("email")
199 .Build();
200 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
201 document_store_->Put(document1));
202 DocumentId document_id1 = put_result.new_document_id;
203 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
204 DocumentId document_id2 = put_result.new_document_id;
205 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
206 DocumentId document_id3 = put_result.new_document_id;
207
208 // Create a search spec with a nonexistent uri in uri filters.
209 SearchSpecProto search_spec;
210 NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
211 uris->set_namespace_("namespace");
212 uris->add_document_uris("email/1");
213 uris->add_document_uris("email/2");
214 uris->add_document_uris("email/3");
215 uris->add_document_uris("nonexistent_uri");
216
217 ICING_ASSERT_OK_AND_ASSIGN(
218 std::unique_ptr<DocHitInfoIteratorByUri> iterator,
219 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
220 EXPECT_THAT(GetDocumentIds(iterator.get()),
221 ElementsAre(document_id3, document_id2, document_id1));
222 EXPECT_FALSE(iterator->Advance().ok());
223 }
224
TEST_F(DocHitInfoIteratorByUriTest,AllNonexistentUriShouldReturnEmptyResults)225 TEST_F(DocHitInfoIteratorByUriTest, AllNonexistentUriShouldReturnEmptyResults) {
226 // Put documents
227 DocumentProto document1 = DocumentBuilder()
228 .SetKey("namespace", "email/1")
229 .SetSchema("email")
230 .Build();
231 DocumentProto document2 = DocumentBuilder()
232 .SetKey("namespace", "email/2")
233 .SetSchema("email")
234 .Build();
235 DocumentProto document3 = DocumentBuilder()
236 .SetKey("namespace", "email/3")
237 .SetSchema("email")
238 .Build();
239 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
240 document_store_->Put(document1));
241 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
242 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
243
244 // Create a search spec with all nonexistent uris.
245 SearchSpecProto search_spec;
246 NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
247 uris->set_namespace_("namespace");
248 uris->add_document_uris("nonexistent_uri1");
249 uris->add_document_uris("nonexistent_uri2");
250 uris->add_document_uris("nonexistent_uri3");
251
252 ICING_ASSERT_OK_AND_ASSIGN(
253 std::unique_ptr<DocHitInfoIteratorByUri> iterator,
254 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
255 EXPECT_THAT(GetDocumentIds(iterator.get()), IsEmpty());
256 EXPECT_FALSE(iterator->Advance().ok());
257 }
258
TEST_F(DocHitInfoIteratorByUriTest,MultipleNamespaces)259 TEST_F(DocHitInfoIteratorByUriTest, MultipleNamespaces) {
260 // Put documents
261 DocumentProto document1 = DocumentBuilder()
262 .SetKey("namespace1", "email/1")
263 .SetSchema("email")
264 .Build();
265 DocumentProto document2 = DocumentBuilder()
266 .SetKey("namespace1", "email/2")
267 .SetSchema("email")
268 .Build();
269 DocumentProto document3 = DocumentBuilder()
270 .SetKey("namespace2", "email/3")
271 .SetSchema("email")
272 .Build();
273 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
274 document_store_->Put(document1));
275 DocumentId document_id1 = put_result.new_document_id;
276 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
277 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
278 DocumentId document_id3 = put_result.new_document_id;
279
280 // Create a search spec with uri filters that match document1 and document3 in
281 // different namespaces.
282 SearchSpecProto search_spec;
283 NamespaceDocumentUriGroup* namespace1_uris =
284 search_spec.add_document_uri_filters();
285 namespace1_uris->set_namespace_("namespace1");
286 namespace1_uris->add_document_uris("email/1");
287 NamespaceDocumentUriGroup* namespace2_uris =
288 search_spec.add_document_uri_filters();
289 namespace2_uris->set_namespace_("namespace2");
290 namespace2_uris->add_document_uris("email/3");
291
292 ICING_ASSERT_OK_AND_ASSIGN(
293 std::unique_ptr<DocHitInfoIteratorByUri> iterator,
294 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
295 EXPECT_THAT(GetDocumentIds(iterator.get()),
296 ElementsAre(document_id3, document_id1));
297 EXPECT_FALSE(iterator->Advance().ok());
298 }
299
TEST_F(DocHitInfoIteratorByUriTest,DuplicatedUriIsOk)300 TEST_F(DocHitInfoIteratorByUriTest, DuplicatedUriIsOk) {
301 // Put documents
302 DocumentProto document1 = DocumentBuilder()
303 .SetKey("namespace", "email/1")
304 .SetSchema("email")
305 .Build();
306 DocumentProto document2 = DocumentBuilder()
307 .SetKey("namespace", "email/2")
308 .SetSchema("email")
309 .Build();
310 DocumentProto document3 = DocumentBuilder()
311 .SetKey("namespace", "email/3")
312 .SetSchema("email")
313 .Build();
314 DocumentProto document4 = DocumentBuilder()
315 .SetKey("namespace", "email/4")
316 .SetSchema("email")
317 .Build();
318 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
319 document_store_->Put(document1));
320 DocumentId document_id1 = put_result.new_document_id;
321 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
322 DocumentId document_id2 = put_result.new_document_id;
323 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
324 DocumentId document_id3 = put_result.new_document_id;
325 ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document4));
326 DocumentId document_id4 = put_result.new_document_id;
327
328 // Create a search spec with duplicated uri filters. The result document ids
329 // should be de-duplicated.
330 SearchSpecProto search_spec;
331 NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
332 uris->set_namespace_("namespace");
333 uris->add_document_uris("email/1");
334 uris->add_document_uris("email/2");
335 uris->add_document_uris("email/3");
336 uris->add_document_uris("email/3");
337
338 uris = search_spec.add_document_uri_filters();
339 uris->set_namespace_("namespace");
340 uris->add_document_uris("email/2");
341 uris->add_document_uris("email/4");
342
343 ICING_ASSERT_OK_AND_ASSIGN(
344 std::unique_ptr<DocHitInfoIteratorByUri> iterator,
345 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
346 EXPECT_THAT(
347 GetDocumentIds(iterator.get()),
348 ElementsAre(document_id4, document_id3, document_id2, document_id1));
349 EXPECT_FALSE(iterator->Advance().ok());
350 }
351
TEST_F(DocHitInfoIteratorByUriTest,TrimRightMostNodeResultsInError)352 TEST_F(DocHitInfoIteratorByUriTest, TrimRightMostNodeResultsInError) {
353 SearchSpecProto search_spec;
354 NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
355 uris->set_namespace_("namespace");
356 uris->add_document_uris("uri");
357
358 ICING_ASSERT_OK_AND_ASSIGN(
359 std::unique_ptr<DocHitInfoIteratorByUri> iterator,
360 DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
361 EXPECT_THAT(std::move(*iterator).TrimRightMostNode(),
362 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
363 }
364
365 } // namespace
366
367 } // namespace lib
368 } // namespace icing
369