xref: /aosp_15_r20/external/icing/icing/index/iterator/doc-hit-info-iterator-by-uri_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/iterator/doc-hit-info-iterator-by-uri.h"
16 
17 #include <memory>
18 #include <string>
19 #include <utility>
20 
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "gmock/gmock.h"
23 #include "gtest/gtest.h"
24 #include "icing/document-builder.h"
25 #include "icing/feature-flags.h"
26 #include "icing/file/filesystem.h"
27 #include "icing/file/portable-file-backed-proto-log.h"
28 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
29 #include "icing/proto/document.pb.h"
30 #include "icing/proto/schema.pb.h"
31 #include "icing/schema-builder.h"
32 #include "icing/schema/schema-store.h"
33 #include "icing/store/document-id.h"
34 #include "icing/store/document-store.h"
35 #include "icing/testing/common-matchers.h"
36 #include "icing/testing/fake-clock.h"
37 #include "icing/testing/test-feature-flags.h"
38 #include "icing/testing/tmp-directory.h"
39 
40 namespace icing {
41 namespace lib {
42 
43 namespace {
44 
45 using ::testing::ElementsAre;
46 using ::testing::IsEmpty;
47 
48 class DocHitInfoIteratorByUriTest : public ::testing::Test {
49  protected:
DocHitInfoIteratorByUriTest()50   DocHitInfoIteratorByUriTest() : test_dir_(GetTestTempDir() + "/icing") {}
51 
SetUp()52   void SetUp() override {
53     feature_flags_ = std::make_unique<FeatureFlags>(GetTestFeatureFlags());
54 
55     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
56 
57     SchemaProto schema =
58         SchemaBuilder()
59             .AddType(SchemaTypeConfigBuilder().SetType("email"))
60             .Build();
61     ICING_ASSERT_OK_AND_ASSIGN(
62         schema_store_, SchemaStore::Create(&filesystem_, test_dir_,
63                                            &fake_clock_, feature_flags_.get()));
64     ICING_ASSERT_OK(schema_store_->SetSchema(
65         schema, /*ignore_errors_and_delete_documents=*/false,
66         /*allow_circular_schema_definitions=*/false));
67 
68     ICING_ASSERT_OK_AND_ASSIGN(
69         DocumentStore::CreateResult create_result,
70         DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
71                               schema_store_.get(), feature_flags_.get(),
72                               /*force_recovery_and_revalidate_documents=*/false,
73                               /*pre_mapping_fbv=*/false,
74                               /*use_persistent_hash_map=*/true,
75                               PortableFileBackedProtoLog<
76                                   DocumentWrapper>::kDefaultCompressionLevel,
77                               /*initialize_stats=*/nullptr));
78     document_store_ = std::move(create_result.document_store);
79   }
80 
TearDown()81   void TearDown() override {
82     document_store_.reset();
83     schema_store_.reset();
84     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
85   }
86 
87   std::unique_ptr<FeatureFlags> feature_flags_;
88   std::unique_ptr<SchemaStore> schema_store_;
89   std::unique_ptr<DocumentStore> document_store_;
90   FakeClock fake_clock_;
91   const Filesystem filesystem_;
92   const std::string test_dir_;
93 };
94 
TEST_F(DocHitInfoIteratorByUriTest,EmptyFilterIsInvalid)95 TEST_F(DocHitInfoIteratorByUriTest, EmptyFilterIsInvalid) {
96   // Create a search spec without a uri filter specified.
97   SearchSpecProto search_spec;
98   EXPECT_THAT(
99       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec),
100       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
101 
102   // Add a namespace group with no uris.
103   NamespaceDocumentUriGroup* namespace_uris =
104       search_spec.add_document_uri_filters();
105   namespace_uris->set_namespace_("namespace");
106   EXPECT_THAT(
107       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec),
108       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
109 }
110 
TEST_F(DocHitInfoIteratorByUriTest,MatchesSomeDocuments)111 TEST_F(DocHitInfoIteratorByUriTest, MatchesSomeDocuments) {
112   // Put documents
113   DocumentProto document1 = DocumentBuilder()
114                                 .SetKey("namespace", "email/1")
115                                 .SetSchema("email")
116                                 .Build();
117   DocumentProto document2 = DocumentBuilder()
118                                 .SetKey("namespace", "email/2")
119                                 .SetSchema("email")
120                                 .Build();
121   DocumentProto document3 = DocumentBuilder()
122                                 .SetKey("namespace", "email/3")
123                                 .SetSchema("email")
124                                 .Build();
125   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
126                              document_store_->Put(document1));
127   DocumentId document_id1 = put_result.new_document_id;
128   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
129   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
130   DocumentId document_id3 = put_result.new_document_id;
131 
132   // Create a search spec with uri filters that only match document1 and
133   // document3.
134   SearchSpecProto search_spec;
135   NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
136   uris->set_namespace_("namespace");
137   uris->add_document_uris("email/1");
138   uris->add_document_uris("email/3");
139 
140   ICING_ASSERT_OK_AND_ASSIGN(
141       std::unique_ptr<DocHitInfoIteratorByUri> iterator,
142       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
143   EXPECT_THAT(GetDocumentIds(iterator.get()),
144               ElementsAre(document_id3, document_id1));
145   EXPECT_FALSE(iterator->Advance().ok());
146 }
147 
TEST_F(DocHitInfoIteratorByUriTest,MatchesAllDocuments)148 TEST_F(DocHitInfoIteratorByUriTest, MatchesAllDocuments) {
149   // Put documents
150   DocumentProto document1 = DocumentBuilder()
151                                 .SetKey("namespace", "email/1")
152                                 .SetSchema("email")
153                                 .Build();
154   DocumentProto document2 = DocumentBuilder()
155                                 .SetKey("namespace", "email/2")
156                                 .SetSchema("email")
157                                 .Build();
158   DocumentProto document3 = DocumentBuilder()
159                                 .SetKey("namespace", "email/3")
160                                 .SetSchema("email")
161                                 .Build();
162   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
163                              document_store_->Put(document1));
164   DocumentId document_id1 = put_result.new_document_id;
165   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
166   DocumentId document_id2 = put_result.new_document_id;
167   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
168   DocumentId document_id3 = put_result.new_document_id;
169 
170   // Create a search spec with uri filters that match all documents.
171   SearchSpecProto search_spec;
172   NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
173   uris->set_namespace_("namespace");
174   uris->add_document_uris("email/1");
175   uris->add_document_uris("email/2");
176   uris->add_document_uris("email/3");
177 
178   ICING_ASSERT_OK_AND_ASSIGN(
179       std::unique_ptr<DocHitInfoIteratorByUri> iterator,
180       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
181   EXPECT_THAT(GetDocumentIds(iterator.get()),
182               ElementsAre(document_id3, document_id2, document_id1));
183   EXPECT_FALSE(iterator->Advance().ok());
184 }
185 
TEST_F(DocHitInfoIteratorByUriTest,NonexistentUriIsOk)186 TEST_F(DocHitInfoIteratorByUriTest, NonexistentUriIsOk) {
187   // Put documents
188   DocumentProto document1 = DocumentBuilder()
189                                 .SetKey("namespace", "email/1")
190                                 .SetSchema("email")
191                                 .Build();
192   DocumentProto document2 = DocumentBuilder()
193                                 .SetKey("namespace", "email/2")
194                                 .SetSchema("email")
195                                 .Build();
196   DocumentProto document3 = DocumentBuilder()
197                                 .SetKey("namespace", "email/3")
198                                 .SetSchema("email")
199                                 .Build();
200   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
201                              document_store_->Put(document1));
202   DocumentId document_id1 = put_result.new_document_id;
203   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
204   DocumentId document_id2 = put_result.new_document_id;
205   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
206   DocumentId document_id3 = put_result.new_document_id;
207 
208   // Create a search spec with a nonexistent uri in uri filters.
209   SearchSpecProto search_spec;
210   NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
211   uris->set_namespace_("namespace");
212   uris->add_document_uris("email/1");
213   uris->add_document_uris("email/2");
214   uris->add_document_uris("email/3");
215   uris->add_document_uris("nonexistent_uri");
216 
217   ICING_ASSERT_OK_AND_ASSIGN(
218       std::unique_ptr<DocHitInfoIteratorByUri> iterator,
219       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
220   EXPECT_THAT(GetDocumentIds(iterator.get()),
221               ElementsAre(document_id3, document_id2, document_id1));
222   EXPECT_FALSE(iterator->Advance().ok());
223 }
224 
TEST_F(DocHitInfoIteratorByUriTest,AllNonexistentUriShouldReturnEmptyResults)225 TEST_F(DocHitInfoIteratorByUriTest, AllNonexistentUriShouldReturnEmptyResults) {
226   // Put documents
227   DocumentProto document1 = DocumentBuilder()
228                                 .SetKey("namespace", "email/1")
229                                 .SetSchema("email")
230                                 .Build();
231   DocumentProto document2 = DocumentBuilder()
232                                 .SetKey("namespace", "email/2")
233                                 .SetSchema("email")
234                                 .Build();
235   DocumentProto document3 = DocumentBuilder()
236                                 .SetKey("namespace", "email/3")
237                                 .SetSchema("email")
238                                 .Build();
239   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
240                              document_store_->Put(document1));
241   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
242   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
243 
244   // Create a search spec with all nonexistent uris.
245   SearchSpecProto search_spec;
246   NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
247   uris->set_namespace_("namespace");
248   uris->add_document_uris("nonexistent_uri1");
249   uris->add_document_uris("nonexistent_uri2");
250   uris->add_document_uris("nonexistent_uri3");
251 
252   ICING_ASSERT_OK_AND_ASSIGN(
253       std::unique_ptr<DocHitInfoIteratorByUri> iterator,
254       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
255   EXPECT_THAT(GetDocumentIds(iterator.get()), IsEmpty());
256   EXPECT_FALSE(iterator->Advance().ok());
257 }
258 
TEST_F(DocHitInfoIteratorByUriTest,MultipleNamespaces)259 TEST_F(DocHitInfoIteratorByUriTest, MultipleNamespaces) {
260   // Put documents
261   DocumentProto document1 = DocumentBuilder()
262                                 .SetKey("namespace1", "email/1")
263                                 .SetSchema("email")
264                                 .Build();
265   DocumentProto document2 = DocumentBuilder()
266                                 .SetKey("namespace1", "email/2")
267                                 .SetSchema("email")
268                                 .Build();
269   DocumentProto document3 = DocumentBuilder()
270                                 .SetKey("namespace2", "email/3")
271                                 .SetSchema("email")
272                                 .Build();
273   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
274                              document_store_->Put(document1));
275   DocumentId document_id1 = put_result.new_document_id;
276   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
277   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
278   DocumentId document_id3 = put_result.new_document_id;
279 
280   // Create a search spec with uri filters that match document1 and document3 in
281   // different namespaces.
282   SearchSpecProto search_spec;
283   NamespaceDocumentUriGroup* namespace1_uris =
284       search_spec.add_document_uri_filters();
285   namespace1_uris->set_namespace_("namespace1");
286   namespace1_uris->add_document_uris("email/1");
287   NamespaceDocumentUriGroup* namespace2_uris =
288       search_spec.add_document_uri_filters();
289   namespace2_uris->set_namespace_("namespace2");
290   namespace2_uris->add_document_uris("email/3");
291 
292   ICING_ASSERT_OK_AND_ASSIGN(
293       std::unique_ptr<DocHitInfoIteratorByUri> iterator,
294       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
295   EXPECT_THAT(GetDocumentIds(iterator.get()),
296               ElementsAre(document_id3, document_id1));
297   EXPECT_FALSE(iterator->Advance().ok());
298 }
299 
TEST_F(DocHitInfoIteratorByUriTest,DuplicatedUriIsOk)300 TEST_F(DocHitInfoIteratorByUriTest, DuplicatedUriIsOk) {
301   // Put documents
302   DocumentProto document1 = DocumentBuilder()
303                                 .SetKey("namespace", "email/1")
304                                 .SetSchema("email")
305                                 .Build();
306   DocumentProto document2 = DocumentBuilder()
307                                 .SetKey("namespace", "email/2")
308                                 .SetSchema("email")
309                                 .Build();
310   DocumentProto document3 = DocumentBuilder()
311                                 .SetKey("namespace", "email/3")
312                                 .SetSchema("email")
313                                 .Build();
314   DocumentProto document4 = DocumentBuilder()
315                                 .SetKey("namespace", "email/4")
316                                 .SetSchema("email")
317                                 .Build();
318   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result,
319                              document_store_->Put(document1));
320   DocumentId document_id1 = put_result.new_document_id;
321   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document2));
322   DocumentId document_id2 = put_result.new_document_id;
323   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document3));
324   DocumentId document_id3 = put_result.new_document_id;
325   ICING_ASSERT_OK_AND_ASSIGN(put_result, document_store_->Put(document4));
326   DocumentId document_id4 = put_result.new_document_id;
327 
328   // Create a search spec with duplicated uri filters. The result document ids
329   // should be de-duplicated.
330   SearchSpecProto search_spec;
331   NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
332   uris->set_namespace_("namespace");
333   uris->add_document_uris("email/1");
334   uris->add_document_uris("email/2");
335   uris->add_document_uris("email/3");
336   uris->add_document_uris("email/3");
337 
338   uris = search_spec.add_document_uri_filters();
339   uris->set_namespace_("namespace");
340   uris->add_document_uris("email/2");
341   uris->add_document_uris("email/4");
342 
343   ICING_ASSERT_OK_AND_ASSIGN(
344       std::unique_ptr<DocHitInfoIteratorByUri> iterator,
345       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
346   EXPECT_THAT(
347       GetDocumentIds(iterator.get()),
348       ElementsAre(document_id4, document_id3, document_id2, document_id1));
349   EXPECT_FALSE(iterator->Advance().ok());
350 }
351 
TEST_F(DocHitInfoIteratorByUriTest,TrimRightMostNodeResultsInError)352 TEST_F(DocHitInfoIteratorByUriTest, TrimRightMostNodeResultsInError) {
353   SearchSpecProto search_spec;
354   NamespaceDocumentUriGroup* uris = search_spec.add_document_uri_filters();
355   uris->set_namespace_("namespace");
356   uris->add_document_uris("uri");
357 
358   ICING_ASSERT_OK_AND_ASSIGN(
359       std::unique_ptr<DocHitInfoIteratorByUri> iterator,
360       DocHitInfoIteratorByUri::Create(document_store_.get(), search_spec));
361   EXPECT_THAT(std::move(*iterator).TrimRightMostNode(),
362               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
363 }
364 
365 }  // namespace
366 
367 }  // namespace lib
368 }  // namespace icing
369