1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/result/result-retriever-v2.h"
16
17 #include <atomic>
18 #include <cstddef>
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <unordered_map>
23 #include <utility>
24 #include <vector>
25
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include "icing/absl_ports/mutex.h"
31 #include "icing/document-builder.h"
32 #include "icing/feature-flags.h"
33 #include "icing/file/filesystem.h"
34 #include "icing/file/mock-filesystem.h"
35 #include "icing/file/portable-file-backed-proto-log.h"
36 #include "icing/portable/equals-proto.h"
37 #include "icing/portable/platform.h"
38 #include "icing/proto/document.pb.h"
39 #include "icing/proto/document_wrapper.pb.h"
40 #include "icing/proto/schema.pb.h"
41 #include "icing/proto/search.pb.h"
42 #include "icing/result/page-result.h"
43 #include "icing/result/result-state-v2.h"
44 #include "icing/schema-builder.h"
45 #include "icing/schema/schema-store.h"
46 #include "icing/schema/section.h"
47 #include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
48 #include "icing/scoring/scored-document-hit.h"
49 #include "icing/store/document-filter-data.h"
50 #include "icing/store/document-id.h"
51 #include "icing/store/document-store.h"
52 #include "icing/testing/common-matchers.h"
53 #include "icing/testing/fake-clock.h"
54 #include "icing/testing/test-data.h"
55 #include "icing/testing/test-feature-flags.h"
56 #include "icing/testing/tmp-directory.h"
57 #include "icing/tokenization/language-segmenter-factory.h"
58 #include "icing/tokenization/language-segmenter.h"
59 #include "icing/transform/normalizer-factory.h"
60 #include "icing/transform/normalizer.h"
61 #include "icing/util/clock.h"
62 #include "icing/util/icu-data-file-helper.h"
63 #include "unicode/uloc.h"
64
65 namespace icing {
66 namespace lib {
67
68 namespace {
69
70 using ::icing::lib::portable_equals_proto::EqualsProto;
71 using ::testing::DoDefault;
72 using ::testing::ElementsAre;
73 using ::testing::Eq;
74 using ::testing::Gt;
75 using ::testing::IsEmpty;
76 using ::testing::Pointee;
77 using ::testing::Return;
78 using ::testing::SizeIs;
79 using EntryIdMap = std::unordered_map<int32_t, int>;
80
81 // Mock the behavior of GroupResultLimiter::ShouldBeRemoved.
82 class MockGroupResultLimiter : public GroupResultLimiterV2 {
83 public:
MockGroupResultLimiter()84 MockGroupResultLimiter() : GroupResultLimiterV2() {
85 ON_CALL(*this, ShouldBeRemoved).WillByDefault(Return(false));
86 }
87
88 MOCK_METHOD(bool, ShouldBeRemoved,
89 (const ScoredDocumentHit&, const EntryIdMap&,
90 const DocumentStore&, std::vector<int>&,
91 ResultSpecProto::ResultGroupingType, int64_t),
92 (const, override));
93 };
94
95 class ResultRetrieverV2Test : public ::testing::Test {
96 protected:
ResultRetrieverV2Test()97 ResultRetrieverV2Test() : test_dir_(GetTestTempDir() + "/icing") {
98 filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
99 }
100
SetUp()101 void SetUp() override {
102 feature_flags_ = std::make_unique<FeatureFlags>(GetTestFeatureFlags());
103 if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
104 ICING_ASSERT_OK(
105 // File generated via icu_data_file rule in //icing/BUILD.
106 icu_data_file_helper::SetUpIcuDataFile(
107 GetTestFilePath("icing/icu.dat")));
108 }
109 language_segmenter_factory::SegmenterOptions options(ULOC_US);
110 ICING_ASSERT_OK_AND_ASSIGN(
111 language_segmenter_,
112 language_segmenter_factory::Create(std::move(options)));
113
114 ICING_ASSERT_OK_AND_ASSIGN(
115 schema_store_, SchemaStore::Create(&filesystem_, test_dir_,
116 &fake_clock_, feature_flags_.get()));
117 ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
118 /*max_term_byte_size=*/10000));
119
120 SchemaProto schema =
121 SchemaBuilder()
122 .AddType(SchemaTypeConfigBuilder()
123 .SetType("Email")
124 .AddProperty(PropertyConfigBuilder()
125 .SetName("name")
126 .SetDataTypeString(TERM_MATCH_PREFIX,
127 TOKENIZER_PLAIN)
128 .SetCardinality(CARDINALITY_OPTIONAL))
129 .AddProperty(PropertyConfigBuilder()
130 .SetName("body")
131 .SetDataTypeString(TERM_MATCH_EXACT,
132 TOKENIZER_PLAIN)
133 .SetCardinality(CARDINALITY_OPTIONAL))
134 .AddProperty(
135 PropertyConfigBuilder()
136 .SetName("sender")
137 .SetDataTypeDocument(
138 "Person", /*index_nested_properties=*/true)
139 .SetCardinality(CARDINALITY_OPTIONAL)))
140 .AddType(
141 SchemaTypeConfigBuilder()
142 .SetType("Person")
143 .AddProperty(PropertyConfigBuilder()
144 .SetName("name")
145 .SetDataTypeString(TERM_MATCH_PREFIX,
146 TOKENIZER_PLAIN)
147 .SetCardinality(CARDINALITY_OPTIONAL))
148 .AddProperty(PropertyConfigBuilder()
149 .SetName("emailAddress")
150 .SetDataTypeString(TERM_MATCH_PREFIX,
151 TOKENIZER_PLAIN)
152 .SetCardinality(CARDINALITY_OPTIONAL)))
153 .Build();
154 ASSERT_THAT(schema_store_->SetSchema(
155 schema, /*ignore_errors_and_delete_documents=*/false,
156 /*allow_circular_schema_definitions=*/false),
157 IsOk());
158
159 num_total_hits_ = 0;
160 }
161
TearDown()162 void TearDown() override {
163 filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
164 }
165
GetSectionId(const std::string & type,const std::string & property)166 SectionId GetSectionId(const std::string& type, const std::string& property) {
167 auto type_id_or = schema_store_->GetSchemaTypeId(type);
168 if (!type_id_or.ok()) {
169 return kInvalidSectionId;
170 }
171 SchemaTypeId type_id = type_id_or.ValueOrDie();
172 for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
173 auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
174 if (!metadata_or.ok()) {
175 break;
176 }
177 const SectionMetadata* metadata = metadata_or.ValueOrDie();
178 if (metadata->path == property) {
179 return metadata->id;
180 }
181 }
182 return kInvalidSectionId;
183 }
184
185 std::unique_ptr<FeatureFlags> feature_flags_;
186 const Filesystem filesystem_;
187 const std::string test_dir_;
188 std::unique_ptr<LanguageSegmenter> language_segmenter_;
189 std::unique_ptr<SchemaStore> schema_store_;
190 std::unique_ptr<Normalizer> normalizer_;
191 std::atomic<int> num_total_hits_;
192 FakeClock fake_clock_;
193 };
194
CreateDocument(int id)195 DocumentProto CreateDocument(int id) {
196 return DocumentBuilder()
197 .SetKey("icing", "Email/" + std::to_string(id))
198 .SetSchema("Email")
199 .AddStringProperty("name", "subject foo " + std::to_string(id))
200 .AddStringProperty("body", "body bar " + std::to_string(id))
201 .SetCreationTimestampMs(1574365086666 + id)
202 .Build();
203 }
204
CreateSectionIdMask(const std::vector<SectionId> & section_ids)205 SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
206 SectionIdMask mask = 0;
207 for (SectionId section_id : section_ids) {
208 mask |= (UINT64_C(1) << section_id);
209 }
210 return mask;
211 }
212
CreateResultSpec(int num_per_page,ResultSpecProto::ResultGroupingType result_group_type)213 ResultSpecProto CreateResultSpec(
214 int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
215 ResultSpecProto result_spec;
216 result_spec.set_result_group_type(result_group_type);
217 result_spec.set_num_per_page(num_per_page);
218 return result_spec;
219 }
220
CreateDocumentStore(const Filesystem * filesystem,const std::string & base_dir,const Clock * clock,const SchemaStore * schema_store,const FeatureFlags & feature_flags)221 libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
222 const Filesystem* filesystem, const std::string& base_dir,
223 const Clock* clock, const SchemaStore* schema_store,
224 const FeatureFlags& feature_flags) {
225 return DocumentStore::Create(
226 filesystem, base_dir, clock, schema_store, &feature_flags,
227 /*force_recovery_and_revalidate_documents=*/false,
228 /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/true,
229 PortableFileBackedProtoLog<DocumentWrapper>::kDefaultCompressionLevel,
230 /*initialize_stats=*/nullptr);
231 }
232
TEST_F(ResultRetrieverV2Test,CreationWithNullPointerShouldFail)233 TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
234 EXPECT_THAT(
235 ResultRetrieverV2::Create(/*doc_store=*/nullptr, schema_store_.get(),
236 language_segmenter_.get(), normalizer_.get()),
237 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
238
239 ICING_ASSERT_OK_AND_ASSIGN(
240 DocumentStore::CreateResult create_result,
241 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
242 schema_store_.get(), *feature_flags_));
243 std::unique_ptr<DocumentStore> doc_store =
244 std::move(create_result.document_store);
245
246 EXPECT_THAT(
247 ResultRetrieverV2::Create(doc_store.get(), /*schema_store=*/nullptr,
248 language_segmenter_.get(), normalizer_.get()),
249 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
250 EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
251 /*language_segmenter=*/nullptr,
252 normalizer_.get()),
253 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
254 EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
255 language_segmenter_.get(),
256 /*normalizer=*/nullptr),
257 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
258 }
259
TEST_F(ResultRetrieverV2Test,ShouldRetrieveSimpleResults)260 TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
261 ICING_ASSERT_OK_AND_ASSIGN(
262 DocumentStore::CreateResult create_result,
263 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
264 schema_store_.get(), *feature_flags_));
265 std::unique_ptr<DocumentStore> doc_store =
266 std::move(create_result.document_store);
267
268 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
269 doc_store->Put(CreateDocument(/*id=*/1)));
270 DocumentId document_id1 = put_result1.new_document_id;
271 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
272 doc_store->Put(CreateDocument(/*id=*/2)));
273 DocumentId document_id2 = put_result2.new_document_id;
274 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result3,
275 doc_store->Put(CreateDocument(/*id=*/3)));
276 DocumentId document_id3 = put_result3.new_document_id;
277 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result4,
278 doc_store->Put(CreateDocument(/*id=*/4)));
279 DocumentId document_id4 = put_result4.new_document_id;
280 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result5,
281 doc_store->Put(CreateDocument(/*id=*/5)));
282 DocumentId document_id5 = put_result5.new_document_id;
283
284 std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
285 GetSectionId("Email", "body")};
286 SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
287 std::vector<ScoredDocumentHit> scored_document_hits = {
288 {document_id1, hit_section_id_mask, /*score=*/19},
289 {document_id2, hit_section_id_mask, /*score=*/12},
290 {document_id3, hit_section_id_mask, /*score=*/8},
291 {document_id4, hit_section_id_mask, /*score=*/3},
292 {document_id5, hit_section_id_mask, /*score=*/1}};
293 ICING_ASSERT_OK_AND_ASSIGN(
294 std::unique_ptr<ResultRetrieverV2> result_retriever,
295 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
296 language_segmenter_.get(), normalizer_.get()));
297
298 SearchResultProto::ResultProto result1;
299 *result1.mutable_document() = CreateDocument(/*id=*/1);
300 result1.set_score(19);
301 SearchResultProto::ResultProto result2;
302 *result2.mutable_document() = CreateDocument(/*id=*/2);
303 result2.set_score(12);
304 SearchResultProto::ResultProto result3;
305 *result3.mutable_document() = CreateDocument(/*id=*/3);
306 result3.set_score(8);
307 SearchResultProto::ResultProto result4;
308 *result4.mutable_document() = CreateDocument(/*id=*/4);
309 result4.set_score(3);
310 SearchResultProto::ResultProto result5;
311 *result5.mutable_document() = CreateDocument(/*id=*/5);
312 result5.set_score(1);
313
314 ResultStateV2 result_state(
315 std::make_unique<
316 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
317 std::move(scored_document_hits), /*is_descending=*/true),
318 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
319 CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
320 *doc_store);
321
322 // First page, 2 results
323 auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
324 result_state, fake_clock_.GetSystemTimeMilliseconds());
325 EXPECT_THAT(page_result1.results,
326 ElementsAre(EqualsProto(result1), EqualsProto(result2)));
327 // num_results_with_snippets is 0 when there is no snippet.
328 EXPECT_THAT(page_result1.num_results_with_snippets, Eq(0));
329 // Requested page size is same as num_per_page.
330 EXPECT_THAT(page_result1.requested_page_size, Eq(2));
331 // Has more results.
332 EXPECT_TRUE(has_more_results1);
333
334 // Second page, 2 results
335 auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
336 result_state, fake_clock_.GetSystemTimeMilliseconds());
337 EXPECT_THAT(page_result2.results,
338 ElementsAre(EqualsProto(result3), EqualsProto(result4)));
339 // num_results_with_snippets is 0 when there is no snippet.
340 EXPECT_THAT(page_result2.num_results_with_snippets, Eq(0));
341 // Requested page size is same as num_per_page.
342 EXPECT_THAT(page_result2.requested_page_size, Eq(2));
343 // Has more results.
344 EXPECT_TRUE(has_more_results2);
345
346 // Third page, 1 result
347 auto [page_result3, has_more_results3] = result_retriever->RetrieveNextPage(
348 result_state, fake_clock_.GetSystemTimeMilliseconds());
349 EXPECT_THAT(page_result3.results, ElementsAre(EqualsProto(result5)));
350 // num_results_with_snippets is 0 when there is no snippet.
351 EXPECT_THAT(page_result3.num_results_with_snippets, Eq(0));
352 // Requested page size is same as num_per_page.
353 EXPECT_THAT(page_result3.requested_page_size, Eq(2));
354 // No more results.
355 EXPECT_FALSE(has_more_results3);
356 }
357
TEST_F(ResultRetrieverV2Test,ShouldIgnoreNonInternalErrors)358 TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
359 ICING_ASSERT_OK_AND_ASSIGN(
360 DocumentStore::CreateResult create_result,
361 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
362 schema_store_.get(), *feature_flags_));
363 std::unique_ptr<DocumentStore> doc_store =
364 std::move(create_result.document_store);
365
366 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
367 doc_store->Put(CreateDocument(/*id=*/1)));
368 DocumentId document_id1 = put_result1.new_document_id;
369 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
370 doc_store->Put(CreateDocument(/*id=*/2)));
371 DocumentId document_id2 = put_result2.new_document_id;
372
373 DocumentId invalid_document_id = -1;
374 std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
375 GetSectionId("Email", "body")};
376 SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
377 std::vector<ScoredDocumentHit> scored_document_hits = {
378 {document_id1, hit_section_id_mask, /*score=*/12},
379 {document_id2, hit_section_id_mask, /*score=*/4},
380 {invalid_document_id, hit_section_id_mask, /*score=*/0}};
381 ICING_ASSERT_OK_AND_ASSIGN(
382 std::unique_ptr<ResultRetrieverV2> result_retriever,
383 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
384 language_segmenter_.get(), normalizer_.get(),
385 std::make_unique<MockGroupResultLimiter>()));
386
387 SearchResultProto::ResultProto result1;
388 *result1.mutable_document() = CreateDocument(/*id=*/1);
389 result1.set_score(12);
390 SearchResultProto::ResultProto result2;
391 *result2.mutable_document() = CreateDocument(/*id=*/2);
392 result2.set_score(4);
393
394 ResultStateV2 result_state1(
395 std::make_unique<
396 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
397 std::move(scored_document_hits),
398 /*is_descending=*/true),
399 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
400 CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE),
401 *doc_store);
402 PageResult page_result1 =
403 result_retriever
404 ->RetrieveNextPage(result_state1,
405 fake_clock_.GetSystemTimeMilliseconds())
406 .first;
407 EXPECT_THAT(page_result1.results,
408 ElementsAre(EqualsProto(result1), EqualsProto(result2)));
409
410 DocumentId non_existing_document_id = 4;
411 scored_document_hits = {
412 {non_existing_document_id, hit_section_id_mask, /*score=*/15},
413 {document_id1, hit_section_id_mask, /*score=*/12},
414 {document_id2, hit_section_id_mask, /*score=*/4}};
415 ResultStateV2 result_state2(
416 std::make_unique<
417 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
418 std::move(scored_document_hits),
419 /*is_descending=*/true),
420 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
421 CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE),
422 *doc_store);
423 PageResult page_result2 =
424 result_retriever
425 ->RetrieveNextPage(result_state2,
426 fake_clock_.GetSystemTimeMilliseconds())
427 .first;
428 EXPECT_THAT(page_result2.results,
429 ElementsAre(EqualsProto(result1), EqualsProto(result2)));
430 }
431
TEST_F(ResultRetrieverV2Test,ShouldLimitNumChildDocumentsByMaxJoinedChildPerParent)432 TEST_F(ResultRetrieverV2Test,
433 ShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
434 ICING_ASSERT_OK_AND_ASSIGN(
435 DocumentStore::CreateResult create_result,
436 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
437 schema_store_.get(), *feature_flags_));
438 std::unique_ptr<DocumentStore> doc_store =
439 std::move(create_result.document_store);
440
441 // 1. Add 2 Person document
442 DocumentProto person_document1 =
443 DocumentBuilder()
444 .SetKey("namespace", "Person/1")
445 .SetCreationTimestampMs(1000)
446 .SetSchema("Person")
447 .AddStringProperty("name", "Joe Fox")
448 .AddStringProperty("emailAddress", "[email protected]")
449 .Build();
450 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
451 doc_store->Put(person_document1));
452 DocumentId person_document_id1 = put_result1.new_document_id;
453
454 DocumentProto person_document2 =
455 DocumentBuilder()
456 .SetKey("namespace", "Person/2")
457 .SetCreationTimestampMs(1000)
458 .SetSchema("Person")
459 .AddStringProperty("name", "Meg Ryan")
460 .AddStringProperty("emailAddress", "[email protected]")
461 .Build();
462 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
463 doc_store->Put(person_document2));
464 DocumentId person_document_id2 = put_result2.new_document_id;
465
466 // 2. Add 4 Email documents
467 DocumentProto email_document1 = DocumentBuilder()
468 .SetKey("namespace", "Email/1")
469 .SetCreationTimestampMs(1000)
470 .SetSchema("Email")
471 .AddStringProperty("name", "Test 1")
472 .AddStringProperty("body", "Test 1")
473 .Build();
474 ICING_ASSERT_OK_AND_ASSIGN(put_result1, doc_store->Put(email_document1));
475 DocumentId email_document_id1 = put_result1.new_document_id;
476
477 DocumentProto email_document2 = DocumentBuilder()
478 .SetKey("namespace", "Email/2")
479 .SetCreationTimestampMs(1000)
480 .SetSchema("Email")
481 .AddStringProperty("name", "Test 2")
482 .AddStringProperty("body", "Test 2")
483 .Build();
484 ICING_ASSERT_OK_AND_ASSIGN(put_result2, doc_store->Put(email_document2));
485 DocumentId email_document_id2 = put_result2.new_document_id;
486
487 DocumentProto email_document3 = DocumentBuilder()
488 .SetKey("namespace", "Email/3")
489 .SetCreationTimestampMs(1000)
490 .SetSchema("Email")
491 .AddStringProperty("name", "Test 3")
492 .AddStringProperty("body", "Test 3")
493 .Build();
494 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result3,
495 doc_store->Put(email_document3));
496 DocumentId email_document_id3 = put_result3.new_document_id;
497
498 DocumentProto email_document4 = DocumentBuilder()
499 .SetKey("namespace", "Email/4")
500 .SetCreationTimestampMs(1000)
501 .SetSchema("Email")
502 .AddStringProperty("name", "Test 4")
503 .AddStringProperty("body", "Test 4")
504 .Build();
505 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result4,
506 doc_store->Put(email_document4));
507 DocumentId email_document_id4 = put_result4.new_document_id;
508
509 // 3. Setup the joined scored results.
510 std::vector<SectionId> person_hit_section_ids = {
511 GetSectionId("Person", "name")};
512 std::vector<SectionId> email_hit_section_ids = {
513 GetSectionId("Email", "name"), GetSectionId("Email", "body")};
514 SectionIdMask person_hit_section_id_mask =
515 CreateSectionIdMask(person_hit_section_ids);
516 SectionIdMask email_hit_section_id_mask =
517 CreateSectionIdMask(email_hit_section_ids);
518
519 ScoredDocumentHit person1_scored_doc_hit(
520 person_document_id1, person_hit_section_id_mask, /*score=*/1);
521 ScoredDocumentHit person2_scored_doc_hit(
522 person_document_id2, person_hit_section_id_mask, /*score=*/2);
523 ScoredDocumentHit email1_scored_doc_hit(
524 email_document_id1, email_hit_section_id_mask, /*score=*/3);
525 ScoredDocumentHit email2_scored_doc_hit(
526 email_document_id2, email_hit_section_id_mask, /*score=*/4);
527 ScoredDocumentHit email3_scored_doc_hit(
528 email_document_id3, email_hit_section_id_mask, /*score=*/5);
529 ScoredDocumentHit email4_scored_doc_hit(
530 email_document_id4, email_hit_section_id_mask, /*score=*/6);
531 // Create JoinedScoredDocumentHits mapping:
532 // - Person1 to Email1
533 // - Person2 to Email2, Email3, Email4
534 std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = {
535 JoinedScoredDocumentHit(
536 /*final_score=*/1,
537 /*parent_scored_document_hit=*/person1_scored_doc_hit,
538 /*child_scored_document_hits=*/{email1_scored_doc_hit}),
539 JoinedScoredDocumentHit(
540 /*final_score=*/3,
541 /*parent_scored_document_hit=*/person2_scored_doc_hit,
542 /*child_scored_document_hits=*/
543 {email4_scored_doc_hit, email3_scored_doc_hit,
544 email2_scored_doc_hit})};
545
546 // 4. Retrieve result with max_joined_children_per_parent_to_return = 2.
547 ResultSpecProto result_spec =
548 CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
549 result_spec.set_max_joined_children_per_parent_to_return(2);
550
551 ICING_ASSERT_OK_AND_ASSIGN(
552 std::unique_ptr<ResultRetrieverV2> result_retriever,
553 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
554 language_segmenter_.get(), normalizer_.get()));
555 ResultStateV2 result_state(
556 std::make_unique<
557 PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
558 std::move(joined_scored_document_hits), /*is_descending=*/true),
559 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
560 result_spec, *doc_store);
561
562 // Result1: person2 with child docs = [email4, email3]
563 SearchResultProto::ResultProto result1;
564 *result1.mutable_document() = person_document2;
565 result1.set_score(3);
566 SearchResultProto::ResultProto* child1 = result1.add_joined_results();
567 *child1->mutable_document() = email_document4;
568 child1->set_score(6);
569 SearchResultProto::ResultProto* child2 = result1.add_joined_results();
570 *child2->mutable_document() = email_document3;
571 child2->set_score(5);
572
573 // Result2: person1 with child docs = [email1]
574 SearchResultProto::ResultProto result2;
575 *result2.mutable_document() = person_document1;
576 result2.set_score(1);
577 SearchResultProto::ResultProto* child3 = result2.add_joined_results();
578 *child3->mutable_document() = email_document1;
579 child3->set_score(3);
580
581 auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
582 result_state, fake_clock_.GetSystemTimeMilliseconds());
583 EXPECT_THAT(page_result.results,
584 ElementsAre(EqualsProto(result1), EqualsProto(result2)));
585 // No more results.
586 EXPECT_FALSE(has_more_results);
587 }
588
TEST_F(ResultRetrieverV2Test,ShouldIgnoreInternalErrors)589 TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
590 MockFilesystem mock_filesystem;
591 EXPECT_CALL(mock_filesystem,
592 PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>()))
593 .WillOnce(Return(false))
594 .WillRepeatedly(DoDefault());
595
596 ICING_ASSERT_OK_AND_ASSIGN(
597 DocumentStore::CreateResult create_result,
598 CreateDocumentStore(&mock_filesystem, test_dir_, &fake_clock_,
599 schema_store_.get(), *feature_flags_));
600 std::unique_ptr<DocumentStore> doc_store =
601 std::move(create_result.document_store);
602
603 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
604 doc_store->Put(CreateDocument(/*id=*/1)));
605 DocumentId document_id1 = put_result1.new_document_id;
606 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
607 doc_store->Put(CreateDocument(/*id=*/2)));
608 DocumentId document_id2 = put_result2.new_document_id;
609
610 std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
611 GetSectionId("Email", "body")};
612 SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
613 std::vector<ScoredDocumentHit> scored_document_hits = {
614 {document_id1, hit_section_id_mask, /*score=*/0},
615 {document_id2, hit_section_id_mask, /*score=*/0}};
616
617 ICING_ASSERT_OK_AND_ASSIGN(
618 std::unique_ptr<ResultRetrieverV2> result_retriever,
619 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
620 language_segmenter_.get(), normalizer_.get(),
621 std::make_unique<MockGroupResultLimiter>()));
622
623 SearchResultProto::ResultProto result1;
624 *result1.mutable_document() = CreateDocument(/*id=*/1);
625 result1.set_score(0);
626
627 ResultStateV2 result_state(
628 std::make_unique<
629 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
630 std::move(scored_document_hits),
631 /*is_descending=*/true),
632 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
633 CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
634 *doc_store);
635 PageResult page_result =
636 result_retriever
637 ->RetrieveNextPage(result_state,
638 fake_clock_.GetSystemTimeMilliseconds())
639 .first;
640 // We mocked mock_filesystem to return an internal error when retrieving doc2,
641 // so doc2 should be skipped and doc1 should still be returned.
642 EXPECT_THAT(page_result.results, ElementsAre(EqualsProto(result1)));
643 }
644
TEST_F(ResultRetrieverV2Test,ShouldUpdateResultState)645 TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
646 ICING_ASSERT_OK_AND_ASSIGN(
647 DocumentStore::CreateResult create_result,
648 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
649 schema_store_.get(), *feature_flags_));
650 std::unique_ptr<DocumentStore> doc_store =
651 std::move(create_result.document_store);
652
653 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
654 doc_store->Put(CreateDocument(/*id=*/1)));
655 DocumentId document_id1 = put_result1.new_document_id;
656 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
657 doc_store->Put(CreateDocument(/*id=*/2)));
658 DocumentId document_id2 = put_result2.new_document_id;
659 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result3,
660 doc_store->Put(CreateDocument(/*id=*/3)));
661 DocumentId document_id3 = put_result3.new_document_id;
662 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result4,
663 doc_store->Put(CreateDocument(/*id=*/4)));
664 DocumentId document_id4 = put_result4.new_document_id;
665 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result5,
666 doc_store->Put(CreateDocument(/*id=*/5)));
667 DocumentId document_id5 = put_result5.new_document_id;
668
669 std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
670 GetSectionId("Email", "body")};
671 SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
672 std::vector<ScoredDocumentHit> scored_document_hits = {
673 {document_id1, hit_section_id_mask, /*score=*/0},
674 {document_id2, hit_section_id_mask, /*score=*/0},
675 {document_id3, hit_section_id_mask, /*score=*/0},
676 {document_id4, hit_section_id_mask, /*score=*/0},
677 {document_id5, hit_section_id_mask, /*score=*/0}};
678 ICING_ASSERT_OK_AND_ASSIGN(
679 std::unique_ptr<ResultRetrieverV2> result_retriever,
680 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
681 language_segmenter_.get(), normalizer_.get()));
682
683 ResultStateV2 result_state(
684 std::make_unique<
685 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
686 std::move(scored_document_hits),
687 /*is_descending=*/true),
688 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
689 CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
690 *doc_store);
691
692 // First page, 2 results
693 PageResult page_result1 =
694 result_retriever
695 ->RetrieveNextPage(result_state,
696 fake_clock_.GetSystemTimeMilliseconds())
697 .first;
698 ASSERT_THAT(page_result1.results, SizeIs(2));
699 {
700 absl_ports::shared_lock l(&result_state.mutex);
701
702 // num_returned = size of first page
703 EXPECT_THAT(result_state.num_returned, Eq(2));
704 // Should remove the 2 returned docs from scored_document_hits and only
705 // contain the remaining 3.
706 EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(3)));
707 }
708
709 // Second page, 2 results
710 PageResult page_result2 =
711 result_retriever
712 ->RetrieveNextPage(result_state,
713 fake_clock_.GetSystemTimeMilliseconds())
714 .first;
715 ASSERT_THAT(page_result2.results, SizeIs(2));
716 {
717 absl_ports::shared_lock l(&result_state.mutex);
718
719 // num_returned = size of first and second pages
720 EXPECT_THAT(result_state.num_returned, Eq(4));
721 // Should remove the 2 returned docs from scored_document_hits and only
722 // contain the remaining 1.
723 EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(1)));
724 }
725
726 // Third page, 1 result
727 PageResult page_result3 =
728 result_retriever
729 ->RetrieveNextPage(result_state,
730 fake_clock_.GetSystemTimeMilliseconds())
731 .first;
732 ASSERT_THAT(page_result3.results, SizeIs(1));
733 {
734 absl_ports::shared_lock l(&result_state.mutex);
735
736 // num_returned = size of first, second and third pages
737 EXPECT_THAT(result_state.num_returned, Eq(5));
738 // Should remove the 1 returned doc from scored_document_hits and become
739 // empty.
740 EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(IsEmpty()));
741 }
742 }
743
TEST_F(ResultRetrieverV2Test,ShouldUpdateNumTotalHits)744 TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
745 ICING_ASSERT_OK_AND_ASSIGN(
746 DocumentStore::CreateResult create_result,
747 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
748 schema_store_.get(), *feature_flags_));
749 std::unique_ptr<DocumentStore> doc_store =
750 std::move(create_result.document_store);
751
752 std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
753 GetSectionId("Email", "body")};
754 SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
755
756 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
757 doc_store->Put(CreateDocument(/*id=*/1)));
758 DocumentId document_id1 = put_result1.new_document_id;
759 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
760 doc_store->Put(CreateDocument(/*id=*/2)));
761 DocumentId document_id2 = put_result2.new_document_id;
762 std::vector<ScoredDocumentHit> scored_document_hits1 = {
763 {document_id1, hit_section_id_mask, /*score=*/0},
764 {document_id2, hit_section_id_mask, /*score=*/0}};
765 std::shared_ptr<ResultStateV2> result_state1 =
766 std::make_shared<ResultStateV2>(
767 std::make_unique<
768 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
769 std::move(scored_document_hits1),
770 /*is_descending=*/true),
771 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
772 CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
773 *doc_store);
774 {
775 absl_ports::unique_lock l(&result_state1->mutex);
776
777 result_state1->RegisterNumTotalHits(&num_total_hits_);
778 ASSERT_THAT(num_total_hits_, Eq(2));
779 }
780
781 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result3,
782 doc_store->Put(CreateDocument(/*id=*/3)));
783 DocumentId document_id3 = put_result3.new_document_id;
784 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result4,
785 doc_store->Put(CreateDocument(/*id=*/4)));
786 DocumentId document_id4 = put_result4.new_document_id;
787 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result5,
788 doc_store->Put(CreateDocument(/*id=*/5)));
789 DocumentId document_id5 = put_result5.new_document_id;
790 std::vector<ScoredDocumentHit> scored_document_hits2 = {
791 {document_id3, hit_section_id_mask, /*score=*/0},
792 {document_id4, hit_section_id_mask, /*score=*/0},
793 {document_id5, hit_section_id_mask, /*score=*/0}};
794 std::shared_ptr<ResultStateV2> result_state2 =
795 std::make_shared<ResultStateV2>(
796 std::make_unique<
797 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
798 std::move(scored_document_hits2),
799 /*is_descending=*/true),
800 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
801 CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
802 *doc_store);
803 {
804 absl_ports::unique_lock l(&result_state2->mutex);
805
806 result_state2->RegisterNumTotalHits(&num_total_hits_);
807 ASSERT_THAT(num_total_hits_, Eq(5));
808 }
809
810 ICING_ASSERT_OK_AND_ASSIGN(
811 std::unique_ptr<ResultRetrieverV2> result_retriever,
812 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
813 language_segmenter_.get(), normalizer_.get()));
814
815 // Should get 1 doc in the first page of result_state1, and num_total_hits
816 // should be decremented by 1.
817 PageResult page_result1 =
818 result_retriever
819 ->RetrieveNextPage(*result_state1,
820 fake_clock_.GetSystemTimeMilliseconds())
821 .first;
822 ASSERT_THAT(page_result1.results, SizeIs(1));
823 EXPECT_THAT(num_total_hits_, Eq(4));
824
825 // Should get 2 docs in the first page of result_state2, and num_total_hits
826 // should be decremented by 2.
827 PageResult page_result2 =
828 result_retriever
829 ->RetrieveNextPage(*result_state2,
830 fake_clock_.GetSystemTimeMilliseconds())
831 .first;
832 ASSERT_THAT(page_result2.results, SizeIs(2));
833 EXPECT_THAT(num_total_hits_, Eq(2));
834
835 // Should get 1 doc in the second page of result_state2 (although num_per_page
836 // is 2, there is only 1 doc left), and num_total_hits should be decremented
837 // by 1.
838 PageResult page_result3 =
839 result_retriever
840 ->RetrieveNextPage(*result_state2,
841 fake_clock_.GetSystemTimeMilliseconds())
842 .first;
843 ASSERT_THAT(page_result3.results, SizeIs(1));
844 EXPECT_THAT(num_total_hits_, Eq(1));
845
846 // Destruct result_state1. There is 1 doc left, so num_total_hits should be
847 // decremented by 1 when destructing it.
848 result_state1.reset();
849 EXPECT_THAT(num_total_hits_, Eq(0));
850
851 // Destruct result_state2. There is 0 doc left, so num_total_hits should be
852 // unchanged when destructing it.
853 result_state1.reset();
854 EXPECT_THAT(num_total_hits_, Eq(0));
855 }
856
TEST_F(ResultRetrieverV2Test,ShouldLimitNumTotalBytesPerPage)857 TEST_F(ResultRetrieverV2Test, ShouldLimitNumTotalBytesPerPage) {
858 ICING_ASSERT_OK_AND_ASSIGN(
859 DocumentStore::CreateResult create_result,
860 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
861 schema_store_.get(), *feature_flags_));
862 std::unique_ptr<DocumentStore> doc_store =
863 std::move(create_result.document_store);
864
865 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
866 doc_store->Put(CreateDocument(/*id=*/1)));
867 DocumentId document_id1 = put_result1.new_document_id;
868 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
869 doc_store->Put(CreateDocument(/*id=*/2)));
870 DocumentId document_id2 = put_result2.new_document_id;
871
872 std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
873 GetSectionId("Email", "body")};
874 SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
875 std::vector<ScoredDocumentHit> scored_document_hits = {
876 {document_id1, hit_section_id_mask, /*score=*/5},
877 {document_id2, hit_section_id_mask, /*score=*/0}};
878 ICING_ASSERT_OK_AND_ASSIGN(
879 std::unique_ptr<ResultRetrieverV2> result_retriever,
880 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
881 language_segmenter_.get(), normalizer_.get()));
882
883 SearchResultProto::ResultProto result1;
884 *result1.mutable_document() = CreateDocument(/*id=*/1);
885 result1.set_score(5);
886 SearchResultProto::ResultProto result2;
887 *result2.mutable_document() = CreateDocument(/*id=*/2);
888 result2.set_score(0);
889
890 ResultSpecProto result_spec =
891 CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
892 result_spec.set_num_total_bytes_per_page_threshold(result1.ByteSizeLong());
893 ResultStateV2 result_state(
894 std::make_unique<
895 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
896 std::move(scored_document_hits),
897 /*is_descending=*/true),
898 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
899 result_spec, *doc_store);
900
901 // First page. Only result1 should be returned, since its byte size meets
902 // num_total_bytes_per_page_threshold and ResultRetriever should terminate
903 // early even though # of results is still below num_per_page.
904 auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
905 result_state, fake_clock_.GetSystemTimeMilliseconds());
906 EXPECT_THAT(page_result1.results, ElementsAre(EqualsProto(result1)));
907 // Has more results.
908 EXPECT_TRUE(has_more_results1);
909
910 // Second page, result2.
911 auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
912 result_state, fake_clock_.GetSystemTimeMilliseconds());
913 EXPECT_THAT(page_result2.results, ElementsAre(EqualsProto(result2)));
914 // No more results.
915 EXPECT_FALSE(has_more_results2);
916 }
917
TEST_F(ResultRetrieverV2Test,ShouldReturnSingleLargeResultAboveNumTotalBytesPerPageThreshold)918 TEST_F(ResultRetrieverV2Test,
919 ShouldReturnSingleLargeResultAboveNumTotalBytesPerPageThreshold) {
920 ICING_ASSERT_OK_AND_ASSIGN(
921 DocumentStore::CreateResult create_result,
922 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
923 schema_store_.get(), *feature_flags_));
924 std::unique_ptr<DocumentStore> doc_store =
925 std::move(create_result.document_store);
926
927 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
928 doc_store->Put(CreateDocument(/*id=*/1)));
929 DocumentId document_id1 = put_result1.new_document_id;
930 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
931 doc_store->Put(CreateDocument(/*id=*/2)));
932 DocumentId document_id2 = put_result2.new_document_id;
933
934 std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
935 GetSectionId("Email", "body")};
936 SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
937 std::vector<ScoredDocumentHit> scored_document_hits = {
938 {document_id1, hit_section_id_mask, /*score=*/5},
939 {document_id2, hit_section_id_mask, /*score=*/0}};
940 ICING_ASSERT_OK_AND_ASSIGN(
941 std::unique_ptr<ResultRetrieverV2> result_retriever,
942 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
943 language_segmenter_.get(), normalizer_.get()));
944
945 SearchResultProto::ResultProto result1;
946 *result1.mutable_document() = CreateDocument(/*id=*/1);
947 result1.set_score(5);
948 SearchResultProto::ResultProto result2;
949 *result2.mutable_document() = CreateDocument(/*id=*/2);
950 result2.set_score(0);
951
952 int threshold = 1;
953 ASSERT_THAT(result1.ByteSizeLong(), Gt(threshold));
954
955 ResultSpecProto result_spec =
956 CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
957 result_spec.set_num_total_bytes_per_page_threshold(threshold);
958 ResultStateV2 result_state(
959 std::make_unique<
960 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
961 std::move(scored_document_hits),
962 /*is_descending=*/true),
963 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
964 result_spec, *doc_store);
965
966 // First page. Should return single result1 even though its byte size exceeds
967 // num_total_bytes_per_page_threshold.
968 auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
969 result_state, fake_clock_.GetSystemTimeMilliseconds());
970 EXPECT_THAT(page_result1.results, ElementsAre(EqualsProto(result1)));
971 // Has more results.
972 EXPECT_TRUE(has_more_results1);
973
974 // Second page, result2.
975 auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
976 result_state, fake_clock_.GetSystemTimeMilliseconds());
977 EXPECT_THAT(page_result2.results, ElementsAre(EqualsProto(result2)));
978 // No more results.
979 EXPECT_FALSE(has_more_results2);
980 }
981
TEST_F(ResultRetrieverV2Test,ShouldRetrieveNextResultWhenBelowNumTotalBytesPerPageThreshold)982 TEST_F(ResultRetrieverV2Test,
983 ShouldRetrieveNextResultWhenBelowNumTotalBytesPerPageThreshold) {
984 ICING_ASSERT_OK_AND_ASSIGN(
985 DocumentStore::CreateResult create_result,
986 CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
987 schema_store_.get(), *feature_flags_));
988 std::unique_ptr<DocumentStore> doc_store =
989 std::move(create_result.document_store);
990
991 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
992 doc_store->Put(CreateDocument(/*id=*/1)));
993 DocumentId document_id1 = put_result1.new_document_id;
994 ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
995 doc_store->Put(CreateDocument(/*id=*/2)));
996 DocumentId document_id2 = put_result2.new_document_id;
997
998 std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
999 GetSectionId("Email", "body")};
1000 SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
1001 std::vector<ScoredDocumentHit> scored_document_hits = {
1002 {document_id1, hit_section_id_mask, /*score=*/5},
1003 {document_id2, hit_section_id_mask, /*score=*/0}};
1004 ICING_ASSERT_OK_AND_ASSIGN(
1005 std::unique_ptr<ResultRetrieverV2> result_retriever,
1006 ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
1007 language_segmenter_.get(), normalizer_.get()));
1008
1009 SearchResultProto::ResultProto result1;
1010 *result1.mutable_document() = CreateDocument(/*id=*/1);
1011 result1.set_score(5);
1012 SearchResultProto::ResultProto result2;
1013 *result2.mutable_document() = CreateDocument(/*id=*/2);
1014 result2.set_score(0);
1015
1016 int threshold = result1.ByteSizeLong() + 1;
1017 ASSERT_THAT(result1.ByteSizeLong() + result2.ByteSizeLong(), Gt(threshold));
1018
1019 ResultSpecProto result_spec =
1020 CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
1021 result_spec.set_num_total_bytes_per_page_threshold(threshold);
1022 ResultStateV2 result_state(
1023 std::make_unique<
1024 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
1025 std::move(scored_document_hits),
1026 /*is_descending=*/true),
1027 /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
1028 result_spec, *doc_store);
1029
1030 // After retrieving result1, total bytes are still below the threshold and #
1031 // of results is still below num_per_page, so ResultRetriever should continue
1032 // the retrieval process and thus include result2 into this page, even though
1033 // finally total bytes of result1 + result2 exceed the threshold.
1034 auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
1035 result_state, fake_clock_.GetSystemTimeMilliseconds());
1036 EXPECT_THAT(page_result.results,
1037 ElementsAre(EqualsProto(result1), EqualsProto(result2)));
1038 // No more results.
1039 EXPECT_FALSE(has_more_results);
1040 }
1041
1042 } // namespace
1043
1044 } // namespace lib
1045 } // namespace icing
1046