xref: /aosp_15_r20/external/icing/icing/result/result-retriever-v2_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/result/result-retriever-v2.h"
16 
17 #include <atomic>
18 #include <cstddef>
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <unordered_map>
23 #include <utility>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include "icing/absl_ports/mutex.h"
31 #include "icing/document-builder.h"
32 #include "icing/feature-flags.h"
33 #include "icing/file/filesystem.h"
34 #include "icing/file/mock-filesystem.h"
35 #include "icing/file/portable-file-backed-proto-log.h"
36 #include "icing/portable/equals-proto.h"
37 #include "icing/portable/platform.h"
38 #include "icing/proto/document.pb.h"
39 #include "icing/proto/document_wrapper.pb.h"
40 #include "icing/proto/schema.pb.h"
41 #include "icing/proto/search.pb.h"
42 #include "icing/result/page-result.h"
43 #include "icing/result/result-state-v2.h"
44 #include "icing/schema-builder.h"
45 #include "icing/schema/schema-store.h"
46 #include "icing/schema/section.h"
47 #include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
48 #include "icing/scoring/scored-document-hit.h"
49 #include "icing/store/document-filter-data.h"
50 #include "icing/store/document-id.h"
51 #include "icing/store/document-store.h"
52 #include "icing/testing/common-matchers.h"
53 #include "icing/testing/fake-clock.h"
54 #include "icing/testing/test-data.h"
55 #include "icing/testing/test-feature-flags.h"
56 #include "icing/testing/tmp-directory.h"
57 #include "icing/tokenization/language-segmenter-factory.h"
58 #include "icing/tokenization/language-segmenter.h"
59 #include "icing/transform/normalizer-factory.h"
60 #include "icing/transform/normalizer.h"
61 #include "icing/util/clock.h"
62 #include "icing/util/icu-data-file-helper.h"
63 #include "unicode/uloc.h"
64 
65 namespace icing {
66 namespace lib {
67 
68 namespace {
69 
70 using ::icing::lib::portable_equals_proto::EqualsProto;
71 using ::testing::DoDefault;
72 using ::testing::ElementsAre;
73 using ::testing::Eq;
74 using ::testing::Gt;
75 using ::testing::IsEmpty;
76 using ::testing::Pointee;
77 using ::testing::Return;
78 using ::testing::SizeIs;
79 using EntryIdMap = std::unordered_map<int32_t, int>;
80 
81 // Mock the behavior of GroupResultLimiter::ShouldBeRemoved.
82 class MockGroupResultLimiter : public GroupResultLimiterV2 {
83  public:
MockGroupResultLimiter()84   MockGroupResultLimiter() : GroupResultLimiterV2() {
85     ON_CALL(*this, ShouldBeRemoved).WillByDefault(Return(false));
86   }
87 
88   MOCK_METHOD(bool, ShouldBeRemoved,
89               (const ScoredDocumentHit&, const EntryIdMap&,
90                const DocumentStore&, std::vector<int>&,
91                ResultSpecProto::ResultGroupingType, int64_t),
92               (const, override));
93 };
94 
95 class ResultRetrieverV2Test : public ::testing::Test {
96  protected:
ResultRetrieverV2Test()97   ResultRetrieverV2Test() : test_dir_(GetTestTempDir() + "/icing") {
98     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
99   }
100 
SetUp()101   void SetUp() override {
102     feature_flags_ = std::make_unique<FeatureFlags>(GetTestFeatureFlags());
103     if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
104       ICING_ASSERT_OK(
105           // File generated via icu_data_file rule in //icing/BUILD.
106           icu_data_file_helper::SetUpIcuDataFile(
107               GetTestFilePath("icing/icu.dat")));
108     }
109     language_segmenter_factory::SegmenterOptions options(ULOC_US);
110     ICING_ASSERT_OK_AND_ASSIGN(
111         language_segmenter_,
112         language_segmenter_factory::Create(std::move(options)));
113 
114     ICING_ASSERT_OK_AND_ASSIGN(
115         schema_store_, SchemaStore::Create(&filesystem_, test_dir_,
116                                            &fake_clock_, feature_flags_.get()));
117     ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
118                                                 /*max_term_byte_size=*/10000));
119 
120     SchemaProto schema =
121         SchemaBuilder()
122             .AddType(SchemaTypeConfigBuilder()
123                          .SetType("Email")
124                          .AddProperty(PropertyConfigBuilder()
125                                           .SetName("name")
126                                           .SetDataTypeString(TERM_MATCH_PREFIX,
127                                                              TOKENIZER_PLAIN)
128                                           .SetCardinality(CARDINALITY_OPTIONAL))
129                          .AddProperty(PropertyConfigBuilder()
130                                           .SetName("body")
131                                           .SetDataTypeString(TERM_MATCH_EXACT,
132                                                              TOKENIZER_PLAIN)
133                                           .SetCardinality(CARDINALITY_OPTIONAL))
134                          .AddProperty(
135                              PropertyConfigBuilder()
136                                  .SetName("sender")
137                                  .SetDataTypeDocument(
138                                      "Person", /*index_nested_properties=*/true)
139                                  .SetCardinality(CARDINALITY_OPTIONAL)))
140             .AddType(
141                 SchemaTypeConfigBuilder()
142                     .SetType("Person")
143                     .AddProperty(PropertyConfigBuilder()
144                                      .SetName("name")
145                                      .SetDataTypeString(TERM_MATCH_PREFIX,
146                                                         TOKENIZER_PLAIN)
147                                      .SetCardinality(CARDINALITY_OPTIONAL))
148                     .AddProperty(PropertyConfigBuilder()
149                                      .SetName("emailAddress")
150                                      .SetDataTypeString(TERM_MATCH_PREFIX,
151                                                         TOKENIZER_PLAIN)
152                                      .SetCardinality(CARDINALITY_OPTIONAL)))
153             .Build();
154     ASSERT_THAT(schema_store_->SetSchema(
155                     schema, /*ignore_errors_and_delete_documents=*/false,
156                     /*allow_circular_schema_definitions=*/false),
157                 IsOk());
158 
159     num_total_hits_ = 0;
160   }
161 
TearDown()162   void TearDown() override {
163     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
164   }
165 
GetSectionId(const std::string & type,const std::string & property)166   SectionId GetSectionId(const std::string& type, const std::string& property) {
167     auto type_id_or = schema_store_->GetSchemaTypeId(type);
168     if (!type_id_or.ok()) {
169       return kInvalidSectionId;
170     }
171     SchemaTypeId type_id = type_id_or.ValueOrDie();
172     for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
173       auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
174       if (!metadata_or.ok()) {
175         break;
176       }
177       const SectionMetadata* metadata = metadata_or.ValueOrDie();
178       if (metadata->path == property) {
179         return metadata->id;
180       }
181     }
182     return kInvalidSectionId;
183   }
184 
185   std::unique_ptr<FeatureFlags> feature_flags_;
186   const Filesystem filesystem_;
187   const std::string test_dir_;
188   std::unique_ptr<LanguageSegmenter> language_segmenter_;
189   std::unique_ptr<SchemaStore> schema_store_;
190   std::unique_ptr<Normalizer> normalizer_;
191   std::atomic<int> num_total_hits_;
192   FakeClock fake_clock_;
193 };
194 
CreateDocument(int id)195 DocumentProto CreateDocument(int id) {
196   return DocumentBuilder()
197       .SetKey("icing", "Email/" + std::to_string(id))
198       .SetSchema("Email")
199       .AddStringProperty("name", "subject foo " + std::to_string(id))
200       .AddStringProperty("body", "body bar " + std::to_string(id))
201       .SetCreationTimestampMs(1574365086666 + id)
202       .Build();
203 }
204 
CreateSectionIdMask(const std::vector<SectionId> & section_ids)205 SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
206   SectionIdMask mask = 0;
207   for (SectionId section_id : section_ids) {
208     mask |= (UINT64_C(1) << section_id);
209   }
210   return mask;
211 }
212 
CreateResultSpec(int num_per_page,ResultSpecProto::ResultGroupingType result_group_type)213 ResultSpecProto CreateResultSpec(
214     int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) {
215   ResultSpecProto result_spec;
216   result_spec.set_result_group_type(result_group_type);
217   result_spec.set_num_per_page(num_per_page);
218   return result_spec;
219 }
220 
CreateDocumentStore(const Filesystem * filesystem,const std::string & base_dir,const Clock * clock,const SchemaStore * schema_store,const FeatureFlags & feature_flags)221 libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
222     const Filesystem* filesystem, const std::string& base_dir,
223     const Clock* clock, const SchemaStore* schema_store,
224     const FeatureFlags& feature_flags) {
225   return DocumentStore::Create(
226       filesystem, base_dir, clock, schema_store, &feature_flags,
227       /*force_recovery_and_revalidate_documents=*/false,
228       /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/true,
229       PortableFileBackedProtoLog<DocumentWrapper>::kDefaultCompressionLevel,
230       /*initialize_stats=*/nullptr);
231 }
232 
TEST_F(ResultRetrieverV2Test,CreationWithNullPointerShouldFail)233 TEST_F(ResultRetrieverV2Test, CreationWithNullPointerShouldFail) {
234   EXPECT_THAT(
235       ResultRetrieverV2::Create(/*doc_store=*/nullptr, schema_store_.get(),
236                                 language_segmenter_.get(), normalizer_.get()),
237       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
238 
239   ICING_ASSERT_OK_AND_ASSIGN(
240       DocumentStore::CreateResult create_result,
241       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
242                           schema_store_.get(), *feature_flags_));
243   std::unique_ptr<DocumentStore> doc_store =
244       std::move(create_result.document_store);
245 
246   EXPECT_THAT(
247       ResultRetrieverV2::Create(doc_store.get(), /*schema_store=*/nullptr,
248                                 language_segmenter_.get(), normalizer_.get()),
249       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
250   EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
251                                         /*language_segmenter=*/nullptr,
252                                         normalizer_.get()),
253               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
254   EXPECT_THAT(ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
255                                         language_segmenter_.get(),
256                                         /*normalizer=*/nullptr),
257               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
258 }
259 
TEST_F(ResultRetrieverV2Test,ShouldRetrieveSimpleResults)260 TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) {
261   ICING_ASSERT_OK_AND_ASSIGN(
262       DocumentStore::CreateResult create_result,
263       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
264                           schema_store_.get(), *feature_flags_));
265   std::unique_ptr<DocumentStore> doc_store =
266       std::move(create_result.document_store);
267 
268   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
269                              doc_store->Put(CreateDocument(/*id=*/1)));
270   DocumentId document_id1 = put_result1.new_document_id;
271   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
272                              doc_store->Put(CreateDocument(/*id=*/2)));
273   DocumentId document_id2 = put_result2.new_document_id;
274   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result3,
275                              doc_store->Put(CreateDocument(/*id=*/3)));
276   DocumentId document_id3 = put_result3.new_document_id;
277   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result4,
278                              doc_store->Put(CreateDocument(/*id=*/4)));
279   DocumentId document_id4 = put_result4.new_document_id;
280   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result5,
281                              doc_store->Put(CreateDocument(/*id=*/5)));
282   DocumentId document_id5 = put_result5.new_document_id;
283 
284   std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
285                                             GetSectionId("Email", "body")};
286   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
287   std::vector<ScoredDocumentHit> scored_document_hits = {
288       {document_id1, hit_section_id_mask, /*score=*/19},
289       {document_id2, hit_section_id_mask, /*score=*/12},
290       {document_id3, hit_section_id_mask, /*score=*/8},
291       {document_id4, hit_section_id_mask, /*score=*/3},
292       {document_id5, hit_section_id_mask, /*score=*/1}};
293   ICING_ASSERT_OK_AND_ASSIGN(
294       std::unique_ptr<ResultRetrieverV2> result_retriever,
295       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
296                                 language_segmenter_.get(), normalizer_.get()));
297 
298   SearchResultProto::ResultProto result1;
299   *result1.mutable_document() = CreateDocument(/*id=*/1);
300   result1.set_score(19);
301   SearchResultProto::ResultProto result2;
302   *result2.mutable_document() = CreateDocument(/*id=*/2);
303   result2.set_score(12);
304   SearchResultProto::ResultProto result3;
305   *result3.mutable_document() = CreateDocument(/*id=*/3);
306   result3.set_score(8);
307   SearchResultProto::ResultProto result4;
308   *result4.mutable_document() = CreateDocument(/*id=*/4);
309   result4.set_score(3);
310   SearchResultProto::ResultProto result5;
311   *result5.mutable_document() = CreateDocument(/*id=*/5);
312   result5.set_score(1);
313 
314   ResultStateV2 result_state(
315       std::make_unique<
316           PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
317           std::move(scored_document_hits), /*is_descending=*/true),
318       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
319       CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
320       *doc_store);
321 
322   // First page, 2 results
323   auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
324       result_state, fake_clock_.GetSystemTimeMilliseconds());
325   EXPECT_THAT(page_result1.results,
326               ElementsAre(EqualsProto(result1), EqualsProto(result2)));
327   // num_results_with_snippets is 0 when there is no snippet.
328   EXPECT_THAT(page_result1.num_results_with_snippets, Eq(0));
329   // Requested page size is same as num_per_page.
330   EXPECT_THAT(page_result1.requested_page_size, Eq(2));
331   // Has more results.
332   EXPECT_TRUE(has_more_results1);
333 
334   // Second page, 2 results
335   auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
336       result_state, fake_clock_.GetSystemTimeMilliseconds());
337   EXPECT_THAT(page_result2.results,
338               ElementsAre(EqualsProto(result3), EqualsProto(result4)));
339   // num_results_with_snippets is 0 when there is no snippet.
340   EXPECT_THAT(page_result2.num_results_with_snippets, Eq(0));
341   // Requested page size is same as num_per_page.
342   EXPECT_THAT(page_result2.requested_page_size, Eq(2));
343   // Has more results.
344   EXPECT_TRUE(has_more_results2);
345 
346   // Third page, 1 result
347   auto [page_result3, has_more_results3] = result_retriever->RetrieveNextPage(
348       result_state, fake_clock_.GetSystemTimeMilliseconds());
349   EXPECT_THAT(page_result3.results, ElementsAre(EqualsProto(result5)));
350   // num_results_with_snippets is 0 when there is no snippet.
351   EXPECT_THAT(page_result3.num_results_with_snippets, Eq(0));
352   // Requested page size is same as num_per_page.
353   EXPECT_THAT(page_result3.requested_page_size, Eq(2));
354   // No more results.
355   EXPECT_FALSE(has_more_results3);
356 }
357 
TEST_F(ResultRetrieverV2Test,ShouldIgnoreNonInternalErrors)358 TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
359   ICING_ASSERT_OK_AND_ASSIGN(
360       DocumentStore::CreateResult create_result,
361       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
362                           schema_store_.get(), *feature_flags_));
363   std::unique_ptr<DocumentStore> doc_store =
364       std::move(create_result.document_store);
365 
366   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
367                              doc_store->Put(CreateDocument(/*id=*/1)));
368   DocumentId document_id1 = put_result1.new_document_id;
369   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
370                              doc_store->Put(CreateDocument(/*id=*/2)));
371   DocumentId document_id2 = put_result2.new_document_id;
372 
373   DocumentId invalid_document_id = -1;
374   std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
375                                             GetSectionId("Email", "body")};
376   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
377   std::vector<ScoredDocumentHit> scored_document_hits = {
378       {document_id1, hit_section_id_mask, /*score=*/12},
379       {document_id2, hit_section_id_mask, /*score=*/4},
380       {invalid_document_id, hit_section_id_mask, /*score=*/0}};
381   ICING_ASSERT_OK_AND_ASSIGN(
382       std::unique_ptr<ResultRetrieverV2> result_retriever,
383       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
384                                 language_segmenter_.get(), normalizer_.get(),
385                                 std::make_unique<MockGroupResultLimiter>()));
386 
387   SearchResultProto::ResultProto result1;
388   *result1.mutable_document() = CreateDocument(/*id=*/1);
389   result1.set_score(12);
390   SearchResultProto::ResultProto result2;
391   *result2.mutable_document() = CreateDocument(/*id=*/2);
392   result2.set_score(4);
393 
394   ResultStateV2 result_state1(
395       std::make_unique<
396           PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
397           std::move(scored_document_hits),
398           /*is_descending=*/true),
399       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
400       CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE),
401       *doc_store);
402   PageResult page_result1 =
403       result_retriever
404           ->RetrieveNextPage(result_state1,
405                              fake_clock_.GetSystemTimeMilliseconds())
406           .first;
407   EXPECT_THAT(page_result1.results,
408               ElementsAre(EqualsProto(result1), EqualsProto(result2)));
409 
410   DocumentId non_existing_document_id = 4;
411   scored_document_hits = {
412       {non_existing_document_id, hit_section_id_mask, /*score=*/15},
413       {document_id1, hit_section_id_mask, /*score=*/12},
414       {document_id2, hit_section_id_mask, /*score=*/4}};
415   ResultStateV2 result_state2(
416       std::make_unique<
417           PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
418           std::move(scored_document_hits),
419           /*is_descending=*/true),
420       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
421       CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE),
422       *doc_store);
423   PageResult page_result2 =
424       result_retriever
425           ->RetrieveNextPage(result_state2,
426                              fake_clock_.GetSystemTimeMilliseconds())
427           .first;
428   EXPECT_THAT(page_result2.results,
429               ElementsAre(EqualsProto(result1), EqualsProto(result2)));
430 }
431 
TEST_F(ResultRetrieverV2Test,ShouldLimitNumChildDocumentsByMaxJoinedChildPerParent)432 TEST_F(ResultRetrieverV2Test,
433        ShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
434   ICING_ASSERT_OK_AND_ASSIGN(
435       DocumentStore::CreateResult create_result,
436       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
437                           schema_store_.get(), *feature_flags_));
438   std::unique_ptr<DocumentStore> doc_store =
439       std::move(create_result.document_store);
440 
441   // 1. Add 2 Person document
442   DocumentProto person_document1 =
443       DocumentBuilder()
444           .SetKey("namespace", "Person/1")
445           .SetCreationTimestampMs(1000)
446           .SetSchema("Person")
447           .AddStringProperty("name", "Joe Fox")
448           .AddStringProperty("emailAddress", "[email protected]")
449           .Build();
450   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
451                              doc_store->Put(person_document1));
452   DocumentId person_document_id1 = put_result1.new_document_id;
453 
454   DocumentProto person_document2 =
455       DocumentBuilder()
456           .SetKey("namespace", "Person/2")
457           .SetCreationTimestampMs(1000)
458           .SetSchema("Person")
459           .AddStringProperty("name", "Meg Ryan")
460           .AddStringProperty("emailAddress", "[email protected]")
461           .Build();
462   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
463                              doc_store->Put(person_document2));
464   DocumentId person_document_id2 = put_result2.new_document_id;
465 
466   // 2. Add 4 Email documents
467   DocumentProto email_document1 = DocumentBuilder()
468                                       .SetKey("namespace", "Email/1")
469                                       .SetCreationTimestampMs(1000)
470                                       .SetSchema("Email")
471                                       .AddStringProperty("name", "Test 1")
472                                       .AddStringProperty("body", "Test 1")
473                                       .Build();
474   ICING_ASSERT_OK_AND_ASSIGN(put_result1, doc_store->Put(email_document1));
475   DocumentId email_document_id1 = put_result1.new_document_id;
476 
477   DocumentProto email_document2 = DocumentBuilder()
478                                       .SetKey("namespace", "Email/2")
479                                       .SetCreationTimestampMs(1000)
480                                       .SetSchema("Email")
481                                       .AddStringProperty("name", "Test 2")
482                                       .AddStringProperty("body", "Test 2")
483                                       .Build();
484   ICING_ASSERT_OK_AND_ASSIGN(put_result2, doc_store->Put(email_document2));
485   DocumentId email_document_id2 = put_result2.new_document_id;
486 
487   DocumentProto email_document3 = DocumentBuilder()
488                                       .SetKey("namespace", "Email/3")
489                                       .SetCreationTimestampMs(1000)
490                                       .SetSchema("Email")
491                                       .AddStringProperty("name", "Test 3")
492                                       .AddStringProperty("body", "Test 3")
493                                       .Build();
494   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result3,
495                              doc_store->Put(email_document3));
496   DocumentId email_document_id3 = put_result3.new_document_id;
497 
498   DocumentProto email_document4 = DocumentBuilder()
499                                       .SetKey("namespace", "Email/4")
500                                       .SetCreationTimestampMs(1000)
501                                       .SetSchema("Email")
502                                       .AddStringProperty("name", "Test 4")
503                                       .AddStringProperty("body", "Test 4")
504                                       .Build();
505   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result4,
506                              doc_store->Put(email_document4));
507   DocumentId email_document_id4 = put_result4.new_document_id;
508 
509   // 3. Setup the joined scored results.
510   std::vector<SectionId> person_hit_section_ids = {
511       GetSectionId("Person", "name")};
512   std::vector<SectionId> email_hit_section_ids = {
513       GetSectionId("Email", "name"), GetSectionId("Email", "body")};
514   SectionIdMask person_hit_section_id_mask =
515       CreateSectionIdMask(person_hit_section_ids);
516   SectionIdMask email_hit_section_id_mask =
517       CreateSectionIdMask(email_hit_section_ids);
518 
519   ScoredDocumentHit person1_scored_doc_hit(
520       person_document_id1, person_hit_section_id_mask, /*score=*/1);
521   ScoredDocumentHit person2_scored_doc_hit(
522       person_document_id2, person_hit_section_id_mask, /*score=*/2);
523   ScoredDocumentHit email1_scored_doc_hit(
524       email_document_id1, email_hit_section_id_mask, /*score=*/3);
525   ScoredDocumentHit email2_scored_doc_hit(
526       email_document_id2, email_hit_section_id_mask, /*score=*/4);
527   ScoredDocumentHit email3_scored_doc_hit(
528       email_document_id3, email_hit_section_id_mask, /*score=*/5);
529   ScoredDocumentHit email4_scored_doc_hit(
530       email_document_id4, email_hit_section_id_mask, /*score=*/6);
531   // Create JoinedScoredDocumentHits mapping:
532   // - Person1 to Email1
533   // - Person2 to Email2, Email3, Email4
534   std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = {
535       JoinedScoredDocumentHit(
536           /*final_score=*/1,
537           /*parent_scored_document_hit=*/person1_scored_doc_hit,
538           /*child_scored_document_hits=*/{email1_scored_doc_hit}),
539       JoinedScoredDocumentHit(
540           /*final_score=*/3,
541           /*parent_scored_document_hit=*/person2_scored_doc_hit,
542           /*child_scored_document_hits=*/
543           {email4_scored_doc_hit, email3_scored_doc_hit,
544            email2_scored_doc_hit})};
545 
546   // 4. Retrieve result with max_joined_children_per_parent_to_return = 2.
547   ResultSpecProto result_spec =
548       CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
549   result_spec.set_max_joined_children_per_parent_to_return(2);
550 
551   ICING_ASSERT_OK_AND_ASSIGN(
552       std::unique_ptr<ResultRetrieverV2> result_retriever,
553       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
554                                 language_segmenter_.get(), normalizer_.get()));
555   ResultStateV2 result_state(
556       std::make_unique<
557           PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
558           std::move(joined_scored_document_hits), /*is_descending=*/true),
559       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
560       result_spec, *doc_store);
561 
562   // Result1: person2 with child docs = [email4, email3]
563   SearchResultProto::ResultProto result1;
564   *result1.mutable_document() = person_document2;
565   result1.set_score(3);
566   SearchResultProto::ResultProto* child1 = result1.add_joined_results();
567   *child1->mutable_document() = email_document4;
568   child1->set_score(6);
569   SearchResultProto::ResultProto* child2 = result1.add_joined_results();
570   *child2->mutable_document() = email_document3;
571   child2->set_score(5);
572 
573   // Result2: person1 with child docs = [email1]
574   SearchResultProto::ResultProto result2;
575   *result2.mutable_document() = person_document1;
576   result2.set_score(1);
577   SearchResultProto::ResultProto* child3 = result2.add_joined_results();
578   *child3->mutable_document() = email_document1;
579   child3->set_score(3);
580 
581   auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
582       result_state, fake_clock_.GetSystemTimeMilliseconds());
583   EXPECT_THAT(page_result.results,
584               ElementsAre(EqualsProto(result1), EqualsProto(result2)));
585   // No more results.
586   EXPECT_FALSE(has_more_results);
587 }
588 
TEST_F(ResultRetrieverV2Test,ShouldIgnoreInternalErrors)589 TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
590   MockFilesystem mock_filesystem;
591   EXPECT_CALL(mock_filesystem,
592               PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>()))
593       .WillOnce(Return(false))
594       .WillRepeatedly(DoDefault());
595 
596   ICING_ASSERT_OK_AND_ASSIGN(
597       DocumentStore::CreateResult create_result,
598       CreateDocumentStore(&mock_filesystem, test_dir_, &fake_clock_,
599                           schema_store_.get(), *feature_flags_));
600   std::unique_ptr<DocumentStore> doc_store =
601       std::move(create_result.document_store);
602 
603   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
604                              doc_store->Put(CreateDocument(/*id=*/1)));
605   DocumentId document_id1 = put_result1.new_document_id;
606   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
607                              doc_store->Put(CreateDocument(/*id=*/2)));
608   DocumentId document_id2 = put_result2.new_document_id;
609 
610   std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
611                                             GetSectionId("Email", "body")};
612   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
613   std::vector<ScoredDocumentHit> scored_document_hits = {
614       {document_id1, hit_section_id_mask, /*score=*/0},
615       {document_id2, hit_section_id_mask, /*score=*/0}};
616 
617   ICING_ASSERT_OK_AND_ASSIGN(
618       std::unique_ptr<ResultRetrieverV2> result_retriever,
619       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
620                                 language_segmenter_.get(), normalizer_.get(),
621                                 std::make_unique<MockGroupResultLimiter>()));
622 
623   SearchResultProto::ResultProto result1;
624   *result1.mutable_document() = CreateDocument(/*id=*/1);
625   result1.set_score(0);
626 
627   ResultStateV2 result_state(
628       std::make_unique<
629           PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
630           std::move(scored_document_hits),
631           /*is_descending=*/true),
632       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
633       CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
634       *doc_store);
635   PageResult page_result =
636       result_retriever
637           ->RetrieveNextPage(result_state,
638                              fake_clock_.GetSystemTimeMilliseconds())
639           .first;
640   // We mocked mock_filesystem to return an internal error when retrieving doc2,
641   // so doc2 should be skipped and doc1 should still be returned.
642   EXPECT_THAT(page_result.results, ElementsAre(EqualsProto(result1)));
643 }
644 
TEST_F(ResultRetrieverV2Test,ShouldUpdateResultState)645 TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) {
646   ICING_ASSERT_OK_AND_ASSIGN(
647       DocumentStore::CreateResult create_result,
648       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
649                           schema_store_.get(), *feature_flags_));
650   std::unique_ptr<DocumentStore> doc_store =
651       std::move(create_result.document_store);
652 
653   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
654                              doc_store->Put(CreateDocument(/*id=*/1)));
655   DocumentId document_id1 = put_result1.new_document_id;
656   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
657                              doc_store->Put(CreateDocument(/*id=*/2)));
658   DocumentId document_id2 = put_result2.new_document_id;
659   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result3,
660                              doc_store->Put(CreateDocument(/*id=*/3)));
661   DocumentId document_id3 = put_result3.new_document_id;
662   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result4,
663                              doc_store->Put(CreateDocument(/*id=*/4)));
664   DocumentId document_id4 = put_result4.new_document_id;
665   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result5,
666                              doc_store->Put(CreateDocument(/*id=*/5)));
667   DocumentId document_id5 = put_result5.new_document_id;
668 
669   std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
670                                             GetSectionId("Email", "body")};
671   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
672   std::vector<ScoredDocumentHit> scored_document_hits = {
673       {document_id1, hit_section_id_mask, /*score=*/0},
674       {document_id2, hit_section_id_mask, /*score=*/0},
675       {document_id3, hit_section_id_mask, /*score=*/0},
676       {document_id4, hit_section_id_mask, /*score=*/0},
677       {document_id5, hit_section_id_mask, /*score=*/0}};
678   ICING_ASSERT_OK_AND_ASSIGN(
679       std::unique_ptr<ResultRetrieverV2> result_retriever,
680       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
681                                 language_segmenter_.get(), normalizer_.get()));
682 
683   ResultStateV2 result_state(
684       std::make_unique<
685           PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
686           std::move(scored_document_hits),
687           /*is_descending=*/true),
688       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
689       CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
690       *doc_store);
691 
692   // First page, 2 results
693   PageResult page_result1 =
694       result_retriever
695           ->RetrieveNextPage(result_state,
696                              fake_clock_.GetSystemTimeMilliseconds())
697           .first;
698   ASSERT_THAT(page_result1.results, SizeIs(2));
699   {
700     absl_ports::shared_lock l(&result_state.mutex);
701 
702     // num_returned = size of first page
703     EXPECT_THAT(result_state.num_returned, Eq(2));
704     // Should remove the 2 returned docs from scored_document_hits and only
705     // contain the remaining 3.
706     EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(3)));
707   }
708 
709   // Second page, 2 results
710   PageResult page_result2 =
711       result_retriever
712           ->RetrieveNextPage(result_state,
713                              fake_clock_.GetSystemTimeMilliseconds())
714           .first;
715   ASSERT_THAT(page_result2.results, SizeIs(2));
716   {
717     absl_ports::shared_lock l(&result_state.mutex);
718 
719     // num_returned = size of first and second pages
720     EXPECT_THAT(result_state.num_returned, Eq(4));
721     // Should remove the 2 returned docs from scored_document_hits and only
722     // contain the remaining 1.
723     EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(SizeIs(1)));
724   }
725 
726   // Third page, 1 result
727   PageResult page_result3 =
728       result_retriever
729           ->RetrieveNextPage(result_state,
730                              fake_clock_.GetSystemTimeMilliseconds())
731           .first;
732   ASSERT_THAT(page_result3.results, SizeIs(1));
733   {
734     absl_ports::shared_lock l(&result_state.mutex);
735 
736     // num_returned = size of first, second and third pages
737     EXPECT_THAT(result_state.num_returned, Eq(5));
738     // Should remove the 1 returned doc from scored_document_hits and become
739     // empty.
740     EXPECT_THAT(result_state.scored_document_hits_ranker, Pointee(IsEmpty()));
741   }
742 }
743 
TEST_F(ResultRetrieverV2Test,ShouldUpdateNumTotalHits)744 TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) {
745   ICING_ASSERT_OK_AND_ASSIGN(
746       DocumentStore::CreateResult create_result,
747       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
748                           schema_store_.get(), *feature_flags_));
749   std::unique_ptr<DocumentStore> doc_store =
750       std::move(create_result.document_store);
751 
752   std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
753                                             GetSectionId("Email", "body")};
754   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
755 
756   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
757                              doc_store->Put(CreateDocument(/*id=*/1)));
758   DocumentId document_id1 = put_result1.new_document_id;
759   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
760                              doc_store->Put(CreateDocument(/*id=*/2)));
761   DocumentId document_id2 = put_result2.new_document_id;
762   std::vector<ScoredDocumentHit> scored_document_hits1 = {
763       {document_id1, hit_section_id_mask, /*score=*/0},
764       {document_id2, hit_section_id_mask, /*score=*/0}};
765   std::shared_ptr<ResultStateV2> result_state1 =
766       std::make_shared<ResultStateV2>(
767           std::make_unique<
768               PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
769               std::move(scored_document_hits1),
770               /*is_descending=*/true),
771           /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
772           CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE),
773           *doc_store);
774   {
775     absl_ports::unique_lock l(&result_state1->mutex);
776 
777     result_state1->RegisterNumTotalHits(&num_total_hits_);
778     ASSERT_THAT(num_total_hits_, Eq(2));
779   }
780 
781   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result3,
782                              doc_store->Put(CreateDocument(/*id=*/3)));
783   DocumentId document_id3 = put_result3.new_document_id;
784   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result4,
785                              doc_store->Put(CreateDocument(/*id=*/4)));
786   DocumentId document_id4 = put_result4.new_document_id;
787   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result5,
788                              doc_store->Put(CreateDocument(/*id=*/5)));
789   DocumentId document_id5 = put_result5.new_document_id;
790   std::vector<ScoredDocumentHit> scored_document_hits2 = {
791       {document_id3, hit_section_id_mask, /*score=*/0},
792       {document_id4, hit_section_id_mask, /*score=*/0},
793       {document_id5, hit_section_id_mask, /*score=*/0}};
794   std::shared_ptr<ResultStateV2> result_state2 =
795       std::make_shared<ResultStateV2>(
796           std::make_unique<
797               PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
798               std::move(scored_document_hits2),
799               /*is_descending=*/true),
800           /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
801           CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE),
802           *doc_store);
803   {
804     absl_ports::unique_lock l(&result_state2->mutex);
805 
806     result_state2->RegisterNumTotalHits(&num_total_hits_);
807     ASSERT_THAT(num_total_hits_, Eq(5));
808   }
809 
810   ICING_ASSERT_OK_AND_ASSIGN(
811       std::unique_ptr<ResultRetrieverV2> result_retriever,
812       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
813                                 language_segmenter_.get(), normalizer_.get()));
814 
815   // Should get 1 doc in the first page of result_state1, and num_total_hits
816   // should be decremented by 1.
817   PageResult page_result1 =
818       result_retriever
819           ->RetrieveNextPage(*result_state1,
820                              fake_clock_.GetSystemTimeMilliseconds())
821           .first;
822   ASSERT_THAT(page_result1.results, SizeIs(1));
823   EXPECT_THAT(num_total_hits_, Eq(4));
824 
825   // Should get 2 docs in the first page of result_state2, and num_total_hits
826   // should be decremented by 2.
827   PageResult page_result2 =
828       result_retriever
829           ->RetrieveNextPage(*result_state2,
830                              fake_clock_.GetSystemTimeMilliseconds())
831           .first;
832   ASSERT_THAT(page_result2.results, SizeIs(2));
833   EXPECT_THAT(num_total_hits_, Eq(2));
834 
835   // Should get 1 doc in the second page of result_state2 (although num_per_page
836   // is 2, there is only 1 doc left), and num_total_hits should be decremented
837   // by 1.
838   PageResult page_result3 =
839       result_retriever
840           ->RetrieveNextPage(*result_state2,
841                              fake_clock_.GetSystemTimeMilliseconds())
842           .first;
843   ASSERT_THAT(page_result3.results, SizeIs(1));
844   EXPECT_THAT(num_total_hits_, Eq(1));
845 
846   // Destruct result_state1. There is 1 doc left, so num_total_hits should be
847   // decremented by 1 when destructing it.
848   result_state1.reset();
849   EXPECT_THAT(num_total_hits_, Eq(0));
850 
851   // Destruct result_state2. There is 0 doc left, so num_total_hits should be
852   // unchanged when destructing it.
853   result_state1.reset();
854   EXPECT_THAT(num_total_hits_, Eq(0));
855 }
856 
TEST_F(ResultRetrieverV2Test,ShouldLimitNumTotalBytesPerPage)857 TEST_F(ResultRetrieverV2Test, ShouldLimitNumTotalBytesPerPage) {
858   ICING_ASSERT_OK_AND_ASSIGN(
859       DocumentStore::CreateResult create_result,
860       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
861                           schema_store_.get(), *feature_flags_));
862   std::unique_ptr<DocumentStore> doc_store =
863       std::move(create_result.document_store);
864 
865   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
866                              doc_store->Put(CreateDocument(/*id=*/1)));
867   DocumentId document_id1 = put_result1.new_document_id;
868   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
869                              doc_store->Put(CreateDocument(/*id=*/2)));
870   DocumentId document_id2 = put_result2.new_document_id;
871 
872   std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
873                                             GetSectionId("Email", "body")};
874   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
875   std::vector<ScoredDocumentHit> scored_document_hits = {
876       {document_id1, hit_section_id_mask, /*score=*/5},
877       {document_id2, hit_section_id_mask, /*score=*/0}};
878   ICING_ASSERT_OK_AND_ASSIGN(
879       std::unique_ptr<ResultRetrieverV2> result_retriever,
880       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
881                                 language_segmenter_.get(), normalizer_.get()));
882 
883   SearchResultProto::ResultProto result1;
884   *result1.mutable_document() = CreateDocument(/*id=*/1);
885   result1.set_score(5);
886   SearchResultProto::ResultProto result2;
887   *result2.mutable_document() = CreateDocument(/*id=*/2);
888   result2.set_score(0);
889 
890   ResultSpecProto result_spec =
891       CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
892   result_spec.set_num_total_bytes_per_page_threshold(result1.ByteSizeLong());
893   ResultStateV2 result_state(
894       std::make_unique<
895           PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
896           std::move(scored_document_hits),
897           /*is_descending=*/true),
898       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
899       result_spec, *doc_store);
900 
901   // First page. Only result1 should be returned, since its byte size meets
902   // num_total_bytes_per_page_threshold and ResultRetriever should terminate
903   // early even though # of results is still below num_per_page.
904   auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
905       result_state, fake_clock_.GetSystemTimeMilliseconds());
906   EXPECT_THAT(page_result1.results, ElementsAre(EqualsProto(result1)));
907   // Has more results.
908   EXPECT_TRUE(has_more_results1);
909 
910   // Second page, result2.
911   auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
912       result_state, fake_clock_.GetSystemTimeMilliseconds());
913   EXPECT_THAT(page_result2.results, ElementsAre(EqualsProto(result2)));
914   // No more results.
915   EXPECT_FALSE(has_more_results2);
916 }
917 
TEST_F(ResultRetrieverV2Test,ShouldReturnSingleLargeResultAboveNumTotalBytesPerPageThreshold)918 TEST_F(ResultRetrieverV2Test,
919        ShouldReturnSingleLargeResultAboveNumTotalBytesPerPageThreshold) {
920   ICING_ASSERT_OK_AND_ASSIGN(
921       DocumentStore::CreateResult create_result,
922       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
923                           schema_store_.get(), *feature_flags_));
924   std::unique_ptr<DocumentStore> doc_store =
925       std::move(create_result.document_store);
926 
927   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
928                              doc_store->Put(CreateDocument(/*id=*/1)));
929   DocumentId document_id1 = put_result1.new_document_id;
930   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
931                              doc_store->Put(CreateDocument(/*id=*/2)));
932   DocumentId document_id2 = put_result2.new_document_id;
933 
934   std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
935                                             GetSectionId("Email", "body")};
936   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
937   std::vector<ScoredDocumentHit> scored_document_hits = {
938       {document_id1, hit_section_id_mask, /*score=*/5},
939       {document_id2, hit_section_id_mask, /*score=*/0}};
940   ICING_ASSERT_OK_AND_ASSIGN(
941       std::unique_ptr<ResultRetrieverV2> result_retriever,
942       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
943                                 language_segmenter_.get(), normalizer_.get()));
944 
945   SearchResultProto::ResultProto result1;
946   *result1.mutable_document() = CreateDocument(/*id=*/1);
947   result1.set_score(5);
948   SearchResultProto::ResultProto result2;
949   *result2.mutable_document() = CreateDocument(/*id=*/2);
950   result2.set_score(0);
951 
952   int threshold = 1;
953   ASSERT_THAT(result1.ByteSizeLong(), Gt(threshold));
954 
955   ResultSpecProto result_spec =
956       CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
957   result_spec.set_num_total_bytes_per_page_threshold(threshold);
958   ResultStateV2 result_state(
959       std::make_unique<
960           PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
961           std::move(scored_document_hits),
962           /*is_descending=*/true),
963       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
964       result_spec, *doc_store);
965 
966   // First page. Should return single result1 even though its byte size exceeds
967   // num_total_bytes_per_page_threshold.
968   auto [page_result1, has_more_results1] = result_retriever->RetrieveNextPage(
969       result_state, fake_clock_.GetSystemTimeMilliseconds());
970   EXPECT_THAT(page_result1.results, ElementsAre(EqualsProto(result1)));
971   // Has more results.
972   EXPECT_TRUE(has_more_results1);
973 
974   // Second page, result2.
975   auto [page_result2, has_more_results2] = result_retriever->RetrieveNextPage(
976       result_state, fake_clock_.GetSystemTimeMilliseconds());
977   EXPECT_THAT(page_result2.results, ElementsAre(EqualsProto(result2)));
978   // No more results.
979   EXPECT_FALSE(has_more_results2);
980 }
981 
TEST_F(ResultRetrieverV2Test,ShouldRetrieveNextResultWhenBelowNumTotalBytesPerPageThreshold)982 TEST_F(ResultRetrieverV2Test,
983        ShouldRetrieveNextResultWhenBelowNumTotalBytesPerPageThreshold) {
984   ICING_ASSERT_OK_AND_ASSIGN(
985       DocumentStore::CreateResult create_result,
986       CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
987                           schema_store_.get(), *feature_flags_));
988   std::unique_ptr<DocumentStore> doc_store =
989       std::move(create_result.document_store);
990 
991   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result1,
992                              doc_store->Put(CreateDocument(/*id=*/1)));
993   DocumentId document_id1 = put_result1.new_document_id;
994   ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::PutResult put_result2,
995                              doc_store->Put(CreateDocument(/*id=*/2)));
996   DocumentId document_id2 = put_result2.new_document_id;
997 
998   std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
999                                             GetSectionId("Email", "body")};
1000   SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
1001   std::vector<ScoredDocumentHit> scored_document_hits = {
1002       {document_id1, hit_section_id_mask, /*score=*/5},
1003       {document_id2, hit_section_id_mask, /*score=*/0}};
1004   ICING_ASSERT_OK_AND_ASSIGN(
1005       std::unique_ptr<ResultRetrieverV2> result_retriever,
1006       ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
1007                                 language_segmenter_.get(), normalizer_.get()));
1008 
1009   SearchResultProto::ResultProto result1;
1010   *result1.mutable_document() = CreateDocument(/*id=*/1);
1011   result1.set_score(5);
1012   SearchResultProto::ResultProto result2;
1013   *result2.mutable_document() = CreateDocument(/*id=*/2);
1014   result2.set_score(0);
1015 
1016   int threshold = result1.ByteSizeLong() + 1;
1017   ASSERT_THAT(result1.ByteSizeLong() + result2.ByteSizeLong(), Gt(threshold));
1018 
1019   ResultSpecProto result_spec =
1020       CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
1021   result_spec.set_num_total_bytes_per_page_threshold(threshold);
1022   ResultStateV2 result_state(
1023       std::make_unique<
1024           PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
1025           std::move(scored_document_hits),
1026           /*is_descending=*/true),
1027       /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
1028       result_spec, *doc_store);
1029 
1030   // After retrieving result1, total bytes are still below the threshold and #
1031   // of results is still below num_per_page, so ResultRetriever should continue
1032   // the retrieval process and thus include result2 into this page, even though
1033   // finally total bytes of result1 + result2 exceed the threshold.
1034   auto [page_result, has_more_results] = result_retriever->RetrieveNextPage(
1035       result_state, fake_clock_.GetSystemTimeMilliseconds());
1036   EXPECT_THAT(page_result.results,
1037               ElementsAre(EqualsProto(result1), EqualsProto(result2)));
1038   // No more results.
1039   EXPECT_FALSE(has_more_results);
1040 }
1041 
1042 }  // namespace
1043 
1044 }  // namespace lib
1045 }  // namespace icing
1046