xref: /aosp_15_r20/external/icing/icing/index/embed/embedding-index_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/embed/embedding-index.h"
16 
17 #include <unistd.h>
18 
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <utility>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include "icing/absl_ports/canonical_errors.h"
31 #include "icing/document-builder.h"
32 #include "icing/feature-flags.h"
33 #include "icing/file/filesystem.h"
34 #include "icing/file/portable-file-backed-proto-log.h"
35 #include "icing/index/embed/embedding-hit.h"
36 #include "icing/index/embed/quantizer.h"
37 #include "icing/index/hit/hit.h"
38 #include "icing/legacy/index/icing-filesystem.h"
39 #include "icing/proto/document.pb.h"
40 #include "icing/schema-builder.h"
41 #include "icing/schema/schema-store.h"
42 #include "icing/schema/section.h"
43 #include "icing/store/document-id.h"
44 #include "icing/store/document-store.h"
45 #include "icing/testing/common-matchers.h"
46 #include "icing/testing/embedding-test-utils.h"
47 #include "icing/testing/test-feature-flags.h"
48 #include "icing/testing/tmp-directory.h"
49 #include "icing/util/clock.h"
50 #include "icing/util/crc32.h"
51 
52 namespace icing {
53 namespace lib {
54 
55 namespace {
56 
57 using ::testing::ElementsAre;
58 using ::testing::Eq;
59 using ::testing::FloatNear;
60 using ::testing::HasSubstr;
61 using ::testing::IsEmpty;
62 using ::testing::Pointwise;
63 using ::testing::Test;
64 
65 static constexpr SectionId kSectionIdQuantizedEmbedding = 2;
66 static constexpr float kEpsQuantized = 0.01f;
67 
68 class EmbeddingIndexTest : public Test {
69  protected:
SetUp()70   void SetUp() override {
71     feature_flags_ = std::make_unique<FeatureFlags>(GetTestFeatureFlags());
72     test_dir_ = GetTestTempDir() + "/icing";
73     embedding_index_dir_ = test_dir_ + "/embedding_index";
74     document_store_dir_ = test_dir_ + "/document_store";
75     schema_store_dir_ = test_dir_ + "/schema_store";
76     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
77     filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
78     filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
79 
80     ICING_ASSERT_OK_AND_ASSIGN(
81         schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_,
82                                            &clock_, feature_flags_.get()));
83 
84     ICING_ASSERT_OK_AND_ASSIGN(
85         DocumentStore::CreateResult create_result,
86         DocumentStore::Create(&filesystem_, document_store_dir_, &clock_,
87                               schema_store_.get(), feature_flags_.get(),
88                               /*force_recovery_and_revalidate_documents=*/false,
89                               /*pre_mapping_fbv=*/false,
90                               /*use_persistent_hash_map=*/true,
91                               PortableFileBackedProtoLog<
92                                   DocumentWrapper>::kDefaultCompressionLevel,
93                               /*initialize_stats=*/nullptr));
94     document_store_ = std::move(create_result.document_store);
95 
96     ICING_ASSERT_OK_AND_ASSIGN(
97         embedding_index_,
98         EmbeddingIndex::Create(&filesystem_, embedding_index_dir_, &clock_,
99                                feature_flags_.get()));
100 
101     ICING_ASSERT_OK(schema_store_->SetSchema(
102         SchemaBuilder()
103             .AddType(
104                 SchemaTypeConfigBuilder()
105                     .SetType("type")
106                     .AddProperty(
107                         PropertyConfigBuilder()
108                             .SetName("prop1")
109                             .SetDataTypeVector(EMBEDDING_INDEXING_LINEAR_SEARCH)
110                             .SetCardinality(CARDINALITY_OPTIONAL))
111                     .AddProperty(
112                         PropertyConfigBuilder()
113                             .SetName("prop2")
114                             .SetDataTypeVector(EMBEDDING_INDEXING_LINEAR_SEARCH)
115                             .SetCardinality(CARDINALITY_OPTIONAL))
116                     // Quantized embedding
117                     .AddProperty(
118                         PropertyConfigBuilder()
119                             .SetName("prop3")
120                             .SetDataTypeVector(EMBEDDING_INDEXING_LINEAR_SEARCH,
121                                                QUANTIZATION_TYPE_QUANTIZE_8_BIT)
122                             .SetCardinality(CARDINALITY_OPTIONAL)))
123             .Build(),
124         /*ignore_errors_and_delete_documents=*/false,
125         /*allow_circular_schema_definitions=*/false));
126     ICING_ASSERT_OK(document_store_->Put(
127         DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
128     ICING_ASSERT_OK(document_store_->Put(
129         DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
130     ICING_ASSERT_OK(document_store_->Put(
131         DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
132   }
133 
TearDown()134   void TearDown() override {
135     document_store_.reset();
136     schema_store_.reset();
137     embedding_index_.reset();
138     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
139   }
140 
IndexContainsMetadataOnly()141   libtextclassifier3::StatusOr<bool> IndexContainsMetadataOnly() {
142     std::vector<std::string> sub_dirs;
143     if (!filesystem_.ListDirectory(embedding_index_dir_.c_str(), /*exclude=*/{},
144                                    /*recursive=*/true, &sub_dirs)) {
145       return absl_ports::InternalError("Failed to list directory");
146     }
147     return sub_dirs.size() == 1 && sub_dirs[0] == "metadata";
148   }
149 
150   std::unique_ptr<FeatureFlags> feature_flags_;
151   Filesystem filesystem_;
152   IcingFilesystem icing_filesystem_;
153   std::string test_dir_;
154   std::string embedding_index_dir_;
155   std::string schema_store_dir_;
156   std::string document_store_dir_;
157   Clock clock_;
158   std::unique_ptr<SchemaStore> schema_store_;
159   std::unique_ptr<DocumentStore> document_store_;
160   std::unique_ptr<EmbeddingIndex> embedding_index_;
161 };
162 
TEST_F(EmbeddingIndexTest,EmptyIndexContainsMetadataOnly)163 TEST_F(EmbeddingIndexTest, EmptyIndexContainsMetadataOnly) {
164   EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
165 }
166 
TEST_F(EmbeddingIndexTest,InitializationShouldFailWithNullPointer)167 TEST_F(EmbeddingIndexTest, InitializationShouldFailWithNullPointer) {
168   std::string embedding_index_dir =
169       GetTestTempDir() + "/embedding_index_test_local";
170 
171   EXPECT_THAT(EmbeddingIndex::Create(nullptr, embedding_index_dir, &clock_,
172                                      feature_flags_.get()),
173               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
174 
175   EXPECT_THAT(EmbeddingIndex::Create(&filesystem_, embedding_index_dir, nullptr,
176                                      feature_flags_.get()),
177               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
178 }
179 
TEST_F(EmbeddingIndexTest,InitializationShouldFailWithoutPersistToDiskOrDestruction)180 TEST_F(EmbeddingIndexTest,
181        InitializationShouldFailWithoutPersistToDiskOrDestruction) {
182   // 1. Create index and confirm that data was properly added.
183   std::string embedding_index_dir =
184       GetTestTempDir() + "/embedding_index_test_local";
185   ICING_ASSERT_OK_AND_ASSIGN(
186       std::unique_ptr<EmbeddingIndex> embedding_index,
187       EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
188                              feature_flags_.get()));
189 
190   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
191   ICING_ASSERT_OK(embedding_index->BufferEmbedding(
192       BasicHit(/*section_id=*/0, /*document_id=*/0), vector,
193       QUANTIZATION_TYPE_NONE));
194   ICING_ASSERT_OK(embedding_index->CommitBufferToIndex());
195   embedding_index->set_last_added_document_id(0);
196 
197   EXPECT_THAT(
198       GetEmbeddingHitsFromIndex(embedding_index.get(), /*dimension=*/3,
199                                 /*model_signature=*/"model"),
200       IsOkAndHolds(ElementsAre(EmbeddingHit(
201           BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
202   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index.get()),
203               ElementsAre(0.1, 0.2, 0.3));
204   EXPECT_EQ(embedding_index->last_added_document_id(), 0);
205   // GetChecksum should succeed without updating the checksum.
206   ICING_EXPECT_OK(embedding_index->GetChecksum());
207 
208   // 2. Try to create another index with the same directory. This should fail
209   // due to checksum mismatch.
210   EXPECT_THAT(EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
211                                      feature_flags_.get()),
212               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
213 
214   embedding_index.reset();
215   filesystem_.DeleteDirectoryRecursively(embedding_index_dir.c_str());
216 }
217 
TEST_F(EmbeddingIndexTest,InitializationShouldSucceedWithUpdateChecksums)218 TEST_F(EmbeddingIndexTest, InitializationShouldSucceedWithUpdateChecksums) {
219   // 1. Create index and confirm that data was properly added.
220   std::string embedding_index_dir =
221       GetTestTempDir() + "/embedding_index_test_local";
222   ICING_ASSERT_OK_AND_ASSIGN(
223       std::unique_ptr<EmbeddingIndex> embedding_index,
224       EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
225                              feature_flags_.get()));
226 
227   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
228   ICING_ASSERT_OK(embedding_index->BufferEmbedding(
229       BasicHit(/*section_id=*/0, /*document_id=*/0), vector,
230       QUANTIZATION_TYPE_NONE));
231   ICING_ASSERT_OK(embedding_index->CommitBufferToIndex());
232   embedding_index->set_last_added_document_id(0);
233 
234   EXPECT_THAT(
235       GetEmbeddingHitsFromIndex(embedding_index.get(), /*dimension=*/3,
236                                 /*model_signature=*/"model"),
237       IsOkAndHolds(ElementsAre(EmbeddingHit(
238           BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
239   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index.get()),
240               ElementsAre(0.1, 0.2, 0.3));
241   EXPECT_EQ(embedding_index->last_added_document_id(), 0);
242 
243   // 2. Update checksums to reflect the new content.
244   ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc, embedding_index->GetChecksum());
245   EXPECT_THAT(embedding_index->UpdateChecksums(), IsOkAndHolds(Eq(crc)));
246   EXPECT_THAT(embedding_index->GetChecksum(), IsOkAndHolds(Eq(crc)));
247 
248   // 3. Create another index and confirm that the data is still there.
249   ICING_ASSERT_OK_AND_ASSIGN(
250       std::unique_ptr<EmbeddingIndex> embedding_index_two,
251       EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
252                              feature_flags_.get()));
253 
254   EXPECT_THAT(
255       GetEmbeddingHitsFromIndex(embedding_index_two.get(), /*dimension=*/3,
256                                 /*model_signature=*/"model"),
257       IsOkAndHolds(ElementsAre(EmbeddingHit(
258           BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
259   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_two.get()),
260               ElementsAre(0.1, 0.2, 0.3));
261   EXPECT_EQ(embedding_index_two->last_added_document_id(), 0);
262 
263   embedding_index.reset();
264   embedding_index_two.reset();
265   filesystem_.DeleteDirectoryRecursively(embedding_index_dir.c_str());
266 }
267 
TEST_F(EmbeddingIndexTest,InitializationShouldSucceedWithPersistToDisk)268 TEST_F(EmbeddingIndexTest, InitializationShouldSucceedWithPersistToDisk) {
269   // 1. Create index and confirm that data was properly added.
270   std::string embedding_index_dir =
271       GetTestTempDir() + "/embedding_index_test_local";
272   ICING_ASSERT_OK_AND_ASSIGN(
273       std::unique_ptr<EmbeddingIndex> embedding_index,
274       EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
275                              feature_flags_.get()));
276 
277   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
278   ICING_ASSERT_OK(embedding_index->BufferEmbedding(
279       BasicHit(/*section_id=*/0, /*document_id=*/0), vector,
280       QUANTIZATION_TYPE_NONE));
281   ICING_ASSERT_OK(embedding_index->CommitBufferToIndex());
282   embedding_index->set_last_added_document_id(0);
283 
284   EXPECT_THAT(
285       GetEmbeddingHitsFromIndex(embedding_index.get(), /*dimension=*/3,
286                                 /*model_signature=*/"model"),
287       IsOkAndHolds(ElementsAre(EmbeddingHit(
288           BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
289   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index.get()),
290               ElementsAre(0.1, 0.2, 0.3));
291   EXPECT_EQ(embedding_index->last_added_document_id(), 0);
292 
293   // 2. Update checksums to reflect the new content.
294   ICING_EXPECT_OK(embedding_index->PersistToDisk());
295 
296   // 3. Create another index and confirm that the data is still there.
297   ICING_ASSERT_OK_AND_ASSIGN(
298       std::unique_ptr<EmbeddingIndex> embedding_index_two,
299       EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
300                              feature_flags_.get()));
301 
302   EXPECT_THAT(
303       GetEmbeddingHitsFromIndex(embedding_index_two.get(), /*dimension=*/3,
304                                 /*model_signature=*/"model"),
305       IsOkAndHolds(ElementsAre(EmbeddingHit(
306           BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
307   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_two.get()),
308               ElementsAre(0.1, 0.2, 0.3));
309   EXPECT_EQ(embedding_index_two->last_added_document_id(), 0);
310 
311   embedding_index.reset();
312   embedding_index_two.reset();
313   filesystem_.DeleteDirectoryRecursively(embedding_index_dir.c_str());
314 }
315 
TEST_F(EmbeddingIndexTest,GetEmbeddingVectorShouldFailWhenOutOfRange)316 TEST_F(EmbeddingIndexTest, GetEmbeddingVectorShouldFailWhenOutOfRange) {
317   BasicHit basic_hit(/*section_id=*/0, /*document_id=*/0);
318   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
319   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(basic_hit, vector,
320                                                     QUANTIZATION_TYPE_NONE));
321   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
322 
323   EmbeddingHit embedding_hit(basic_hit, /*location=*/0);
324   uint32_t dimension = 3;
325   ICING_ASSERT_OK(
326       embedding_index_->GetEmbeddingVector(embedding_hit, dimension));
327   EXPECT_THAT(
328       embedding_index_->GetEmbeddingVector(embedding_hit, dimension + 1),
329       StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
330 }
331 
TEST_F(EmbeddingIndexTest,GetQuantizedEmbeddingVectorShouldFailWhenOutOfRange)332 TEST_F(EmbeddingIndexTest,
333        GetQuantizedEmbeddingVectorShouldFailWhenOutOfRange) {
334   BasicHit basic_hit(kSectionIdQuantizedEmbedding, /*document_id=*/0);
335   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
336   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
337       basic_hit, vector, QUANTIZATION_TYPE_QUANTIZE_8_BIT));
338   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
339 
340   EmbeddingHit embedding_hit(basic_hit, /*location=*/0);
341   uint32_t dimension = 3;
342   ICING_ASSERT_OK(
343       embedding_index_->GetQuantizedEmbeddingVector(embedding_hit, dimension));
344   EXPECT_THAT(embedding_index_->GetQuantizedEmbeddingVector(embedding_hit,
345                                                             dimension + 1),
346               StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
347 }
348 
TEST_F(EmbeddingIndexTest,AddSingleEmbedding)349 TEST_F(EmbeddingIndexTest, AddSingleEmbedding) {
350   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
351   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
352       BasicHit(/*section_id=*/0, /*document_id=*/0), vector,
353       QUANTIZATION_TYPE_NONE));
354   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
355   embedding_index_->set_last_added_document_id(0);
356 
357   EXPECT_THAT(
358       GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
359                                 /*model_signature=*/"model"),
360       IsOkAndHolds(ElementsAre(EmbeddingHit(
361           BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
362   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
363               ElementsAre(0.1, 0.2, 0.3));
364   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
365 }
366 
TEST_F(EmbeddingIndexTest,AddSingleQuantizedEmbedding)367 TEST_F(EmbeddingIndexTest, AddSingleQuantizedEmbedding) {
368   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
369   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
370       BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0), vector,
371       QUANTIZATION_TYPE_QUANTIZE_8_BIT));
372   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
373   embedding_index_->set_last_added_document_id(0);
374 
375   EmbeddingHit hit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0),
376                    /*location=*/0);
377   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
378                                         /*model_signature=*/"model"),
379               IsOkAndHolds(ElementsAre(hit)));
380   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
381               Eq(3 + sizeof(Quantizer)));
382   EXPECT_THAT(
383       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
384                                                      hit,
385                                                      /*dimension=*/3),
386       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
387   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
388   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
389 }
390 
TEST_F(EmbeddingIndexTest,AddMultipleEmbeddingsInTheSameSection)391 TEST_F(EmbeddingIndexTest, AddMultipleEmbeddingsInTheSameSection) {
392   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
393   PropertyProto::VectorProto vector2 =
394       CreateVector("model", {-0.1, -0.2, -0.3});
395   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
396       BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
397       QUANTIZATION_TYPE_NONE));
398   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
399       BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
400       QUANTIZATION_TYPE_NONE));
401   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
402   embedding_index_->set_last_added_document_id(0);
403 
404   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
405                                         /*model_signature=*/"model"),
406               IsOkAndHolds(ElementsAre(
407                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
408                                /*location=*/0),
409                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
410                                /*location=*/3))));
411   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
412               ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
413   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
414 }
415 
TEST_F(EmbeddingIndexTest,AddMultipleQuantizedEmbeddingsInTheSameSection)416 TEST_F(EmbeddingIndexTest, AddMultipleQuantizedEmbeddingsInTheSameSection) {
417   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
418   PropertyProto::VectorProto vector2 =
419       CreateVector("model", {-0.1, -0.2, -0.3});
420   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
421       BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0), vector1,
422       QUANTIZATION_TYPE_QUANTIZE_8_BIT));
423   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
424       BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0), vector2,
425       QUANTIZATION_TYPE_QUANTIZE_8_BIT));
426   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
427   embedding_index_->set_last_added_document_id(0);
428 
429   EmbeddingHit hit1(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0),
430                     /*location=*/0);
431   EmbeddingHit hit2(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0),
432                     /*location=*/3 + sizeof(Quantizer));
433   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
434                                         /*model_signature=*/"model"),
435               IsOkAndHolds(ElementsAre(hit1, hit2)));
436   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
437               Eq(2 * (3 + sizeof(Quantizer))));  // Two quantized vectors
438   EXPECT_THAT(
439       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
440                                                      hit1, /*dimension=*/3),
441       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
442   EXPECT_THAT(
443       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
444                                                      hit2, /*dimension=*/3),
445       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {-0.1, -0.2, -0.3})));
446   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
447   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
448 }
449 
TEST_F(EmbeddingIndexTest,HitsWithLowerSectionIdReturnedFirst)450 TEST_F(EmbeddingIndexTest, HitsWithLowerSectionIdReturnedFirst) {
451   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
452   PropertyProto::VectorProto vector2 =
453       CreateVector("model", {-0.1, -0.2, -0.3});
454   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
455       BasicHit(/*section_id=*/5, /*document_id=*/0), vector1,
456       QUANTIZATION_TYPE_NONE));
457   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
458       BasicHit(/*section_id=*/2, /*document_id=*/0), vector2,
459       QUANTIZATION_TYPE_NONE));
460   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
461   embedding_index_->set_last_added_document_id(0);
462 
463   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
464                                         /*model_signature=*/"model"),
465               IsOkAndHolds(ElementsAre(
466                   EmbeddingHit(BasicHit(/*section_id=*/2, /*document_id=*/0),
467                                /*location=*/3),
468                   EmbeddingHit(BasicHit(/*section_id=*/5, /*document_id=*/0),
469                                /*location=*/0))));
470   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
471               ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
472   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
473 }
474 
TEST_F(EmbeddingIndexTest,HitsWithHigherDocumentIdReturnedFirst)475 TEST_F(EmbeddingIndexTest, HitsWithHigherDocumentIdReturnedFirst) {
476   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
477   PropertyProto::VectorProto vector2 =
478       CreateVector("model", {-0.1, -0.2, -0.3});
479   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
480       BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
481       QUANTIZATION_TYPE_NONE));
482   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
483       BasicHit(/*section_id=*/0, /*document_id=*/1), vector2,
484       QUANTIZATION_TYPE_NONE));
485   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
486   embedding_index_->set_last_added_document_id(1);
487 
488   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
489                                         /*model_signature=*/"model"),
490               IsOkAndHolds(ElementsAre(
491                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
492                                /*location=*/3),
493                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
494                                /*location=*/0))));
495   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
496               ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
497   EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
498 }
499 
TEST_F(EmbeddingIndexTest,AddEmbeddingsFromDifferentModels)500 TEST_F(EmbeddingIndexTest, AddEmbeddingsFromDifferentModels) {
501   PropertyProto::VectorProto vector1 = CreateVector("model1", {0.1, 0.2});
502   PropertyProto::VectorProto vector2 =
503       CreateVector("model2", {-0.1, -0.2, -0.3});
504   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
505       BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
506       QUANTIZATION_TYPE_NONE));
507   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
508       BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
509       QUANTIZATION_TYPE_NONE));
510   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
511   embedding_index_->set_last_added_document_id(0);
512 
513   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
514                                         /*model_signature=*/"model1"),
515               IsOkAndHolds(ElementsAre(
516                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
517                                /*location=*/0))));
518   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
519                                         /*model_signature=*/"model2"),
520               IsOkAndHolds(ElementsAre(
521                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
522                                /*location=*/2))));
523   EXPECT_THAT(GetEmbeddingHitsFromIndex(
524                   embedding_index_.get(),
525                   /*dimension=*/5, /*model_signature=*/"non-existent-model"),
526               IsOkAndHolds(IsEmpty()));
527   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
528               ElementsAre(0.1, 0.2, -0.1, -0.2, -0.3));
529   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
530 }
531 
TEST_F(EmbeddingIndexTest,AddEmbeddingsWithSameSignatureButDifferentDimension)532 TEST_F(EmbeddingIndexTest,
533        AddEmbeddingsWithSameSignatureButDifferentDimension) {
534   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2});
535   PropertyProto::VectorProto vector2 =
536       CreateVector("model", {-0.1, -0.2, -0.3});
537   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
538       BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
539       QUANTIZATION_TYPE_NONE));
540   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
541       BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
542       QUANTIZATION_TYPE_NONE));
543   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
544   embedding_index_->set_last_added_document_id(0);
545 
546   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
547                                         /*model_signature=*/"model"),
548               IsOkAndHolds(ElementsAre(
549                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
550                                /*location=*/0))));
551   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
552                                         /*model_signature=*/"model"),
553               IsOkAndHolds(ElementsAre(
554                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
555                                /*location=*/2))));
556   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
557               ElementsAre(0.1, 0.2, -0.1, -0.2, -0.3));
558   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
559 }
560 
TEST_F(EmbeddingIndexTest,ClearIndex)561 TEST_F(EmbeddingIndexTest, ClearIndex) {
562   // Loop the same logic twice to make sure that clear works as expected, and
563   // the index is still valid after clearing.
564   for (int i = 0; i < 2; i++) {
565     PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
566     PropertyProto::VectorProto vector2 =
567         CreateVector("model", {-0.1, -0.2, -0.3});
568     PropertyProto::VectorProto vector3 = CreateVector("model", {0.4, 0.5, 0.6});
569 
570     ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
571         BasicHit(/*section_id=*/1, /*document_id=*/0), vector1,
572         QUANTIZATION_TYPE_NONE));
573     ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
574         BasicHit(/*section_id=*/2, /*document_id=*/1), vector2,
575         QUANTIZATION_TYPE_NONE));
576     ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
577         BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2), vector3,
578         QUANTIZATION_TYPE_QUANTIZE_8_BIT));
579     ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
580     embedding_index_->set_last_added_document_id(2);
581 
582     EmbeddingHit hit1(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2),
583                       /*location=*/0);
584     EmbeddingHit hit2(BasicHit(/*section_id=*/2, /*document_id=*/1),
585                       /*location=*/3);
586     EmbeddingHit hit3(BasicHit(/*section_id=*/1, /*document_id=*/0),
587                       /*location=*/0);
588 
589     EXPECT_THAT(
590         GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
591                                   /*model_signature=*/"model"),
592         IsOkAndHolds(ElementsAre(hit1, hit2, hit3)));
593     EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
594                 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
595     EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
596                 Eq(3 + sizeof(Quantizer)));
597     EXPECT_THAT(
598         GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
599                                                        hit1,
600                                                        /*dimension=*/3),
601         IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
602     EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
603     EXPECT_FALSE(embedding_index_->is_empty());
604     EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(false));
605 
606     // Check that clear works as expected.
607     ICING_ASSERT_OK(embedding_index_->Clear());
608     EXPECT_TRUE(embedding_index_->is_empty());
609     EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
610     EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
611                 IsEmpty());
612     EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
613     EXPECT_EQ(embedding_index_->last_added_document_id(), kInvalidDocumentId);
614   }
615 }
616 
TEST_F(EmbeddingIndexTest,DiscardIndex)617 TEST_F(EmbeddingIndexTest, DiscardIndex) {
618   // Loop the same logic twice to make sure that Discard works as expected, and
619   // the index is still valid after discarding.
620   for (int i = 0; i < 2; i++) {
621     PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
622     PropertyProto::VectorProto vector2 =
623         CreateVector("model", {-0.1, -0.2, -0.3});
624     PropertyProto::VectorProto vector3 = CreateVector("model", {0.4, 0.5, 0.6});
625 
626     ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
627         BasicHit(/*section_id=*/1, /*document_id=*/0), vector1,
628         QUANTIZATION_TYPE_NONE));
629     ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
630         BasicHit(/*section_id=*/2, /*document_id=*/1), vector2,
631         QUANTIZATION_TYPE_NONE));
632     ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
633         BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2), vector3,
634         QUANTIZATION_TYPE_QUANTIZE_8_BIT));
635     ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
636     embedding_index_->set_last_added_document_id(2);
637 
638     EmbeddingHit hit1(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2),
639                       /*location=*/0);
640     EmbeddingHit hit2(BasicHit(/*section_id=*/2, /*document_id=*/1),
641                       /*location=*/3);
642     EmbeddingHit hit3(BasicHit(/*section_id=*/1, /*document_id=*/0),
643                       /*location=*/0);
644     EXPECT_THAT(
645         GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
646                                   /*model_signature=*/"model"),
647         IsOkAndHolds(ElementsAre(hit1, hit2, hit3)));
648     EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
649                 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
650     EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
651                 Eq(3 + sizeof(Quantizer)));
652     EXPECT_THAT(
653         GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
654                                                        hit1,
655                                                        /*dimension=*/3),
656         IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
657     EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
658     EXPECT_FALSE(embedding_index_->is_empty());
659     EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(false));
660 
661     // Check that Discard works as expected.
662     embedding_index_.reset();
663     EmbeddingIndex::Discard(filesystem_, embedding_index_dir_);
664     ICING_ASSERT_OK_AND_ASSIGN(
665         embedding_index_,
666         EmbeddingIndex::Create(&filesystem_, embedding_index_dir_, &clock_,
667                                feature_flags_.get()));
668     EXPECT_TRUE(embedding_index_->is_empty());
669     EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
670     EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
671                 IsEmpty());
672     EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
673     EXPECT_EQ(embedding_index_->last_added_document_id(), kInvalidDocumentId);
674   }
675 }
676 
TEST_F(EmbeddingIndexTest,EmptyCommitIsOk)677 TEST_F(EmbeddingIndexTest, EmptyCommitIsOk) {
678   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
679   EXPECT_TRUE(embedding_index_->is_empty());
680   EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
681   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
682   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
683 }
684 
TEST_F(EmbeddingIndexTest,MultipleCommits)685 TEST_F(EmbeddingIndexTest, MultipleCommits) {
686   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
687   PropertyProto::VectorProto vector2 =
688       CreateVector("model", {-0.1, -0.2, -0.3});
689 
690   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
691       BasicHit(/*section_id=*/1, /*document_id=*/0), vector1,
692       QUANTIZATION_TYPE_NONE));
693   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
694 
695   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
696       BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
697       QUANTIZATION_TYPE_NONE));
698   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
699 
700   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
701                                         /*model_signature=*/"model"),
702               IsOkAndHolds(ElementsAre(
703                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
704                                /*location=*/3),
705                   EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/0),
706                                /*location=*/0))));
707   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
708               ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
709 }
710 
TEST_F(EmbeddingIndexTest,InvalidCommit_SectionIdCanOnlyDecreaseForSingleDocument)711 TEST_F(EmbeddingIndexTest,
712        InvalidCommit_SectionIdCanOnlyDecreaseForSingleDocument) {
713   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
714   PropertyProto::VectorProto vector2 =
715       CreateVector("model", {-0.1, -0.2, -0.3});
716 
717   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
718       BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
719       QUANTIZATION_TYPE_NONE));
720   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
721 
722   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
723       BasicHit(/*section_id=*/1, /*document_id=*/0), vector2,
724       QUANTIZATION_TYPE_NONE));
725   // Posting list with delta encoding can only allow decreasing values.
726   EXPECT_THAT(embedding_index_->CommitBufferToIndex(),
727               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
728 }
729 
TEST_F(EmbeddingIndexTest,InvalidCommit_DocumentIdCanOnlyIncrease)730 TEST_F(EmbeddingIndexTest, InvalidCommit_DocumentIdCanOnlyIncrease) {
731   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
732   PropertyProto::VectorProto vector2 =
733       CreateVector("model", {-0.1, -0.2, -0.3});
734 
735   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
736       BasicHit(/*section_id=*/0, /*document_id=*/1), vector1,
737       QUANTIZATION_TYPE_NONE));
738   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
739 
740   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
741       BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
742       QUANTIZATION_TYPE_NONE));
743   // Posting list with delta encoding can only allow decreasing values, which
744   // means document ids must be committed increasingly, since document ids are
745   // inverted in hit values.
746   EXPECT_THAT(embedding_index_->CommitBufferToIndex(),
747               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
748 }
749 
TEST_F(EmbeddingIndexTest,OptimizeShouldFailWithNullPointer)750 TEST_F(EmbeddingIndexTest, OptimizeShouldFailWithNullPointer) {
751   EXPECT_THAT(embedding_index_->Optimize(
752                   /*document_store=*/nullptr, schema_store_.get(),
753                   /*document_id_old_to_new=*/{},
754                   /*new_last_added_document_id=*/kInvalidDocumentId),
755               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
756 
757   EXPECT_THAT(embedding_index_->Optimize(
758                   document_store_.get(), /*schema_store=*/nullptr,
759                   /*document_id_old_to_new=*/{},
760                   /*new_last_added_document_id=*/kInvalidDocumentId),
761               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
762 }
763 
TEST_F(EmbeddingIndexTest,OptimizeShouldFailWhenDocumentIdMapIsTooSmall)764 TEST_F(EmbeddingIndexTest, OptimizeShouldFailWhenDocumentIdMapIsTooSmall) {
765   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
766   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
767       BasicHit(/*section_id=*/0, /*document_id=*/2), vector,
768       QUANTIZATION_TYPE_NONE));
769   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
770   embedding_index_->set_last_added_document_id(2);
771 
772   // Optimize should fail because the provided document_id_old_to_new map does
773   // not contain an entry for document id 2.
774   EXPECT_THAT(embedding_index_
775                   ->Optimize(document_store_.get(), schema_store_.get(),
776                              /*document_id_old_to_new=*/{0, 1},
777                              /*new_last_added_document_id=*/2)
778                   .error_message(),
779               HasSubstr("The provided map is too small"));
780 }
781 
TEST_F(EmbeddingIndexTest,EmptyOptimizeIsOk)782 TEST_F(EmbeddingIndexTest, EmptyOptimizeIsOk) {
783   ICING_ASSERT_OK(embedding_index_->Optimize(
784       document_store_.get(), schema_store_.get(),
785       /*document_id_old_to_new=*/{},
786       /*new_last_added_document_id=*/kInvalidDocumentId));
787   EXPECT_TRUE(embedding_index_->is_empty());
788   EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
789   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
790   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
791 }
792 
TEST_F(EmbeddingIndexTest,OptimizeSingleEmbeddingSingleDocument)793 TEST_F(EmbeddingIndexTest, OptimizeSingleEmbeddingSingleDocument) {
794   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
795   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
796       BasicHit(/*section_id=*/0, /*document_id=*/2), vector,
797       QUANTIZATION_TYPE_NONE));
798   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
799   embedding_index_->set_last_added_document_id(2);
800 
801   // Before optimize
802   EXPECT_THAT(
803       GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
804                                 /*model_signature=*/"model"),
805       IsOkAndHolds(ElementsAre(EmbeddingHit(
806           BasicHit(/*section_id=*/0, /*document_id=*/2), /*location=*/0))));
807   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
808               ElementsAre(0.1, 0.2, 0.3));
809   EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
810 
811   // Run optimize without deleting any documents, and check that the index is
812   // not changed.
813   ICING_ASSERT_OK(
814       embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
815                                  /*document_id_old_to_new=*/{0, 1, 2},
816                                  /*new_last_added_document_id=*/2));
817   EXPECT_THAT(
818       GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
819                                 /*model_signature=*/"model"),
820       IsOkAndHolds(ElementsAre(EmbeddingHit(
821           BasicHit(/*section_id=*/0, /*document_id=*/2), /*location=*/0))));
822   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
823               ElementsAre(0.1, 0.2, 0.3));
824   EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
825 
826   // Run optimize to map document id 2 to 1, and check that the index is
827   // updated correctly.
828   ICING_ASSERT_OK(embedding_index_->Optimize(
829       document_store_.get(), schema_store_.get(),
830       /*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
831       /*new_last_added_document_id=*/1));
832   EXPECT_THAT(
833       GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
834                                 /*model_signature=*/"model"),
835       IsOkAndHolds(ElementsAre(EmbeddingHit(
836           BasicHit(/*section_id=*/0, /*document_id=*/1), /*location=*/0))));
837   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
838               ElementsAre(0.1, 0.2, 0.3));
839   EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
840 
841   // Run optimize to delete the document.
842   ICING_ASSERT_OK(embedding_index_->Optimize(
843       document_store_.get(), schema_store_.get(),
844       /*document_id_old_to_new=*/{0, kInvalidDocumentId},
845       /*new_last_added_document_id=*/0));
846   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
847                                         /*model_signature=*/"model"),
848               IsOkAndHolds(IsEmpty()));
849   EXPECT_TRUE(embedding_index_->is_empty());
850   EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
851   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
852   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
853 }
854 
TEST_F(EmbeddingIndexTest,OptimizeSingleQuantizedEmbeddingSingleDocument)855 TEST_F(EmbeddingIndexTest, OptimizeSingleQuantizedEmbeddingSingleDocument) {
856   PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
857   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
858       BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2), vector,
859       QUANTIZATION_TYPE_QUANTIZE_8_BIT));
860   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
861   embedding_index_->set_last_added_document_id(2);
862 
863   // Before optimize
864   EmbeddingHit hit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2),
865                    /*location=*/0);
866   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
867                                         /*model_signature=*/"model"),
868               IsOkAndHolds(ElementsAre(hit)));
869   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
870               Eq(3 + sizeof(Quantizer)));
871   EXPECT_THAT(
872       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
873                                                      hit, /*dimension=*/3),
874       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
875   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
876   EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
877 
878   // Run optimize without deleting any documents, and check that the index is
879   // not changed
880   ICING_ASSERT_OK(
881       embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
882                                  /*document_id_old_to_new=*/{0, 1, 2},
883                                  /*new_last_added_document_id=*/2));
884   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
885                                         /*model_signature=*/"model"),
886               IsOkAndHolds(ElementsAre(hit)));
887   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
888               Eq(3 + sizeof(Quantizer)));
889   EXPECT_THAT(
890       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
891                                                      hit, /*dimension=*/3),
892       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
893   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
894   EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
895 
896   // Run optimize to map document id 2 to 1, and check that the index is
897   // updated correctly
898   ICING_ASSERT_OK(embedding_index_->Optimize(
899       document_store_.get(), schema_store_.get(),
900       /*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
901       /*new_last_added_document_id=*/1));
902   hit = EmbeddingHit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/1),
903                      /*location=*/0);
904   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
905                                         /*model_signature=*/"model"),
906               IsOkAndHolds(ElementsAre(hit)));
907   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
908               Eq(3 + sizeof(Quantizer)));
909   EXPECT_THAT(
910       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
911                                                      hit, /*dimension=*/3),
912       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
913   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
914   EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
915 
916   // Run optimize to delete the document
917   ICING_ASSERT_OK(embedding_index_->Optimize(
918       document_store_.get(), schema_store_.get(),
919       /*document_id_old_to_new=*/{0, kInvalidDocumentId},
920       /*new_last_added_document_id=*/0));
921   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
922                                         /*model_signature=*/"model"),
923               IsOkAndHolds(IsEmpty()));
924   EXPECT_TRUE(embedding_index_->is_empty());
925   EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
926   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
927   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
928   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
929 }
930 
TEST_F(EmbeddingIndexTest,OptimizeMultipleEmbeddingsSingleDocument)931 TEST_F(EmbeddingIndexTest, OptimizeMultipleEmbeddingsSingleDocument) {
932   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
933   PropertyProto::VectorProto vector2 =
934       CreateVector("model", {-0.1, -0.2, -0.3});
935   PropertyProto::VectorProto vector3 = CreateVector("model", {0.4, 0.5, 0.6});
936 
937   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
938       BasicHit(/*section_id=*/0, /*document_id=*/2), vector1,
939       QUANTIZATION_TYPE_NONE));
940   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
941       BasicHit(/*section_id=*/0, /*document_id=*/2), vector2,
942       QUANTIZATION_TYPE_NONE));
943   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
944       BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2), vector3,
945       QUANTIZATION_TYPE_QUANTIZE_8_BIT));
946   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
947   embedding_index_->set_last_added_document_id(2);
948 
949   // Before optimize
950   EmbeddingHit quantized_hit(
951       BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2),
952       /*location=*/0);
953   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
954                                         /*model_signature=*/"model"),
955               IsOkAndHolds(ElementsAre(
956                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/2),
957                                /*location=*/0),
958                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/2),
959                                /*location=*/3),
960                   quantized_hit)));
961   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
962               ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
963   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
964               Eq(3 + sizeof(Quantizer)));
965   EXPECT_THAT(
966       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
967                                                      quantized_hit,
968                                                      /*dimension=*/3),
969       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
970   EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
971 
972   // Run optimize without deleting any documents, and check that the index is
973   // not changed.
974   ICING_ASSERT_OK(
975       embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
976                                  /*document_id_old_to_new=*/{0, 1, 2},
977                                  /*new_last_added_document_id=*/2));
978   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
979                                         /*model_signature=*/"model"),
980               IsOkAndHolds(ElementsAre(
981                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/2),
982                                /*location=*/0),
983                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/2),
984                                /*location=*/3),
985                   quantized_hit)));
986   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
987               ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
988   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
989               Eq(3 + sizeof(Quantizer)));
990   EXPECT_THAT(
991       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
992                                                      quantized_hit,
993                                                      /*dimension=*/3),
994       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
995   EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
996 
997   // Run optimize to map document id 2 to 1, and check that the index is
998   // updated correctly.
999   ICING_ASSERT_OK(embedding_index_->Optimize(
1000       document_store_.get(), schema_store_.get(),
1001       /*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
1002       /*new_last_added_document_id=*/1));
1003   quantized_hit =
1004       EmbeddingHit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/1),
1005                    /*location=*/0);
1006   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1007                                         /*model_signature=*/"model"),
1008               IsOkAndHolds(ElementsAre(
1009                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1010                                /*location=*/0),
1011                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1012                                /*location=*/3),
1013                   quantized_hit)));
1014   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1015               ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
1016   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
1017               Eq(3 + sizeof(Quantizer)));
1018   EXPECT_THAT(
1019       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
1020                                                      quantized_hit,
1021                                                      /*dimension=*/3),
1022       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
1023   EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1024 
1025   // Run optimize to delete the document.
1026   ICING_ASSERT_OK(embedding_index_->Optimize(
1027       document_store_.get(), schema_store_.get(),
1028       /*document_id_old_to_new=*/{0, kInvalidDocumentId},
1029       /*new_last_added_document_id=*/0));
1030   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1031                                         /*model_signature=*/"model"),
1032               IsOkAndHolds(IsEmpty()));
1033   EXPECT_TRUE(embedding_index_->is_empty());
1034   EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
1035   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
1036   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
1037   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
1038 }
1039 
TEST_F(EmbeddingIndexTest,OptimizeMultipleEmbeddingsMultipleDocument)1040 TEST_F(EmbeddingIndexTest, OptimizeMultipleEmbeddingsMultipleDocument) {
1041   PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
1042   PropertyProto::VectorProto vector2 = CreateVector("model", {1, 2, 3});
1043   PropertyProto::VectorProto vector3 =
1044       CreateVector("model", {-0.1, -0.2, -0.3});
1045   PropertyProto::VectorProto vector4 = CreateVector("model", {0.4, 0.5, 0.6});
1046 
1047   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1048       BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
1049       QUANTIZATION_TYPE_NONE));
1050   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1051       BasicHit(/*section_id=*/1, /*document_id=*/0), vector2,
1052       QUANTIZATION_TYPE_NONE));
1053   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1054       BasicHit(/*section_id=*/0, /*document_id=*/1), vector3,
1055       QUANTIZATION_TYPE_NONE));
1056   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1057       BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/1), vector4,
1058       QUANTIZATION_TYPE_QUANTIZE_8_BIT));
1059   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
1060   embedding_index_->set_last_added_document_id(1);
1061 
1062   // Before optimize
1063   EmbeddingHit quantized_hit(
1064       BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/1),
1065       /*location=*/0);
1066   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1067                                         /*model_signature=*/"model"),
1068               IsOkAndHolds(ElementsAre(
1069                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1070                                /*location=*/6),
1071                   quantized_hit,
1072                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1073                                /*location=*/0),
1074                   EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/0),
1075                                /*location=*/3))));
1076   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1077               ElementsAre(0.1, 0.2, 0.3, 1, 2, 3, -0.1, -0.2, -0.3));
1078   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
1079               Eq(3 + sizeof(Quantizer)));
1080   EXPECT_THAT(
1081       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
1082                                                      quantized_hit,
1083                                                      /*dimension=*/3),
1084       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector4.values())));
1085   EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1086 
1087   // Run optimize without deleting any documents. It is expected to see that the
1088   // raw embedding data is rearranged, since during index transfer, embedding
1089   // vectors from higher document ids are added first.
1090   //
1091   // Also keep in mind that once the raw data is rearranged, calling another
1092   // Optimize subsequently will not change the raw data again.
1093   for (int i = 0; i < 2; i++) {
1094     ICING_ASSERT_OK(
1095         embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
1096                                    /*document_id_old_to_new=*/{0, 1},
1097                                    /*new_last_added_document_id=*/1));
1098     EXPECT_THAT(
1099         GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1100                                   /*model_signature=*/"model"),
1101         IsOkAndHolds(ElementsAre(
1102             EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1103                          /*location=*/0),
1104             quantized_hit,
1105             EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1106                          /*location=*/3),
1107             EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/0),
1108                          /*location=*/6))));
1109     EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1110                 ElementsAre(-0.1, -0.2, -0.3, 0.1, 0.2, 0.3, 1, 2, 3));
1111     EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
1112                 Eq(3 + sizeof(Quantizer)));
1113     EXPECT_THAT(
1114         GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
1115                                                        quantized_hit,
1116                                                        /*dimension=*/3),
1117         IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector4.values())));
1118     EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1119   }
1120 
1121   // Run optimize to delete document 0, and check that the index is
1122   // updated correctly.
1123   ICING_ASSERT_OK(embedding_index_->Optimize(
1124       document_store_.get(), schema_store_.get(),
1125       /*document_id_old_to_new=*/{kInvalidDocumentId, 0},
1126       /*new_last_added_document_id=*/0));
1127   quantized_hit =
1128       EmbeddingHit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0),
1129                    /*location=*/0);
1130   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1131                                         /*model_signature=*/"model"),
1132               IsOkAndHolds(ElementsAre(
1133                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1134                                /*location=*/0),
1135                   quantized_hit)));
1136   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1137               ElementsAre(-0.1, -0.2, -0.3));
1138   EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
1139               Eq(3 + sizeof(Quantizer)));
1140   EXPECT_THAT(
1141       GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
1142                                                      quantized_hit,
1143                                                      /*dimension=*/3),
1144       IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector4.values())));
1145   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
1146 }
1147 
TEST_F(EmbeddingIndexTest,OptimizeEmbeddingsFromDifferentModels)1148 TEST_F(EmbeddingIndexTest, OptimizeEmbeddingsFromDifferentModels) {
1149   PropertyProto::VectorProto vector1 = CreateVector("model1", {0.1, 0.2});
1150   PropertyProto::VectorProto vector2 = CreateVector("model1", {1, 2});
1151   PropertyProto::VectorProto vector3 =
1152       CreateVector("model2", {-0.1, -0.2, -0.3});
1153   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1154       BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
1155       QUANTIZATION_TYPE_NONE));
1156   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1157       BasicHit(/*section_id=*/0, /*document_id=*/1), vector2,
1158       QUANTIZATION_TYPE_NONE));
1159   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1160       BasicHit(/*section_id=*/1, /*document_id=*/1), vector3,
1161       QUANTIZATION_TYPE_NONE));
1162   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
1163   embedding_index_->set_last_added_document_id(1);
1164 
1165   // Before optimize
1166   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1167                                         /*model_signature=*/"model1"),
1168               IsOkAndHolds(ElementsAre(
1169                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1170                                /*location=*/2),
1171                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1172                                /*location=*/0))));
1173   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1174                                         /*model_signature=*/"model2"),
1175               IsOkAndHolds(ElementsAre(
1176                   EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/1),
1177                                /*location=*/4))));
1178   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1179               ElementsAre(0.1, 0.2, 1, 2, -0.1, -0.2, -0.3));
1180   EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1181 
1182   // Run optimize without deleting any documents. It is expected to see that the
1183   // raw embedding data is rearranged, since during index transfer:
1184   // - Embedding vectors with lower keys, which are the string encoded ordered
1185   //   pairs (dimension, model_signature), are iterated first.
1186   // - Embedding vectors from higher document ids are added first.
1187   //
1188   // Also keep in mind that once the raw data is rearranged, calling another
1189   // Optimize subsequently will not change the raw data again.
1190   for (int i = 0; i < 2; i++) {
1191     ICING_ASSERT_OK(
1192         embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
1193                                    /*document_id_old_to_new=*/{0, 1},
1194                                    /*new_last_added_document_id=*/1));
1195     EXPECT_THAT(
1196         GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1197                                   /*model_signature=*/"model1"),
1198         IsOkAndHolds(ElementsAre(
1199             EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1200                          /*location=*/0),
1201             EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1202                          /*location=*/2))));
1203     EXPECT_THAT(
1204         GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1205                                   /*model_signature=*/"model2"),
1206         IsOkAndHolds(ElementsAre(
1207             EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/1),
1208                          /*location=*/4))));
1209     EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1210                 ElementsAre(1, 2, 0.1, 0.2, -0.1, -0.2, -0.3));
1211     EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1212   }
1213 
1214   // Run optimize to delete document 1, and check that the index is
1215   // updated correctly.
1216   ICING_ASSERT_OK(embedding_index_->Optimize(
1217       document_store_.get(), schema_store_.get(),
1218       /*document_id_old_to_new=*/{0, kInvalidDocumentId},
1219       /*new_last_added_document_id=*/0));
1220   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1221                                         /*model_signature=*/"model1"),
1222               IsOkAndHolds(ElementsAre(
1223                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1224                                /*location=*/0))));
1225   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1226                                         /*model_signature=*/"model2"),
1227               IsOkAndHolds(IsEmpty()));
1228   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1229               ElementsAre(0.1, 0.2));
1230   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
1231 }
1232 
TEST_F(EmbeddingIndexTest,OptimizeEmbeddingsFromDifferentModelsAndDeleteTheFirst)1233 TEST_F(EmbeddingIndexTest,
1234        OptimizeEmbeddingsFromDifferentModelsAndDeleteTheFirst) {
1235   PropertyProto::VectorProto vector1 = CreateVector("model1", {0.1, 0.2});
1236   PropertyProto::VectorProto vector2 =
1237       CreateVector("model2", {-0.1, -0.2, -0.3});
1238   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1239       BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
1240       QUANTIZATION_TYPE_NONE));
1241   ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1242       BasicHit(/*section_id=*/1, /*document_id=*/1), vector2,
1243       QUANTIZATION_TYPE_NONE));
1244   ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
1245   embedding_index_->set_last_added_document_id(1);
1246 
1247   // Before optimize
1248   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1249                                         /*model_signature=*/"model1"),
1250               IsOkAndHolds(ElementsAre(
1251                   EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1252                                /*location=*/0))));
1253   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1254                                         /*model_signature=*/"model2"),
1255               IsOkAndHolds(ElementsAre(
1256                   EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/1),
1257                                /*location=*/2))));
1258   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1259               ElementsAre(0.1, 0.2, -0.1, -0.2, -0.3));
1260   EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1261 
1262   // Run optimize to delete document 0, and check that the index is
1263   // updated correctly.
1264   ICING_ASSERT_OK(embedding_index_->Optimize(
1265       document_store_.get(), schema_store_.get(),
1266       /*document_id_old_to_new=*/{kInvalidDocumentId, 0},
1267       /*new_last_added_document_id=*/0));
1268   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1269                                         /*model_signature=*/"model1"),
1270               IsOkAndHolds(IsEmpty()));
1271   EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1272                                         /*model_signature=*/"model2"),
1273               IsOkAndHolds(ElementsAre(
1274                   EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/0),
1275                                /*location=*/0))));
1276   EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1277               ElementsAre(-0.1, -0.2, -0.3));
1278   EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
1279 }
1280 
1281 }  // namespace
1282 }  // namespace lib
1283 }  // namespace icing
1284