1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/embed/embedding-index.h"
16
17 #include <unistd.h>
18
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <utility>
24 #include <vector>
25
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "gmock/gmock.h"
29 #include "gtest/gtest.h"
30 #include "icing/absl_ports/canonical_errors.h"
31 #include "icing/document-builder.h"
32 #include "icing/feature-flags.h"
33 #include "icing/file/filesystem.h"
34 #include "icing/file/portable-file-backed-proto-log.h"
35 #include "icing/index/embed/embedding-hit.h"
36 #include "icing/index/embed/quantizer.h"
37 #include "icing/index/hit/hit.h"
38 #include "icing/legacy/index/icing-filesystem.h"
39 #include "icing/proto/document.pb.h"
40 #include "icing/schema-builder.h"
41 #include "icing/schema/schema-store.h"
42 #include "icing/schema/section.h"
43 #include "icing/store/document-id.h"
44 #include "icing/store/document-store.h"
45 #include "icing/testing/common-matchers.h"
46 #include "icing/testing/embedding-test-utils.h"
47 #include "icing/testing/test-feature-flags.h"
48 #include "icing/testing/tmp-directory.h"
49 #include "icing/util/clock.h"
50 #include "icing/util/crc32.h"
51
52 namespace icing {
53 namespace lib {
54
55 namespace {
56
57 using ::testing::ElementsAre;
58 using ::testing::Eq;
59 using ::testing::FloatNear;
60 using ::testing::HasSubstr;
61 using ::testing::IsEmpty;
62 using ::testing::Pointwise;
63 using ::testing::Test;
64
65 static constexpr SectionId kSectionIdQuantizedEmbedding = 2;
66 static constexpr float kEpsQuantized = 0.01f;
67
68 class EmbeddingIndexTest : public Test {
69 protected:
SetUp()70 void SetUp() override {
71 feature_flags_ = std::make_unique<FeatureFlags>(GetTestFeatureFlags());
72 test_dir_ = GetTestTempDir() + "/icing";
73 embedding_index_dir_ = test_dir_ + "/embedding_index";
74 document_store_dir_ = test_dir_ + "/document_store";
75 schema_store_dir_ = test_dir_ + "/schema_store";
76 filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
77 filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
78 filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
79
80 ICING_ASSERT_OK_AND_ASSIGN(
81 schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_,
82 &clock_, feature_flags_.get()));
83
84 ICING_ASSERT_OK_AND_ASSIGN(
85 DocumentStore::CreateResult create_result,
86 DocumentStore::Create(&filesystem_, document_store_dir_, &clock_,
87 schema_store_.get(), feature_flags_.get(),
88 /*force_recovery_and_revalidate_documents=*/false,
89 /*pre_mapping_fbv=*/false,
90 /*use_persistent_hash_map=*/true,
91 PortableFileBackedProtoLog<
92 DocumentWrapper>::kDefaultCompressionLevel,
93 /*initialize_stats=*/nullptr));
94 document_store_ = std::move(create_result.document_store);
95
96 ICING_ASSERT_OK_AND_ASSIGN(
97 embedding_index_,
98 EmbeddingIndex::Create(&filesystem_, embedding_index_dir_, &clock_,
99 feature_flags_.get()));
100
101 ICING_ASSERT_OK(schema_store_->SetSchema(
102 SchemaBuilder()
103 .AddType(
104 SchemaTypeConfigBuilder()
105 .SetType("type")
106 .AddProperty(
107 PropertyConfigBuilder()
108 .SetName("prop1")
109 .SetDataTypeVector(EMBEDDING_INDEXING_LINEAR_SEARCH)
110 .SetCardinality(CARDINALITY_OPTIONAL))
111 .AddProperty(
112 PropertyConfigBuilder()
113 .SetName("prop2")
114 .SetDataTypeVector(EMBEDDING_INDEXING_LINEAR_SEARCH)
115 .SetCardinality(CARDINALITY_OPTIONAL))
116 // Quantized embedding
117 .AddProperty(
118 PropertyConfigBuilder()
119 .SetName("prop3")
120 .SetDataTypeVector(EMBEDDING_INDEXING_LINEAR_SEARCH,
121 QUANTIZATION_TYPE_QUANTIZE_8_BIT)
122 .SetCardinality(CARDINALITY_OPTIONAL)))
123 .Build(),
124 /*ignore_errors_and_delete_documents=*/false,
125 /*allow_circular_schema_definitions=*/false));
126 ICING_ASSERT_OK(document_store_->Put(
127 DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
128 ICING_ASSERT_OK(document_store_->Put(
129 DocumentBuilder().SetKey("ns", "uri1").SetSchema("type").Build()));
130 ICING_ASSERT_OK(document_store_->Put(
131 DocumentBuilder().SetKey("ns", "uri2").SetSchema("type").Build()));
132 }
133
TearDown()134 void TearDown() override {
135 document_store_.reset();
136 schema_store_.reset();
137 embedding_index_.reset();
138 filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
139 }
140
IndexContainsMetadataOnly()141 libtextclassifier3::StatusOr<bool> IndexContainsMetadataOnly() {
142 std::vector<std::string> sub_dirs;
143 if (!filesystem_.ListDirectory(embedding_index_dir_.c_str(), /*exclude=*/{},
144 /*recursive=*/true, &sub_dirs)) {
145 return absl_ports::InternalError("Failed to list directory");
146 }
147 return sub_dirs.size() == 1 && sub_dirs[0] == "metadata";
148 }
149
150 std::unique_ptr<FeatureFlags> feature_flags_;
151 Filesystem filesystem_;
152 IcingFilesystem icing_filesystem_;
153 std::string test_dir_;
154 std::string embedding_index_dir_;
155 std::string schema_store_dir_;
156 std::string document_store_dir_;
157 Clock clock_;
158 std::unique_ptr<SchemaStore> schema_store_;
159 std::unique_ptr<DocumentStore> document_store_;
160 std::unique_ptr<EmbeddingIndex> embedding_index_;
161 };
162
TEST_F(EmbeddingIndexTest,EmptyIndexContainsMetadataOnly)163 TEST_F(EmbeddingIndexTest, EmptyIndexContainsMetadataOnly) {
164 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
165 }
166
TEST_F(EmbeddingIndexTest,InitializationShouldFailWithNullPointer)167 TEST_F(EmbeddingIndexTest, InitializationShouldFailWithNullPointer) {
168 std::string embedding_index_dir =
169 GetTestTempDir() + "/embedding_index_test_local";
170
171 EXPECT_THAT(EmbeddingIndex::Create(nullptr, embedding_index_dir, &clock_,
172 feature_flags_.get()),
173 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
174
175 EXPECT_THAT(EmbeddingIndex::Create(&filesystem_, embedding_index_dir, nullptr,
176 feature_flags_.get()),
177 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
178 }
179
TEST_F(EmbeddingIndexTest,InitializationShouldFailWithoutPersistToDiskOrDestruction)180 TEST_F(EmbeddingIndexTest,
181 InitializationShouldFailWithoutPersistToDiskOrDestruction) {
182 // 1. Create index and confirm that data was properly added.
183 std::string embedding_index_dir =
184 GetTestTempDir() + "/embedding_index_test_local";
185 ICING_ASSERT_OK_AND_ASSIGN(
186 std::unique_ptr<EmbeddingIndex> embedding_index,
187 EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
188 feature_flags_.get()));
189
190 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
191 ICING_ASSERT_OK(embedding_index->BufferEmbedding(
192 BasicHit(/*section_id=*/0, /*document_id=*/0), vector,
193 QUANTIZATION_TYPE_NONE));
194 ICING_ASSERT_OK(embedding_index->CommitBufferToIndex());
195 embedding_index->set_last_added_document_id(0);
196
197 EXPECT_THAT(
198 GetEmbeddingHitsFromIndex(embedding_index.get(), /*dimension=*/3,
199 /*model_signature=*/"model"),
200 IsOkAndHolds(ElementsAre(EmbeddingHit(
201 BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
202 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index.get()),
203 ElementsAre(0.1, 0.2, 0.3));
204 EXPECT_EQ(embedding_index->last_added_document_id(), 0);
205 // GetChecksum should succeed without updating the checksum.
206 ICING_EXPECT_OK(embedding_index->GetChecksum());
207
208 // 2. Try to create another index with the same directory. This should fail
209 // due to checksum mismatch.
210 EXPECT_THAT(EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
211 feature_flags_.get()),
212 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
213
214 embedding_index.reset();
215 filesystem_.DeleteDirectoryRecursively(embedding_index_dir.c_str());
216 }
217
TEST_F(EmbeddingIndexTest,InitializationShouldSucceedWithUpdateChecksums)218 TEST_F(EmbeddingIndexTest, InitializationShouldSucceedWithUpdateChecksums) {
219 // 1. Create index and confirm that data was properly added.
220 std::string embedding_index_dir =
221 GetTestTempDir() + "/embedding_index_test_local";
222 ICING_ASSERT_OK_AND_ASSIGN(
223 std::unique_ptr<EmbeddingIndex> embedding_index,
224 EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
225 feature_flags_.get()));
226
227 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
228 ICING_ASSERT_OK(embedding_index->BufferEmbedding(
229 BasicHit(/*section_id=*/0, /*document_id=*/0), vector,
230 QUANTIZATION_TYPE_NONE));
231 ICING_ASSERT_OK(embedding_index->CommitBufferToIndex());
232 embedding_index->set_last_added_document_id(0);
233
234 EXPECT_THAT(
235 GetEmbeddingHitsFromIndex(embedding_index.get(), /*dimension=*/3,
236 /*model_signature=*/"model"),
237 IsOkAndHolds(ElementsAre(EmbeddingHit(
238 BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
239 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index.get()),
240 ElementsAre(0.1, 0.2, 0.3));
241 EXPECT_EQ(embedding_index->last_added_document_id(), 0);
242
243 // 2. Update checksums to reflect the new content.
244 ICING_ASSERT_OK_AND_ASSIGN(Crc32 crc, embedding_index->GetChecksum());
245 EXPECT_THAT(embedding_index->UpdateChecksums(), IsOkAndHolds(Eq(crc)));
246 EXPECT_THAT(embedding_index->GetChecksum(), IsOkAndHolds(Eq(crc)));
247
248 // 3. Create another index and confirm that the data is still there.
249 ICING_ASSERT_OK_AND_ASSIGN(
250 std::unique_ptr<EmbeddingIndex> embedding_index_two,
251 EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
252 feature_flags_.get()));
253
254 EXPECT_THAT(
255 GetEmbeddingHitsFromIndex(embedding_index_two.get(), /*dimension=*/3,
256 /*model_signature=*/"model"),
257 IsOkAndHolds(ElementsAre(EmbeddingHit(
258 BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
259 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_two.get()),
260 ElementsAre(0.1, 0.2, 0.3));
261 EXPECT_EQ(embedding_index_two->last_added_document_id(), 0);
262
263 embedding_index.reset();
264 embedding_index_two.reset();
265 filesystem_.DeleteDirectoryRecursively(embedding_index_dir.c_str());
266 }
267
TEST_F(EmbeddingIndexTest,InitializationShouldSucceedWithPersistToDisk)268 TEST_F(EmbeddingIndexTest, InitializationShouldSucceedWithPersistToDisk) {
269 // 1. Create index and confirm that data was properly added.
270 std::string embedding_index_dir =
271 GetTestTempDir() + "/embedding_index_test_local";
272 ICING_ASSERT_OK_AND_ASSIGN(
273 std::unique_ptr<EmbeddingIndex> embedding_index,
274 EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
275 feature_flags_.get()));
276
277 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
278 ICING_ASSERT_OK(embedding_index->BufferEmbedding(
279 BasicHit(/*section_id=*/0, /*document_id=*/0), vector,
280 QUANTIZATION_TYPE_NONE));
281 ICING_ASSERT_OK(embedding_index->CommitBufferToIndex());
282 embedding_index->set_last_added_document_id(0);
283
284 EXPECT_THAT(
285 GetEmbeddingHitsFromIndex(embedding_index.get(), /*dimension=*/3,
286 /*model_signature=*/"model"),
287 IsOkAndHolds(ElementsAre(EmbeddingHit(
288 BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
289 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index.get()),
290 ElementsAre(0.1, 0.2, 0.3));
291 EXPECT_EQ(embedding_index->last_added_document_id(), 0);
292
293 // 2. Update checksums to reflect the new content.
294 ICING_EXPECT_OK(embedding_index->PersistToDisk());
295
296 // 3. Create another index and confirm that the data is still there.
297 ICING_ASSERT_OK_AND_ASSIGN(
298 std::unique_ptr<EmbeddingIndex> embedding_index_two,
299 EmbeddingIndex::Create(&filesystem_, embedding_index_dir, &clock_,
300 feature_flags_.get()));
301
302 EXPECT_THAT(
303 GetEmbeddingHitsFromIndex(embedding_index_two.get(), /*dimension=*/3,
304 /*model_signature=*/"model"),
305 IsOkAndHolds(ElementsAre(EmbeddingHit(
306 BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
307 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_two.get()),
308 ElementsAre(0.1, 0.2, 0.3));
309 EXPECT_EQ(embedding_index_two->last_added_document_id(), 0);
310
311 embedding_index.reset();
312 embedding_index_two.reset();
313 filesystem_.DeleteDirectoryRecursively(embedding_index_dir.c_str());
314 }
315
TEST_F(EmbeddingIndexTest,GetEmbeddingVectorShouldFailWhenOutOfRange)316 TEST_F(EmbeddingIndexTest, GetEmbeddingVectorShouldFailWhenOutOfRange) {
317 BasicHit basic_hit(/*section_id=*/0, /*document_id=*/0);
318 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
319 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(basic_hit, vector,
320 QUANTIZATION_TYPE_NONE));
321 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
322
323 EmbeddingHit embedding_hit(basic_hit, /*location=*/0);
324 uint32_t dimension = 3;
325 ICING_ASSERT_OK(
326 embedding_index_->GetEmbeddingVector(embedding_hit, dimension));
327 EXPECT_THAT(
328 embedding_index_->GetEmbeddingVector(embedding_hit, dimension + 1),
329 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
330 }
331
TEST_F(EmbeddingIndexTest,GetQuantizedEmbeddingVectorShouldFailWhenOutOfRange)332 TEST_F(EmbeddingIndexTest,
333 GetQuantizedEmbeddingVectorShouldFailWhenOutOfRange) {
334 BasicHit basic_hit(kSectionIdQuantizedEmbedding, /*document_id=*/0);
335 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
336 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
337 basic_hit, vector, QUANTIZATION_TYPE_QUANTIZE_8_BIT));
338 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
339
340 EmbeddingHit embedding_hit(basic_hit, /*location=*/0);
341 uint32_t dimension = 3;
342 ICING_ASSERT_OK(
343 embedding_index_->GetQuantizedEmbeddingVector(embedding_hit, dimension));
344 EXPECT_THAT(embedding_index_->GetQuantizedEmbeddingVector(embedding_hit,
345 dimension + 1),
346 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
347 }
348
TEST_F(EmbeddingIndexTest,AddSingleEmbedding)349 TEST_F(EmbeddingIndexTest, AddSingleEmbedding) {
350 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
351 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
352 BasicHit(/*section_id=*/0, /*document_id=*/0), vector,
353 QUANTIZATION_TYPE_NONE));
354 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
355 embedding_index_->set_last_added_document_id(0);
356
357 EXPECT_THAT(
358 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
359 /*model_signature=*/"model"),
360 IsOkAndHolds(ElementsAre(EmbeddingHit(
361 BasicHit(/*section_id=*/0, /*document_id=*/0), /*location=*/0))));
362 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
363 ElementsAre(0.1, 0.2, 0.3));
364 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
365 }
366
TEST_F(EmbeddingIndexTest,AddSingleQuantizedEmbedding)367 TEST_F(EmbeddingIndexTest, AddSingleQuantizedEmbedding) {
368 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
369 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
370 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0), vector,
371 QUANTIZATION_TYPE_QUANTIZE_8_BIT));
372 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
373 embedding_index_->set_last_added_document_id(0);
374
375 EmbeddingHit hit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0),
376 /*location=*/0);
377 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
378 /*model_signature=*/"model"),
379 IsOkAndHolds(ElementsAre(hit)));
380 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
381 Eq(3 + sizeof(Quantizer)));
382 EXPECT_THAT(
383 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
384 hit,
385 /*dimension=*/3),
386 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
387 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
388 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
389 }
390
TEST_F(EmbeddingIndexTest,AddMultipleEmbeddingsInTheSameSection)391 TEST_F(EmbeddingIndexTest, AddMultipleEmbeddingsInTheSameSection) {
392 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
393 PropertyProto::VectorProto vector2 =
394 CreateVector("model", {-0.1, -0.2, -0.3});
395 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
396 BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
397 QUANTIZATION_TYPE_NONE));
398 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
399 BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
400 QUANTIZATION_TYPE_NONE));
401 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
402 embedding_index_->set_last_added_document_id(0);
403
404 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
405 /*model_signature=*/"model"),
406 IsOkAndHolds(ElementsAre(
407 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
408 /*location=*/0),
409 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
410 /*location=*/3))));
411 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
412 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
413 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
414 }
415
TEST_F(EmbeddingIndexTest,AddMultipleQuantizedEmbeddingsInTheSameSection)416 TEST_F(EmbeddingIndexTest, AddMultipleQuantizedEmbeddingsInTheSameSection) {
417 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
418 PropertyProto::VectorProto vector2 =
419 CreateVector("model", {-0.1, -0.2, -0.3});
420 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
421 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0), vector1,
422 QUANTIZATION_TYPE_QUANTIZE_8_BIT));
423 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
424 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0), vector2,
425 QUANTIZATION_TYPE_QUANTIZE_8_BIT));
426 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
427 embedding_index_->set_last_added_document_id(0);
428
429 EmbeddingHit hit1(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0),
430 /*location=*/0);
431 EmbeddingHit hit2(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0),
432 /*location=*/3 + sizeof(Quantizer));
433 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
434 /*model_signature=*/"model"),
435 IsOkAndHolds(ElementsAre(hit1, hit2)));
436 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
437 Eq(2 * (3 + sizeof(Quantizer)))); // Two quantized vectors
438 EXPECT_THAT(
439 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
440 hit1, /*dimension=*/3),
441 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
442 EXPECT_THAT(
443 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
444 hit2, /*dimension=*/3),
445 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {-0.1, -0.2, -0.3})));
446 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
447 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
448 }
449
TEST_F(EmbeddingIndexTest,HitsWithLowerSectionIdReturnedFirst)450 TEST_F(EmbeddingIndexTest, HitsWithLowerSectionIdReturnedFirst) {
451 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
452 PropertyProto::VectorProto vector2 =
453 CreateVector("model", {-0.1, -0.2, -0.3});
454 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
455 BasicHit(/*section_id=*/5, /*document_id=*/0), vector1,
456 QUANTIZATION_TYPE_NONE));
457 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
458 BasicHit(/*section_id=*/2, /*document_id=*/0), vector2,
459 QUANTIZATION_TYPE_NONE));
460 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
461 embedding_index_->set_last_added_document_id(0);
462
463 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
464 /*model_signature=*/"model"),
465 IsOkAndHolds(ElementsAre(
466 EmbeddingHit(BasicHit(/*section_id=*/2, /*document_id=*/0),
467 /*location=*/3),
468 EmbeddingHit(BasicHit(/*section_id=*/5, /*document_id=*/0),
469 /*location=*/0))));
470 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
471 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
472 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
473 }
474
TEST_F(EmbeddingIndexTest,HitsWithHigherDocumentIdReturnedFirst)475 TEST_F(EmbeddingIndexTest, HitsWithHigherDocumentIdReturnedFirst) {
476 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
477 PropertyProto::VectorProto vector2 =
478 CreateVector("model", {-0.1, -0.2, -0.3});
479 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
480 BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
481 QUANTIZATION_TYPE_NONE));
482 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
483 BasicHit(/*section_id=*/0, /*document_id=*/1), vector2,
484 QUANTIZATION_TYPE_NONE));
485 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
486 embedding_index_->set_last_added_document_id(1);
487
488 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
489 /*model_signature=*/"model"),
490 IsOkAndHolds(ElementsAre(
491 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
492 /*location=*/3),
493 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
494 /*location=*/0))));
495 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
496 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
497 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
498 }
499
TEST_F(EmbeddingIndexTest,AddEmbeddingsFromDifferentModels)500 TEST_F(EmbeddingIndexTest, AddEmbeddingsFromDifferentModels) {
501 PropertyProto::VectorProto vector1 = CreateVector("model1", {0.1, 0.2});
502 PropertyProto::VectorProto vector2 =
503 CreateVector("model2", {-0.1, -0.2, -0.3});
504 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
505 BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
506 QUANTIZATION_TYPE_NONE));
507 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
508 BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
509 QUANTIZATION_TYPE_NONE));
510 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
511 embedding_index_->set_last_added_document_id(0);
512
513 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
514 /*model_signature=*/"model1"),
515 IsOkAndHolds(ElementsAre(
516 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
517 /*location=*/0))));
518 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
519 /*model_signature=*/"model2"),
520 IsOkAndHolds(ElementsAre(
521 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
522 /*location=*/2))));
523 EXPECT_THAT(GetEmbeddingHitsFromIndex(
524 embedding_index_.get(),
525 /*dimension=*/5, /*model_signature=*/"non-existent-model"),
526 IsOkAndHolds(IsEmpty()));
527 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
528 ElementsAre(0.1, 0.2, -0.1, -0.2, -0.3));
529 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
530 }
531
TEST_F(EmbeddingIndexTest,AddEmbeddingsWithSameSignatureButDifferentDimension)532 TEST_F(EmbeddingIndexTest,
533 AddEmbeddingsWithSameSignatureButDifferentDimension) {
534 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2});
535 PropertyProto::VectorProto vector2 =
536 CreateVector("model", {-0.1, -0.2, -0.3});
537 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
538 BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
539 QUANTIZATION_TYPE_NONE));
540 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
541 BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
542 QUANTIZATION_TYPE_NONE));
543 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
544 embedding_index_->set_last_added_document_id(0);
545
546 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
547 /*model_signature=*/"model"),
548 IsOkAndHolds(ElementsAre(
549 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
550 /*location=*/0))));
551 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
552 /*model_signature=*/"model"),
553 IsOkAndHolds(ElementsAre(
554 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
555 /*location=*/2))));
556 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
557 ElementsAre(0.1, 0.2, -0.1, -0.2, -0.3));
558 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
559 }
560
TEST_F(EmbeddingIndexTest,ClearIndex)561 TEST_F(EmbeddingIndexTest, ClearIndex) {
562 // Loop the same logic twice to make sure that clear works as expected, and
563 // the index is still valid after clearing.
564 for (int i = 0; i < 2; i++) {
565 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
566 PropertyProto::VectorProto vector2 =
567 CreateVector("model", {-0.1, -0.2, -0.3});
568 PropertyProto::VectorProto vector3 = CreateVector("model", {0.4, 0.5, 0.6});
569
570 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
571 BasicHit(/*section_id=*/1, /*document_id=*/0), vector1,
572 QUANTIZATION_TYPE_NONE));
573 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
574 BasicHit(/*section_id=*/2, /*document_id=*/1), vector2,
575 QUANTIZATION_TYPE_NONE));
576 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
577 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2), vector3,
578 QUANTIZATION_TYPE_QUANTIZE_8_BIT));
579 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
580 embedding_index_->set_last_added_document_id(2);
581
582 EmbeddingHit hit1(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2),
583 /*location=*/0);
584 EmbeddingHit hit2(BasicHit(/*section_id=*/2, /*document_id=*/1),
585 /*location=*/3);
586 EmbeddingHit hit3(BasicHit(/*section_id=*/1, /*document_id=*/0),
587 /*location=*/0);
588
589 EXPECT_THAT(
590 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
591 /*model_signature=*/"model"),
592 IsOkAndHolds(ElementsAre(hit1, hit2, hit3)));
593 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
594 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
595 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
596 Eq(3 + sizeof(Quantizer)));
597 EXPECT_THAT(
598 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
599 hit1,
600 /*dimension=*/3),
601 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
602 EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
603 EXPECT_FALSE(embedding_index_->is_empty());
604 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(false));
605
606 // Check that clear works as expected.
607 ICING_ASSERT_OK(embedding_index_->Clear());
608 EXPECT_TRUE(embedding_index_->is_empty());
609 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
610 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
611 IsEmpty());
612 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
613 EXPECT_EQ(embedding_index_->last_added_document_id(), kInvalidDocumentId);
614 }
615 }
616
TEST_F(EmbeddingIndexTest,DiscardIndex)617 TEST_F(EmbeddingIndexTest, DiscardIndex) {
618 // Loop the same logic twice to make sure that Discard works as expected, and
619 // the index is still valid after discarding.
620 for (int i = 0; i < 2; i++) {
621 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
622 PropertyProto::VectorProto vector2 =
623 CreateVector("model", {-0.1, -0.2, -0.3});
624 PropertyProto::VectorProto vector3 = CreateVector("model", {0.4, 0.5, 0.6});
625
626 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
627 BasicHit(/*section_id=*/1, /*document_id=*/0), vector1,
628 QUANTIZATION_TYPE_NONE));
629 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
630 BasicHit(/*section_id=*/2, /*document_id=*/1), vector2,
631 QUANTIZATION_TYPE_NONE));
632 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
633 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2), vector3,
634 QUANTIZATION_TYPE_QUANTIZE_8_BIT));
635 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
636 embedding_index_->set_last_added_document_id(2);
637
638 EmbeddingHit hit1(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2),
639 /*location=*/0);
640 EmbeddingHit hit2(BasicHit(/*section_id=*/2, /*document_id=*/1),
641 /*location=*/3);
642 EmbeddingHit hit3(BasicHit(/*section_id=*/1, /*document_id=*/0),
643 /*location=*/0);
644 EXPECT_THAT(
645 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
646 /*model_signature=*/"model"),
647 IsOkAndHolds(ElementsAre(hit1, hit2, hit3)));
648 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
649 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
650 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
651 Eq(3 + sizeof(Quantizer)));
652 EXPECT_THAT(
653 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
654 hit1,
655 /*dimension=*/3),
656 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
657 EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
658 EXPECT_FALSE(embedding_index_->is_empty());
659 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(false));
660
661 // Check that Discard works as expected.
662 embedding_index_.reset();
663 EmbeddingIndex::Discard(filesystem_, embedding_index_dir_);
664 ICING_ASSERT_OK_AND_ASSIGN(
665 embedding_index_,
666 EmbeddingIndex::Create(&filesystem_, embedding_index_dir_, &clock_,
667 feature_flags_.get()));
668 EXPECT_TRUE(embedding_index_->is_empty());
669 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
670 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
671 IsEmpty());
672 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
673 EXPECT_EQ(embedding_index_->last_added_document_id(), kInvalidDocumentId);
674 }
675 }
676
TEST_F(EmbeddingIndexTest,EmptyCommitIsOk)677 TEST_F(EmbeddingIndexTest, EmptyCommitIsOk) {
678 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
679 EXPECT_TRUE(embedding_index_->is_empty());
680 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
681 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
682 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
683 }
684
TEST_F(EmbeddingIndexTest,MultipleCommits)685 TEST_F(EmbeddingIndexTest, MultipleCommits) {
686 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
687 PropertyProto::VectorProto vector2 =
688 CreateVector("model", {-0.1, -0.2, -0.3});
689
690 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
691 BasicHit(/*section_id=*/1, /*document_id=*/0), vector1,
692 QUANTIZATION_TYPE_NONE));
693 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
694
695 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
696 BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
697 QUANTIZATION_TYPE_NONE));
698 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
699
700 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
701 /*model_signature=*/"model"),
702 IsOkAndHolds(ElementsAre(
703 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
704 /*location=*/3),
705 EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/0),
706 /*location=*/0))));
707 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
708 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
709 }
710
TEST_F(EmbeddingIndexTest,InvalidCommit_SectionIdCanOnlyDecreaseForSingleDocument)711 TEST_F(EmbeddingIndexTest,
712 InvalidCommit_SectionIdCanOnlyDecreaseForSingleDocument) {
713 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
714 PropertyProto::VectorProto vector2 =
715 CreateVector("model", {-0.1, -0.2, -0.3});
716
717 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
718 BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
719 QUANTIZATION_TYPE_NONE));
720 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
721
722 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
723 BasicHit(/*section_id=*/1, /*document_id=*/0), vector2,
724 QUANTIZATION_TYPE_NONE));
725 // Posting list with delta encoding can only allow decreasing values.
726 EXPECT_THAT(embedding_index_->CommitBufferToIndex(),
727 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
728 }
729
TEST_F(EmbeddingIndexTest,InvalidCommit_DocumentIdCanOnlyIncrease)730 TEST_F(EmbeddingIndexTest, InvalidCommit_DocumentIdCanOnlyIncrease) {
731 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
732 PropertyProto::VectorProto vector2 =
733 CreateVector("model", {-0.1, -0.2, -0.3});
734
735 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
736 BasicHit(/*section_id=*/0, /*document_id=*/1), vector1,
737 QUANTIZATION_TYPE_NONE));
738 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
739
740 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
741 BasicHit(/*section_id=*/0, /*document_id=*/0), vector2,
742 QUANTIZATION_TYPE_NONE));
743 // Posting list with delta encoding can only allow decreasing values, which
744 // means document ids must be committed increasingly, since document ids are
745 // inverted in hit values.
746 EXPECT_THAT(embedding_index_->CommitBufferToIndex(),
747 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
748 }
749
TEST_F(EmbeddingIndexTest,OptimizeShouldFailWithNullPointer)750 TEST_F(EmbeddingIndexTest, OptimizeShouldFailWithNullPointer) {
751 EXPECT_THAT(embedding_index_->Optimize(
752 /*document_store=*/nullptr, schema_store_.get(),
753 /*document_id_old_to_new=*/{},
754 /*new_last_added_document_id=*/kInvalidDocumentId),
755 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
756
757 EXPECT_THAT(embedding_index_->Optimize(
758 document_store_.get(), /*schema_store=*/nullptr,
759 /*document_id_old_to_new=*/{},
760 /*new_last_added_document_id=*/kInvalidDocumentId),
761 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
762 }
763
TEST_F(EmbeddingIndexTest,OptimizeShouldFailWhenDocumentIdMapIsTooSmall)764 TEST_F(EmbeddingIndexTest, OptimizeShouldFailWhenDocumentIdMapIsTooSmall) {
765 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
766 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
767 BasicHit(/*section_id=*/0, /*document_id=*/2), vector,
768 QUANTIZATION_TYPE_NONE));
769 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
770 embedding_index_->set_last_added_document_id(2);
771
772 // Optimize should fail because the provided document_id_old_to_new map does
773 // not contain an entry for document id 2.
774 EXPECT_THAT(embedding_index_
775 ->Optimize(document_store_.get(), schema_store_.get(),
776 /*document_id_old_to_new=*/{0, 1},
777 /*new_last_added_document_id=*/2)
778 .error_message(),
779 HasSubstr("The provided map is too small"));
780 }
781
TEST_F(EmbeddingIndexTest,EmptyOptimizeIsOk)782 TEST_F(EmbeddingIndexTest, EmptyOptimizeIsOk) {
783 ICING_ASSERT_OK(embedding_index_->Optimize(
784 document_store_.get(), schema_store_.get(),
785 /*document_id_old_to_new=*/{},
786 /*new_last_added_document_id=*/kInvalidDocumentId));
787 EXPECT_TRUE(embedding_index_->is_empty());
788 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
789 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
790 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
791 }
792
TEST_F(EmbeddingIndexTest,OptimizeSingleEmbeddingSingleDocument)793 TEST_F(EmbeddingIndexTest, OptimizeSingleEmbeddingSingleDocument) {
794 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
795 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
796 BasicHit(/*section_id=*/0, /*document_id=*/2), vector,
797 QUANTIZATION_TYPE_NONE));
798 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
799 embedding_index_->set_last_added_document_id(2);
800
801 // Before optimize
802 EXPECT_THAT(
803 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
804 /*model_signature=*/"model"),
805 IsOkAndHolds(ElementsAre(EmbeddingHit(
806 BasicHit(/*section_id=*/0, /*document_id=*/2), /*location=*/0))));
807 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
808 ElementsAre(0.1, 0.2, 0.3));
809 EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
810
811 // Run optimize without deleting any documents, and check that the index is
812 // not changed.
813 ICING_ASSERT_OK(
814 embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
815 /*document_id_old_to_new=*/{0, 1, 2},
816 /*new_last_added_document_id=*/2));
817 EXPECT_THAT(
818 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
819 /*model_signature=*/"model"),
820 IsOkAndHolds(ElementsAre(EmbeddingHit(
821 BasicHit(/*section_id=*/0, /*document_id=*/2), /*location=*/0))));
822 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
823 ElementsAre(0.1, 0.2, 0.3));
824 EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
825
826 // Run optimize to map document id 2 to 1, and check that the index is
827 // updated correctly.
828 ICING_ASSERT_OK(embedding_index_->Optimize(
829 document_store_.get(), schema_store_.get(),
830 /*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
831 /*new_last_added_document_id=*/1));
832 EXPECT_THAT(
833 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
834 /*model_signature=*/"model"),
835 IsOkAndHolds(ElementsAre(EmbeddingHit(
836 BasicHit(/*section_id=*/0, /*document_id=*/1), /*location=*/0))));
837 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
838 ElementsAre(0.1, 0.2, 0.3));
839 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
840
841 // Run optimize to delete the document.
842 ICING_ASSERT_OK(embedding_index_->Optimize(
843 document_store_.get(), schema_store_.get(),
844 /*document_id_old_to_new=*/{0, kInvalidDocumentId},
845 /*new_last_added_document_id=*/0));
846 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
847 /*model_signature=*/"model"),
848 IsOkAndHolds(IsEmpty()));
849 EXPECT_TRUE(embedding_index_->is_empty());
850 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
851 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
852 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
853 }
854
TEST_F(EmbeddingIndexTest,OptimizeSingleQuantizedEmbeddingSingleDocument)855 TEST_F(EmbeddingIndexTest, OptimizeSingleQuantizedEmbeddingSingleDocument) {
856 PropertyProto::VectorProto vector = CreateVector("model", {0.1, 0.2, 0.3});
857 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
858 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2), vector,
859 QUANTIZATION_TYPE_QUANTIZE_8_BIT));
860 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
861 embedding_index_->set_last_added_document_id(2);
862
863 // Before optimize
864 EmbeddingHit hit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2),
865 /*location=*/0);
866 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
867 /*model_signature=*/"model"),
868 IsOkAndHolds(ElementsAre(hit)));
869 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
870 Eq(3 + sizeof(Quantizer)));
871 EXPECT_THAT(
872 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
873 hit, /*dimension=*/3),
874 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
875 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
876 EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
877
878 // Run optimize without deleting any documents, and check that the index is
879 // not changed
880 ICING_ASSERT_OK(
881 embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
882 /*document_id_old_to_new=*/{0, 1, 2},
883 /*new_last_added_document_id=*/2));
884 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
885 /*model_signature=*/"model"),
886 IsOkAndHolds(ElementsAre(hit)));
887 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
888 Eq(3 + sizeof(Quantizer)));
889 EXPECT_THAT(
890 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
891 hit, /*dimension=*/3),
892 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
893 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
894 EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
895
896 // Run optimize to map document id 2 to 1, and check that the index is
897 // updated correctly
898 ICING_ASSERT_OK(embedding_index_->Optimize(
899 document_store_.get(), schema_store_.get(),
900 /*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
901 /*new_last_added_document_id=*/1));
902 hit = EmbeddingHit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/1),
903 /*location=*/0);
904 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
905 /*model_signature=*/"model"),
906 IsOkAndHolds(ElementsAre(hit)));
907 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
908 Eq(3 + sizeof(Quantizer)));
909 EXPECT_THAT(
910 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
911 hit, /*dimension=*/3),
912 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), {0.1, 0.2, 0.3})));
913 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
914 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
915
916 // Run optimize to delete the document
917 ICING_ASSERT_OK(embedding_index_->Optimize(
918 document_store_.get(), schema_store_.get(),
919 /*document_id_old_to_new=*/{0, kInvalidDocumentId},
920 /*new_last_added_document_id=*/0));
921 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
922 /*model_signature=*/"model"),
923 IsOkAndHolds(IsEmpty()));
924 EXPECT_TRUE(embedding_index_->is_empty());
925 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
926 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
927 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
928 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
929 }
930
TEST_F(EmbeddingIndexTest,OptimizeMultipleEmbeddingsSingleDocument)931 TEST_F(EmbeddingIndexTest, OptimizeMultipleEmbeddingsSingleDocument) {
932 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
933 PropertyProto::VectorProto vector2 =
934 CreateVector("model", {-0.1, -0.2, -0.3});
935 PropertyProto::VectorProto vector3 = CreateVector("model", {0.4, 0.5, 0.6});
936
937 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
938 BasicHit(/*section_id=*/0, /*document_id=*/2), vector1,
939 QUANTIZATION_TYPE_NONE));
940 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
941 BasicHit(/*section_id=*/0, /*document_id=*/2), vector2,
942 QUANTIZATION_TYPE_NONE));
943 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
944 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2), vector3,
945 QUANTIZATION_TYPE_QUANTIZE_8_BIT));
946 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
947 embedding_index_->set_last_added_document_id(2);
948
949 // Before optimize
950 EmbeddingHit quantized_hit(
951 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/2),
952 /*location=*/0);
953 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
954 /*model_signature=*/"model"),
955 IsOkAndHolds(ElementsAre(
956 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/2),
957 /*location=*/0),
958 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/2),
959 /*location=*/3),
960 quantized_hit)));
961 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
962 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
963 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
964 Eq(3 + sizeof(Quantizer)));
965 EXPECT_THAT(
966 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
967 quantized_hit,
968 /*dimension=*/3),
969 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
970 EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
971
972 // Run optimize without deleting any documents, and check that the index is
973 // not changed.
974 ICING_ASSERT_OK(
975 embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
976 /*document_id_old_to_new=*/{0, 1, 2},
977 /*new_last_added_document_id=*/2));
978 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
979 /*model_signature=*/"model"),
980 IsOkAndHolds(ElementsAre(
981 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/2),
982 /*location=*/0),
983 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/2),
984 /*location=*/3),
985 quantized_hit)));
986 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
987 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
988 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
989 Eq(3 + sizeof(Quantizer)));
990 EXPECT_THAT(
991 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
992 quantized_hit,
993 /*dimension=*/3),
994 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
995 EXPECT_EQ(embedding_index_->last_added_document_id(), 2);
996
997 // Run optimize to map document id 2 to 1, and check that the index is
998 // updated correctly.
999 ICING_ASSERT_OK(embedding_index_->Optimize(
1000 document_store_.get(), schema_store_.get(),
1001 /*document_id_old_to_new=*/{0, kInvalidDocumentId, 1},
1002 /*new_last_added_document_id=*/1));
1003 quantized_hit =
1004 EmbeddingHit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/1),
1005 /*location=*/0);
1006 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1007 /*model_signature=*/"model"),
1008 IsOkAndHolds(ElementsAre(
1009 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1010 /*location=*/0),
1011 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1012 /*location=*/3),
1013 quantized_hit)));
1014 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1015 ElementsAre(0.1, 0.2, 0.3, -0.1, -0.2, -0.3));
1016 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
1017 Eq(3 + sizeof(Quantizer)));
1018 EXPECT_THAT(
1019 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
1020 quantized_hit,
1021 /*dimension=*/3),
1022 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector3.values())));
1023 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1024
1025 // Run optimize to delete the document.
1026 ICING_ASSERT_OK(embedding_index_->Optimize(
1027 document_store_.get(), schema_store_.get(),
1028 /*document_id_old_to_new=*/{0, kInvalidDocumentId},
1029 /*new_last_added_document_id=*/0));
1030 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1031 /*model_signature=*/"model"),
1032 IsOkAndHolds(IsEmpty()));
1033 EXPECT_TRUE(embedding_index_->is_empty());
1034 EXPECT_THAT(IndexContainsMetadataOnly(), IsOkAndHolds(true));
1035 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()), IsEmpty());
1036 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(), Eq(0));
1037 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
1038 }
1039
TEST_F(EmbeddingIndexTest,OptimizeMultipleEmbeddingsMultipleDocument)1040 TEST_F(EmbeddingIndexTest, OptimizeMultipleEmbeddingsMultipleDocument) {
1041 PropertyProto::VectorProto vector1 = CreateVector("model", {0.1, 0.2, 0.3});
1042 PropertyProto::VectorProto vector2 = CreateVector("model", {1, 2, 3});
1043 PropertyProto::VectorProto vector3 =
1044 CreateVector("model", {-0.1, -0.2, -0.3});
1045 PropertyProto::VectorProto vector4 = CreateVector("model", {0.4, 0.5, 0.6});
1046
1047 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1048 BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
1049 QUANTIZATION_TYPE_NONE));
1050 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1051 BasicHit(/*section_id=*/1, /*document_id=*/0), vector2,
1052 QUANTIZATION_TYPE_NONE));
1053 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1054 BasicHit(/*section_id=*/0, /*document_id=*/1), vector3,
1055 QUANTIZATION_TYPE_NONE));
1056 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1057 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/1), vector4,
1058 QUANTIZATION_TYPE_QUANTIZE_8_BIT));
1059 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
1060 embedding_index_->set_last_added_document_id(1);
1061
1062 // Before optimize
1063 EmbeddingHit quantized_hit(
1064 BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/1),
1065 /*location=*/0);
1066 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1067 /*model_signature=*/"model"),
1068 IsOkAndHolds(ElementsAre(
1069 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1070 /*location=*/6),
1071 quantized_hit,
1072 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1073 /*location=*/0),
1074 EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/0),
1075 /*location=*/3))));
1076 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1077 ElementsAre(0.1, 0.2, 0.3, 1, 2, 3, -0.1, -0.2, -0.3));
1078 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
1079 Eq(3 + sizeof(Quantizer)));
1080 EXPECT_THAT(
1081 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
1082 quantized_hit,
1083 /*dimension=*/3),
1084 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector4.values())));
1085 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1086
1087 // Run optimize without deleting any documents. It is expected to see that the
1088 // raw embedding data is rearranged, since during index transfer, embedding
1089 // vectors from higher document ids are added first.
1090 //
1091 // Also keep in mind that once the raw data is rearranged, calling another
1092 // Optimize subsequently will not change the raw data again.
1093 for (int i = 0; i < 2; i++) {
1094 ICING_ASSERT_OK(
1095 embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
1096 /*document_id_old_to_new=*/{0, 1},
1097 /*new_last_added_document_id=*/1));
1098 EXPECT_THAT(
1099 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1100 /*model_signature=*/"model"),
1101 IsOkAndHolds(ElementsAre(
1102 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1103 /*location=*/0),
1104 quantized_hit,
1105 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1106 /*location=*/3),
1107 EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/0),
1108 /*location=*/6))));
1109 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1110 ElementsAre(-0.1, -0.2, -0.3, 0.1, 0.2, 0.3, 1, 2, 3));
1111 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
1112 Eq(3 + sizeof(Quantizer)));
1113 EXPECT_THAT(
1114 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
1115 quantized_hit,
1116 /*dimension=*/3),
1117 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector4.values())));
1118 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1119 }
1120
1121 // Run optimize to delete document 0, and check that the index is
1122 // updated correctly.
1123 ICING_ASSERT_OK(embedding_index_->Optimize(
1124 document_store_.get(), schema_store_.get(),
1125 /*document_id_old_to_new=*/{kInvalidDocumentId, 0},
1126 /*new_last_added_document_id=*/0));
1127 quantized_hit =
1128 EmbeddingHit(BasicHit(kSectionIdQuantizedEmbedding, /*document_id=*/0),
1129 /*location=*/0);
1130 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1131 /*model_signature=*/"model"),
1132 IsOkAndHolds(ElementsAre(
1133 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1134 /*location=*/0),
1135 quantized_hit)));
1136 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1137 ElementsAre(-0.1, -0.2, -0.3));
1138 EXPECT_THAT(embedding_index_->GetTotalQuantizedVectorSize(),
1139 Eq(3 + sizeof(Quantizer)));
1140 EXPECT_THAT(
1141 GetAndRestoreQuantizedEmbeddingVectorFromIndex(embedding_index_.get(),
1142 quantized_hit,
1143 /*dimension=*/3),
1144 IsOkAndHolds(Pointwise(FloatNear(kEpsQuantized), vector4.values())));
1145 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
1146 }
1147
TEST_F(EmbeddingIndexTest,OptimizeEmbeddingsFromDifferentModels)1148 TEST_F(EmbeddingIndexTest, OptimizeEmbeddingsFromDifferentModels) {
1149 PropertyProto::VectorProto vector1 = CreateVector("model1", {0.1, 0.2});
1150 PropertyProto::VectorProto vector2 = CreateVector("model1", {1, 2});
1151 PropertyProto::VectorProto vector3 =
1152 CreateVector("model2", {-0.1, -0.2, -0.3});
1153 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1154 BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
1155 QUANTIZATION_TYPE_NONE));
1156 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1157 BasicHit(/*section_id=*/0, /*document_id=*/1), vector2,
1158 QUANTIZATION_TYPE_NONE));
1159 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1160 BasicHit(/*section_id=*/1, /*document_id=*/1), vector3,
1161 QUANTIZATION_TYPE_NONE));
1162 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
1163 embedding_index_->set_last_added_document_id(1);
1164
1165 // Before optimize
1166 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1167 /*model_signature=*/"model1"),
1168 IsOkAndHolds(ElementsAre(
1169 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1170 /*location=*/2),
1171 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1172 /*location=*/0))));
1173 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1174 /*model_signature=*/"model2"),
1175 IsOkAndHolds(ElementsAre(
1176 EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/1),
1177 /*location=*/4))));
1178 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1179 ElementsAre(0.1, 0.2, 1, 2, -0.1, -0.2, -0.3));
1180 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1181
1182 // Run optimize without deleting any documents. It is expected to see that the
1183 // raw embedding data is rearranged, since during index transfer:
1184 // - Embedding vectors with lower keys, which are the string encoded ordered
1185 // pairs (dimension, model_signature), are iterated first.
1186 // - Embedding vectors from higher document ids are added first.
1187 //
1188 // Also keep in mind that once the raw data is rearranged, calling another
1189 // Optimize subsequently will not change the raw data again.
1190 for (int i = 0; i < 2; i++) {
1191 ICING_ASSERT_OK(
1192 embedding_index_->Optimize(document_store_.get(), schema_store_.get(),
1193 /*document_id_old_to_new=*/{0, 1},
1194 /*new_last_added_document_id=*/1));
1195 EXPECT_THAT(
1196 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1197 /*model_signature=*/"model1"),
1198 IsOkAndHolds(ElementsAre(
1199 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/1),
1200 /*location=*/0),
1201 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1202 /*location=*/2))));
1203 EXPECT_THAT(
1204 GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1205 /*model_signature=*/"model2"),
1206 IsOkAndHolds(ElementsAre(
1207 EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/1),
1208 /*location=*/4))));
1209 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1210 ElementsAre(1, 2, 0.1, 0.2, -0.1, -0.2, -0.3));
1211 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1212 }
1213
1214 // Run optimize to delete document 1, and check that the index is
1215 // updated correctly.
1216 ICING_ASSERT_OK(embedding_index_->Optimize(
1217 document_store_.get(), schema_store_.get(),
1218 /*document_id_old_to_new=*/{0, kInvalidDocumentId},
1219 /*new_last_added_document_id=*/0));
1220 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1221 /*model_signature=*/"model1"),
1222 IsOkAndHolds(ElementsAre(
1223 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1224 /*location=*/0))));
1225 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1226 /*model_signature=*/"model2"),
1227 IsOkAndHolds(IsEmpty()));
1228 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1229 ElementsAre(0.1, 0.2));
1230 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
1231 }
1232
TEST_F(EmbeddingIndexTest,OptimizeEmbeddingsFromDifferentModelsAndDeleteTheFirst)1233 TEST_F(EmbeddingIndexTest,
1234 OptimizeEmbeddingsFromDifferentModelsAndDeleteTheFirst) {
1235 PropertyProto::VectorProto vector1 = CreateVector("model1", {0.1, 0.2});
1236 PropertyProto::VectorProto vector2 =
1237 CreateVector("model2", {-0.1, -0.2, -0.3});
1238 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1239 BasicHit(/*section_id=*/0, /*document_id=*/0), vector1,
1240 QUANTIZATION_TYPE_NONE));
1241 ICING_ASSERT_OK(embedding_index_->BufferEmbedding(
1242 BasicHit(/*section_id=*/1, /*document_id=*/1), vector2,
1243 QUANTIZATION_TYPE_NONE));
1244 ICING_ASSERT_OK(embedding_index_->CommitBufferToIndex());
1245 embedding_index_->set_last_added_document_id(1);
1246
1247 // Before optimize
1248 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1249 /*model_signature=*/"model1"),
1250 IsOkAndHolds(ElementsAre(
1251 EmbeddingHit(BasicHit(/*section_id=*/0, /*document_id=*/0),
1252 /*location=*/0))));
1253 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1254 /*model_signature=*/"model2"),
1255 IsOkAndHolds(ElementsAre(
1256 EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/1),
1257 /*location=*/2))));
1258 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1259 ElementsAre(0.1, 0.2, -0.1, -0.2, -0.3));
1260 EXPECT_EQ(embedding_index_->last_added_document_id(), 1);
1261
1262 // Run optimize to delete document 0, and check that the index is
1263 // updated correctly.
1264 ICING_ASSERT_OK(embedding_index_->Optimize(
1265 document_store_.get(), schema_store_.get(),
1266 /*document_id_old_to_new=*/{kInvalidDocumentId, 0},
1267 /*new_last_added_document_id=*/0));
1268 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/2,
1269 /*model_signature=*/"model1"),
1270 IsOkAndHolds(IsEmpty()));
1271 EXPECT_THAT(GetEmbeddingHitsFromIndex(embedding_index_.get(), /*dimension=*/3,
1272 /*model_signature=*/"model2"),
1273 IsOkAndHolds(ElementsAre(
1274 EmbeddingHit(BasicHit(/*section_id=*/1, /*document_id=*/0),
1275 /*location=*/0))));
1276 EXPECT_THAT(GetRawEmbeddingDataFromIndex(embedding_index_.get()),
1277 ElementsAre(-0.1, -0.2, -0.3));
1278 EXPECT_EQ(embedding_index_->last_added_document_id(), 0);
1279 }
1280
1281 } // namespace
1282 } // namespace lib
1283 } // namespace icing
1284