xref: /aosp_15_r20/external/icing/icing/store/document-log-creator.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2021 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/store/document-log-creator.h"
16 
17 #include <memory>
18 #include <string>
19 #include <utility>
20 
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/absl_ports/annotate.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/file/constants.h"
27 #include "icing/file/file-backed-proto-log.h"
28 #include "icing/file/filesystem.h"
29 #include "icing/file/portable-file-backed-proto-log.h"
30 #include "icing/proto/document.pb.h"
31 #include "icing/proto/document_wrapper.pb.h"
32 #include "icing/util/logging.h"
33 #include "icing/util/status-macros.h"
34 
35 namespace icing {
36 namespace lib {
37 
38 namespace {
39 
40 // Used in DocumentId mapper to mark a document as deleted
41 constexpr char kDocumentLogFilename[] = "document_log";
42 
DocumentLogFilenameV0()43 std::string DocumentLogFilenameV0() {
44   // Originally only had this one version, no suffix.
45   return kDocumentLogFilename;
46 }
47 
DocumentLogFilenameV1()48 std::string DocumentLogFilenameV1() {
49   return absl_ports::StrCat(kDocumentLogFilename, "_v1");
50 }
51 
MakeDocumentLogFilenameV0(const std::string & base_dir)52 std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
53   return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
54 }
55 
MakeDocumentLogFilenameV1(const std::string & base_dir)56 std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
57   return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
58 }
59 
60 }  // namespace
61 
GetDocumentLogFilename()62 std::string DocumentLogCreator::GetDocumentLogFilename() {
63   // This should always return the latest version of the document log in use.
64   // The current latest version is V1.
65   return DocumentLogFilenameV1();
66 }
67 
68 libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
Create(const Filesystem * filesystem,const std::string & base_dir,int32_t compression_level)69 DocumentLogCreator::Create(const Filesystem* filesystem,
70                            const std::string& base_dir,
71                            int32_t compression_level) {
72   bool v0_exists =
73       filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
74   bool v1_exists =
75       filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
76 
77   bool new_file = false;
78   int preexisting_file_version = kCurrentVersion;
79   if (v0_exists && !v1_exists) {
80     ICING_RETURN_IF_ERROR(
81         MigrateFromV0ToV1(filesystem, base_dir, compression_level));
82 
83     // Need to regenerate derived files since documents may be written to a
84     // different file offset in the log.
85     preexisting_file_version = 0;
86   } else if (!v1_exists) {
87     // First time initializing a v1 log. There are no existing derived files at
88     // this point, so we should generate some. "regenerate" here also means
89     // "generate for the first time", i.e. we shouldn't expect there to be any
90     // existing derived files.
91     new_file = true;
92   }
93 
94   ICING_ASSIGN_OR_RETURN(
95       PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
96           log_create_result,
97       PortableFileBackedProtoLog<DocumentWrapper>::Create(
98           filesystem, MakeDocumentLogFilenameV1(base_dir),
99           PortableFileBackedProtoLog<DocumentWrapper>::Options(
100               /*compress_in=*/true, constants::kMaxProtoSize,
101               compression_level)));
102 
103   CreateResult create_result = {std::move(log_create_result),
104                                 preexisting_file_version, new_file};
105   return create_result;
106 }
107 
MigrateFromV0ToV1(const Filesystem * filesystem,const std::string & base_dir,int32_t compression_level)108 libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
109     const Filesystem* filesystem, const std::string& base_dir,
110     int32_t compression_level) {
111   ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
112 
113   // Our v0 proto log was non-portable, create it so we can read protos out from
114   // it.
115   auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
116       filesystem, MakeDocumentLogFilenameV0(base_dir),
117       FileBackedProtoLog<DocumentWrapper>::Options(/*compress_in=*/true));
118   if (!v0_create_result_or.ok()) {
119     return absl_ports::Annotate(
120         v0_create_result_or.status(),
121         "Failed to initialize v0 document log while migrating.");
122     return v0_create_result_or.status();
123   }
124   FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
125       std::move(v0_create_result_or).ValueOrDie();
126   std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
127       std::move(v0_create_result.proto_log);
128 
129   // Create a v1 portable proto log that we will write our protos to.
130   auto v1_create_result_or =
131       PortableFileBackedProtoLog<DocumentWrapper>::Create(
132           filesystem, MakeDocumentLogFilenameV1(base_dir),
133           PortableFileBackedProtoLog<DocumentWrapper>::Options(
134               /*compress_in=*/true,
135               /*max_proto_size_in=*/
136               constants::kMaxProtoSize,
137               /*compression_level_in=*/compression_level));
138   if (!v1_create_result_or.ok()) {
139     return absl_ports::Annotate(
140         v1_create_result_or.status(),
141         "Failed to initialize v1 document log while migrating.");
142   }
143   PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
144       std::move(v1_create_result_or).ValueOrDie();
145   std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
146       std::move(v1_create_result.proto_log);
147 
148   // Dummy empty document to be used when copying over deleted documents.
149   DocumentProto empty_document;
150 
151   // Start reading out from the old log and putting them in the new log.
152   ICING_ASSIGN_OR_RETURN(FileBackedProtoLog<DocumentWrapper>::Iterator iterator,
153                          v0_proto_log->GetIterator());
154   auto iterator_status = iterator.Advance();
155   while (iterator_status.ok()) {
156     libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
157         v0_proto_log->ReadProto(iterator.GetOffset());
158 
159     bool deleted_document = false;
160     DocumentWrapper document_wrapper;
161     if (absl_ports::IsNotFound(document_wrapper_or.status())) {
162       // Proto was erased, we can skip copying this into our new log.
163       *document_wrapper.mutable_document() = empty_document;
164       deleted_document = true;
165     } else if (!document_wrapper_or.ok()) {
166       // Some real error, pass up
167       return document_wrapper_or.status();
168     } else {
169       document_wrapper = std::move(document_wrapper_or).ValueOrDie();
170     }
171 
172     auto offset_or = v1_proto_log->WriteProto(document_wrapper);
173     if (!offset_or.ok()) {
174       return absl_ports::Annotate(
175           offset_or.status(),
176           "Failed to write proto to v1 document log while migrating.");
177     }
178 
179     // If the original document was deleted, erase the proto we just wrote.
180     // We do this to maintain the document_ids, i.e. we still want document_id 2
181     // to point to a deleted document even though we may not have the document
182     // contents anymore. DocumentStore guarantees that the document_ids don't
183     // change unless an Optimize is triggered.
184     if (deleted_document) {
185       int64_t offset = offset_or.ValueOrDie();
186       auto erased_status = v1_proto_log->EraseProto(offset);
187       if (!erased_status.ok()) {
188         return absl_ports::Annotate(
189             erased_status,
190             "Failed to erase proto in v1 document log while migrating.");
191       }
192     }
193 
194     iterator_status = iterator.Advance();
195   }
196 
197   // Close out our file log pointers.
198   v0_proto_log.reset();
199   v1_proto_log.reset();
200 
201   return libtextclassifier3::Status::OK;
202 }
203 
204 }  // namespace lib
205 }  // namespace icing
206