1 // Copyright (C) 2021 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/store/document-log-creator.h"
16
17 #include <memory>
18 #include <string>
19 #include <utility>
20
21 #include "icing/text_classifier/lib3/utils/base/status.h"
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/absl_ports/annotate.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/file/constants.h"
27 #include "icing/file/file-backed-proto-log.h"
28 #include "icing/file/filesystem.h"
29 #include "icing/file/portable-file-backed-proto-log.h"
30 #include "icing/proto/document.pb.h"
31 #include "icing/proto/document_wrapper.pb.h"
32 #include "icing/util/logging.h"
33 #include "icing/util/status-macros.h"
34
35 namespace icing {
36 namespace lib {
37
38 namespace {
39
40 // Used in DocumentId mapper to mark a document as deleted
41 constexpr char kDocumentLogFilename[] = "document_log";
42
DocumentLogFilenameV0()43 std::string DocumentLogFilenameV0() {
44 // Originally only had this one version, no suffix.
45 return kDocumentLogFilename;
46 }
47
DocumentLogFilenameV1()48 std::string DocumentLogFilenameV1() {
49 return absl_ports::StrCat(kDocumentLogFilename, "_v1");
50 }
51
MakeDocumentLogFilenameV0(const std::string & base_dir)52 std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
53 return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
54 }
55
MakeDocumentLogFilenameV1(const std::string & base_dir)56 std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
57 return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
58 }
59
60 } // namespace
61
GetDocumentLogFilename()62 std::string DocumentLogCreator::GetDocumentLogFilename() {
63 // This should always return the latest version of the document log in use.
64 // The current latest version is V1.
65 return DocumentLogFilenameV1();
66 }
67
68 libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
Create(const Filesystem * filesystem,const std::string & base_dir,int32_t compression_level)69 DocumentLogCreator::Create(const Filesystem* filesystem,
70 const std::string& base_dir,
71 int32_t compression_level) {
72 bool v0_exists =
73 filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
74 bool v1_exists =
75 filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
76
77 bool new_file = false;
78 int preexisting_file_version = kCurrentVersion;
79 if (v0_exists && !v1_exists) {
80 ICING_RETURN_IF_ERROR(
81 MigrateFromV0ToV1(filesystem, base_dir, compression_level));
82
83 // Need to regenerate derived files since documents may be written to a
84 // different file offset in the log.
85 preexisting_file_version = 0;
86 } else if (!v1_exists) {
87 // First time initializing a v1 log. There are no existing derived files at
88 // this point, so we should generate some. "regenerate" here also means
89 // "generate for the first time", i.e. we shouldn't expect there to be any
90 // existing derived files.
91 new_file = true;
92 }
93
94 ICING_ASSIGN_OR_RETURN(
95 PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
96 log_create_result,
97 PortableFileBackedProtoLog<DocumentWrapper>::Create(
98 filesystem, MakeDocumentLogFilenameV1(base_dir),
99 PortableFileBackedProtoLog<DocumentWrapper>::Options(
100 /*compress_in=*/true, constants::kMaxProtoSize,
101 compression_level)));
102
103 CreateResult create_result = {std::move(log_create_result),
104 preexisting_file_version, new_file};
105 return create_result;
106 }
107
MigrateFromV0ToV1(const Filesystem * filesystem,const std::string & base_dir,int32_t compression_level)108 libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
109 const Filesystem* filesystem, const std::string& base_dir,
110 int32_t compression_level) {
111 ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
112
113 // Our v0 proto log was non-portable, create it so we can read protos out from
114 // it.
115 auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
116 filesystem, MakeDocumentLogFilenameV0(base_dir),
117 FileBackedProtoLog<DocumentWrapper>::Options(/*compress_in=*/true));
118 if (!v0_create_result_or.ok()) {
119 return absl_ports::Annotate(
120 v0_create_result_or.status(),
121 "Failed to initialize v0 document log while migrating.");
122 return v0_create_result_or.status();
123 }
124 FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
125 std::move(v0_create_result_or).ValueOrDie();
126 std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
127 std::move(v0_create_result.proto_log);
128
129 // Create a v1 portable proto log that we will write our protos to.
130 auto v1_create_result_or =
131 PortableFileBackedProtoLog<DocumentWrapper>::Create(
132 filesystem, MakeDocumentLogFilenameV1(base_dir),
133 PortableFileBackedProtoLog<DocumentWrapper>::Options(
134 /*compress_in=*/true,
135 /*max_proto_size_in=*/
136 constants::kMaxProtoSize,
137 /*compression_level_in=*/compression_level));
138 if (!v1_create_result_or.ok()) {
139 return absl_ports::Annotate(
140 v1_create_result_or.status(),
141 "Failed to initialize v1 document log while migrating.");
142 }
143 PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
144 std::move(v1_create_result_or).ValueOrDie();
145 std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
146 std::move(v1_create_result.proto_log);
147
148 // Dummy empty document to be used when copying over deleted documents.
149 DocumentProto empty_document;
150
151 // Start reading out from the old log and putting them in the new log.
152 ICING_ASSIGN_OR_RETURN(FileBackedProtoLog<DocumentWrapper>::Iterator iterator,
153 v0_proto_log->GetIterator());
154 auto iterator_status = iterator.Advance();
155 while (iterator_status.ok()) {
156 libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
157 v0_proto_log->ReadProto(iterator.GetOffset());
158
159 bool deleted_document = false;
160 DocumentWrapper document_wrapper;
161 if (absl_ports::IsNotFound(document_wrapper_or.status())) {
162 // Proto was erased, we can skip copying this into our new log.
163 *document_wrapper.mutable_document() = empty_document;
164 deleted_document = true;
165 } else if (!document_wrapper_or.ok()) {
166 // Some real error, pass up
167 return document_wrapper_or.status();
168 } else {
169 document_wrapper = std::move(document_wrapper_or).ValueOrDie();
170 }
171
172 auto offset_or = v1_proto_log->WriteProto(document_wrapper);
173 if (!offset_or.ok()) {
174 return absl_ports::Annotate(
175 offset_or.status(),
176 "Failed to write proto to v1 document log while migrating.");
177 }
178
179 // If the original document was deleted, erase the proto we just wrote.
180 // We do this to maintain the document_ids, i.e. we still want document_id 2
181 // to point to a deleted document even though we may not have the document
182 // contents anymore. DocumentStore guarantees that the document_ids don't
183 // change unless an Optimize is triggered.
184 if (deleted_document) {
185 int64_t offset = offset_or.ValueOrDie();
186 auto erased_status = v1_proto_log->EraseProto(offset);
187 if (!erased_status.ok()) {
188 return absl_ports::Annotate(
189 erased_status,
190 "Failed to erase proto in v1 document log while migrating.");
191 }
192 }
193
194 iterator_status = iterator.Advance();
195 }
196
197 // Close out our file log pointers.
198 v0_proto_log.reset();
199 v1_proto_log.reset();
200
201 return libtextclassifier3::Status::OK;
202 }
203
204 } // namespace lib
205 } // namespace icing
206