1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/memory-mapped-file.h"
16
17 #include <sys/mman.h>
18
19 #include <cerrno>
20 #include <cinttypes>
21 #include <memory>
22
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/absl_ports/canonical_errors.h"
26 #include "icing/absl_ports/str_cat.h"
27 #include "icing/file/filesystem.h"
28 #include "icing/legacy/core/icing-string-util.h"
29 #include "icing/util/math-util.h"
30 #include "icing/util/status-macros.h"
31
32 namespace icing {
33 namespace lib {
34
35 /* static */ libtextclassifier3::StatusOr<MemoryMappedFile>
Create(const Filesystem & filesystem,std::string_view file_path,Strategy mmap_strategy,int64_t max_file_size)36 MemoryMappedFile::Create(const Filesystem& filesystem,
37 std::string_view file_path, Strategy mmap_strategy,
38 int64_t max_file_size) {
39 if (max_file_size <= 0 || max_file_size > kMaxFileSize) {
40 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
41 "Invalid max file size %" PRId64 " for MemoryMappedFile",
42 max_file_size));
43 }
44
45 const std::string file_path_str(file_path);
46 int64_t file_size = filesystem.FileExists(file_path_str.c_str())
47 ? filesystem.GetFileSize(file_path_str.c_str())
48 : 0;
49 if (file_size == Filesystem::kBadFileSize) {
50 return absl_ports::InternalError(
51 absl_ports::StrCat("Bad file size for file ", file_path));
52 }
53
54 return MemoryMappedFile(filesystem, file_path, mmap_strategy, max_file_size,
55 file_size);
56 }
57
58 /* static */ libtextclassifier3::StatusOr<MemoryMappedFile>
Create(const Filesystem & filesystem,std::string_view file_path,Strategy mmap_strategy,int64_t max_file_size,int64_t pre_mapping_file_offset,int64_t pre_mapping_mmap_size)59 MemoryMappedFile::Create(const Filesystem& filesystem,
60 std::string_view file_path, Strategy mmap_strategy,
61 int64_t max_file_size, int64_t pre_mapping_file_offset,
62 int64_t pre_mapping_mmap_size) {
63 if (max_file_size <= 0 || max_file_size > kMaxFileSize) {
64 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
65 "Invalid max file size %" PRId64 " for MemoryMappedFile",
66 max_file_size));
67 }
68
69 // We need at least pre_mapping_file_offset + pre_mapping_mmap_size bytes for
70 // the underlying file size, so max_file_size should be at least
71 // pre_mapping_file_offset + pre_mapping_mmap_size. Safe integer check.
72 if (pre_mapping_file_offset < 0 || pre_mapping_mmap_size < 0 ||
73 pre_mapping_file_offset > max_file_size - pre_mapping_mmap_size) {
74 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
75 "Invalid pre-mapping file offset %" PRId64 " and mmap size %" PRId64
76 " with max file size %" PRId64 "for MemoryMappedFile",
77 pre_mapping_file_offset, pre_mapping_mmap_size, max_file_size));
78 }
79
80 ICING_ASSIGN_OR_RETURN(
81 MemoryMappedFile mmapped_file,
82 Create(filesystem, file_path, mmap_strategy, max_file_size));
83
84 if (pre_mapping_mmap_size > 0) {
85 ICING_RETURN_IF_ERROR(
86 mmapped_file.RemapImpl(pre_mapping_file_offset, pre_mapping_mmap_size));
87 }
88
89 return std::move(mmapped_file);
90 }
91
MemoryMappedFile(const Filesystem & filesystem,std::string_view file_path,Strategy mmap_strategy,int64_t max_file_size,int64_t file_size)92 MemoryMappedFile::MemoryMappedFile(const Filesystem& filesystem,
93 std::string_view file_path,
94 Strategy mmap_strategy,
95 int64_t max_file_size, int64_t file_size)
96 : filesystem_(&filesystem),
97 file_path_(file_path),
98 strategy_(mmap_strategy),
99 max_file_size_(max_file_size),
100 file_size_(file_size),
101 mmap_result_(nullptr),
102 file_offset_(0),
103 mmap_size_(0),
104 alignment_adjustment_(0) {}
105
MemoryMappedFile(MemoryMappedFile && other)106 MemoryMappedFile::MemoryMappedFile(MemoryMappedFile&& other)
107 // Make sure that mmap_result_ is a nullptr before we call Swap. We don't
108 // care what values the remaining members hold before we swap into other,
109 // but if mmap_result_ holds a non-NULL value before we initialized anything
110 // then other will try to free memory at that address when it's destroyed!
111 : mmap_result_(nullptr) {
112 Swap(&other);
113 }
114
operator =(MemoryMappedFile && other)115 MemoryMappedFile& MemoryMappedFile::operator=(MemoryMappedFile&& other) {
116 // Swap all of our elements with other. This will ensure that both this now
117 // holds other's previous resources and that this's previous resources will be
118 // properly freed when other is destructed at the end of this function.
119 Swap(&other);
120 return *this;
121 }
122
~MemoryMappedFile()123 MemoryMappedFile::~MemoryMappedFile() { Unmap(); }
124
Unmap()125 void MemoryMappedFile::MemoryMappedFile::Unmap() {
126 if (mmap_result_ != nullptr) {
127 munmap(mmap_result_, adjusted_mmap_size());
128 mmap_result_ = nullptr;
129 }
130
131 file_offset_ = 0;
132 mmap_size_ = 0;
133 alignment_adjustment_ = 0;
134 }
135
Remap(int64_t file_offset,int64_t mmap_size)136 libtextclassifier3::Status MemoryMappedFile::Remap(int64_t file_offset,
137 int64_t mmap_size) {
138 return RemapImpl(file_offset, mmap_size);
139 }
140
GrowAndRemapIfNecessary(int64_t new_file_offset,int64_t new_mmap_size)141 libtextclassifier3::Status MemoryMappedFile::GrowAndRemapIfNecessary(
142 int64_t new_file_offset, int64_t new_mmap_size) {
143 // We need at least new_file_offset + new_mmap_size bytes for the underlying
144 // file size, and it should not exceed max_file_size_. Safe integer check.
145 if (new_file_offset < 0 || new_mmap_size < 0 ||
146 new_file_offset > max_file_size_ - new_mmap_size) {
147 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
148 "Invalid new file offset %" PRId64 " and new mmap size %" PRId64
149 " with max file size %" PRId64 "for MemoryMappedFile",
150 new_file_offset, new_mmap_size, max_file_size_));
151 }
152
153 if (new_mmap_size == 0) {
154 // Unmap any previously mmapped region.
155 Unmap();
156 return libtextclassifier3::Status::OK;
157 }
158
159 ICING_RETURN_IF_ERROR(GrowFileSize(new_file_offset + new_mmap_size));
160
161 if (new_file_offset != file_offset_ || new_mmap_size > mmap_size_) {
162 ICING_RETURN_IF_ERROR(RemapImpl(new_file_offset, new_mmap_size));
163 }
164
165 return libtextclassifier3::Status::OK;
166 }
167
PersistToDisk()168 libtextclassifier3::Status MemoryMappedFile::PersistToDisk() {
169 if (strategy_ == Strategy::READ_ONLY) {
170 return absl_ports::FailedPreconditionError(absl_ports::StrCat(
171 "Attempting to PersistToDisk on a read-only file: ", file_path_));
172 }
173
174 if (mmap_result_ == nullptr) {
175 // Nothing mapped to sync.
176 return libtextclassifier3::Status::OK;
177 }
178
179 // Sync actual file size via system call.
180 int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str());
181 if (actual_file_size == Filesystem::kBadFileSize) {
182 return absl_ports::InternalError("Unable to retrieve file size");
183 }
184 file_size_ = actual_file_size;
185
186 if (strategy_ == Strategy::READ_WRITE_AUTO_SYNC &&
187 // adjusted_mmap_size(), which is the mmap size after alignment
188 // adjustment, may be larger than the actual underlying file size since we
189 // can pre-mmap a large memory region before growing the file. Therefore,
190 // we should std::min with file_size_ - adjusted_offset() as the msync
191 // size.
192 msync(mmap_result_,
193 std::min(file_size_ - adjusted_offset(), adjusted_mmap_size()),
194 MS_SYNC) != 0) {
195 return absl_ports::InternalError(
196 absl_ports::StrCat("Unable to sync file using msync(): ", file_path_));
197 }
198
199 // In order to prevent automatic syncing of changes, files that use the
200 // READ_WRITE_MANUAL_SYNC strategy are mmapped using MAP_PRIVATE. Such files
201 // can't be synced using msync(). So, we have to directly write to the
202 // underlying file to update it.
203 if (strategy_ == Strategy::READ_WRITE_MANUAL_SYNC &&
204 // Contents before file_offset_ won't be modified by the caller, so we
205 // only need to PWrite contents starting at file_offset_. mmap_size_ may
206 // be larger than the actual underlying file size since we can pre-mmap a
207 // large memory before growing the file. Therefore, we should std::min
208 // with file_size_ - file_offset_ as the PWrite size.
209 !filesystem_->PWrite(file_path_.c_str(), file_offset_, region(),
210 std::min(mmap_size_, file_size_ - file_offset_))) {
211 return absl_ports::InternalError(
212 absl_ports::StrCat("Unable to sync file using PWrite(): ", file_path_));
213 }
214
215 return libtextclassifier3::Status::OK;
216 }
217
OptimizeFor(AccessPattern access_pattern)218 libtextclassifier3::Status MemoryMappedFile::OptimizeFor(
219 AccessPattern access_pattern) {
220 int madvise_flag = 0;
221 if (access_pattern == AccessPattern::ACCESS_ALL) {
222 madvise_flag = MADV_WILLNEED;
223 } else if (access_pattern == AccessPattern::ACCESS_NONE) {
224 madvise_flag = MADV_DONTNEED;
225 } else if (access_pattern == AccessPattern::ACCESS_RANDOM) {
226 madvise_flag = MADV_RANDOM;
227 } else if (access_pattern == AccessPattern::ACCESS_SEQUENTIAL) {
228 madvise_flag = MADV_SEQUENTIAL;
229 }
230
231 if (madvise(mmap_result_, adjusted_mmap_size(), madvise_flag) != 0) {
232 return absl_ports::InternalError(absl_ports::StrCat(
233 "Unable to madvise file ", file_path_, "; Error: ", strerror(errno)));
234 }
235 return libtextclassifier3::Status::OK;
236 }
237
GrowFileSize(int64_t new_file_size)238 libtextclassifier3::Status MemoryMappedFile::GrowFileSize(
239 int64_t new_file_size) {
240 // Early return if new_file_size doesn't exceed the cached file size
241 // (file_size_). It saves a system call for getting the actual file size and
242 // reduces latency significantly.
243 if (new_file_size <= file_size_) {
244 return libtextclassifier3::Status::OK;
245 }
246
247 if (new_file_size > max_file_size_) {
248 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
249 "new file size %" PRId64 " exceeds maximum file size allowed, %" PRId64
250 " bytes",
251 new_file_size, max_file_size_));
252 }
253
254 // Sync actual file size via system call.
255 int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str());
256 if (actual_file_size == Filesystem::kBadFileSize) {
257 return absl_ports::InternalError("Unable to retrieve file size");
258 }
259 file_size_ = actual_file_size;
260
261 // Early return again if new_file_size doesn't exceed actual_file_size. It
262 // saves system calls for opening and closing file descriptor.
263 if (new_file_size <= actual_file_size) {
264 return libtextclassifier3::Status::OK;
265 }
266
267 if (strategy_ == Strategy::READ_ONLY) {
268 return absl_ports::FailedPreconditionError(absl_ports::StrCat(
269 "Attempting to grow a read-only file: ", file_path_));
270 }
271
272 // We use Write here rather than Grow because Grow doesn't actually allocate
273 // an underlying disk block. This can lead to problems with mmap because mmap
274 // has no effective way to signal that it was impossible to allocate the disk
275 // block and ends up crashing instead. Write will force the allocation of
276 // these blocks, which will ensure that any failure to grow will surface here.
277 int64_t page_size = system_page_size();
278 auto buf = std::make_unique<uint8_t[]>(page_size);
279 int64_t size_to_write = std::min(page_size - (file_size_ % page_size),
280 new_file_size - file_size_);
281 ScopedFd sfd(filesystem_->OpenForAppend(file_path_.c_str()));
282 if (!sfd.is_valid()) {
283 return absl_ports::InternalError(
284 absl_ports::StrCat("Couldn't open file ", file_path_));
285 }
286 while (size_to_write > 0 && file_size_ < new_file_size) {
287 if (!filesystem_->Write(sfd.get(), buf.get(), size_to_write)) {
288 return absl_ports::InternalError(
289 absl_ports::StrCat("Couldn't grow file ", file_path_));
290 }
291 file_size_ += size_to_write;
292 size_to_write = std::min(page_size - (file_size_ % page_size),
293 new_file_size - file_size_);
294 }
295
296 return libtextclassifier3::Status::OK;
297 }
298
RemapImpl(int64_t new_file_offset,int64_t new_mmap_size)299 libtextclassifier3::Status MemoryMappedFile::RemapImpl(int64_t new_file_offset,
300 int64_t new_mmap_size) {
301 if (new_file_offset < 0) {
302 return absl_ports::OutOfRangeError("Invalid file offset");
303 }
304
305 if (new_mmap_size < 0) {
306 return absl_ports::OutOfRangeError("Invalid mmap size");
307 }
308
309 if (new_mmap_size == 0) {
310 // First unmap any previously mmapped region.
311 Unmap();
312 return libtextclassifier3::Status::OK;
313 }
314
315 int64_t new_aligned_offset =
316 math_util::RoundDownTo(new_file_offset, system_page_size());
317 int64_t new_alignment_adjustment = new_file_offset - new_aligned_offset;
318 int64_t new_adjusted_mmap_size = new_alignment_adjustment + new_mmap_size;
319
320 int mmap_flags = 0;
321 // Determines if the mapped region should just be readable or also writable.
322 int protection_flags = 0;
323 ScopedFd fd;
324 switch (strategy_) {
325 case Strategy::READ_ONLY: {
326 mmap_flags = MAP_PRIVATE;
327 protection_flags = PROT_READ;
328 fd.reset(filesystem_->OpenForRead(file_path_.c_str()));
329 break;
330 }
331 case Strategy::READ_WRITE_AUTO_SYNC: {
332 mmap_flags = MAP_SHARED;
333 protection_flags = PROT_READ | PROT_WRITE;
334 fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
335 break;
336 }
337 case Strategy::READ_WRITE_MANUAL_SYNC: {
338 mmap_flags = MAP_PRIVATE;
339 protection_flags = PROT_READ | PROT_WRITE;
340 // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file.
341 // figure out if we can open this file in read-only mode.
342 fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
343 break;
344 }
345 default:
346 return absl_ports::UnknownError(IcingStringUtil::StringPrintf(
347 "Invalid value in switch statement: %d", strategy_));
348 }
349
350 if (!fd.is_valid()) {
351 return absl_ports::InternalError(absl_ports::StrCat(
352 "Unable to open file meant to be mmapped: ", file_path_));
353 }
354
355 void* new_mmap_result =
356 mmap(nullptr, new_adjusted_mmap_size, protection_flags, mmap_flags,
357 fd.get(), new_aligned_offset);
358
359 if (new_mmap_result == MAP_FAILED) {
360 new_mmap_result = nullptr;
361 return absl_ports::InternalError(absl_ports::StrCat(
362 "Failed to mmap region due to error: ", strerror(errno)));
363 }
364
365 // Now we know that we have successfully created a new mapping. We can free
366 // the old one and switch to the new one.
367 Unmap();
368
369 mmap_result_ = new_mmap_result;
370 file_offset_ = new_file_offset;
371 mmap_size_ = new_mmap_size;
372 alignment_adjustment_ = new_alignment_adjustment;
373 return libtextclassifier3::Status::OK;
374 }
375
Swap(MemoryMappedFile * other)376 void MemoryMappedFile::Swap(MemoryMappedFile* other) {
377 std::swap(filesystem_, other->filesystem_);
378 std::swap(file_path_, other->file_path_);
379 std::swap(strategy_, other->strategy_);
380 std::swap(max_file_size_, other->max_file_size_);
381 std::swap(file_size_, other->file_size_);
382 std::swap(mmap_result_, other->mmap_result_);
383 std::swap(file_offset_, other->file_offset_);
384 std::swap(mmap_size_, other->mmap_size_);
385 std::swap(alignment_adjustment_, other->alignment_adjustment_);
386 }
387
388 } // namespace lib
389 } // namespace icing
390