xref: /aosp_15_r20/external/icing/icing/file/memory-mapped-file.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/memory-mapped-file.h"
16 
17 #include <sys/mman.h>
18 
19 #include <cerrno>
20 #include <cinttypes>
21 #include <memory>
22 
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "icing/absl_ports/canonical_errors.h"
26 #include "icing/absl_ports/str_cat.h"
27 #include "icing/file/filesystem.h"
28 #include "icing/legacy/core/icing-string-util.h"
29 #include "icing/util/math-util.h"
30 #include "icing/util/status-macros.h"
31 
32 namespace icing {
33 namespace lib {
34 
35 /* static */ libtextclassifier3::StatusOr<MemoryMappedFile>
Create(const Filesystem & filesystem,std::string_view file_path,Strategy mmap_strategy,int64_t max_file_size)36 MemoryMappedFile::Create(const Filesystem& filesystem,
37                          std::string_view file_path, Strategy mmap_strategy,
38                          int64_t max_file_size) {
39   if (max_file_size <= 0 || max_file_size > kMaxFileSize) {
40     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
41         "Invalid max file size %" PRId64 " for MemoryMappedFile",
42         max_file_size));
43   }
44 
45   const std::string file_path_str(file_path);
46   int64_t file_size = filesystem.FileExists(file_path_str.c_str())
47                           ? filesystem.GetFileSize(file_path_str.c_str())
48                           : 0;
49   if (file_size == Filesystem::kBadFileSize) {
50     return absl_ports::InternalError(
51         absl_ports::StrCat("Bad file size for file ", file_path));
52   }
53 
54   return MemoryMappedFile(filesystem, file_path, mmap_strategy, max_file_size,
55                           file_size);
56 }
57 
58 /* static */ libtextclassifier3::StatusOr<MemoryMappedFile>
Create(const Filesystem & filesystem,std::string_view file_path,Strategy mmap_strategy,int64_t max_file_size,int64_t pre_mapping_file_offset,int64_t pre_mapping_mmap_size)59 MemoryMappedFile::Create(const Filesystem& filesystem,
60                          std::string_view file_path, Strategy mmap_strategy,
61                          int64_t max_file_size, int64_t pre_mapping_file_offset,
62                          int64_t pre_mapping_mmap_size) {
63   if (max_file_size <= 0 || max_file_size > kMaxFileSize) {
64     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
65         "Invalid max file size %" PRId64 " for MemoryMappedFile",
66         max_file_size));
67   }
68 
69   // We need at least pre_mapping_file_offset + pre_mapping_mmap_size bytes for
70   // the underlying file size, so max_file_size should be at least
71   // pre_mapping_file_offset + pre_mapping_mmap_size. Safe integer check.
72   if (pre_mapping_file_offset < 0 || pre_mapping_mmap_size < 0 ||
73       pre_mapping_file_offset > max_file_size - pre_mapping_mmap_size) {
74     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
75         "Invalid pre-mapping file offset %" PRId64 " and mmap size %" PRId64
76         " with max file size %" PRId64 "for MemoryMappedFile",
77         pre_mapping_file_offset, pre_mapping_mmap_size, max_file_size));
78   }
79 
80   ICING_ASSIGN_OR_RETURN(
81       MemoryMappedFile mmapped_file,
82       Create(filesystem, file_path, mmap_strategy, max_file_size));
83 
84   if (pre_mapping_mmap_size > 0) {
85     ICING_RETURN_IF_ERROR(
86         mmapped_file.RemapImpl(pre_mapping_file_offset, pre_mapping_mmap_size));
87   }
88 
89   return std::move(mmapped_file);
90 }
91 
MemoryMappedFile(const Filesystem & filesystem,std::string_view file_path,Strategy mmap_strategy,int64_t max_file_size,int64_t file_size)92 MemoryMappedFile::MemoryMappedFile(const Filesystem& filesystem,
93                                    std::string_view file_path,
94                                    Strategy mmap_strategy,
95                                    int64_t max_file_size, int64_t file_size)
96     : filesystem_(&filesystem),
97       file_path_(file_path),
98       strategy_(mmap_strategy),
99       max_file_size_(max_file_size),
100       file_size_(file_size),
101       mmap_result_(nullptr),
102       file_offset_(0),
103       mmap_size_(0),
104       alignment_adjustment_(0) {}
105 
MemoryMappedFile(MemoryMappedFile && other)106 MemoryMappedFile::MemoryMappedFile(MemoryMappedFile&& other)
107     // Make sure that mmap_result_ is a nullptr before we call Swap. We don't
108     // care what values the remaining members hold before we swap into other,
109     // but if mmap_result_ holds a non-NULL value before we initialized anything
110     // then other will try to free memory at that address when it's destroyed!
111     : mmap_result_(nullptr) {
112   Swap(&other);
113 }
114 
operator =(MemoryMappedFile && other)115 MemoryMappedFile& MemoryMappedFile::operator=(MemoryMappedFile&& other) {
116   // Swap all of our elements with other. This will ensure that both this now
117   // holds other's previous resources and that this's previous resources will be
118   // properly freed when other is destructed at the end of this function.
119   Swap(&other);
120   return *this;
121 }
122 
~MemoryMappedFile()123 MemoryMappedFile::~MemoryMappedFile() { Unmap(); }
124 
Unmap()125 void MemoryMappedFile::MemoryMappedFile::Unmap() {
126   if (mmap_result_ != nullptr) {
127     munmap(mmap_result_, adjusted_mmap_size());
128     mmap_result_ = nullptr;
129   }
130 
131   file_offset_ = 0;
132   mmap_size_ = 0;
133   alignment_adjustment_ = 0;
134 }
135 
Remap(int64_t file_offset,int64_t mmap_size)136 libtextclassifier3::Status MemoryMappedFile::Remap(int64_t file_offset,
137                                                    int64_t mmap_size) {
138   return RemapImpl(file_offset, mmap_size);
139 }
140 
GrowAndRemapIfNecessary(int64_t new_file_offset,int64_t new_mmap_size)141 libtextclassifier3::Status MemoryMappedFile::GrowAndRemapIfNecessary(
142     int64_t new_file_offset, int64_t new_mmap_size) {
143   // We need at least new_file_offset + new_mmap_size bytes for the underlying
144   // file size, and it should not exceed max_file_size_. Safe integer check.
145   if (new_file_offset < 0 || new_mmap_size < 0 ||
146       new_file_offset > max_file_size_ - new_mmap_size) {
147     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
148         "Invalid new file offset %" PRId64 " and new mmap size %" PRId64
149         " with max file size %" PRId64 "for MemoryMappedFile",
150         new_file_offset, new_mmap_size, max_file_size_));
151   }
152 
153   if (new_mmap_size == 0) {
154     // Unmap any previously mmapped region.
155     Unmap();
156     return libtextclassifier3::Status::OK;
157   }
158 
159   ICING_RETURN_IF_ERROR(GrowFileSize(new_file_offset + new_mmap_size));
160 
161   if (new_file_offset != file_offset_ || new_mmap_size > mmap_size_) {
162     ICING_RETURN_IF_ERROR(RemapImpl(new_file_offset, new_mmap_size));
163   }
164 
165   return libtextclassifier3::Status::OK;
166 }
167 
PersistToDisk()168 libtextclassifier3::Status MemoryMappedFile::PersistToDisk() {
169   if (strategy_ == Strategy::READ_ONLY) {
170     return absl_ports::FailedPreconditionError(absl_ports::StrCat(
171         "Attempting to PersistToDisk on a read-only file: ", file_path_));
172   }
173 
174   if (mmap_result_ == nullptr) {
175     // Nothing mapped to sync.
176     return libtextclassifier3::Status::OK;
177   }
178 
179   // Sync actual file size via system call.
180   int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str());
181   if (actual_file_size == Filesystem::kBadFileSize) {
182     return absl_ports::InternalError("Unable to retrieve file size");
183   }
184   file_size_ = actual_file_size;
185 
186   if (strategy_ == Strategy::READ_WRITE_AUTO_SYNC &&
187       // adjusted_mmap_size(), which is the mmap size after alignment
188       // adjustment, may be larger than the actual underlying file size since we
189       // can pre-mmap a large memory region before growing the file. Therefore,
190       // we should std::min with file_size_ - adjusted_offset() as the msync
191       // size.
192       msync(mmap_result_,
193             std::min(file_size_ - adjusted_offset(), adjusted_mmap_size()),
194             MS_SYNC) != 0) {
195     return absl_ports::InternalError(
196         absl_ports::StrCat("Unable to sync file using msync(): ", file_path_));
197   }
198 
199   // In order to prevent automatic syncing of changes, files that use the
200   // READ_WRITE_MANUAL_SYNC strategy are mmapped using MAP_PRIVATE. Such files
201   // can't be synced using msync(). So, we have to directly write to the
202   // underlying file to update it.
203   if (strategy_ == Strategy::READ_WRITE_MANUAL_SYNC &&
204       // Contents before file_offset_ won't be modified by the caller, so we
205       // only need to PWrite contents starting at file_offset_. mmap_size_ may
206       // be larger than the actual underlying file size since we can pre-mmap a
207       // large memory before growing the file. Therefore, we should std::min
208       // with file_size_ - file_offset_ as the PWrite size.
209       !filesystem_->PWrite(file_path_.c_str(), file_offset_, region(),
210                            std::min(mmap_size_, file_size_ - file_offset_))) {
211     return absl_ports::InternalError(
212         absl_ports::StrCat("Unable to sync file using PWrite(): ", file_path_));
213   }
214 
215   return libtextclassifier3::Status::OK;
216 }
217 
OptimizeFor(AccessPattern access_pattern)218 libtextclassifier3::Status MemoryMappedFile::OptimizeFor(
219     AccessPattern access_pattern) {
220   int madvise_flag = 0;
221   if (access_pattern == AccessPattern::ACCESS_ALL) {
222     madvise_flag = MADV_WILLNEED;
223   } else if (access_pattern == AccessPattern::ACCESS_NONE) {
224     madvise_flag = MADV_DONTNEED;
225   } else if (access_pattern == AccessPattern::ACCESS_RANDOM) {
226     madvise_flag = MADV_RANDOM;
227   } else if (access_pattern == AccessPattern::ACCESS_SEQUENTIAL) {
228     madvise_flag = MADV_SEQUENTIAL;
229   }
230 
231   if (madvise(mmap_result_, adjusted_mmap_size(), madvise_flag) != 0) {
232     return absl_ports::InternalError(absl_ports::StrCat(
233         "Unable to madvise file ", file_path_, "; Error: ", strerror(errno)));
234   }
235   return libtextclassifier3::Status::OK;
236 }
237 
GrowFileSize(int64_t new_file_size)238 libtextclassifier3::Status MemoryMappedFile::GrowFileSize(
239     int64_t new_file_size) {
240   // Early return if new_file_size doesn't exceed the cached file size
241   // (file_size_). It saves a system call for getting the actual file size and
242   // reduces latency significantly.
243   if (new_file_size <= file_size_) {
244     return libtextclassifier3::Status::OK;
245   }
246 
247   if (new_file_size > max_file_size_) {
248     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
249         "new file size %" PRId64 " exceeds maximum file size allowed, %" PRId64
250         " bytes",
251         new_file_size, max_file_size_));
252   }
253 
254   // Sync actual file size via system call.
255   int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str());
256   if (actual_file_size == Filesystem::kBadFileSize) {
257     return absl_ports::InternalError("Unable to retrieve file size");
258   }
259   file_size_ = actual_file_size;
260 
261   // Early return again if new_file_size doesn't exceed actual_file_size. It
262   // saves system calls for opening and closing file descriptor.
263   if (new_file_size <= actual_file_size) {
264     return libtextclassifier3::Status::OK;
265   }
266 
267   if (strategy_ == Strategy::READ_ONLY) {
268     return absl_ports::FailedPreconditionError(absl_ports::StrCat(
269         "Attempting to grow a read-only file: ", file_path_));
270   }
271 
272   // We use Write here rather than Grow because Grow doesn't actually allocate
273   // an underlying disk block. This can lead to problems with mmap because mmap
274   // has no effective way to signal that it was impossible to allocate the disk
275   // block and ends up crashing instead. Write will force the allocation of
276   // these blocks, which will ensure that any failure to grow will surface here.
277   int64_t page_size = system_page_size();
278   auto buf = std::make_unique<uint8_t[]>(page_size);
279   int64_t size_to_write = std::min(page_size - (file_size_ % page_size),
280                                    new_file_size - file_size_);
281   ScopedFd sfd(filesystem_->OpenForAppend(file_path_.c_str()));
282   if (!sfd.is_valid()) {
283     return absl_ports::InternalError(
284         absl_ports::StrCat("Couldn't open file ", file_path_));
285   }
286   while (size_to_write > 0 && file_size_ < new_file_size) {
287     if (!filesystem_->Write(sfd.get(), buf.get(), size_to_write)) {
288       return absl_ports::InternalError(
289           absl_ports::StrCat("Couldn't grow file ", file_path_));
290     }
291     file_size_ += size_to_write;
292     size_to_write = std::min(page_size - (file_size_ % page_size),
293                              new_file_size - file_size_);
294   }
295 
296   return libtextclassifier3::Status::OK;
297 }
298 
RemapImpl(int64_t new_file_offset,int64_t new_mmap_size)299 libtextclassifier3::Status MemoryMappedFile::RemapImpl(int64_t new_file_offset,
300                                                        int64_t new_mmap_size) {
301   if (new_file_offset < 0) {
302     return absl_ports::OutOfRangeError("Invalid file offset");
303   }
304 
305   if (new_mmap_size < 0) {
306     return absl_ports::OutOfRangeError("Invalid mmap size");
307   }
308 
309   if (new_mmap_size == 0) {
310     // First unmap any previously mmapped region.
311     Unmap();
312     return libtextclassifier3::Status::OK;
313   }
314 
315   int64_t new_aligned_offset =
316       math_util::RoundDownTo(new_file_offset, system_page_size());
317   int64_t new_alignment_adjustment = new_file_offset - new_aligned_offset;
318   int64_t new_adjusted_mmap_size = new_alignment_adjustment + new_mmap_size;
319 
320   int mmap_flags = 0;
321   // Determines if the mapped region should just be readable or also writable.
322   int protection_flags = 0;
323   ScopedFd fd;
324   switch (strategy_) {
325     case Strategy::READ_ONLY: {
326       mmap_flags = MAP_PRIVATE;
327       protection_flags = PROT_READ;
328       fd.reset(filesystem_->OpenForRead(file_path_.c_str()));
329       break;
330     }
331     case Strategy::READ_WRITE_AUTO_SYNC: {
332       mmap_flags = MAP_SHARED;
333       protection_flags = PROT_READ | PROT_WRITE;
334       fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
335       break;
336     }
337     case Strategy::READ_WRITE_MANUAL_SYNC: {
338       mmap_flags = MAP_PRIVATE;
339       protection_flags = PROT_READ | PROT_WRITE;
340       // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file.
341       // figure out if we can open this file in read-only mode.
342       fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
343       break;
344     }
345     default:
346       return absl_ports::UnknownError(IcingStringUtil::StringPrintf(
347           "Invalid value in switch statement: %d", strategy_));
348   }
349 
350   if (!fd.is_valid()) {
351     return absl_ports::InternalError(absl_ports::StrCat(
352         "Unable to open file meant to be mmapped: ", file_path_));
353   }
354 
355   void* new_mmap_result =
356       mmap(nullptr, new_adjusted_mmap_size, protection_flags, mmap_flags,
357            fd.get(), new_aligned_offset);
358 
359   if (new_mmap_result == MAP_FAILED) {
360     new_mmap_result = nullptr;
361     return absl_ports::InternalError(absl_ports::StrCat(
362         "Failed to mmap region due to error: ", strerror(errno)));
363   }
364 
365   // Now we know that we have successfully created a new mapping. We can free
366   // the old one and switch to the new one.
367   Unmap();
368 
369   mmap_result_ = new_mmap_result;
370   file_offset_ = new_file_offset;
371   mmap_size_ = new_mmap_size;
372   alignment_adjustment_ = new_alignment_adjustment;
373   return libtextclassifier3::Status::OK;
374 }
375 
Swap(MemoryMappedFile * other)376 void MemoryMappedFile::Swap(MemoryMappedFile* other) {
377   std::swap(filesystem_, other->filesystem_);
378   std::swap(file_path_, other->file_path_);
379   std::swap(strategy_, other->strategy_);
380   std::swap(max_file_size_, other->max_file_size_);
381   std::swap(file_size_, other->file_size_);
382   std::swap(mmap_result_, other->mmap_result_);
383   std::swap(file_offset_, other->file_offset_);
384   std::swap(mmap_size_, other->mmap_size_);
385   std::swap(alignment_adjustment_, other->alignment_adjustment_);
386 }
387 
388 }  // namespace lib
389 }  // namespace icing
390