xref: /aosp_15_r20/external/cronet/base/i18n/icu_mergeable_data_file.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/icu_mergeable_data_file.h"
6 
7 #include <sys/mman.h>
8 
9 #include "base/check.h"
10 #include "base/check_op.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/dump_without_crashing.h"
13 #include "base/hash/hash.h"
14 #include "base/numerics/safe_conversions.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/threading/scoped_blocking_call.h"
17 #include "build/chromeos_buildflags.h"
18 
19 namespace base::i18n {
20 
21 // Enable merging of `icudtl.dat` in Lacros.
22 BASE_FEATURE(kLacrosMergeIcuDataFile,
23              "LacrosMergeIcuDataFile",
24              base::FEATURE_ENABLED_BY_DEFAULT);
25 
26 namespace {
27 
28 #if BUILDFLAG(IS_CHROMEOS_DEVICE)
29 // Path of Ash's ICU data file.
30 constexpr char kIcuDataFileAshPath[] = "/opt/google/chrome/icudtl.dat";
31 #endif  // BUILDFLAG(IS_CHROMEOS_DEVICE)
32 
33 // Expected size of a system page.
34 constexpr int64_t kPageSize = 0x1000;
35 
36 // Size of a page hash. Changing this will break compatibility
37 // with existing `icudtl.dat.hash` files, so be careful.
38 constexpr size_t kHashBytes = 8;
39 static_assert(sizeof(IcuMergeableDataFile::HashType) == kHashBytes);
40 
HashPage(const uint8_t * page)41 inline IcuMergeableDataFile::HashType HashPage(const uint8_t* page) {
42   return FastHash(base::make_span(page, static_cast<size_t>(kPageSize)));
43 }
44 
ReadHash(const uint8_t * data,size_t offset)45 IcuMergeableDataFile::HashType ReadHash(const uint8_t* data, size_t offset) {
46   // TODO(crbug/1503551): upgrade to CHECK.
47   DUMP_WILL_BE_CHECK_EQ(0ul, offset % kHashBytes);
48   IcuMergeableDataFile::HashType hash = 0;
49   for (size_t i = 0; i < kHashBytes; i++) {
50     IcuMergeableDataFile::HashType byte = data[offset + i];
51     hash |= byte << (i * 8);
52   }
53   return hash;
54 }
55 
NPages(size_t length)56 constexpr size_t NPages(size_t length) {
57   return (length + kPageSize - 1) / kPageSize;
58 }
59 
60 }  // namespace
61 
62 class AshMemoryMappedFile {
63  public:
Initialize(File ash_file)64   bool Initialize(File ash_file) {
65     fd_ = ash_file.GetPlatformFile();
66     return memory_mapped_file_.Initialize(std::move(ash_file));
67   }
68 
fd() const69   PlatformFile fd() const { return fd_; }
data() const70   const uint8_t* data() const { return memory_mapped_file_.data(); }
length() const71   size_t length() const { return memory_mapped_file_.length(); }
72 
73  private:
74   PlatformFile fd_;
75   MemoryMappedFile memory_mapped_file_;
76 };
77 
MmapAshFile(const FilePath & ash_file_path)78 std::unique_ptr<AshMemoryMappedFile> MmapAshFile(
79     const FilePath& ash_file_path) {
80   ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
81 
82   // Open Ash's data file.
83   File ash_file(FilePath(ash_file_path), File::FLAG_OPEN | File::FLAG_READ);
84 
85   // Mmap Ash's data file.
86   auto ash_mapped_file = std::make_unique<AshMemoryMappedFile>();
87   bool map_successful = ash_mapped_file->Initialize(std::move(ash_file));
88   if (!map_successful) {
89     PLOG(DFATAL) << "Failed to mmap Ash's icudtl.dat";
90     return nullptr;
91   }
92 
93   return ash_mapped_file;
94 }
95 
96 // Class wrapping the memory-merging logic for `icudtl.dat`.
97 IcuMergeableDataFile::IcuMergeableDataFile() = default;
98 
~IcuMergeableDataFile()99 IcuMergeableDataFile::~IcuMergeableDataFile() {
100   if (lacros_data_) {
101     ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
102     munmap(lacros_data_, lacros_length_);
103   }
104 }
105 
106 IcuMergeableDataFile::Hashes::Hashes() = default;
Hashes(HashToOffsetMap ash,std::vector<HashType> lacros)107 IcuMergeableDataFile::Hashes::Hashes(HashToOffsetMap ash,
108                                      std::vector<HashType> lacros)
109     : ash(std::move(ash)), lacros(std::move(lacros)) {}
110 IcuMergeableDataFile::Hashes::Hashes(Hashes&& other) = default;
111 IcuMergeableDataFile::Hashes& IcuMergeableDataFile::Hashes::operator=(
112     Hashes&& other) = default;
113 IcuMergeableDataFile::Hashes::~Hashes() = default;
114 
Initialize(File lacros_file,MemoryMappedFile::Region region)115 bool IcuMergeableDataFile::Initialize(File lacros_file,
116                                       MemoryMappedFile::Region region) {
117   // TODO(crbug/1503551): upgrade to CHECK.
118   DUMP_WILL_BE_CHECK(region == MemoryMappedFile::Region::kWholeFile);
119   DUMP_WILL_BE_CHECK(!lacros_file_.IsValid())
120       << "ICUDataFile::Initialize called twice";
121 
122   lacros_file_ = std::move(lacros_file);
123   int64_t lacros_length = lacros_file_.GetLength();
124   if (lacros_length < 0) {
125     return false;
126   }
127   // Narrow to size_t, since it's used for pointer arithmetic, mmap and other
128   // APIs that accept size_t.
129   lacros_length_ = base::checked_cast<size_t>(lacros_length);
130 
131   // Map Lacros's version of `icudtl.dat`, then attempt merging with Ash.
132   bool map_successful = MmapLacrosFile(/*remap=*/false);
133 
134 #if BUILDFLAG(IS_CHROMEOS_DEVICE)
135   // If we're inside an actual ChromeOS system (i.e. not just in
136   // linux-lacros-rel) then we can expect Ash Chrome (and its version of
137   // `icudtl.dat`) to be present in the default directory.
138   // In that case, we can attempt merging.
139   if (map_successful && base::FeatureList::IsEnabled(kLacrosMergeIcuDataFile)) {
140     bool merge_successful = MergeWithAshVersion(FilePath(kIcuDataFileAshPath));
141     // If we hit a critical failure while merging, remap Lacros's version.
142     if (!merge_successful) {
143       PLOG(DFATAL) << "Attempt to merge Lacros's icudtl.dat with Ash's failed";
144       map_successful = MmapLacrosFile(/*remap=*/true);
145     }
146   }
147 #endif  // BUILDFLAG(IS_CHROMEOS_DEVICE)
148 
149   return map_successful;
150 }
151 
data() const152 const uint8_t* IcuMergeableDataFile::data() const {
153   return static_cast<const uint8_t*>(lacros_data_);
154 }
155 
MergeWithAshVersion(const FilePath & ash_file_path)156 bool IcuMergeableDataFile::MergeWithAshVersion(const FilePath& ash_file_path) {
157   // Verify the assumption that page size is 4K.
158   // TODO(crbug/1503551): upgrade to CHECK.
159   DUMP_WILL_BE_CHECK_EQ(sysconf(_SC_PAGESIZE), kPageSize);
160 
161   // Mmap Ash's data file.
162   auto ash_file = MmapAshFile(ash_file_path);
163   if (!ash_file)
164     return true;  // Non-critical failure.
165 
166   // Calculate hashes for each page in Ash and Lacros's data files.
167   Hashes hashes = CalculateHashes(*ash_file, ash_file_path);
168 
169   // Find Lacros's ICU pages that are duplicated in Ash.
170   size_t lacros_offset = 0;
171   while (lacros_offset < lacros_length_) {
172     Slice ash_overlap = FindOverlap(*ash_file, hashes, lacros_offset);
173     // If there's no overlap, move to the next page and keep scanning.
174     if (ash_overlap.length == 0) {
175       lacros_offset += kPageSize;
176       continue;
177     }
178 
179     // Found a sequence of equal pages, merge them with Ash.
180     bool merge_successful = MergeArea(*ash_file, ash_overlap, lacros_offset);
181     if (!merge_successful)
182       return false;  // Critical failure.
183 
184     lacros_offset += ash_overlap.length;
185   }
186 
187   return true;  // Success.
188 }
189 
MmapLacrosFile(bool remap)190 bool IcuMergeableDataFile::MmapLacrosFile(bool remap) {
191   ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
192 
193   if (remap) {
194     // If `remap` == true, we add the MAP_FIXED option to unmap the
195     // existing map and replace it with the new one in a single operation.
196     // TODO(crbug/1503551): upgrade to CHECK.
197     DUMP_WILL_BE_CHECK_NE(lacros_data_, nullptr);
198     lacros_data_ = static_cast<uint8_t*>(
199         mmap(lacros_data_, lacros_length_, PROT_READ, MAP_FIXED | MAP_PRIVATE,
200              lacros_file_.GetPlatformFile(), 0));
201   } else {
202     // Otherwise, simply map the file.
203     lacros_data_ = static_cast<uint8_t*>(
204         mmap(nullptr, lacros_length_, PROT_READ, MAP_PRIVATE,
205              lacros_file_.GetPlatformFile(), 0));
206   }
207 
208   if (lacros_data_ == MAP_FAILED) {
209     lacros_data_ = nullptr;
210     PLOG(DFATAL) << "Failed to mmap Lacros's icudtl.dat";
211     return false;
212   }
213 
214   return true;
215 }
216 
FindOverlap(const AshMemoryMappedFile & ash_file,const Hashes & hashes,size_t lacros_offset) const217 IcuMergeableDataFile::Slice IcuMergeableDataFile::FindOverlap(
218     const AshMemoryMappedFile& ash_file,
219     const Hashes& hashes,
220     size_t lacros_offset) const {
221   // Search for equal pages by hash.
222   HashType hash = hashes.lacros[lacros_offset / kPageSize];
223   auto search = hashes.ash.find(hash);
224   if (search == hashes.ash.end())
225     return {0, 0};
226 
227   // Count how many pages (if any) have the same content.
228   size_t ash_offset = search->second;
229   size_t overlap_length =
230       kPageSize * CountEqualPages(ash_file, ash_file.data() + ash_offset,
231                                   lacros_data_ + lacros_offset);
232 
233   return {ash_offset, overlap_length};
234 }
235 
MergeArea(const AshMemoryMappedFile & ash_file,const Slice & ash_overlap,size_t lacros_offset)236 bool IcuMergeableDataFile::MergeArea(const AshMemoryMappedFile& ash_file,
237                                      const Slice& ash_overlap,
238                                      size_t lacros_offset) {
239   ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
240 
241   // Unmap from Lacros's file and map from Ash's file instead.
242   // NOTE: "[...] If the memory region specified by addr and length overlaps
243   //        pages of any existing mapping(s), then the overlapped part of the
244   //        existing mapping(s) will be discarded.  If the specified address
245   //        cannot be used, mmap() will fail."
246   // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
247   uint8_t* map_result = static_cast<uint8_t*>(
248       mmap(lacros_data_ + lacros_offset, ash_overlap.length, PROT_READ,
249            MAP_FIXED | MAP_PRIVATE, ash_file.fd(), ash_overlap.offset));
250 
251   if (map_result == MAP_FAILED) {
252     PLOG(DFATAL) << "Couldn't mmap Ash's icudtl.dat while merging";
253     return false;
254   }
255 
256   return true;
257 }
258 
CountEqualPages(const AshMemoryMappedFile & ash_file,const uint8_t * ash_page,const uint8_t * lacros_page) const259 size_t IcuMergeableDataFile::CountEqualPages(
260     const AshMemoryMappedFile& ash_file,
261     const uint8_t* ash_page,
262     const uint8_t* lacros_page) const {
263   // TODO(crbug/1478718): Remove once the cause of this crash is identified.
264   if (!ash_page || !lacros_page) {
265     const uint8_t* debug_ash_page = ash_page;
266     const uint8_t* debug_lacros_page = lacros_page;
267     base::debug::Alias(&debug_ash_page);
268     base::debug::Alias(&debug_lacros_page);
269     base::debug::DumpWithoutCrashing();
270     return 0;
271   }
272 
273   size_t pages = 0;
274   const uint8_t* ash_end = ash_file.data() + ash_file.length();
275   const uint8_t* lacros_end = lacros_data_ + lacros_length_;
276 
277   while (ash_page < ash_end && lacros_page < lacros_end &&
278          memcmp(ash_page, lacros_page, kPageSize) == 0) {
279     ash_page += kPageSize;
280     lacros_page += kPageSize;
281     pages++;
282   }
283 
284   return pages;
285 }
286 
CalculateHashes(const AshMemoryMappedFile & ash_file,const FilePath & ash_file_path)287 IcuMergeableDataFile::Hashes IcuMergeableDataFile::CalculateHashes(
288     const AshMemoryMappedFile& ash_file,
289     const FilePath& ash_file_path) {
290   // Try loading hashes from the pre-computed files first.
291   Hashes hashes;
292   used_cached_hashes_ = MaybeLoadCachedHashes(ash_file, ash_file_path, hashes);
293 
294   if (!used_cached_hashes_) {
295     // Calculate hashes for each page in Ash's data file.
296     std::vector<HashOffset> ash_hashes;
297     ash_hashes.reserve(NPages(ash_file.length()));
298     for (size_t offset = 0; offset < ash_file.length(); offset += kPageSize) {
299       // NOTE: "POSIX specifies that the system shall always zero fill any
300       //        partial page at the end of the object [...]".
301       // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
302       //
303       // Therefore this code works even if the size of Ash's `icudtl.dat` is not
304       // a multiple of the page size.
305       HashType hash = HashPage(ash_file.data() + offset);
306       ash_hashes.emplace_back(hash, offset);
307     }
308 
309     // Calculate hashes for each page in Lacros's data file.
310     hashes.lacros.reserve(NPages(lacros_length_));
311     for (size_t offset = 0; offset < lacros_length_; offset += kPageSize) {
312       HashType hash = HashPage(lacros_data_ + offset);
313       hashes.lacros.emplace_back(hash);
314     }
315 
316     hashes.ash = HashToOffsetMap(std::move(ash_hashes));
317   }
318 
319   return hashes;
320 }
321 
MaybeLoadCachedHashes(const AshMemoryMappedFile & ash_file,const FilePath & ash_file_path,Hashes & hashes)322 bool IcuMergeableDataFile::MaybeLoadCachedHashes(
323     const AshMemoryMappedFile& ash_file,
324     const FilePath& ash_file_path,
325     Hashes& hashes) {
326   FilePath ash_hash_path =
327       ash_file_path.AddExtensionASCII(kIcuDataFileHashExtension);
328   FilePath lacros_hash_path =
329       GetLacrosFilePath().AddExtensionASCII(kIcuDataFileHashExtension);
330 
331   // Memory map Ash's `icudtl.dat.hash`. Ensure its size is valid and consistent
332   // with the current version of `icudtl.dat`.
333   MemoryMappedFile ash_hash_file;
334   size_t ash_pages = NPages(ash_file.length());
335   bool result = ash_hash_file.Initialize(ash_hash_path);
336   if (!result || (ash_hash_file.length() % kHashBytes) ||
337       ((ash_hash_file.length() / kHashBytes) != ash_pages)) {
338     return false;
339   }
340 
341   // Same for Lacros's `icudtl.dat.hash`.
342   MemoryMappedFile lacros_hash_file;
343   size_t lacros_pages = NPages(lacros_length_);
344   result = lacros_hash_file.Initialize(lacros_hash_path);
345   if (!result || (lacros_hash_file.length() % kHashBytes) ||
346       ((lacros_hash_file.length() / kHashBytes) != lacros_pages)) {
347     return false;
348   }
349 
350   // Load Ash's hashes.
351   std::vector<HashOffset> ash_hashes;
352   ash_hashes.reserve(ash_pages);
353   for (size_t i = 0; i < ash_hash_file.length(); i += kHashBytes) {
354     HashType hash = ReadHash(ash_hash_file.data(), i);
355     size_t offset = (i / kHashBytes) * kPageSize;
356     ash_hashes.emplace_back(hash, offset);
357   }
358 
359   // Load Lacros's hashes.
360   hashes.lacros.reserve(lacros_pages);
361   for (size_t i = 0; i < lacros_hash_file.length(); i += kHashBytes) {
362     HashType hash = ReadHash(lacros_hash_file.data(), i);
363     hashes.lacros.emplace_back(hash);
364   }
365 
366   hashes.ash = HashToOffsetMap(std::move(ash_hashes));
367   return true;
368 }
369 
GetLacrosFilePath()370 FilePath IcuMergeableDataFile::GetLacrosFilePath() {
371   // /proc/self/fd/<fd>
372   //   This is a subdirectory containing one entry for each file
373   //   which the process has open, named by its file descriptor,
374   //   and which is a symbolic link to the actual file.
375   // Reference: proc(5) - Linux manual page.
376   char path[PATH_MAX];
377   FilePath proc_path =
378       FilePath("/proc/self/fd/")
379           .AppendASCII(base::NumberToString(lacros_file_.GetPlatformFile()));
380 
381   // We read the content of the symbolic link to find the path of the
382   // file associated with the file descriptor.
383   int64_t path_len = readlink(proc_path.value().c_str(), path, sizeof(path));
384   // TODO(crbug/1503551): upgrade to CHECK.
385   DUMP_WILL_BE_CHECK_NE(path_len, -1);
386   DUMP_WILL_BE_CHECK_LT(path_len, PATH_MAX);
387 
388   return FilePath(std::string(path, 0, path_len));
389 }
390 
391 }  // namespace base::i18n
392