1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/icu_mergeable_data_file.h"
6
7 #include <sys/mman.h>
8
9 #include "base/check.h"
10 #include "base/check_op.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/dump_without_crashing.h"
13 #include "base/hash/hash.h"
14 #include "base/numerics/safe_conversions.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/threading/scoped_blocking_call.h"
17 #include "build/chromeos_buildflags.h"
18
19 namespace base::i18n {
20
21 // Enable merging of `icudtl.dat` in Lacros.
22 BASE_FEATURE(kLacrosMergeIcuDataFile,
23 "LacrosMergeIcuDataFile",
24 base::FEATURE_ENABLED_BY_DEFAULT);
25
26 namespace {
27
28 #if BUILDFLAG(IS_CHROMEOS_DEVICE)
29 // Path of Ash's ICU data file.
30 constexpr char kIcuDataFileAshPath[] = "/opt/google/chrome/icudtl.dat";
31 #endif // BUILDFLAG(IS_CHROMEOS_DEVICE)
32
33 // Expected size of a system page.
34 constexpr int64_t kPageSize = 0x1000;
35
36 // Size of a page hash. Changing this will break compatibility
37 // with existing `icudtl.dat.hash` files, so be careful.
38 constexpr size_t kHashBytes = 8;
39 static_assert(sizeof(IcuMergeableDataFile::HashType) == kHashBytes);
40
HashPage(const uint8_t * page)41 inline IcuMergeableDataFile::HashType HashPage(const uint8_t* page) {
42 return FastHash(base::make_span(page, static_cast<size_t>(kPageSize)));
43 }
44
ReadHash(const uint8_t * data,size_t offset)45 IcuMergeableDataFile::HashType ReadHash(const uint8_t* data, size_t offset) {
46 // TODO(crbug/1503551): upgrade to CHECK.
47 DUMP_WILL_BE_CHECK_EQ(0ul, offset % kHashBytes);
48 IcuMergeableDataFile::HashType hash = 0;
49 for (size_t i = 0; i < kHashBytes; i++) {
50 IcuMergeableDataFile::HashType byte = data[offset + i];
51 hash |= byte << (i * 8);
52 }
53 return hash;
54 }
55
NPages(size_t length)56 constexpr size_t NPages(size_t length) {
57 return (length + kPageSize - 1) / kPageSize;
58 }
59
60 } // namespace
61
62 class AshMemoryMappedFile {
63 public:
Initialize(File ash_file)64 bool Initialize(File ash_file) {
65 fd_ = ash_file.GetPlatformFile();
66 return memory_mapped_file_.Initialize(std::move(ash_file));
67 }
68
fd() const69 PlatformFile fd() const { return fd_; }
data() const70 const uint8_t* data() const { return memory_mapped_file_.data(); }
length() const71 size_t length() const { return memory_mapped_file_.length(); }
72
73 private:
74 PlatformFile fd_;
75 MemoryMappedFile memory_mapped_file_;
76 };
77
MmapAshFile(const FilePath & ash_file_path)78 std::unique_ptr<AshMemoryMappedFile> MmapAshFile(
79 const FilePath& ash_file_path) {
80 ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
81
82 // Open Ash's data file.
83 File ash_file(FilePath(ash_file_path), File::FLAG_OPEN | File::FLAG_READ);
84
85 // Mmap Ash's data file.
86 auto ash_mapped_file = std::make_unique<AshMemoryMappedFile>();
87 bool map_successful = ash_mapped_file->Initialize(std::move(ash_file));
88 if (!map_successful) {
89 PLOG(DFATAL) << "Failed to mmap Ash's icudtl.dat";
90 return nullptr;
91 }
92
93 return ash_mapped_file;
94 }
95
96 // Class wrapping the memory-merging logic for `icudtl.dat`.
97 IcuMergeableDataFile::IcuMergeableDataFile() = default;
98
~IcuMergeableDataFile()99 IcuMergeableDataFile::~IcuMergeableDataFile() {
100 if (lacros_data_) {
101 ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
102 munmap(lacros_data_, lacros_length_);
103 }
104 }
105
106 IcuMergeableDataFile::Hashes::Hashes() = default;
Hashes(HashToOffsetMap ash,std::vector<HashType> lacros)107 IcuMergeableDataFile::Hashes::Hashes(HashToOffsetMap ash,
108 std::vector<HashType> lacros)
109 : ash(std::move(ash)), lacros(std::move(lacros)) {}
110 IcuMergeableDataFile::Hashes::Hashes(Hashes&& other) = default;
111 IcuMergeableDataFile::Hashes& IcuMergeableDataFile::Hashes::operator=(
112 Hashes&& other) = default;
113 IcuMergeableDataFile::Hashes::~Hashes() = default;
114
Initialize(File lacros_file,MemoryMappedFile::Region region)115 bool IcuMergeableDataFile::Initialize(File lacros_file,
116 MemoryMappedFile::Region region) {
117 // TODO(crbug/1503551): upgrade to CHECK.
118 DUMP_WILL_BE_CHECK(region == MemoryMappedFile::Region::kWholeFile);
119 DUMP_WILL_BE_CHECK(!lacros_file_.IsValid())
120 << "ICUDataFile::Initialize called twice";
121
122 lacros_file_ = std::move(lacros_file);
123 int64_t lacros_length = lacros_file_.GetLength();
124 if (lacros_length < 0) {
125 return false;
126 }
127 // Narrow to size_t, since it's used for pointer arithmetic, mmap and other
128 // APIs that accept size_t.
129 lacros_length_ = base::checked_cast<size_t>(lacros_length);
130
131 // Map Lacros's version of `icudtl.dat`, then attempt merging with Ash.
132 bool map_successful = MmapLacrosFile(/*remap=*/false);
133
134 #if BUILDFLAG(IS_CHROMEOS_DEVICE)
135 // If we're inside an actual ChromeOS system (i.e. not just in
136 // linux-lacros-rel) then we can expect Ash Chrome (and its version of
137 // `icudtl.dat`) to be present in the default directory.
138 // In that case, we can attempt merging.
139 if (map_successful && base::FeatureList::IsEnabled(kLacrosMergeIcuDataFile)) {
140 bool merge_successful = MergeWithAshVersion(FilePath(kIcuDataFileAshPath));
141 // If we hit a critical failure while merging, remap Lacros's version.
142 if (!merge_successful) {
143 PLOG(DFATAL) << "Attempt to merge Lacros's icudtl.dat with Ash's failed";
144 map_successful = MmapLacrosFile(/*remap=*/true);
145 }
146 }
147 #endif // BUILDFLAG(IS_CHROMEOS_DEVICE)
148
149 return map_successful;
150 }
151
data() const152 const uint8_t* IcuMergeableDataFile::data() const {
153 return static_cast<const uint8_t*>(lacros_data_);
154 }
155
MergeWithAshVersion(const FilePath & ash_file_path)156 bool IcuMergeableDataFile::MergeWithAshVersion(const FilePath& ash_file_path) {
157 // Verify the assumption that page size is 4K.
158 // TODO(crbug/1503551): upgrade to CHECK.
159 DUMP_WILL_BE_CHECK_EQ(sysconf(_SC_PAGESIZE), kPageSize);
160
161 // Mmap Ash's data file.
162 auto ash_file = MmapAshFile(ash_file_path);
163 if (!ash_file)
164 return true; // Non-critical failure.
165
166 // Calculate hashes for each page in Ash and Lacros's data files.
167 Hashes hashes = CalculateHashes(*ash_file, ash_file_path);
168
169 // Find Lacros's ICU pages that are duplicated in Ash.
170 size_t lacros_offset = 0;
171 while (lacros_offset < lacros_length_) {
172 Slice ash_overlap = FindOverlap(*ash_file, hashes, lacros_offset);
173 // If there's no overlap, move to the next page and keep scanning.
174 if (ash_overlap.length == 0) {
175 lacros_offset += kPageSize;
176 continue;
177 }
178
179 // Found a sequence of equal pages, merge them with Ash.
180 bool merge_successful = MergeArea(*ash_file, ash_overlap, lacros_offset);
181 if (!merge_successful)
182 return false; // Critical failure.
183
184 lacros_offset += ash_overlap.length;
185 }
186
187 return true; // Success.
188 }
189
MmapLacrosFile(bool remap)190 bool IcuMergeableDataFile::MmapLacrosFile(bool remap) {
191 ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
192
193 if (remap) {
194 // If `remap` == true, we add the MAP_FIXED option to unmap the
195 // existing map and replace it with the new one in a single operation.
196 // TODO(crbug/1503551): upgrade to CHECK.
197 DUMP_WILL_BE_CHECK_NE(lacros_data_, nullptr);
198 lacros_data_ = static_cast<uint8_t*>(
199 mmap(lacros_data_, lacros_length_, PROT_READ, MAP_FIXED | MAP_PRIVATE,
200 lacros_file_.GetPlatformFile(), 0));
201 } else {
202 // Otherwise, simply map the file.
203 lacros_data_ = static_cast<uint8_t*>(
204 mmap(nullptr, lacros_length_, PROT_READ, MAP_PRIVATE,
205 lacros_file_.GetPlatformFile(), 0));
206 }
207
208 if (lacros_data_ == MAP_FAILED) {
209 lacros_data_ = nullptr;
210 PLOG(DFATAL) << "Failed to mmap Lacros's icudtl.dat";
211 return false;
212 }
213
214 return true;
215 }
216
FindOverlap(const AshMemoryMappedFile & ash_file,const Hashes & hashes,size_t lacros_offset) const217 IcuMergeableDataFile::Slice IcuMergeableDataFile::FindOverlap(
218 const AshMemoryMappedFile& ash_file,
219 const Hashes& hashes,
220 size_t lacros_offset) const {
221 // Search for equal pages by hash.
222 HashType hash = hashes.lacros[lacros_offset / kPageSize];
223 auto search = hashes.ash.find(hash);
224 if (search == hashes.ash.end())
225 return {0, 0};
226
227 // Count how many pages (if any) have the same content.
228 size_t ash_offset = search->second;
229 size_t overlap_length =
230 kPageSize * CountEqualPages(ash_file, ash_file.data() + ash_offset,
231 lacros_data_ + lacros_offset);
232
233 return {ash_offset, overlap_length};
234 }
235
MergeArea(const AshMemoryMappedFile & ash_file,const Slice & ash_overlap,size_t lacros_offset)236 bool IcuMergeableDataFile::MergeArea(const AshMemoryMappedFile& ash_file,
237 const Slice& ash_overlap,
238 size_t lacros_offset) {
239 ScopedBlockingCall scoped_blocking_call(FROM_HERE, BlockingType::MAY_BLOCK);
240
241 // Unmap from Lacros's file and map from Ash's file instead.
242 // NOTE: "[...] If the memory region specified by addr and length overlaps
243 // pages of any existing mapping(s), then the overlapped part of the
244 // existing mapping(s) will be discarded. If the specified address
245 // cannot be used, mmap() will fail."
246 // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
247 uint8_t* map_result = static_cast<uint8_t*>(
248 mmap(lacros_data_ + lacros_offset, ash_overlap.length, PROT_READ,
249 MAP_FIXED | MAP_PRIVATE, ash_file.fd(), ash_overlap.offset));
250
251 if (map_result == MAP_FAILED) {
252 PLOG(DFATAL) << "Couldn't mmap Ash's icudtl.dat while merging";
253 return false;
254 }
255
256 return true;
257 }
258
CountEqualPages(const AshMemoryMappedFile & ash_file,const uint8_t * ash_page,const uint8_t * lacros_page) const259 size_t IcuMergeableDataFile::CountEqualPages(
260 const AshMemoryMappedFile& ash_file,
261 const uint8_t* ash_page,
262 const uint8_t* lacros_page) const {
263 // TODO(crbug/1478718): Remove once the cause of this crash is identified.
264 if (!ash_page || !lacros_page) {
265 const uint8_t* debug_ash_page = ash_page;
266 const uint8_t* debug_lacros_page = lacros_page;
267 base::debug::Alias(&debug_ash_page);
268 base::debug::Alias(&debug_lacros_page);
269 base::debug::DumpWithoutCrashing();
270 return 0;
271 }
272
273 size_t pages = 0;
274 const uint8_t* ash_end = ash_file.data() + ash_file.length();
275 const uint8_t* lacros_end = lacros_data_ + lacros_length_;
276
277 while (ash_page < ash_end && lacros_page < lacros_end &&
278 memcmp(ash_page, lacros_page, kPageSize) == 0) {
279 ash_page += kPageSize;
280 lacros_page += kPageSize;
281 pages++;
282 }
283
284 return pages;
285 }
286
CalculateHashes(const AshMemoryMappedFile & ash_file,const FilePath & ash_file_path)287 IcuMergeableDataFile::Hashes IcuMergeableDataFile::CalculateHashes(
288 const AshMemoryMappedFile& ash_file,
289 const FilePath& ash_file_path) {
290 // Try loading hashes from the pre-computed files first.
291 Hashes hashes;
292 used_cached_hashes_ = MaybeLoadCachedHashes(ash_file, ash_file_path, hashes);
293
294 if (!used_cached_hashes_) {
295 // Calculate hashes for each page in Ash's data file.
296 std::vector<HashOffset> ash_hashes;
297 ash_hashes.reserve(NPages(ash_file.length()));
298 for (size_t offset = 0; offset < ash_file.length(); offset += kPageSize) {
299 // NOTE: "POSIX specifies that the system shall always zero fill any
300 // partial page at the end of the object [...]".
301 // Reference: https://man7.org/linux/man-pages/man2/mmap.2.html
302 //
303 // Therefore this code works even if the size of Ash's `icudtl.dat` is not
304 // a multiple of the page size.
305 HashType hash = HashPage(ash_file.data() + offset);
306 ash_hashes.emplace_back(hash, offset);
307 }
308
309 // Calculate hashes for each page in Lacros's data file.
310 hashes.lacros.reserve(NPages(lacros_length_));
311 for (size_t offset = 0; offset < lacros_length_; offset += kPageSize) {
312 HashType hash = HashPage(lacros_data_ + offset);
313 hashes.lacros.emplace_back(hash);
314 }
315
316 hashes.ash = HashToOffsetMap(std::move(ash_hashes));
317 }
318
319 return hashes;
320 }
321
MaybeLoadCachedHashes(const AshMemoryMappedFile & ash_file,const FilePath & ash_file_path,Hashes & hashes)322 bool IcuMergeableDataFile::MaybeLoadCachedHashes(
323 const AshMemoryMappedFile& ash_file,
324 const FilePath& ash_file_path,
325 Hashes& hashes) {
326 FilePath ash_hash_path =
327 ash_file_path.AddExtensionASCII(kIcuDataFileHashExtension);
328 FilePath lacros_hash_path =
329 GetLacrosFilePath().AddExtensionASCII(kIcuDataFileHashExtension);
330
331 // Memory map Ash's `icudtl.dat.hash`. Ensure its size is valid and consistent
332 // with the current version of `icudtl.dat`.
333 MemoryMappedFile ash_hash_file;
334 size_t ash_pages = NPages(ash_file.length());
335 bool result = ash_hash_file.Initialize(ash_hash_path);
336 if (!result || (ash_hash_file.length() % kHashBytes) ||
337 ((ash_hash_file.length() / kHashBytes) != ash_pages)) {
338 return false;
339 }
340
341 // Same for Lacros's `icudtl.dat.hash`.
342 MemoryMappedFile lacros_hash_file;
343 size_t lacros_pages = NPages(lacros_length_);
344 result = lacros_hash_file.Initialize(lacros_hash_path);
345 if (!result || (lacros_hash_file.length() % kHashBytes) ||
346 ((lacros_hash_file.length() / kHashBytes) != lacros_pages)) {
347 return false;
348 }
349
350 // Load Ash's hashes.
351 std::vector<HashOffset> ash_hashes;
352 ash_hashes.reserve(ash_pages);
353 for (size_t i = 0; i < ash_hash_file.length(); i += kHashBytes) {
354 HashType hash = ReadHash(ash_hash_file.data(), i);
355 size_t offset = (i / kHashBytes) * kPageSize;
356 ash_hashes.emplace_back(hash, offset);
357 }
358
359 // Load Lacros's hashes.
360 hashes.lacros.reserve(lacros_pages);
361 for (size_t i = 0; i < lacros_hash_file.length(); i += kHashBytes) {
362 HashType hash = ReadHash(lacros_hash_file.data(), i);
363 hashes.lacros.emplace_back(hash);
364 }
365
366 hashes.ash = HashToOffsetMap(std::move(ash_hashes));
367 return true;
368 }
369
GetLacrosFilePath()370 FilePath IcuMergeableDataFile::GetLacrosFilePath() {
371 // /proc/self/fd/<fd>
372 // This is a subdirectory containing one entry for each file
373 // which the process has open, named by its file descriptor,
374 // and which is a symbolic link to the actual file.
375 // Reference: proc(5) - Linux manual page.
376 char path[PATH_MAX];
377 FilePath proc_path =
378 FilePath("/proc/self/fd/")
379 .AppendASCII(base::NumberToString(lacros_file_.GetPlatformFile()));
380
381 // We read the content of the symbolic link to find the path of the
382 // file associated with the file descriptor.
383 int64_t path_len = readlink(proc_path.value().c_str(), path, sizeof(path));
384 // TODO(crbug/1503551): upgrade to CHECK.
385 DUMP_WILL_BE_CHECK_NE(path_len, -1);
386 DUMP_WILL_BE_CHECK_LT(path_len, PATH_MAX);
387
388 return FilePath(std::string(path, 0, path_len));
389 }
390
391 } // namespace base::i18n
392