1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/icu_util.h"
6
7 #include "build/build_config.h"
8
9 #if BUILDFLAG(IS_WIN)
10 #include <windows.h>
11 #endif
12
13 #include <string.h>
14
15 #include <memory>
16 #include <string>
17
18 #include "base/debug/alias.h"
19 #include "base/environment.h"
20 #include "base/files/file_path.h"
21 #include "base/files/file_util.h"
22 #include "base/files/memory_mapped_file.h"
23 #include "base/logging.h"
24 #include "base/metrics/histogram_functions.h"
25 #include "base/metrics/metrics_hashes.h"
26 #include "base/path_service.h"
27 #include "base/strings/string_util.h"
28 #include "build/chromecast_buildflags.h"
29 #include "third_party/icu/source/common/unicode/putil.h"
30 #include "third_party/icu/source/common/unicode/uclean.h"
31 #include "third_party/icu/source/common/unicode/udata.h"
32 #include "third_party/icu/source/common/unicode/utrace.h"
33
34 #if BUILDFLAG(IS_ANDROID)
35 #include "base/android/apk_assets.h"
36 #include "base/android/timezone_utils.h"
37 #endif
38
39 #if BUILDFLAG(IS_IOS)
40 #include "base/ios/ios_util.h"
41 #endif
42
43 #if BUILDFLAG(IS_APPLE)
44 #include "base/apple/foundation_util.h"
45 #endif
46
47 #if BUILDFLAG(IS_FUCHSIA)
48 #include "base/fuchsia/intl_profile_watcher.h"
49 #endif
50
51 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA)
52 #include "third_party/icu/source/common/unicode/unistr.h"
53 #endif
54
55 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA) || \
56 BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS))
57 #include "third_party/icu/source/i18n/unicode/timezone.h"
58 #endif
59
60 namespace base::i18n {
61
62 #if !BUILDFLAG(IS_NACL)
63 namespace {
64
65 #if DCHECK_IS_ON()
66 // Assert that we are not called more than once. Even though calling this
67 // function isn't harmful (ICU can handle it), being called twice probably
68 // indicates a programming error.
69 bool g_check_called_once = true;
70 bool g_called_once = false;
71 #endif // DCHECK_IS_ON()
72
73 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
74
75 // To debug http://crbug.com/445616.
76 int g_debug_icu_last_error;
77 int g_debug_icu_load;
78 int g_debug_icu_pf_error_details;
79 int g_debug_icu_pf_last_error;
80 #if BUILDFLAG(IS_WIN)
81 wchar_t g_debug_icu_pf_filename[_MAX_PATH];
82 #endif // BUILDFLAG(IS_WIN)
83 // Use an unversioned file name to simplify a icu version update down the road.
84 // No need to change the filename in multiple places (gyp files, windows
85 // build pkg configurations, etc). 'l' stands for Little Endian.
86 // This variable is exported through the header file.
87 const char kIcuDataFileName[] = "icudtl.dat";
88
89 // Time zone data loading.
90 // For now, only Fuchsia has a meaningful use case for this feature, so it is
91 // only implemented for OS_FUCHSIA.
92 #if BUILDFLAG(IS_FUCHSIA)
93 // The environment variable used to point the ICU data loader to the directory
94 // containing time zone data. This is available from ICU version 54. The env
95 // variable approach is antiquated by today's standards (2019), but is the
96 // recommended way to configure ICU.
97 //
98 // See for details: http://userguide.icu-project.org/datetime/timezone
99 const char kIcuTimeZoneEnvVariable[] = "ICU_TIMEZONE_FILES_DIR";
100
101 // Up-to-date time zone data MUST be provided by the system as a
102 // directory offered to Chromium components at /config/tzdata. Chromium
103 // components "use" the `tzdata` directory capability, specifying the
104 // "/config/tzdata" path. Chromium components will crash if this capability
105 // is not available.
106 //
107 // TimeZoneDataTest.* tests verify that external timezone data is correctly
108 // loaded from the system, to alert developers if the platform and Chromium
109 // versions are no longer compatible versions.
110 // LINT.IfChange(icu_time_zone_data_path)
111 const char kIcuTimeZoneDataDir[] = "/config/tzdata/icu/44/le";
112 // LINT.ThenChange(//sandbox/policy.fuchsia/sandbox_policy_fuchsia.cc:icu_time_zone_data_path)
113 #endif // BUILDFLAG(IS_FUCHSIA)
114
115 #if BUILDFLAG(IS_ANDROID)
116 const char kAndroidAssetsIcuDataFileName[] = "assets/icudtl.dat";
117 #endif // BUILDFLAG(IS_ANDROID)
118
119 // File handle intentionally never closed. Not using File here because its
120 // Windows implementation guards against two instances owning the same
121 // PlatformFile (which we allow since we know it is never freed).
122 PlatformFile g_icudtl_pf = kInvalidPlatformFile;
123 IcuDataFile* g_icudtl_mapped_file = nullptr;
124 MemoryMappedFile::Region g_icudtl_region;
125
126 #if BUILDFLAG(IS_FUCHSIA)
127 // The directory from which the ICU data loader will be configured to load time
128 // zone data. It is only changed by SetIcuTimeZoneDataDirForTesting().
129 const char* g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
130 #endif // BUILDFLAG(IS_FUCHSIA)
131
LazyInitIcuDataFile()132 void LazyInitIcuDataFile() {
133 if (g_icudtl_pf != kInvalidPlatformFile) {
134 return;
135 }
136 #if BUILDFLAG(IS_ANDROID)
137 int fd =
138 android::OpenApkAsset(kAndroidAssetsIcuDataFileName, &g_icudtl_region);
139 g_icudtl_pf = fd;
140 if (fd != -1) {
141 return;
142 }
143 #endif // BUILDFLAG(IS_ANDROID)
144 // For unit tests, data file is located on disk, so try there as a fallback.
145 #if !BUILDFLAG(IS_APPLE)
146 FilePath data_path;
147 if (!PathService::Get(DIR_ASSETS, &data_path)) {
148 LOG(ERROR) << "Can't find " << kIcuDataFileName;
149 return;
150 }
151 #if BUILDFLAG(IS_WIN)
152 // TODO(brucedawson): http://crbug.com/445616
153 wchar_t tmp_buffer[_MAX_PATH] = {0};
154 wcscpy_s(tmp_buffer, data_path.value().c_str());
155 debug::Alias(tmp_buffer);
156 #endif
157 data_path = data_path.AppendASCII(kIcuDataFileName);
158
159 #if BUILDFLAG(IS_WIN)
160 // TODO(brucedawson): http://crbug.com/445616
161 wchar_t tmp_buffer2[_MAX_PATH] = {0};
162 wcscpy_s(tmp_buffer2, data_path.value().c_str());
163 debug::Alias(tmp_buffer2);
164 #endif
165
166 #else // !BUILDFLAG(IS_APPLE)
167 // Assume it is in the framework bundle's Resources directory.
168 FilePath data_path = apple::PathForFrameworkBundleResource(kIcuDataFileName);
169 #if BUILDFLAG(IS_IOS)
170 FilePath override_data_path = ios::FilePathOfEmbeddedICU();
171 if (!override_data_path.empty()) {
172 data_path = override_data_path;
173 }
174 #endif // !BUILDFLAG(IS_IOS)
175 if (data_path.empty()) {
176 LOG(ERROR) << kIcuDataFileName << " not found in bundle";
177 return;
178 }
179 #endif // !BUILDFLAG(IS_APPLE)
180 File file(data_path, File::FLAG_OPEN | File::FLAG_READ);
181 if (file.IsValid()) {
182 // TODO(brucedawson): http://crbug.com/445616.
183 g_debug_icu_pf_last_error = 0;
184 g_debug_icu_pf_error_details = 0;
185 #if BUILDFLAG(IS_WIN)
186 g_debug_icu_pf_filename[0] = 0;
187 #endif // BUILDFLAG(IS_WIN)
188
189 g_icudtl_pf = file.TakePlatformFile();
190 g_icudtl_region = MemoryMappedFile::Region::kWholeFile;
191 }
192 #if BUILDFLAG(IS_WIN)
193 else {
194 // TODO(brucedawson): http://crbug.com/445616.
195 g_debug_icu_pf_last_error = ::GetLastError();
196 g_debug_icu_pf_error_details = file.error_details();
197 wcscpy_s(g_debug_icu_pf_filename, data_path.value().c_str());
198 }
199 #endif // BUILDFLAG(IS_WIN)
200 }
201
202 // Configures ICU to load external time zone data, if appropriate.
InitializeExternalTimeZoneData()203 void InitializeExternalTimeZoneData() {
204 #if BUILDFLAG(IS_FUCHSIA)
205 // Set the environment variable to override the location used by ICU.
206 // Loading can still fail if the directory is empty or its data is invalid.
207 std::unique_ptr<base::Environment> env = base::Environment::Create();
208 if (!base::DirectoryExists(base::FilePath(g_icu_time_zone_data_dir))) {
209 PLOG(FATAL) << "Could not open directory: '" << g_icu_time_zone_data_dir
210 << "'";
211 }
212 env->SetVar(kIcuTimeZoneEnvVariable, g_icu_time_zone_data_dir);
213 #endif // BUILDFLAG(IS_FUCHSIA)
214 }
215
LoadIcuData(PlatformFile data_fd,const MemoryMappedFile::Region & data_region,std::unique_ptr<IcuDataFile> * out_mapped_data_file,UErrorCode * out_error_code)216 int LoadIcuData(PlatformFile data_fd,
217 const MemoryMappedFile::Region& data_region,
218 std::unique_ptr<IcuDataFile>* out_mapped_data_file,
219 UErrorCode* out_error_code) {
220 InitializeExternalTimeZoneData();
221
222 if (data_fd == kInvalidPlatformFile) {
223 LOG(ERROR) << "Invalid file descriptor to ICU data received.";
224 return 1; // To debug http://crbug.com/445616.
225 }
226
227 *out_mapped_data_file = std::make_unique<IcuDataFile>();
228 if (!(*out_mapped_data_file)->Initialize(File(data_fd), data_region)) {
229 LOG(ERROR) << "Couldn't mmap icu data file";
230 return 2; // To debug http://crbug.com/445616.
231 }
232
233 (*out_error_code) = U_ZERO_ERROR;
234 udata_setCommonData(const_cast<uint8_t*>((*out_mapped_data_file)->data()),
235 out_error_code);
236 if (U_FAILURE(*out_error_code)) {
237 LOG(ERROR) << "Failed to initialize ICU with data file: "
238 << u_errorName(*out_error_code);
239 return 3; // To debug http://crbug.com/445616.
240 }
241
242 return 0;
243 }
244
InitializeICUWithFileDescriptorInternal(PlatformFile data_fd,const MemoryMappedFile::Region & data_region)245 bool InitializeICUWithFileDescriptorInternal(
246 PlatformFile data_fd,
247 const MemoryMappedFile::Region& data_region) {
248 // This can be called multiple times in tests.
249 if (g_icudtl_mapped_file) {
250 g_debug_icu_load = 0; // To debug http://crbug.com/445616.
251 return true;
252 }
253
254 std::unique_ptr<IcuDataFile> mapped_file;
255 UErrorCode err;
256 g_debug_icu_load = LoadIcuData(data_fd, data_region, &mapped_file, &err);
257 if (g_debug_icu_load == 1 || g_debug_icu_load == 2) {
258 return false;
259 }
260 g_icudtl_mapped_file = mapped_file.release();
261
262 if (g_debug_icu_load == 3) {
263 g_debug_icu_last_error = err;
264 }
265
266 // Never try to load ICU data from files.
267 udata_setFileAccess(UDATA_ONLY_PACKAGES, &err);
268 return U_SUCCESS(err);
269 }
270
InitializeICUFromDataFile()271 bool InitializeICUFromDataFile() {
272 // If the ICU data directory is set, ICU won't actually load the data until
273 // it is needed. This can fail if the process is sandboxed at that time.
274 // Instead, we map the file in and hand off the data so the sandbox won't
275 // cause any problems.
276 LazyInitIcuDataFile();
277 bool result =
278 InitializeICUWithFileDescriptorInternal(g_icudtl_pf, g_icudtl_region);
279
280 int debug_icu_load = g_debug_icu_load;
281 debug::Alias(&debug_icu_load);
282 int debug_icu_last_error = g_debug_icu_last_error;
283 debug::Alias(&debug_icu_last_error);
284 #if BUILDFLAG(IS_WIN)
285 int debug_icu_pf_last_error = g_debug_icu_pf_last_error;
286 debug::Alias(&debug_icu_pf_last_error);
287 int debug_icu_pf_error_details = g_debug_icu_pf_error_details;
288 debug::Alias(&debug_icu_pf_error_details);
289 wchar_t debug_icu_pf_filename[_MAX_PATH] = {0};
290 wcscpy_s(debug_icu_pf_filename, g_debug_icu_pf_filename);
291 debug::Alias(&debug_icu_pf_filename);
292 #endif // BUILDFLAG(IS_WIN)
293 // Excluding Chrome OS from this CHECK due to b/289684640.
294 #if !BUILDFLAG(IS_CHROMEOS)
295 // https://crbug.com/445616
296 // https://crbug.com/1449816
297 CHECK(result);
298 #endif
299
300 return result;
301 }
302 #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
303
304 // Explicitly initialize ICU's time zone if necessary.
305 // On some platforms, the time zone must be explicitly initialized zone rather
306 // than relying on ICU's internal initialization.
InitializeIcuTimeZone()307 void InitializeIcuTimeZone() {
308 #if BUILDFLAG(IS_ANDROID)
309 // On Android, we can't leave it up to ICU to set the default time zone
310 // because ICU's time zone detection does not work in many time zones (e.g.
311 // Australia/Sydney, Asia/Seoul, Europe/Paris ). Use JNI to detect the host
312 // time zone and set the ICU default time zone accordingly in advance of
313 // actual use. See crbug.com/722821 and
314 // https://ssl.icu-project.org/trac/ticket/13208 .
315 std::u16string zone_id = android::GetDefaultTimeZoneId();
316 icu::TimeZone::adoptDefault(icu::TimeZone::createTimeZone(
317 icu::UnicodeString(false, zone_id.data(), zone_id.length())));
318 #elif BUILDFLAG(IS_FUCHSIA)
319 // The platform-specific mechanisms used by ICU's detectHostTimeZone() to
320 // determine the default time zone will not work on Fuchsia. Therefore,
321 // proactively set the default system.
322 // This is also required by TimeZoneMonitorFuchsia::ProfileMayHaveChanged(),
323 // which uses the current default to detect whether the time zone changed in
324 // the new profile.
325 // If the system time zone cannot be obtained or is not understood by ICU,
326 // the "unknown" time zone will be returned by createTimeZone() and used.
327 std::string zone_id =
328 FuchsiaIntlProfileWatcher::GetPrimaryTimeZoneIdForIcuInitialization();
329 icu::TimeZone::adoptDefault(
330 icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(zone_id)));
331 #elif BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS))
332 // To respond to the time zone change properly, the default time zone
333 // cache in ICU has to be populated on starting up.
334 // See TimeZoneMonitorLinux::NotifyClientsFromImpl().
335 std::unique_ptr<icu::TimeZone> zone(icu::TimeZone::createDefault());
336 #endif // BUILDFLAG(IS_ANDROID)
337 }
338
339 enum class ICUCreateInstance {
340 kCharacterBreakIterator = 0,
341 kWordBreakIterator = 1,
342 kLineBreakIterator = 2,
343 kLineBreakIteratorTypeLoose = 3,
344 kLineBreakIteratorTypeNormal = 4,
345 kLineBreakIteratorTypeStrict = 5,
346 kSentenceBreakIterator = 6,
347 kTitleBreakIterator = 7,
348 kThaiBreakEngine = 8,
349 kLaoBreakEngine = 9,
350 kBurmeseBreakEngine = 10,
351 kKhmerBreakEngine = 11,
352 kChineseJapaneseBreakEngine = 12,
353
354 kMaxValue = kChineseJapaneseBreakEngine
355 };
356
357 // Common initialization to run regardless of how ICU is initialized.
358 // There are multiple exposed InitializeIcu* functions. This should be called
359 // as at the end of (the last functions in the sequence of) these functions.
DoCommonInitialization()360 bool DoCommonInitialization() {
361 // TODO(jungshik): Some callers do not care about tz at all. If necessary,
362 // add a boolean argument to this function to init the default tz only
363 // when requested.
364 InitializeIcuTimeZone();
365
366 utrace_setLevel(UTRACE_VERBOSE);
367 return true;
368 }
369
370 } // namespace
371
372 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
InitializeICUWithFileDescriptor(PlatformFile data_fd,const MemoryMappedFile::Region & data_region)373 bool InitializeICUWithFileDescriptor(
374 PlatformFile data_fd,
375 const MemoryMappedFile::Region& data_region) {
376 #if DCHECK_IS_ON()
377 DCHECK(!g_check_called_once || !g_called_once);
378 g_called_once = true;
379 #endif
380 if (!InitializeICUWithFileDescriptorInternal(data_fd, data_region))
381 return false;
382
383 return DoCommonInitialization();
384 }
385
GetIcuDataFileHandle(MemoryMappedFile::Region * out_region)386 PlatformFile GetIcuDataFileHandle(MemoryMappedFile::Region* out_region) {
387 CHECK_NE(g_icudtl_pf, kInvalidPlatformFile);
388 *out_region = g_icudtl_region;
389 return g_icudtl_pf;
390 }
391
ResetGlobalsForTesting()392 void ResetGlobalsForTesting() {
393 // Reset ICU library internal state before tearing-down the mapped data
394 // file, or handle.
395 u_cleanup();
396
397 // `g_icudtl_pf` does not actually own the FD once ICU is initialized, so
398 // don't try to close it here.
399 g_icudtl_pf = kInvalidPlatformFile;
400 delete std::exchange(g_icudtl_mapped_file, nullptr);
401
402 #if BUILDFLAG(IS_FUCHSIA)
403 g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
404 #endif // BUILDFLAG(IS_FUCHSIA)
405 }
406
407 #if BUILDFLAG(IS_FUCHSIA)
408 // |dir| must remain valid until ResetGlobalsForTesting() is called.
SetIcuTimeZoneDataDirForTesting(const char * dir)409 void SetIcuTimeZoneDataDirForTesting(const char* dir) {
410 g_icu_time_zone_data_dir = dir;
411 }
412 #endif // BUILDFLAG(IS_FUCHSIA)
413 #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
414
InitializeICU()415 bool InitializeICU() {
416 #if DCHECK_IS_ON()
417 DCHECK(!g_check_called_once || !g_called_once);
418 g_called_once = true;
419 #endif
420
421 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
422 // The ICU data is statically linked.
423 #elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
424 if (!InitializeICUFromDataFile())
425 return false;
426 #else
427 #error Unsupported ICU_UTIL_DATA_IMPL value
428 #endif // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
429
430 return DoCommonInitialization();
431 }
432
AllowMultipleInitializeCallsForTesting()433 void AllowMultipleInitializeCallsForTesting() {
434 #if DCHECK_IS_ON()
435 g_check_called_once = false;
436 #endif
437 }
438
439 #endif // !BUILDFLAG(IS_NACL)
440
441 } // namespace base::i18n
442