xref: /aosp_15_r20/external/cronet/base/i18n/icu_util.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/icu_util.h"
6 
7 #include "build/build_config.h"
8 
9 #if BUILDFLAG(IS_WIN)
10 #include <windows.h>
11 #endif
12 
13 #include <string.h>
14 
15 #include <memory>
16 #include <string>
17 
18 #include "base/debug/alias.h"
19 #include "base/environment.h"
20 #include "base/files/file_path.h"
21 #include "base/files/file_util.h"
22 #include "base/files/memory_mapped_file.h"
23 #include "base/logging.h"
24 #include "base/metrics/histogram_functions.h"
25 #include "base/metrics/metrics_hashes.h"
26 #include "base/path_service.h"
27 #include "base/strings/string_util.h"
28 #include "build/chromecast_buildflags.h"
29 #include "third_party/icu/source/common/unicode/putil.h"
30 #include "third_party/icu/source/common/unicode/uclean.h"
31 #include "third_party/icu/source/common/unicode/udata.h"
32 #include "third_party/icu/source/common/unicode/utrace.h"
33 
34 #if BUILDFLAG(IS_ANDROID)
35 #include "base/android/apk_assets.h"
36 #include "base/android/timezone_utils.h"
37 #endif
38 
39 #if BUILDFLAG(IS_IOS)
40 #include "base/ios/ios_util.h"
41 #endif
42 
43 #if BUILDFLAG(IS_APPLE)
44 #include "base/apple/foundation_util.h"
45 #endif
46 
47 #if BUILDFLAG(IS_FUCHSIA)
48 #include "base/fuchsia/intl_profile_watcher.h"
49 #endif
50 
51 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA)
52 #include "third_party/icu/source/common/unicode/unistr.h"
53 #endif
54 
55 #if BUILDFLAG(IS_ANDROID) || BUILDFLAG(IS_FUCHSIA) || \
56     BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS))
57 #include "third_party/icu/source/i18n/unicode/timezone.h"
58 #endif
59 
60 namespace base::i18n {
61 
62 #if !BUILDFLAG(IS_NACL)
63 namespace {
64 
65 #if DCHECK_IS_ON()
66 // Assert that we are not called more than once.  Even though calling this
67 // function isn't harmful (ICU can handle it), being called twice probably
68 // indicates a programming error.
69 bool g_check_called_once = true;
70 bool g_called_once = false;
71 #endif  // DCHECK_IS_ON()
72 
73 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
74 
75 // To debug http://crbug.com/445616.
76 int g_debug_icu_last_error;
77 int g_debug_icu_load;
78 int g_debug_icu_pf_error_details;
79 int g_debug_icu_pf_last_error;
80 #if BUILDFLAG(IS_WIN)
81 wchar_t g_debug_icu_pf_filename[_MAX_PATH];
82 #endif  // BUILDFLAG(IS_WIN)
83 // Use an unversioned file name to simplify a icu version update down the road.
84 // No need to change the filename in multiple places (gyp files, windows
85 // build pkg configurations, etc). 'l' stands for Little Endian.
86 // This variable is exported through the header file.
87 const char kIcuDataFileName[] = "icudtl.dat";
88 
89 // Time zone data loading.
90 // For now, only Fuchsia has a meaningful use case for this feature, so it is
91 // only implemented for OS_FUCHSIA.
92 #if BUILDFLAG(IS_FUCHSIA)
93 // The environment variable used to point the ICU data loader to the directory
94 // containing time zone data. This is available from ICU version 54. The env
95 // variable approach is antiquated by today's standards (2019), but is the
96 // recommended way to configure ICU.
97 //
98 // See for details: http://userguide.icu-project.org/datetime/timezone
99 const char kIcuTimeZoneEnvVariable[] = "ICU_TIMEZONE_FILES_DIR";
100 
101 // Up-to-date time zone data MUST be provided by the system as a
102 // directory offered to Chromium components at /config/tzdata.  Chromium
103 // components "use" the `tzdata` directory capability, specifying the
104 // "/config/tzdata" path. Chromium components will crash if this capability
105 // is not available.
106 //
107 // TimeZoneDataTest.* tests verify that external timezone data is correctly
108 // loaded from the system, to alert developers if the platform and Chromium
109 // versions are no longer compatible versions.
110 // LINT.IfChange(icu_time_zone_data_path)
111 const char kIcuTimeZoneDataDir[] = "/config/tzdata/icu/44/le";
112 // LINT.ThenChange(//sandbox/policy.fuchsia/sandbox_policy_fuchsia.cc:icu_time_zone_data_path)
113 #endif  // BUILDFLAG(IS_FUCHSIA)
114 
115 #if BUILDFLAG(IS_ANDROID)
116 const char kAndroidAssetsIcuDataFileName[] = "assets/icudtl.dat";
117 #endif  // BUILDFLAG(IS_ANDROID)
118 
119 // File handle intentionally never closed. Not using File here because its
120 // Windows implementation guards against two instances owning the same
121 // PlatformFile (which we allow since we know it is never freed).
122 PlatformFile g_icudtl_pf = kInvalidPlatformFile;
123 IcuDataFile* g_icudtl_mapped_file = nullptr;
124 MemoryMappedFile::Region g_icudtl_region;
125 
126 #if BUILDFLAG(IS_FUCHSIA)
127 // The directory from which the ICU data loader will be configured to load time
128 // zone data. It is only changed by SetIcuTimeZoneDataDirForTesting().
129 const char* g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
130 #endif  // BUILDFLAG(IS_FUCHSIA)
131 
LazyInitIcuDataFile()132 void LazyInitIcuDataFile() {
133   if (g_icudtl_pf != kInvalidPlatformFile) {
134     return;
135   }
136 #if BUILDFLAG(IS_ANDROID)
137   int fd =
138       android::OpenApkAsset(kAndroidAssetsIcuDataFileName, &g_icudtl_region);
139   g_icudtl_pf = fd;
140   if (fd != -1) {
141     return;
142   }
143 #endif  // BUILDFLAG(IS_ANDROID)
144   // For unit tests, data file is located on disk, so try there as a fallback.
145 #if !BUILDFLAG(IS_APPLE)
146   FilePath data_path;
147   if (!PathService::Get(DIR_ASSETS, &data_path)) {
148     LOG(ERROR) << "Can't find " << kIcuDataFileName;
149     return;
150   }
151 #if BUILDFLAG(IS_WIN)
152   // TODO(brucedawson): http://crbug.com/445616
153   wchar_t tmp_buffer[_MAX_PATH] = {0};
154   wcscpy_s(tmp_buffer, data_path.value().c_str());
155   debug::Alias(tmp_buffer);
156 #endif
157   data_path = data_path.AppendASCII(kIcuDataFileName);
158 
159 #if BUILDFLAG(IS_WIN)
160   // TODO(brucedawson): http://crbug.com/445616
161   wchar_t tmp_buffer2[_MAX_PATH] = {0};
162   wcscpy_s(tmp_buffer2, data_path.value().c_str());
163   debug::Alias(tmp_buffer2);
164 #endif
165 
166 #else  // !BUILDFLAG(IS_APPLE)
167   // Assume it is in the framework bundle's Resources directory.
168   FilePath data_path = apple::PathForFrameworkBundleResource(kIcuDataFileName);
169 #if BUILDFLAG(IS_IOS)
170   FilePath override_data_path = ios::FilePathOfEmbeddedICU();
171   if (!override_data_path.empty()) {
172     data_path = override_data_path;
173   }
174 #endif  // !BUILDFLAG(IS_IOS)
175   if (data_path.empty()) {
176     LOG(ERROR) << kIcuDataFileName << " not found in bundle";
177     return;
178   }
179 #endif  // !BUILDFLAG(IS_APPLE)
180   File file(data_path, File::FLAG_OPEN | File::FLAG_READ);
181   if (file.IsValid()) {
182     // TODO(brucedawson): http://crbug.com/445616.
183     g_debug_icu_pf_last_error = 0;
184     g_debug_icu_pf_error_details = 0;
185 #if BUILDFLAG(IS_WIN)
186     g_debug_icu_pf_filename[0] = 0;
187 #endif  // BUILDFLAG(IS_WIN)
188 
189     g_icudtl_pf = file.TakePlatformFile();
190     g_icudtl_region = MemoryMappedFile::Region::kWholeFile;
191   }
192 #if BUILDFLAG(IS_WIN)
193   else {
194     // TODO(brucedawson): http://crbug.com/445616.
195     g_debug_icu_pf_last_error = ::GetLastError();
196     g_debug_icu_pf_error_details = file.error_details();
197     wcscpy_s(g_debug_icu_pf_filename, data_path.value().c_str());
198   }
199 #endif  // BUILDFLAG(IS_WIN)
200 }
201 
202 // Configures ICU to load external time zone data, if appropriate.
InitializeExternalTimeZoneData()203 void InitializeExternalTimeZoneData() {
204 #if BUILDFLAG(IS_FUCHSIA)
205   // Set the environment variable to override the location used by ICU.
206   // Loading can still fail if the directory is empty or its data is invalid.
207   std::unique_ptr<base::Environment> env = base::Environment::Create();
208   if (!base::DirectoryExists(base::FilePath(g_icu_time_zone_data_dir))) {
209     PLOG(FATAL) << "Could not open directory: '" << g_icu_time_zone_data_dir
210                 << "'";
211   }
212   env->SetVar(kIcuTimeZoneEnvVariable, g_icu_time_zone_data_dir);
213 #endif  // BUILDFLAG(IS_FUCHSIA)
214 }
215 
LoadIcuData(PlatformFile data_fd,const MemoryMappedFile::Region & data_region,std::unique_ptr<IcuDataFile> * out_mapped_data_file,UErrorCode * out_error_code)216 int LoadIcuData(PlatformFile data_fd,
217                 const MemoryMappedFile::Region& data_region,
218                 std::unique_ptr<IcuDataFile>* out_mapped_data_file,
219                 UErrorCode* out_error_code) {
220   InitializeExternalTimeZoneData();
221 
222   if (data_fd == kInvalidPlatformFile) {
223     LOG(ERROR) << "Invalid file descriptor to ICU data received.";
224     return 1;  // To debug http://crbug.com/445616.
225   }
226 
227   *out_mapped_data_file = std::make_unique<IcuDataFile>();
228   if (!(*out_mapped_data_file)->Initialize(File(data_fd), data_region)) {
229     LOG(ERROR) << "Couldn't mmap icu data file";
230     return 2;  // To debug http://crbug.com/445616.
231   }
232 
233   (*out_error_code) = U_ZERO_ERROR;
234   udata_setCommonData(const_cast<uint8_t*>((*out_mapped_data_file)->data()),
235                       out_error_code);
236   if (U_FAILURE(*out_error_code)) {
237     LOG(ERROR) << "Failed to initialize ICU with data file: "
238                << u_errorName(*out_error_code);
239     return 3;  // To debug http://crbug.com/445616.
240   }
241 
242   return 0;
243 }
244 
InitializeICUWithFileDescriptorInternal(PlatformFile data_fd,const MemoryMappedFile::Region & data_region)245 bool InitializeICUWithFileDescriptorInternal(
246     PlatformFile data_fd,
247     const MemoryMappedFile::Region& data_region) {
248   // This can be called multiple times in tests.
249   if (g_icudtl_mapped_file) {
250     g_debug_icu_load = 0;  // To debug http://crbug.com/445616.
251     return true;
252   }
253 
254   std::unique_ptr<IcuDataFile> mapped_file;
255   UErrorCode err;
256   g_debug_icu_load = LoadIcuData(data_fd, data_region, &mapped_file, &err);
257   if (g_debug_icu_load == 1 || g_debug_icu_load == 2) {
258     return false;
259   }
260   g_icudtl_mapped_file = mapped_file.release();
261 
262   if (g_debug_icu_load == 3) {
263     g_debug_icu_last_error = err;
264   }
265 
266   // Never try to load ICU data from files.
267   udata_setFileAccess(UDATA_ONLY_PACKAGES, &err);
268   return U_SUCCESS(err);
269 }
270 
InitializeICUFromDataFile()271 bool InitializeICUFromDataFile() {
272   // If the ICU data directory is set, ICU won't actually load the data until
273   // it is needed.  This can fail if the process is sandboxed at that time.
274   // Instead, we map the file in and hand off the data so the sandbox won't
275   // cause any problems.
276   LazyInitIcuDataFile();
277   bool result =
278       InitializeICUWithFileDescriptorInternal(g_icudtl_pf, g_icudtl_region);
279 
280   int debug_icu_load = g_debug_icu_load;
281   debug::Alias(&debug_icu_load);
282   int debug_icu_last_error = g_debug_icu_last_error;
283   debug::Alias(&debug_icu_last_error);
284 #if BUILDFLAG(IS_WIN)
285   int debug_icu_pf_last_error = g_debug_icu_pf_last_error;
286   debug::Alias(&debug_icu_pf_last_error);
287   int debug_icu_pf_error_details = g_debug_icu_pf_error_details;
288   debug::Alias(&debug_icu_pf_error_details);
289   wchar_t debug_icu_pf_filename[_MAX_PATH] = {0};
290   wcscpy_s(debug_icu_pf_filename, g_debug_icu_pf_filename);
291   debug::Alias(&debug_icu_pf_filename);
292 #endif            // BUILDFLAG(IS_WIN)
293   // Excluding Chrome OS from this CHECK due to b/289684640.
294 #if !BUILDFLAG(IS_CHROMEOS)
295   // https://crbug.com/445616
296   // https://crbug.com/1449816
297   CHECK(result);
298 #endif
299 
300   return result;
301 }
302 #endif  // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
303 
304 // Explicitly initialize ICU's time zone if necessary.
305 // On some platforms, the time zone must be explicitly initialized zone rather
306 // than relying on ICU's internal initialization.
InitializeIcuTimeZone()307 void InitializeIcuTimeZone() {
308 #if BUILDFLAG(IS_ANDROID)
309   // On Android, we can't leave it up to ICU to set the default time zone
310   // because ICU's time zone detection does not work in many time zones (e.g.
311   // Australia/Sydney, Asia/Seoul, Europe/Paris ). Use JNI to detect the host
312   // time zone and set the ICU default time zone accordingly in advance of
313   // actual use. See crbug.com/722821 and
314   // https://ssl.icu-project.org/trac/ticket/13208 .
315   std::u16string zone_id = android::GetDefaultTimeZoneId();
316   icu::TimeZone::adoptDefault(icu::TimeZone::createTimeZone(
317       icu::UnicodeString(false, zone_id.data(), zone_id.length())));
318 #elif BUILDFLAG(IS_FUCHSIA)
319   // The platform-specific mechanisms used by ICU's detectHostTimeZone() to
320   // determine the default time zone will not work on Fuchsia. Therefore,
321   // proactively set the default system.
322   // This is also required by TimeZoneMonitorFuchsia::ProfileMayHaveChanged(),
323   // which uses the current default to detect whether the time zone changed in
324   // the new profile.
325   // If the system time zone cannot be obtained or is not understood by ICU,
326   // the "unknown" time zone will be returned by createTimeZone() and used.
327   std::string zone_id =
328       FuchsiaIntlProfileWatcher::GetPrimaryTimeZoneIdForIcuInitialization();
329   icu::TimeZone::adoptDefault(
330       icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(zone_id)));
331 #elif BUILDFLAG(IS_CHROMEOS) || (BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS))
332   // To respond to the time zone change properly, the default time zone
333   // cache in ICU has to be populated on starting up.
334   // See TimeZoneMonitorLinux::NotifyClientsFromImpl().
335   std::unique_ptr<icu::TimeZone> zone(icu::TimeZone::createDefault());
336 #endif  // BUILDFLAG(IS_ANDROID)
337 }
338 
339 enum class ICUCreateInstance {
340   kCharacterBreakIterator = 0,
341   kWordBreakIterator = 1,
342   kLineBreakIterator = 2,
343   kLineBreakIteratorTypeLoose = 3,
344   kLineBreakIteratorTypeNormal = 4,
345   kLineBreakIteratorTypeStrict = 5,
346   kSentenceBreakIterator = 6,
347   kTitleBreakIterator = 7,
348   kThaiBreakEngine = 8,
349   kLaoBreakEngine = 9,
350   kBurmeseBreakEngine = 10,
351   kKhmerBreakEngine = 11,
352   kChineseJapaneseBreakEngine = 12,
353 
354   kMaxValue = kChineseJapaneseBreakEngine
355 };
356 
357 // Common initialization to run regardless of how ICU is initialized.
358 // There are multiple exposed InitializeIcu* functions. This should be called
359 // as at the end of (the last functions in the sequence of) these functions.
DoCommonInitialization()360 bool DoCommonInitialization() {
361   // TODO(jungshik): Some callers do not care about tz at all. If necessary,
362   // add a boolean argument to this function to init the default tz only
363   // when requested.
364   InitializeIcuTimeZone();
365 
366   utrace_setLevel(UTRACE_VERBOSE);
367   return true;
368 }
369 
370 }  // namespace
371 
372 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
InitializeICUWithFileDescriptor(PlatformFile data_fd,const MemoryMappedFile::Region & data_region)373 bool InitializeICUWithFileDescriptor(
374     PlatformFile data_fd,
375     const MemoryMappedFile::Region& data_region) {
376 #if DCHECK_IS_ON()
377   DCHECK(!g_check_called_once || !g_called_once);
378   g_called_once = true;
379 #endif
380   if (!InitializeICUWithFileDescriptorInternal(data_fd, data_region))
381     return false;
382 
383   return DoCommonInitialization();
384 }
385 
GetIcuDataFileHandle(MemoryMappedFile::Region * out_region)386 PlatformFile GetIcuDataFileHandle(MemoryMappedFile::Region* out_region) {
387   CHECK_NE(g_icudtl_pf, kInvalidPlatformFile);
388   *out_region = g_icudtl_region;
389   return g_icudtl_pf;
390 }
391 
ResetGlobalsForTesting()392 void ResetGlobalsForTesting() {
393   // Reset ICU library internal state before tearing-down the mapped data
394   // file, or handle.
395   u_cleanup();
396 
397   // `g_icudtl_pf` does not actually own the FD once ICU is initialized, so
398   // don't try to close it here.
399   g_icudtl_pf = kInvalidPlatformFile;
400   delete std::exchange(g_icudtl_mapped_file, nullptr);
401 
402 #if BUILDFLAG(IS_FUCHSIA)
403   g_icu_time_zone_data_dir = kIcuTimeZoneDataDir;
404 #endif  // BUILDFLAG(IS_FUCHSIA)
405 }
406 
407 #if BUILDFLAG(IS_FUCHSIA)
408 // |dir| must remain valid until ResetGlobalsForTesting() is called.
SetIcuTimeZoneDataDirForTesting(const char * dir)409 void SetIcuTimeZoneDataDirForTesting(const char* dir) {
410   g_icu_time_zone_data_dir = dir;
411 }
412 #endif  // BUILDFLAG(IS_FUCHSIA)
413 #endif  // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
414 
InitializeICU()415 bool InitializeICU() {
416 #if DCHECK_IS_ON()
417   DCHECK(!g_check_called_once || !g_called_once);
418   g_called_once = true;
419 #endif
420 
421 #if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
422   // The ICU data is statically linked.
423 #elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
424   if (!InitializeICUFromDataFile())
425     return false;
426 #else
427 #error Unsupported ICU_UTIL_DATA_IMPL value
428 #endif  // (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
429 
430   return DoCommonInitialization();
431 }
432 
AllowMultipleInitializeCallsForTesting()433 void AllowMultipleInitializeCallsForTesting() {
434 #if DCHECK_IS_ON()
435   g_check_called_once = false;
436 #endif
437 }
438 
439 #endif  // !BUILDFLAG(IS_NACL)
440 
441 }  // namespace base::i18n
442