xref: /aosp_15_r20/external/cronet/base/i18n/rtl.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/rtl.h"
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <algorithm>
11 #include <string_view>
12 
13 #include "base/check_op.h"
14 #include "base/command_line.h"
15 #include "base/files/file_path.h"
16 #include "base/i18n/base_i18n_switches.h"
17 #include "base/logging.h"
18 #include "base/strings/string_split.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/sys_string_conversions.h"
21 #include "base/strings/utf_string_conversions.h"
22 #include "build/build_config.h"
23 #include "third_party/icu/source/common/unicode/locid.h"
24 #include "third_party/icu/source/common/unicode/uchar.h"
25 #include "third_party/icu/source/common/unicode/uscript.h"
26 #include "third_party/icu/source/i18n/unicode/coll.h"
27 
28 #if BUILDFLAG(IS_IOS)
29 #include "base/debug/crash_logging.h"
30 #include "base/ios/ios_util.h"
31 #endif
32 
33 namespace {
34 
35 // Extract language, country and variant, but ignore keywords.  For example,
36 // en-US, ca@valencia, ca-ES@valencia.
GetLocaleString(const icu::Locale & locale)37 std::string GetLocaleString(const icu::Locale& locale) {
38   const char* language = locale.getLanguage();
39   const char* country = locale.getCountry();
40   const char* variant = locale.getVariant();
41   const char* script = locale.getScript();
42 
43   std::string result =
44       (language != nullptr && *language != '\0') ? language : "und";
45 
46   if (script != nullptr && *script != '\0') {
47     result += '-';
48     result += script;
49   }
50 
51   if (country != nullptr && *country != '\0') {
52     result += '-';
53     result += country;
54   }
55 
56   if (variant != nullptr && *variant != '\0')
57     result += '@' + base::ToLowerASCII(variant);
58 
59   return result;
60 }
61 
62 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
63 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
64 // http://unicode.org/reports/tr9/ for more information.
GetCharacterDirection(UChar32 character)65 base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
66   static bool has_switch = base::CommandLine::ForCurrentProcess()->HasSwitch(
67       switches::kForceTextDirection);
68   if (has_switch) {
69     base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
70     std::string force_flag =
71         command_line->GetSwitchValueASCII(switches::kForceTextDirection);
72 
73     if (force_flag == switches::kForceDirectionRTL)
74       return base::i18n::RIGHT_TO_LEFT;
75     if (force_flag == switches::kForceDirectionLTR)
76       return base::i18n::LEFT_TO_RIGHT;
77   }
78   // Now that we have the character, we use ICU in order to query for the
79   // appropriate Unicode BiDi character type.
80   int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
81   switch (property) {
82     case U_RIGHT_TO_LEFT:
83     case U_RIGHT_TO_LEFT_ARABIC:
84     case U_RIGHT_TO_LEFT_EMBEDDING:
85     case U_RIGHT_TO_LEFT_OVERRIDE:
86       return base::i18n::RIGHT_TO_LEFT;
87     case U_LEFT_TO_RIGHT:
88     case U_LEFT_TO_RIGHT_EMBEDDING:
89     case U_LEFT_TO_RIGHT_OVERRIDE:
90       return base::i18n::LEFT_TO_RIGHT;
91   }
92   return base::i18n::UNKNOWN_DIRECTION;
93 }
94 
95 }  // namespace
96 
97 namespace base {
98 namespace i18n {
99 
100 // Represents the locale-specific ICU text direction.
101 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
102 
103 // Convert the ICU default locale to a string.
GetConfiguredLocale()104 std::string GetConfiguredLocale() {
105   return GetLocaleString(icu::Locale::getDefault());
106 }
107 
108 // Convert the ICU canonicalized locale to a string.
GetCanonicalLocale(const std::string & locale)109 std::string GetCanonicalLocale(const std::string& locale) {
110   return GetLocaleString(icu::Locale::createCanonical(locale.c_str()));
111 }
112 
113 // Convert Chrome locale name to ICU locale name
ICULocaleName(const std::string & locale_string)114 std::string ICULocaleName(const std::string& locale_string) {
115   // If not Spanish, just return it.
116   if (locale_string.substr(0, 2) != "es")
117     return locale_string;
118   // Expand es to es-ES.
119   if (EqualsCaseInsensitiveASCII(locale_string, "es"))
120     return "es-ES";
121   // Map es-419 (Latin American Spanish) to es-FOO depending on the system
122   // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
123   // to es-MX (the most populous in Spanish-speaking Latin America).
124   if (EqualsCaseInsensitiveASCII(locale_string, "es-419")) {
125     const icu::Locale& locale = icu::Locale::getDefault();
126     std::string language = locale.getLanguage();
127     const char* country = locale.getCountry();
128     if (EqualsCaseInsensitiveASCII(language, "es") &&
129         !EqualsCaseInsensitiveASCII(country, "es")) {
130       language += '-';
131       language += country;
132       return language;
133     }
134     return "es-MX";
135   }
136   // Currently, Chrome has only "es" and "es-419", but later we may have
137   // more specific "es-RR".
138   return locale_string;
139 }
140 
SetICUDefaultLocale(const std::string & locale_string)141 void SetICUDefaultLocale(const std::string& locale_string) {
142 #if BUILDFLAG(IS_IOS)
143   static base::debug::CrashKeyString* crash_key_locale =
144       base::debug::AllocateCrashKeyString("icu_locale_input",
145                                           base::debug::CrashKeySize::Size256);
146   base::debug::SetCrashKeyString(crash_key_locale, locale_string);
147 #endif
148   icu::Locale locale(ICULocaleName(locale_string).c_str());
149   UErrorCode error_code = U_ZERO_ERROR;
150   const char* lang = locale.getLanguage();
151   if (lang != nullptr && *lang != '\0') {
152     icu::Locale::setDefault(locale, error_code);
153   } else {
154     LOG(ERROR) << "Failed to set the ICU default locale to " << locale_string
155                << ". Falling back to en-US.";
156     icu::Locale::setDefault(icu::Locale::getUS(), error_code);
157   }
158   g_icu_text_direction = UNKNOWN_DIRECTION;
159 }
160 
IsRTL()161 bool IsRTL() {
162   return ICUIsRTL();
163 }
164 
SetRTLForTesting(bool rtl)165 void SetRTLForTesting(bool rtl) {
166   SetICUDefaultLocale(rtl ? "he" : "en");
167   DCHECK_EQ(rtl, IsRTL());
168 }
169 
ICUIsRTL()170 bool ICUIsRTL() {
171   if (g_icu_text_direction == UNKNOWN_DIRECTION) {
172     const icu::Locale& locale = icu::Locale::getDefault();
173     g_icu_text_direction = GetTextDirectionForLocaleInStartUp(locale.getName());
174   }
175   return g_icu_text_direction == RIGHT_TO_LEFT;
176 }
177 
GetForcedTextDirection()178 TextDirection GetForcedTextDirection() {
179 // On iOS, check for RTL forcing.
180 #if BUILDFLAG(IS_IOS)
181   if (base::ios::IsInForcedRTL())
182     return base::i18n::RIGHT_TO_LEFT;
183 #endif
184 
185   base::CommandLine* command_line = base::CommandLine::ForCurrentProcess();
186   if (command_line->HasSwitch(switches::kForceUIDirection)) {
187     std::string force_flag =
188         command_line->GetSwitchValueASCII(switches::kForceUIDirection);
189 
190     if (force_flag == switches::kForceDirectionLTR)
191       return base::i18n::LEFT_TO_RIGHT;
192 
193     if (force_flag == switches::kForceDirectionRTL)
194       return base::i18n::RIGHT_TO_LEFT;
195   }
196 
197   return base::i18n::UNKNOWN_DIRECTION;
198 }
199 
GetTextDirectionForLocaleInStartUp(const char * locale_name)200 TextDirection GetTextDirectionForLocaleInStartUp(const char* locale_name) {
201   // Check for direction forcing.
202   TextDirection forced_direction = GetForcedTextDirection();
203   if (forced_direction != UNKNOWN_DIRECTION)
204     return forced_direction;
205 
206   // This list needs to be updated in alphabetical order if we add more RTL
207   // locales.
208   static const char kRTLLanguageCodes[][3] = {"ar", "fa", "he", "iw", "ur"};
209   std::vector<std::string_view> locale_split =
210       SplitStringPiece(locale_name, "-_", KEEP_WHITESPACE, SPLIT_WANT_ALL);
211   std::string_view language_code = locale_split[0];
212   if (std::binary_search(kRTLLanguageCodes,
213                          kRTLLanguageCodes + std::size(kRTLLanguageCodes),
214                          language_code))
215     return RIGHT_TO_LEFT;
216   return LEFT_TO_RIGHT;
217 }
218 
GetTextDirectionForLocale(const char * locale_name)219 TextDirection GetTextDirectionForLocale(const char* locale_name) {
220   // Check for direction forcing.
221   TextDirection forced_direction = GetForcedTextDirection();
222   if (forced_direction != UNKNOWN_DIRECTION)
223     return forced_direction;
224 
225   UErrorCode status = U_ZERO_ERROR;
226   ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
227   DCHECK(U_SUCCESS(status));
228   // Treat anything other than RTL as LTR.
229   return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
230 }
231 
GetFirstStrongCharacterDirection(const std::u16string & text)232 TextDirection GetFirstStrongCharacterDirection(const std::u16string& text) {
233   const char16_t* string = text.c_str();
234   size_t length = text.length();
235   size_t position = 0;
236   while (position < length) {
237     UChar32 character;
238     size_t next_position = position;
239     U16_NEXT(string, next_position, length, character);
240     TextDirection direction = GetCharacterDirection(character);
241     if (direction != UNKNOWN_DIRECTION)
242       return direction;
243     position = next_position;
244   }
245   return LEFT_TO_RIGHT;
246 }
247 
GetLastStrongCharacterDirection(const std::u16string & text)248 TextDirection GetLastStrongCharacterDirection(const std::u16string& text) {
249   const char16_t* string = text.c_str();
250   size_t position = text.length();
251   while (position > 0) {
252     UChar32 character;
253     size_t prev_position = position;
254     U16_PREV(string, 0, prev_position, character);
255     TextDirection direction = GetCharacterDirection(character);
256     if (direction != UNKNOWN_DIRECTION)
257       return direction;
258     position = prev_position;
259   }
260   return LEFT_TO_RIGHT;
261 }
262 
GetStringDirection(const std::u16string & text)263 TextDirection GetStringDirection(const std::u16string& text) {
264   const char16_t* string = text.c_str();
265   size_t length = text.length();
266   size_t position = 0;
267 
268   TextDirection result(UNKNOWN_DIRECTION);
269   while (position < length) {
270     UChar32 character;
271     size_t next_position = position;
272     U16_NEXT(string, next_position, length, character);
273     TextDirection direction = GetCharacterDirection(character);
274     if (direction != UNKNOWN_DIRECTION) {
275       if (result != UNKNOWN_DIRECTION && result != direction)
276         return UNKNOWN_DIRECTION;
277       result = direction;
278     }
279     position = next_position;
280   }
281 
282   // Handle the case of a string not containing any strong directionality
283   // characters defaulting to LEFT_TO_RIGHT.
284   if (result == UNKNOWN_DIRECTION)
285     return LEFT_TO_RIGHT;
286 
287   return result;
288 }
289 
290 #if BUILDFLAG(IS_WIN)
AdjustStringForLocaleDirection(std::u16string * text)291 bool AdjustStringForLocaleDirection(std::u16string* text) {
292   if (!IsRTL() || text->empty())
293     return false;
294 
295   // Marking the string as LTR if the locale is RTL and the string does not
296   // contain strong RTL characters. Otherwise, mark the string as RTL.
297   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
298   if (!has_rtl_chars)
299     WrapStringWithLTRFormatting(text);
300   else
301     WrapStringWithRTLFormatting(text);
302 
303   return true;
304 }
305 
UnadjustStringForLocaleDirection(std::u16string * text)306 bool UnadjustStringForLocaleDirection(std::u16string* text) {
307   if (!IsRTL() || text->empty())
308     return false;
309 
310   *text = StripWrappingBidiControlCharacters(*text);
311   return true;
312 }
313 #else
AdjustStringForLocaleDirection(std::u16string * text)314 bool AdjustStringForLocaleDirection(std::u16string* text) {
315   // On OS X & GTK the directionality of a label is determined by the first
316   // strongly directional character.
317   // However, we want to make sure that in an LTR-language-UI all strings are
318   // left aligned and vice versa.
319   // A problem can arise if we display a string which starts with user input.
320   // User input may be of the opposite directionality to the UI. So the whole
321   // string will be displayed in the opposite directionality, e.g. if we want to
322   // display in an LTR UI [such as US English]:
323   //
324   // EMAN_NOISNETXE is now installed.
325   //
326   // Since EXTENSION_NAME begins with a strong RTL char, the label's
327   // directionality will be set to RTL and the string will be displayed visually
328   // as:
329   //
330   // .is now installed EMAN_NOISNETXE
331   //
332   // In order to solve this issue, we prepend an LRM to the string. An LRM is a
333   // strongly directional LTR char.
334   // We also append an LRM at the end, which ensures that we're in an LTR
335   // context.
336 
337   // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
338   // box so there is no issue with displaying zero-width bidi control characters
339   // on any system.  Thus no need for the !IsRTL() check here.
340   if (text->empty())
341     return false;
342 
343   bool ui_direction_is_rtl = IsRTL();
344 
345   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
346   if (!ui_direction_is_rtl && has_rtl_chars) {
347     WrapStringWithRTLFormatting(text);
348     text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
349                  kLeftToRightMark);
350     text->push_back(kLeftToRightMark);
351   } else if (ui_direction_is_rtl && has_rtl_chars) {
352     WrapStringWithRTLFormatting(text);
353     text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
354                  kRightToLeftMark);
355     text->push_back(kRightToLeftMark);
356   } else if (ui_direction_is_rtl) {
357     WrapStringWithLTRFormatting(text);
358     text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
359                  kRightToLeftMark);
360     text->push_back(kRightToLeftMark);
361   } else {
362     return false;
363   }
364 
365   return true;
366 }
367 
UnadjustStringForLocaleDirection(std::u16string * text)368 bool UnadjustStringForLocaleDirection(std::u16string* text) {
369   if (text->empty())
370     return false;
371 
372   size_t begin_index = 0;
373   char16_t begin = text->at(begin_index);
374   if (begin == kLeftToRightMark ||
375       begin == kRightToLeftMark) {
376     ++begin_index;
377   }
378 
379   size_t end_index = text->length() - 1;
380   char16_t end = text->at(end_index);
381   if (end == kLeftToRightMark ||
382       end == kRightToLeftMark) {
383     --end_index;
384   }
385 
386   std::u16string unmarked_text =
387       text->substr(begin_index, end_index - begin_index + 1);
388   *text = StripWrappingBidiControlCharacters(unmarked_text);
389   return true;
390 }
391 
392 #endif  // !BUILDFLAG(IS_WIN)
393 
EnsureTerminatedDirectionalFormatting(std::u16string * text)394 void EnsureTerminatedDirectionalFormatting(std::u16string* text) {
395   int count = 0;
396   for (auto c : *text) {
397     if (c == kLeftToRightEmbeddingMark || c == kRightToLeftEmbeddingMark ||
398         c == kLeftToRightOverride || c == kRightToLeftOverride) {
399       ++count;
400     } else if (c == kPopDirectionalFormatting && count > 0) {
401       --count;
402     }
403   }
404   for (int j = 0; j < count; j++)
405     text->push_back(kPopDirectionalFormatting);
406 }
407 
SanitizeUserSuppliedString(std::u16string * text)408 void SanitizeUserSuppliedString(std::u16string* text) {
409   EnsureTerminatedDirectionalFormatting(text);
410   AdjustStringForLocaleDirection(text);
411 }
412 
StringContainsStrongRTLChars(const std::u16string & text)413 bool StringContainsStrongRTLChars(const std::u16string& text) {
414   const char16_t* string = text.c_str();
415   size_t length = text.length();
416   size_t position = 0;
417   while (position < length) {
418     UChar32 character;
419     size_t next_position = position;
420     U16_NEXT(string, next_position, length, character);
421 
422     // Now that we have the character, we use ICU in order to query for the
423     // appropriate Unicode BiDi character type.
424     int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
425     if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
426       return true;
427 
428     position = next_position;
429   }
430 
431   return false;
432 }
433 
WrapStringWithLTRFormatting(std::u16string * text)434 void WrapStringWithLTRFormatting(std::u16string* text) {
435   if (text->empty())
436     return;
437 
438   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
439   text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
440                kLeftToRightEmbeddingMark);
441 
442   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
443   text->push_back(kPopDirectionalFormatting);
444 }
445 
WrapStringWithRTLFormatting(std::u16string * text)446 void WrapStringWithRTLFormatting(std::u16string* text) {
447   if (text->empty())
448     return;
449 
450   // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
451   text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
452                kRightToLeftEmbeddingMark);
453 
454   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
455   text->push_back(kPopDirectionalFormatting);
456 }
457 
WrapPathWithLTRFormatting(const FilePath & path,std::u16string * rtl_safe_path)458 void WrapPathWithLTRFormatting(const FilePath& path,
459                                std::u16string* rtl_safe_path) {
460   // Wrap the overall path with LRE-PDF pair which essentialy marks the
461   // string as a Left-To-Right string.
462   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
463   rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
464 #if BUILDFLAG(IS_APPLE)
465   rtl_safe_path->append(UTF8ToUTF16(path.value()));
466 #elif BUILDFLAG(IS_WIN)
467   rtl_safe_path->append(AsString16(path.value()));
468 #else  // BUILDFLAG(IS_POSIX) && !BUILDFLAG(IS_APPLE)
469   std::wstring wide_path = base::SysNativeMBToWide(path.value());
470   rtl_safe_path->append(WideToUTF16(wide_path));
471 #endif
472   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
473   rtl_safe_path->push_back(kPopDirectionalFormatting);
474 }
475 
GetDisplayStringInLTRDirectionality(const std::u16string & text)476 std::u16string GetDisplayStringInLTRDirectionality(const std::u16string& text) {
477   // Always wrap the string in RTL UI (it may be appended to RTL string).
478   // Also wrap strings with an RTL first strong character direction in LTR UI.
479   if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
480     std::u16string text_mutable(text);
481     WrapStringWithLTRFormatting(&text_mutable);
482     return text_mutable;
483   }
484   return text;
485 }
486 
StripWrappingBidiControlCharacters(const std::u16string & text)487 std::u16string StripWrappingBidiControlCharacters(const std::u16string& text) {
488   if (text.empty())
489     return text;
490   size_t begin_index = 0;
491   char16_t begin = text[begin_index];
492   if (begin == kLeftToRightEmbeddingMark ||
493       begin == kRightToLeftEmbeddingMark ||
494       begin == kLeftToRightOverride ||
495       begin == kRightToLeftOverride)
496     ++begin_index;
497   size_t end_index = text.length() - 1;
498   if (text[end_index] == kPopDirectionalFormatting)
499     --end_index;
500   return text.substr(begin_index, end_index - begin_index + 1);
501 }
502 
503 }  // namespace i18n
504 }  // namespace base
505