xref: /aosp_15_r20/external/cronet/base/i18n/file_util_icu.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2012 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker // File utilities that use the ICU library go in this file.
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include "base/i18n/file_util_icu.h"
8*6777b538SAndroid Build Coastguard Worker 
9*6777b538SAndroid Build Coastguard Worker #include <stdint.h>
10*6777b538SAndroid Build Coastguard Worker 
11*6777b538SAndroid Build Coastguard Worker #include "base/check.h"
12*6777b538SAndroid Build Coastguard Worker #include "base/files/file_path.h"
13*6777b538SAndroid Build Coastguard Worker #include "base/i18n/icu_string_conversions.h"
14*6777b538SAndroid Build Coastguard Worker #include "base/i18n/string_compare.h"
15*6777b538SAndroid Build Coastguard Worker #include "base/memory/singleton.h"
16*6777b538SAndroid Build Coastguard Worker #include "base/numerics/safe_conversions.h"
17*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
18*6777b538SAndroid Build Coastguard Worker #include "base/strings/sys_string_conversions.h"
19*6777b538SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
20*6777b538SAndroid Build Coastguard Worker #include "build/build_config.h"
21*6777b538SAndroid Build Coastguard Worker #include "build/chromeos_buildflags.h"
22*6777b538SAndroid Build Coastguard Worker #include "third_party/icu/source/common/unicode/uniset.h"
23*6777b538SAndroid Build Coastguard Worker #include "third_party/icu/source/i18n/unicode/coll.h"
24*6777b538SAndroid Build Coastguard Worker 
25*6777b538SAndroid Build Coastguard Worker namespace base {
26*6777b538SAndroid Build Coastguard Worker namespace i18n {
27*6777b538SAndroid Build Coastguard Worker 
28*6777b538SAndroid Build Coastguard Worker namespace {
29*6777b538SAndroid Build Coastguard Worker 
30*6777b538SAndroid Build Coastguard Worker class IllegalCharacters {
31*6777b538SAndroid Build Coastguard Worker  public:
32*6777b538SAndroid Build Coastguard Worker   IllegalCharacters(const IllegalCharacters&) = delete;
33*6777b538SAndroid Build Coastguard Worker   IllegalCharacters& operator=(const IllegalCharacters&) = delete;
34*6777b538SAndroid Build Coastguard Worker 
GetInstance()35*6777b538SAndroid Build Coastguard Worker   static IllegalCharacters* GetInstance() {
36*6777b538SAndroid Build Coastguard Worker     return Singleton<IllegalCharacters>::get();
37*6777b538SAndroid Build Coastguard Worker   }
38*6777b538SAndroid Build Coastguard Worker 
IsDisallowedEverywhere(UChar32 ucs4) const39*6777b538SAndroid Build Coastguard Worker   bool IsDisallowedEverywhere(UChar32 ucs4) const {
40*6777b538SAndroid Build Coastguard Worker     return !!illegal_anywhere_.contains(ucs4);
41*6777b538SAndroid Build Coastguard Worker   }
42*6777b538SAndroid Build Coastguard Worker 
IsDisallowedLeadingOrTrailing(UChar32 ucs4) const43*6777b538SAndroid Build Coastguard Worker   bool IsDisallowedLeadingOrTrailing(UChar32 ucs4) const {
44*6777b538SAndroid Build Coastguard Worker     return !!illegal_at_ends_.contains(ucs4);
45*6777b538SAndroid Build Coastguard Worker   }
46*6777b538SAndroid Build Coastguard Worker 
47*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
IsDisallowedShortNameCharacter(UChar32 ucs4) const48*6777b538SAndroid Build Coastguard Worker   bool IsDisallowedShortNameCharacter(UChar32 ucs4) const {
49*6777b538SAndroid Build Coastguard Worker     return !!illegal_in_short_filenames_.contains(ucs4);
50*6777b538SAndroid Build Coastguard Worker   }
51*6777b538SAndroid Build Coastguard Worker 
IsDisallowedIfMayBeShortName(UChar32 ucs4) const52*6777b538SAndroid Build Coastguard Worker   bool IsDisallowedIfMayBeShortName(UChar32 ucs4) const {
53*6777b538SAndroid Build Coastguard Worker     return !!required_to_be_a_short_filename_.contains(ucs4);
54*6777b538SAndroid Build Coastguard Worker   }
55*6777b538SAndroid Build Coastguard Worker 
56*6777b538SAndroid Build Coastguard Worker   template <typename StringT>
HasValidDotPositionForShortName(const StringT & s) const57*6777b538SAndroid Build Coastguard Worker   bool HasValidDotPositionForShortName(const StringT& s) const {
58*6777b538SAndroid Build Coastguard Worker     auto first_dot = s.find_first_of('.');
59*6777b538SAndroid Build Coastguard Worker     // Short names are not required to have a "." period character...
60*6777b538SAndroid Build Coastguard Worker     if (first_dot == std::string::npos) {
61*6777b538SAndroid Build Coastguard Worker       return s.size() <= 8;
62*6777b538SAndroid Build Coastguard Worker     }
63*6777b538SAndroid Build Coastguard Worker     // ...but they must not contain more than one "." period character...
64*6777b538SAndroid Build Coastguard Worker     if (first_dot != s.find_last_of('.')) {
65*6777b538SAndroid Build Coastguard Worker       return false;
66*6777b538SAndroid Build Coastguard Worker     }
67*6777b538SAndroid Build Coastguard Worker     // ... and must contain a basename of 1-8 characters, optionally with one
68*6777b538SAndroid Build Coastguard Worker     // "." period character followed by an extension no more than 3 characters
69*6777b538SAndroid Build Coastguard Worker     // in length.
70*6777b538SAndroid Build Coastguard Worker     return first_dot > 0 && first_dot <= 8 && first_dot + 4 >= s.size();
71*6777b538SAndroid Build Coastguard Worker   }
72*6777b538SAndroid Build Coastguard Worker 
73*6777b538SAndroid Build Coastguard Worker   // Returns whether `s` could possibly be in the 8.3 name format AND contains a
74*6777b538SAndroid Build Coastguard Worker   // '~' character, which may interact poorly with short filenames on VFAT. See
75*6777b538SAndroid Build Coastguard Worker   // https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-cifs/09c2ccc8-4aaf-439f-9b4e-13b3fe85a4cf.
CouldBeInvalidShortName(const std::u16string & s) const76*6777b538SAndroid Build Coastguard Worker   bool CouldBeInvalidShortName(const std::u16string& s) const {
77*6777b538SAndroid Build Coastguard Worker     if (s.size() > 12 ||
78*6777b538SAndroid Build Coastguard Worker         !required_to_be_a_short_filename_.containsSome(icu::UnicodeString(
79*6777b538SAndroid Build Coastguard Worker             /*isTerminated=*/false, s.c_str(), s.size())) ||
80*6777b538SAndroid Build Coastguard Worker         !illegal_in_short_filenames_.containsNone(
81*6777b538SAndroid Build Coastguard Worker             icu::UnicodeString(/*isTerminated=*/false, s.c_str(), s.size()))) {
82*6777b538SAndroid Build Coastguard Worker       return false;
83*6777b538SAndroid Build Coastguard Worker     }
84*6777b538SAndroid Build Coastguard Worker     return HasValidDotPositionForShortName<std::u16string>(s);
85*6777b538SAndroid Build Coastguard Worker   }
86*6777b538SAndroid Build Coastguard Worker #endif
87*6777b538SAndroid Build Coastguard Worker 
IsAllowedName(const std::u16string & s) const88*6777b538SAndroid Build Coastguard Worker   bool IsAllowedName(const std::u16string& s) const {
89*6777b538SAndroid Build Coastguard Worker     return s.empty() || (!!illegal_anywhere_.containsNone(icu::UnicodeString(
90*6777b538SAndroid Build Coastguard Worker                              /*isTerminated=*/false, s.c_str(), s.size())) &&
91*6777b538SAndroid Build Coastguard Worker                          !illegal_at_ends_.contains(*s.begin()) &&
92*6777b538SAndroid Build Coastguard Worker                          !illegal_at_ends_.contains(*s.rbegin())
93*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
94*6777b538SAndroid Build Coastguard Worker                          && !CouldBeInvalidShortName(s)
95*6777b538SAndroid Build Coastguard Worker #endif
96*6777b538SAndroid Build Coastguard Worker                         );
97*6777b538SAndroid Build Coastguard Worker   }
98*6777b538SAndroid Build Coastguard Worker 
99*6777b538SAndroid Build Coastguard Worker  private:
100*6777b538SAndroid Build Coastguard Worker   friend struct DefaultSingletonTraits<IllegalCharacters>;
101*6777b538SAndroid Build Coastguard Worker 
102*6777b538SAndroid Build Coastguard Worker   IllegalCharacters();
103*6777b538SAndroid Build Coastguard Worker   ~IllegalCharacters() = default;
104*6777b538SAndroid Build Coastguard Worker 
105*6777b538SAndroid Build Coastguard Worker   // Set of characters considered invalid anywhere inside a filename.
106*6777b538SAndroid Build Coastguard Worker   icu::UnicodeSet illegal_anywhere_;
107*6777b538SAndroid Build Coastguard Worker 
108*6777b538SAndroid Build Coastguard Worker   // Set of characters considered invalid at either end of a filename.
109*6777b538SAndroid Build Coastguard Worker   icu::UnicodeSet illegal_at_ends_;
110*6777b538SAndroid Build Coastguard Worker 
111*6777b538SAndroid Build Coastguard Worker   // #if BUILDFLAG(IS_WIN)
112*6777b538SAndroid Build Coastguard Worker   // Set of characters which are guaranteed to exist if the filename is to be of
113*6777b538SAndroid Build Coastguard Worker   // the problematic VFAT 8.3 short filename format.
114*6777b538SAndroid Build Coastguard Worker   icu::UnicodeSet required_to_be_a_short_filename_;
115*6777b538SAndroid Build Coastguard Worker   // Set of characters which are not allowed in VFAT 8.3 short filenames. If
116*6777b538SAndroid Build Coastguard Worker   // any of these characters are present, the file cannot be a short filename.
117*6777b538SAndroid Build Coastguard Worker   icu::UnicodeSet illegal_in_short_filenames_;
118*6777b538SAndroid Build Coastguard Worker   // #endif
119*6777b538SAndroid Build Coastguard Worker };
120*6777b538SAndroid Build Coastguard Worker 
IllegalCharacters()121*6777b538SAndroid Build Coastguard Worker IllegalCharacters::IllegalCharacters() {
122*6777b538SAndroid Build Coastguard Worker   UErrorCode status = U_ZERO_ERROR;
123*6777b538SAndroid Build Coastguard Worker   // Control characters, formatting characters, non-characters, path separators,
124*6777b538SAndroid Build Coastguard Worker   // and some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
125*6777b538SAndroid Build Coastguard Worker   // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
126*6777b538SAndroid Build Coastguard Worker   // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
127*6777b538SAndroid Build Coastguard Worker   // Note that code points in the "Other, Format" (Cf) category are ignored on
128*6777b538SAndroid Build Coastguard Worker   // HFS+ despite the ZERO_WIDTH_JOINER and ZERO_WIDTH_NON-JOINER being
129*6777b538SAndroid Build Coastguard Worker   // legitimate in Arabic and some S/SE Asian scripts. In addition tilde (~) is
130*6777b538SAndroid Build Coastguard Worker   // also excluded in some circumstances due to the possibility of interacting
131*6777b538SAndroid Build Coastguard Worker   // poorly with short filenames on VFAT. (Related to CVE-2014-9390)
132*6777b538SAndroid Build Coastguard Worker   illegal_anywhere_ = icu::UnicodeSet(
133*6777b538SAndroid Build Coastguard Worker       UNICODE_STRING_SIMPLE("[[\"*/:<>?\\\\|][:Cc:][:Cf:]]"), status);
134*6777b538SAndroid Build Coastguard Worker   DCHECK(U_SUCCESS(status));
135*6777b538SAndroid Build Coastguard Worker   // Add non-characters. If this becomes a performance bottleneck by
136*6777b538SAndroid Build Coastguard Worker   // any chance, do not add these to |set| and change IsFilenameLegal()
137*6777b538SAndroid Build Coastguard Worker   // to check |ucs4 & 0xFFFEu == 0xFFFEu|, in addition to calling
138*6777b538SAndroid Build Coastguard Worker   // IsAllowedName().
139*6777b538SAndroid Build Coastguard Worker   illegal_anywhere_.add(0xFDD0, 0xFDEF);
140*6777b538SAndroid Build Coastguard Worker   for (int i = 0; i <= 0x10; ++i) {
141*6777b538SAndroid Build Coastguard Worker     int plane_base = 0x10000 * i;
142*6777b538SAndroid Build Coastguard Worker     illegal_anywhere_.add(plane_base + 0xFFFE, plane_base + 0xFFFF);
143*6777b538SAndroid Build Coastguard Worker   }
144*6777b538SAndroid Build Coastguard Worker   illegal_anywhere_.freeze();
145*6777b538SAndroid Build Coastguard Worker 
146*6777b538SAndroid Build Coastguard Worker   illegal_at_ends_ =
147*6777b538SAndroid Build Coastguard Worker       icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:WSpace:][.~]]"), status);
148*6777b538SAndroid Build Coastguard Worker   DCHECK(U_SUCCESS(status));
149*6777b538SAndroid Build Coastguard Worker   illegal_at_ends_.freeze();
150*6777b538SAndroid Build Coastguard Worker 
151*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
152*6777b538SAndroid Build Coastguard Worker   required_to_be_a_short_filename_ =
153*6777b538SAndroid Build Coastguard Worker       icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[~]]"), status);
154*6777b538SAndroid Build Coastguard Worker   DCHECK(U_SUCCESS(status));
155*6777b538SAndroid Build Coastguard Worker   required_to_be_a_short_filename_.freeze();
156*6777b538SAndroid Build Coastguard Worker 
157*6777b538SAndroid Build Coastguard Worker   illegal_in_short_filenames_ = icu::UnicodeSet(
158*6777b538SAndroid Build Coastguard Worker       UNICODE_STRING_SIMPLE("[[:WSpace:][\"\\/[]:+|<>=;?,*]]"), status);
159*6777b538SAndroid Build Coastguard Worker   DCHECK(U_SUCCESS(status));
160*6777b538SAndroid Build Coastguard Worker   illegal_in_short_filenames_.freeze();
161*6777b538SAndroid Build Coastguard Worker #endif
162*6777b538SAndroid Build Coastguard Worker }
163*6777b538SAndroid Build Coastguard Worker 
164*6777b538SAndroid Build Coastguard Worker // Returns the code point at position |cursor| in |file_name|, and increments
165*6777b538SAndroid Build Coastguard Worker // |cursor| to the next position.
GetNextCodePoint(const FilePath::StringType * const file_name,int & cursor)166*6777b538SAndroid Build Coastguard Worker UChar32 GetNextCodePoint(const FilePath::StringType* const file_name,
167*6777b538SAndroid Build Coastguard Worker                          int& cursor) {
168*6777b538SAndroid Build Coastguard Worker   UChar32 code_point;
169*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
170*6777b538SAndroid Build Coastguard Worker   // Windows uses UTF-16 encoding for filenames.
171*6777b538SAndroid Build Coastguard Worker   U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
172*6777b538SAndroid Build Coastguard Worker            code_point);
173*6777b538SAndroid Build Coastguard Worker #elif BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
174*6777b538SAndroid Build Coastguard Worker   // Mac and Chrome OS use UTF-8 encoding for filenames.
175*6777b538SAndroid Build Coastguard Worker   // Linux doesn't actually define file system encoding. Try to parse as
176*6777b538SAndroid Build Coastguard Worker   // UTF-8.
177*6777b538SAndroid Build Coastguard Worker   U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
178*6777b538SAndroid Build Coastguard Worker           code_point);
179*6777b538SAndroid Build Coastguard Worker #else
180*6777b538SAndroid Build Coastguard Worker #error Unsupported platform
181*6777b538SAndroid Build Coastguard Worker #endif
182*6777b538SAndroid Build Coastguard Worker   return code_point;
183*6777b538SAndroid Build Coastguard Worker }
184*6777b538SAndroid Build Coastguard Worker 
185*6777b538SAndroid Build Coastguard Worker }  // namespace
186*6777b538SAndroid Build Coastguard Worker 
IsFilenameLegal(const std::u16string & file_name)187*6777b538SAndroid Build Coastguard Worker bool IsFilenameLegal(const std::u16string& file_name) {
188*6777b538SAndroid Build Coastguard Worker   return IllegalCharacters::GetInstance()->IsAllowedName(file_name);
189*6777b538SAndroid Build Coastguard Worker }
190*6777b538SAndroid Build Coastguard Worker 
ReplaceIllegalCharactersInPath(FilePath::StringType * file_name,char replace_char)191*6777b538SAndroid Build Coastguard Worker void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
192*6777b538SAndroid Build Coastguard Worker                                     char replace_char) {
193*6777b538SAndroid Build Coastguard Worker   IllegalCharacters* illegal = IllegalCharacters::GetInstance();
194*6777b538SAndroid Build Coastguard Worker 
195*6777b538SAndroid Build Coastguard Worker   DCHECK(!(illegal->IsDisallowedEverywhere(replace_char)));
196*6777b538SAndroid Build Coastguard Worker   const bool is_replace_char_illegal_at_ends =
197*6777b538SAndroid Build Coastguard Worker       illegal->IsDisallowedLeadingOrTrailing(replace_char);
198*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
199*6777b538SAndroid Build Coastguard Worker   bool could_be_short_name =
200*6777b538SAndroid Build Coastguard Worker       file_name->size() <= 12 &&
201*6777b538SAndroid Build Coastguard Worker       illegal->HasValidDotPositionForShortName<FilePath::StringType>(
202*6777b538SAndroid Build Coastguard Worker           *file_name);
203*6777b538SAndroid Build Coastguard Worker #endif
204*6777b538SAndroid Build Coastguard Worker   // Keep track of the earliest and latest legal begin/end characters and file-
205*6777b538SAndroid Build Coastguard Worker   // extension separator encountered, -1 if none yet.
206*6777b538SAndroid Build Coastguard Worker   int unreplaced_legal_range_begin = -1;
207*6777b538SAndroid Build Coastguard Worker   int unreplaced_legal_range_end = -1;
208*6777b538SAndroid Build Coastguard Worker   int last_extension_separator = -1;
209*6777b538SAndroid Build Coastguard Worker   static const UChar32 kExtensionSeparator =
210*6777b538SAndroid Build Coastguard Worker       checked_cast<UChar32>(FilePath::kExtensionSeparator);
211*6777b538SAndroid Build Coastguard Worker 
212*6777b538SAndroid Build Coastguard Worker   int cursor = 0;  // The ICU macros expect an int.
213*6777b538SAndroid Build Coastguard Worker 
214*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
215*6777b538SAndroid Build Coastguard Worker   // Loop through the file name, looking for any characters which are invalid in
216*6777b538SAndroid Build Coastguard Worker   // an 8.3 short file name. If any of these characters exist, it's not an 8.3
217*6777b538SAndroid Build Coastguard Worker   // file name and we don't need to replace the '~' character.
218*6777b538SAndroid Build Coastguard Worker   while (could_be_short_name && cursor < static_cast<int>(file_name->size())) {
219*6777b538SAndroid Build Coastguard Worker     const UChar32 code_point = GetNextCodePoint(file_name, cursor);
220*6777b538SAndroid Build Coastguard Worker     could_be_short_name = !illegal->IsDisallowedShortNameCharacter(code_point);
221*6777b538SAndroid Build Coastguard Worker   }
222*6777b538SAndroid Build Coastguard Worker #endif
223*6777b538SAndroid Build Coastguard Worker 
224*6777b538SAndroid Build Coastguard Worker   cursor = 0;
225*6777b538SAndroid Build Coastguard Worker   while (cursor < static_cast<int>(file_name->size())) {
226*6777b538SAndroid Build Coastguard Worker     int char_begin = cursor;
227*6777b538SAndroid Build Coastguard Worker     const UChar32 code_point = GetNextCodePoint(file_name, cursor);
228*6777b538SAndroid Build Coastguard Worker 
229*6777b538SAndroid Build Coastguard Worker     const bool is_illegal_at_ends =
230*6777b538SAndroid Build Coastguard Worker         illegal->IsDisallowedLeadingOrTrailing(code_point);
231*6777b538SAndroid Build Coastguard Worker 
232*6777b538SAndroid Build Coastguard Worker     if (illegal->IsDisallowedEverywhere(code_point) ||
233*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
234*6777b538SAndroid Build Coastguard Worker         (could_be_short_name &&
235*6777b538SAndroid Build Coastguard Worker          illegal->IsDisallowedIfMayBeShortName(code_point)) ||
236*6777b538SAndroid Build Coastguard Worker #endif
237*6777b538SAndroid Build Coastguard Worker         ((char_begin == 0 || cursor == static_cast<int>(file_name->length())) &&
238*6777b538SAndroid Build Coastguard Worker          is_illegal_at_ends && !is_replace_char_illegal_at_ends)) {
239*6777b538SAndroid Build Coastguard Worker       file_name->replace(char_begin, cursor - char_begin, 1, replace_char);
240*6777b538SAndroid Build Coastguard Worker       // We just made the potentially multi-byte/word char into one that only
241*6777b538SAndroid Build Coastguard Worker       // takes one byte/word, so need to adjust the cursor to point to the next
242*6777b538SAndroid Build Coastguard Worker       // character again.
243*6777b538SAndroid Build Coastguard Worker       cursor = char_begin + 1;
244*6777b538SAndroid Build Coastguard Worker     } else if (!is_illegal_at_ends) {
245*6777b538SAndroid Build Coastguard Worker       if (unreplaced_legal_range_begin == -1)
246*6777b538SAndroid Build Coastguard Worker         unreplaced_legal_range_begin = char_begin;
247*6777b538SAndroid Build Coastguard Worker       unreplaced_legal_range_end = cursor;
248*6777b538SAndroid Build Coastguard Worker     }
249*6777b538SAndroid Build Coastguard Worker 
250*6777b538SAndroid Build Coastguard Worker     if (code_point == kExtensionSeparator)
251*6777b538SAndroid Build Coastguard Worker       last_extension_separator = char_begin;
252*6777b538SAndroid Build Coastguard Worker   }
253*6777b538SAndroid Build Coastguard Worker 
254*6777b538SAndroid Build Coastguard Worker   // If |replace_char| is not a legal starting/ending character, ensure that
255*6777b538SAndroid Build Coastguard Worker   // |replace_char| is not the first nor last character in |file_name|.
256*6777b538SAndroid Build Coastguard Worker   if (is_replace_char_illegal_at_ends) {
257*6777b538SAndroid Build Coastguard Worker     if (unreplaced_legal_range_begin == -1) {
258*6777b538SAndroid Build Coastguard Worker       // |file_name| has no characters that are legal at ends; enclose in '_'s.
259*6777b538SAndroid Build Coastguard Worker       file_name->insert(file_name->begin(), FILE_PATH_LITERAL('_'));
260*6777b538SAndroid Build Coastguard Worker       file_name->append(FILE_PATH_LITERAL("_"));
261*6777b538SAndroid Build Coastguard Worker     } else {
262*6777b538SAndroid Build Coastguard Worker       // Trim trailing instances of |replace_char| and other characters that are
263*6777b538SAndroid Build Coastguard Worker       // illegal at ends.
264*6777b538SAndroid Build Coastguard Worker       file_name->erase(unreplaced_legal_range_end, FilePath::StringType::npos);
265*6777b538SAndroid Build Coastguard Worker 
266*6777b538SAndroid Build Coastguard Worker       // Trim leading instances of |replace_char| and other characters that are
267*6777b538SAndroid Build Coastguard Worker       // illegal at ends, while ensuring that the file-extension separator is
268*6777b538SAndroid Build Coastguard Worker       // not removed if present. The file-extension separator is considered the
269*6777b538SAndroid Build Coastguard Worker       // last '.' in |file_name| followed by a legal character.
270*6777b538SAndroid Build Coastguard Worker       if (last_extension_separator != -1 &&
271*6777b538SAndroid Build Coastguard Worker           last_extension_separator == unreplaced_legal_range_begin - 1) {
272*6777b538SAndroid Build Coastguard Worker         // If the file-extension separator is at the start of the resulting
273*6777b538SAndroid Build Coastguard Worker         // |file_name|, prepend '_' instead of trimming it, e.g.,
274*6777b538SAndroid Build Coastguard Worker         // "***.txt" -> "_.txt".
275*6777b538SAndroid Build Coastguard Worker         file_name->erase(0, last_extension_separator);
276*6777b538SAndroid Build Coastguard Worker         file_name->insert(file_name->begin(), FILE_PATH_LITERAL('_'));
277*6777b538SAndroid Build Coastguard Worker       } else {
278*6777b538SAndroid Build Coastguard Worker         file_name->erase(0, unreplaced_legal_range_begin);
279*6777b538SAndroid Build Coastguard Worker       }
280*6777b538SAndroid Build Coastguard Worker     }
281*6777b538SAndroid Build Coastguard Worker     DCHECK(!file_name->empty());
282*6777b538SAndroid Build Coastguard Worker   }
283*6777b538SAndroid Build Coastguard Worker }
284*6777b538SAndroid Build Coastguard Worker 
LocaleAwareCompareFilenames(const FilePath & a,const FilePath & b)285*6777b538SAndroid Build Coastguard Worker bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {
286*6777b538SAndroid Build Coastguard Worker   UErrorCode error_code = U_ZERO_ERROR;
287*6777b538SAndroid Build Coastguard Worker   // Use the default collator. The default locale should have been properly
288*6777b538SAndroid Build Coastguard Worker   // set by the time this constructor is called.
289*6777b538SAndroid Build Coastguard Worker   std::unique_ptr<icu::Collator> collator(
290*6777b538SAndroid Build Coastguard Worker       icu::Collator::createInstance(error_code));
291*6777b538SAndroid Build Coastguard Worker   DCHECK(U_SUCCESS(error_code));
292*6777b538SAndroid Build Coastguard Worker   // Make it case-sensitive.
293*6777b538SAndroid Build Coastguard Worker   collator->setStrength(icu::Collator::TERTIARY);
294*6777b538SAndroid Build Coastguard Worker 
295*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_WIN)
296*6777b538SAndroid Build Coastguard Worker   return CompareString16WithCollator(*collator, AsStringPiece16(a.value()),
297*6777b538SAndroid Build Coastguard Worker                                      AsStringPiece16(b.value())) == UCOL_LESS;
298*6777b538SAndroid Build Coastguard Worker 
299*6777b538SAndroid Build Coastguard Worker #elif BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
300*6777b538SAndroid Build Coastguard Worker   // On linux, the file system encoding is not defined. We assume
301*6777b538SAndroid Build Coastguard Worker   // SysNativeMBToWide takes care of it.
302*6777b538SAndroid Build Coastguard Worker   return CompareString16WithCollator(
303*6777b538SAndroid Build Coastguard Worker              *collator, WideToUTF16(SysNativeMBToWide(a.value())),
304*6777b538SAndroid Build Coastguard Worker              WideToUTF16(SysNativeMBToWide(b.value()))) == UCOL_LESS;
305*6777b538SAndroid Build Coastguard Worker #endif
306*6777b538SAndroid Build Coastguard Worker }
307*6777b538SAndroid Build Coastguard Worker 
NormalizeFileNameEncoding(FilePath * file_name)308*6777b538SAndroid Build Coastguard Worker void NormalizeFileNameEncoding(FilePath* file_name) {
309*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_CHROMEOS_ASH)
310*6777b538SAndroid Build Coastguard Worker   std::string normalized_str;
311*6777b538SAndroid Build Coastguard Worker   if (ConvertToUtf8AndNormalize(file_name->BaseName().value(), kCodepageUTF8,
312*6777b538SAndroid Build Coastguard Worker                                 &normalized_str) &&
313*6777b538SAndroid Build Coastguard Worker       !normalized_str.empty()) {
314*6777b538SAndroid Build Coastguard Worker     *file_name = file_name->DirName().Append(FilePath(normalized_str));
315*6777b538SAndroid Build Coastguard Worker   }
316*6777b538SAndroid Build Coastguard Worker #endif
317*6777b538SAndroid Build Coastguard Worker }
318*6777b538SAndroid Build Coastguard Worker 
319*6777b538SAndroid Build Coastguard Worker }  // namespace i18n
320*6777b538SAndroid Build Coastguard Worker }  // namespace base
321