xref: /aosp_15_r20/external/lzma/CPP/Common/StringConvert.cpp (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 // Common/StringConvert.cpp
2 
3 #include "StdAfx.h"
4 
5 #include "StringConvert.h"
6 
7 #ifndef _WIN32
8 // #include <stdio.h>
9 #include <stdlib.h>
10 #endif
11 
12 #if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
13 #include "UTFConvert.h"
14 #endif
15 
16 #ifdef ENV_HAVE_LOCALE
17 #include <locale.h>
18 #endif
19 
20 static const char k_DefultChar = '_';
21 
22 #ifdef _WIN32
23 
24 /*
25 MultiByteToWideChar(CodePage, DWORD dwFlags,
26     LPCSTR lpMultiByteStr, int cbMultiByte,
27     LPWSTR lpWideCharStr, int cchWideChar)
28 
29   if (cbMultiByte == 0)
30     return: 0. ERR: ERROR_INVALID_PARAMETER
31 
32   if (cchWideChar == 0)
33     return: the required buffer size in characters.
34 
35   if (supplied buffer size was not large enough)
36     return: 0. ERR: ERROR_INSUFFICIENT_BUFFER
37     The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex)
38 
39   If there are illegal characters:
40     if MB_ERR_INVALID_CHARS is set in dwFlags:
41       - the function stops conversion on illegal character.
42       - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION.
43 
44     if MB_ERR_INVALID_CHARS is NOT set in dwFlags:
45       before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0.
46       in Vista+:    illegal character is not dropped (MSDN). Undocumented: illegal
47                     character is converted to U+FFFD, which is REPLACEMENT CHARACTER.
48 */
49 
50 
MultiByteToUnicodeString2(UString & dest,const AString & src,UINT codePage)51 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
52 {
53   dest.Empty();
54   if (src.IsEmpty())
55     return;
56   {
57     /*
58     wchar_t *d = dest.GetBuf(src.Len());
59     const char *s = (const char *)src;
60     unsigned i;
61 
62     for (i = 0;;)
63     {
64       Byte c = (Byte)s[i];
65       if (c >= 0x80 || c == 0)
66         break;
67       d[i++] = (wchar_t)c;
68     }
69 
70     if (i != src.Len())
71     {
72       unsigned len = MultiByteToWideChar(codePage, 0, s + i,
73           src.Len() - i, d + i,
74           src.Len() + 1 - i);
75       if (len == 0)
76         throw 282228;
77       i += len;
78     }
79 
80     d[i] = 0;
81     dest.ReleaseBuf_SetLen(i);
82     */
83     unsigned len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), NULL, 0);
84     if (len == 0)
85     {
86       if (GetLastError() != 0)
87         throw 282228;
88     }
89     else
90     {
91       len = (unsigned)MultiByteToWideChar(codePage, 0, src, (int)src.Len(), dest.GetBuf(len), (int)len);
92       if (len == 0)
93         throw 282228;
94       dest.ReleaseBuf_SetEnd(len);
95     }
96   }
97 }
98 
99 /*
100   int WideCharToMultiByte(
101       UINT CodePage, DWORD dwFlags,
102       LPCWSTR lpWideCharStr, int cchWideChar,
103       LPSTR lpMultiByteStr, int cbMultiByte,
104       LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar);
105 
106 if (lpDefaultChar == NULL),
107   - it uses system default value.
108 
109 if (CodePage == CP_UTF7 || CodePage == CP_UTF8)
110   if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL)
111     return: 0. ERR: ERROR_INVALID_PARAMETER.
112 
113 The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL)
114 
115 */
116 
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT codePage,char defaultChar,bool & defaultCharWasUsed)117 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
118 {
119   dest.Empty();
120   defaultCharWasUsed = false;
121   if (src.IsEmpty())
122     return;
123   {
124     /*
125     unsigned numRequiredBytes = src.Len() * 2;
126     char *d = dest.GetBuf(numRequiredBytes);
127     const wchar_t *s = (const wchar_t *)src;
128     unsigned i;
129 
130     for (i = 0;;)
131     {
132       wchar_t c = s[i];
133       if (c >= 0x80 || c == 0)
134         break;
135       d[i++] = (char)c;
136     }
137 
138     if (i != src.Len())
139     {
140       BOOL defUsed = FALSE;
141       defaultChar = defaultChar;
142 
143       bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
144       unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i,
145           d + i, numRequiredBytes + 1 - i,
146           (isUtf ? NULL : &defaultChar),
147           (isUtf ? NULL : &defUsed));
148       defaultCharWasUsed = (defUsed != FALSE);
149       if (len == 0)
150         throw 282229;
151       i += len;
152     }
153 
154     d[i] = 0;
155     dest.ReleaseBuf_SetLen(i);
156     */
157 
158     /*
159     if (codePage != CP_UTF7)
160     {
161       const wchar_t *s = (const wchar_t *)src;
162       unsigned i;
163       for (i = 0;; i++)
164       {
165         wchar_t c = s[i];
166         if (c >= 0x80 || c == 0)
167           break;
168       }
169 
170       if (s[i] == 0)
171       {
172         char *d = dest.GetBuf(src.Len());
173         for (i = 0;;)
174         {
175           wchar_t c = s[i];
176           if (c == 0)
177             break;
178           d[i++] = (char)c;
179         }
180         d[i] = 0;
181         dest.ReleaseBuf_SetLen(i);
182         return;
183       }
184     }
185     */
186 
187     unsigned len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(), NULL, 0, NULL, NULL);
188     if (len == 0)
189     {
190       if (GetLastError() != 0)
191         throw 282228;
192     }
193     else
194     {
195       BOOL defUsed = FALSE;
196       bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
197       // defaultChar = defaultChar;
198       len = (unsigned)WideCharToMultiByte(codePage, 0, src, (int)src.Len(),
199           dest.GetBuf(len), (int)len,
200           (isUtf ? NULL : &defaultChar),
201           (isUtf ? NULL : &defUsed)
202           );
203       if (!isUtf)
204         defaultCharWasUsed = (defUsed != FALSE);
205       if (len == 0)
206         throw 282228;
207       dest.ReleaseBuf_SetEnd(len);
208     }
209   }
210 }
211 
212 /*
213 #ifndef UNDER_CE
214 AString SystemStringToOemString(const CSysString &src)
215 {
216   AString dest;
217   const unsigned len = src.Len() * 2;
218   CharToOem(src, dest.GetBuf(len));
219   dest.ReleaseBuf_CalcLen(len);
220   return dest;
221 }
222 #endif
223 */
224 
225 #else // _WIN32
226 
227 // #include <stdio.h>
228 /*
229   if (wchar_t is 32-bit (#if WCHAR_MAX > 0xffff),
230       and utf-8 string contains big unicode character > 0xffff),
231   then we still use 16-bit surrogate pair in UString.
232   It simplifies another code where utf-16 encoding is used.
233   So we use surrogate-conversion code only in is file.
234 */
235 
236 /*
237    mbstowcs() returns error if there is error in utf-8 stream,
238    mbstowcs() returns error if there is single surrogates point (d800-dfff) in utf-8 stream
239 */
240 
241 /*
242 static void MultiByteToUnicodeString2_Native(UString &dest, const AString &src)
243 {
244   dest.Empty();
245   if (src.IsEmpty())
246     return;
247 
248   const size_t limit = ((size_t)src.Len() + 1) * 2;
249   wchar_t *d = dest.GetBuf((unsigned)limit);
250   const size_t len = mbstowcs(d, src, limit);
251   if (len != (size_t)-1)
252   {
253     dest.ReleaseBuf_SetEnd((unsigned)len);
254     return;
255   }
256   dest.ReleaseBuf_SetEnd(0);
257 }
258 */
259 
260 bool g_ForceToUTF8 = true; // false;
261 
MultiByteToUnicodeString2(UString & dest,const AString & src,UINT codePage)262 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
263 {
264   dest.Empty();
265   if (src.IsEmpty())
266     return;
267 
268   if (codePage == CP_UTF8 || g_ForceToUTF8)
269   {
270 #if 1
271     ConvertUTF8ToUnicode(src, dest);
272     return;
273 #endif
274   }
275 
276   const size_t limit = ((size_t)src.Len() + 1) * 2;
277   wchar_t *d = dest.GetBuf((unsigned)limit);
278   const size_t len = mbstowcs(d, src, limit);
279   if (len != (size_t)-1)
280   {
281     dest.ReleaseBuf_SetEnd((unsigned)len);
282 
283 #if WCHAR_MAX > 0xffff
284     d = dest.GetBuf();
285     for (size_t i = 0;; i++)
286     {
287       wchar_t c = d[i];
288       // printf("\ni=%2d c = %4x\n", (unsigned)i, (unsigned)c);
289       if (c == 0)
290         break;
291       if (c >= 0x10000 && c < 0x110000)
292       {
293         UString tempString = d + i;
294         const wchar_t *t = tempString.Ptr();
295 
296         for (;;)
297         {
298           wchar_t w = *t++;
299           // printf("\nchar=%x\n", w);
300           if (w == 0)
301             break;
302           if (i == limit)
303             break; // unexpected error
304           if (w >= 0x10000 && w < 0x110000)
305           {
306 #if 1
307             if (i + 1 == limit)
308               break; // unexpected error
309             w -= 0x10000;
310             d[i++] = (unsigned)0xd800 + (((unsigned)w >> 10) & 0x3ff);
311             w = 0xdc00 + (w & 0x3ff);
312 #else
313             // w = '_'; // for debug
314 #endif
315           }
316           d[i++] = w;
317         }
318         dest.ReleaseBuf_SetEnd((unsigned)i);
319         break;
320       }
321     }
322 
323 #endif
324 
325     /*
326     printf("\nMultiByteToUnicodeString2 (%d) %s\n", (int)src.Len(),  src.Ptr());
327     printf("char:    ");
328     for (unsigned i = 0; i < src.Len(); i++)
329       printf (" %02x", (int)(Byte)src[i]);
330     printf("\n");
331     printf("\n-> (%d) %ls\n", (int)dest.Len(), dest.Ptr());
332     printf("wchar_t: ");
333     for (unsigned i = 0; i < dest.Len(); i++)
334     {
335       printf (" %02x", (int)dest[i]);
336     }
337     printf("\n");
338     */
339 
340     return;
341   }
342 
343   /* if there is mbstowcs() error, we have two ways:
344 
345      1) change 0x80+ characters to some character: '_'
346         in that case we lose data, but we have correct UString()
347         and that scheme can show errors to user in early stages,
348         when file converted back to mbs() cannot be found
349 
350      2) transfer bad characters in some UTF-16 range.
351         it can be non-original Unicode character.
352         but later we still can restore original character.
353   */
354 
355 
356   // printf("\nmbstowcs  ERROR !!!!!! s=%s\n", src.Ptr());
357   {
358     unsigned i;
359     const char *s = (const char *)src;
360     for (i = 0;;)
361     {
362       Byte c = (Byte)s[i];
363       if (c == 0)
364         break;
365       // we can use ascii compatibilty character '_'
366       // if (c > 0x7F) c = '_'; // we replace "bad: character
367       d[i++] = (wchar_t)c;
368     }
369     d[i] = 0;
370     dest.ReleaseBuf_SetLen(i);
371   }
372 }
373 
UnicodeStringToMultiByte2_Native(AString & dest,const UString & src)374 static void UnicodeStringToMultiByte2_Native(AString &dest, const UString &src)
375 {
376   dest.Empty();
377   if (src.IsEmpty())
378     return;
379 
380   const size_t limit = ((size_t)src.Len() + 1) * 6;
381   char *d = dest.GetBuf((unsigned)limit);
382 
383   const size_t len = wcstombs(d, src, limit);
384 
385   if (len != (size_t)-1)
386   {
387     dest.ReleaseBuf_SetEnd((unsigned)len);
388     return;
389   }
390   dest.ReleaseBuf_SetEnd(0);
391 }
392 
393 
UnicodeStringToMultiByte2(AString & dest,const UString & src2,UINT codePage,char defaultChar,bool & defaultCharWasUsed)394 static void UnicodeStringToMultiByte2(AString &dest, const UString &src2, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
395 {
396   // if (codePage == 1234567) // for debug purposes
397   if (codePage == CP_UTF8 || g_ForceToUTF8)
398   {
399 #if 1
400     defaultCharWasUsed = false;
401     ConvertUnicodeToUTF8(src2, dest);
402     return;
403 #endif
404   }
405 
406   UString src = src2;
407 #if WCHAR_MAX > 0xffff
408   {
409     src.Empty();
410     for (unsigned i = 0; i < src2.Len();)
411     {
412       wchar_t c = src2[i++];
413       if (c >= 0xd800 && c < 0xdc00 && i != src2.Len())
414       {
415         const wchar_t c2 = src2[i];
416         if (c2 >= 0xdc00 && c2 < 0xe000)
417         {
418 #if 1
419           // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
420           c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
421           // printf("%4x\n", (int)c);
422           i++;
423 #else
424           // c = '_'; // for debug
425 #endif
426         }
427       }
428       src += c;
429     }
430   }
431 #endif
432 
433   dest.Empty();
434   defaultCharWasUsed = false;
435   if (src.IsEmpty())
436     return;
437 
438   const size_t len = wcstombs(NULL, src, 0);
439 
440   if (len != (size_t)-1)
441   {
442     const unsigned limit = ((unsigned)len);
443     if (limit == len)
444     {
445       char *d = dest.GetBuf(limit);
446 
447       /*
448       {
449         printf("\nwcstombs; len = %d %ls \n", (int)src.Len(), src.Ptr());
450         for (unsigned i = 0; i < src.Len(); i++)
451           printf (" %02x", (int)src[i]);
452         printf("\n");
453         printf("\ndest Limit = %d \n", limit);
454       }
455       */
456 
457       const size_t len2 = wcstombs(d, src, len + 1);
458 
459       if (len2 != (size_t)-1 && len2 <= limit)
460       {
461         /*
462         printf("\nOK : destLen = %d : %s\n", (int)len, dest.Ptr());
463         for (unsigned i = 0; i < len2; i++)
464           printf(" %02x", (int)(Byte)dest[i]);
465         printf("\n");
466         */
467         dest.ReleaseBuf_SetEnd((unsigned)len2);
468         return;
469       }
470     }
471   }
472 
473   {
474     const wchar_t *s = (const wchar_t *)src;
475     char *d = dest.GetBuf(src.Len());
476 
477     unsigned i;
478     for (i = 0;;)
479     {
480       wchar_t c = s[i];
481       if (c == 0)
482         break;
483       if (c >=
484             0x100
485             // 0x80
486           )
487       {
488         c = defaultChar;
489         defaultCharWasUsed = true;
490       }
491 
492       d[i++] = (char)c;
493     }
494     d[i] = 0;
495     dest.ReleaseBuf_SetLen(i);
496     /*
497     printf("\nUnicodeStringToMultiByte2; len = %d \n", (int)src.Len());
498     printf("ERROR: %s\n", dest.Ptr());
499     */
500   }
501 }
502 
503 #endif // _WIN32
504 
505 
MultiByteToUnicodeString(const AString & src,UINT codePage)506 UString MultiByteToUnicodeString(const AString &src, UINT codePage)
507 {
508   UString dest;
509   MultiByteToUnicodeString2(dest, src, codePage);
510   return dest;
511 }
512 
MultiByteToUnicodeString(const char * src,UINT codePage)513 UString MultiByteToUnicodeString(const char *src, UINT codePage)
514 {
515   return MultiByteToUnicodeString(AString(src), codePage);
516 }
517 
518 
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT codePage)519 void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage)
520 {
521   bool defaultCharWasUsed;
522   UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
523 }
524 
UnicodeStringToMultiByte(const UString & src,UINT codePage,char defaultChar,bool & defaultCharWasUsed)525 AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
526 {
527   AString dest;
528   UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed);
529   return dest;
530 }
531 
UnicodeStringToMultiByte(const UString & src,UINT codePage)532 AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
533 {
534   AString dest;
535   bool defaultCharWasUsed;
536   UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
537   return dest;
538 }
539 
540 
541 
542 
543 #if !defined(_WIN32) || defined(ENV_HAVE_LOCALE)
544 
545 #ifdef _WIN32
546 #define U_to_A(a, b, c)  UnicodeStringToMultiByte2
547 // #define A_to_U(a, b, c)  MultiByteToUnicodeString2
548 #else
549 // void MultiByteToUnicodeString2_Native(UString &dest, const AString &src);
550 #define U_to_A(a, b, c)  UnicodeStringToMultiByte2_Native(a, b)
551 // #define A_to_U(a, b, c)  MultiByteToUnicodeString2_Native(a, b)
552 #endif
553 
IsNativeUTF8()554 bool IsNativeUTF8()
555 {
556   UString u;
557   AString a, a2;
558   // for (unsigned c = 0x80; c < (UInt32)0x10000; c += (c >> 9) + 1)
559   for (unsigned c = 0x80; c < (UInt32)0xD000; c += (c >> 2) + 1)
560   {
561     u.Empty();
562     u += (wchar_t)c;
563     /*
564     if (Unicode_Is_There_Utf16SurrogateError(u))
565       continue;
566     #ifndef _WIN32
567     if (Unicode_Is_There_BmpEscape(u))
568       continue;
569     #endif
570     */
571     ConvertUnicodeToUTF8(u, a);
572     U_to_A(a2, u, CP_OEMCP);
573     if (a != a2)
574       return false;
575   }
576   return true;
577 }
578 
579 #endif
580 
581 
582 #ifdef ENV_HAVE_LOCALE
583 
GetLocale(void)584 const char *GetLocale(void)
585 {
586   #ifdef ENV_HAVE_LOCALE
587     // printf("\n\nsetlocale(LC_CTYPE, NULL) : return : ");
588     const char *s = setlocale(LC_CTYPE, NULL);
589     if (!s)
590     {
591       // printf("[NULL]\n");
592       s = "C";
593     }
594     else
595     {
596       // ubuntu returns "C" after program start
597       // printf("\"%s\"\n", s);
598     }
599     return s;
600   #elif defined(LOCALE_IS_UTF8)
601     return "utf8";
602   #else
603     return "C";
604   #endif
605 }
606 
607 #ifdef _WIN32
Set_ForceToUTF8(bool)608   static void Set_ForceToUTF8(bool) {}
609 #else
Set_ForceToUTF8(bool val)610   static void Set_ForceToUTF8(bool val) { g_ForceToUTF8 = val; }
611 #endif
612 
Is_Default_Basic_Locale(const char * locale)613 static bool Is_Default_Basic_Locale(const char *locale)
614 {
615   const AString a (locale);
616   if (a.IsEqualTo_Ascii_NoCase("")
617       || a.IsEqualTo_Ascii_NoCase("C")
618       || a.IsEqualTo_Ascii_NoCase("POSIX"))
619       return true;
620   return false;
621 }
622 
Is_Default_Basic_Locale()623 static bool Is_Default_Basic_Locale()
624 {
625   return Is_Default_Basic_Locale(GetLocale());
626 }
627 
628 
MY_SetLocale()629 void MY_SetLocale()
630 {
631   #ifdef ENV_HAVE_LOCALE
632   /*
633   {
634     const char *s = GetLocale();
635     printf("\nGetLocale() : returned : \"%s\"\n", s);
636   }
637   */
638 
639   unsigned start = 0;
640   // unsigned lim = 0;
641   unsigned lim = 3;
642 
643   /*
644   #define MY_SET_LOCALE_FLAGS__FROM_ENV 1
645   #define MY_SET_LOCALE_FLAGS__TRY_UTF8 2
646 
647   unsigned flags =
648       MY_SET_LOCALE_FLAGS__FROM_ENV |
649       MY_SET_LOCALE_FLAGS__TRY_UTF8
650 
651   if (flags != 0)
652   {
653     if (flags & MY_SET_LOCALE_FLAGS__FROM_ENV)
654       lim = (flags & MY_SET_LOCALE_FLAGS__TRY_UTF8) ? 3 : 1;
655     else
656     {
657       start = 1;
658       lim = 2;
659     }
660   }
661   */
662 
663   for (unsigned i = start; i < lim; i++)
664   {
665     /*
666     man7: "If locale is an empty string, "", each part of the locale that
667     should be modified is set according to the environment variables.
668     for glibc: glibc, first from the user's environment variables:
669       1) the environment variable LC_ALL,
670       2) environment variable with the same name as the category (see the
671       3) the environment variable LANG
672     The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
673 
674     for WIN32 : MSDN :
675       Sets the locale to the default, which is the user-default
676       ANSI code page obtained from the operating system.
677       The locale name is set to the value returned by GetUserDefaultLocaleName.
678       The code page is set to the value returned by GetACP
679   */
680     const char *newLocale = "";
681 
682     #ifdef __APPLE__
683 
684     /* look also CFLocale
685        there is no C.UTF-8 in macos
686        macos has UTF-8 locale only with some language like en_US.UTF-8
687        what is best way to set UTF-8 locale in macos? */
688     if (i == 1)
689       newLocale = "en_US.UTF-8";
690 
691     /* file open with non-utf8 sequencies return
692       #define EILSEQ    92    // "Illegal byte sequence"
693     */
694 #else
695     // newLocale = "C";
696     if (i == 1)
697     {
698       newLocale = "C.UTF-8";    // main UTF-8 locale in ubuntu
699       // newLocale = ".utf8";    // supported in new Windows 10 build 17134 (April 2018 Update), the Universal C Runtime
700       // newLocale = "en_US.utf8"; // supported by ubuntu ?
701       // newLocale = "en_US.UTF-8";
702       /* setlocale() in ubuntu allows locales with minor chracter changes in strings
703         "en_US.UTF-8" /  "en_US.utf8" */
704     }
705 
706 #endif
707 
708     // printf("\nsetlocale(LC_ALL, \"%s\") : returned: ", newLocale);
709 
710     // const char *s =
711     setlocale(LC_ALL, newLocale);
712 
713     /*
714     if (!s)
715       printf("NULL: can't set locale");
716     else
717       printf("\"%s\"\n", s);
718     */
719 
720     // request curent locale of program
721     const char *locale = GetLocale();
722     if (locale)
723     {
724       AString a (locale);
725       a.MakeLower_Ascii();
726       // if (a.Find("utf") >= 0)
727       {
728         if (IsNativeUTF8())
729         {
730           Set_ForceToUTF8(true);
731           return;
732         }
733       }
734       if (!Is_Default_Basic_Locale(locale))
735       {
736         // if there is some non-default and non-utf locale, we want to use it
737         break; // comment it for debug
738       }
739     }
740   }
741 
742   if (IsNativeUTF8())
743   {
744     Set_ForceToUTF8(true);
745     return;
746   }
747 
748   if (Is_Default_Basic_Locale())
749   {
750     Set_ForceToUTF8(true);
751     return;
752   }
753 
754   Set_ForceToUTF8(false);
755 
756   #elif defined(LOCALE_IS_UTF8)
757     // assume LC_CTYPE="utf8"
758   #else
759     // assume LC_CTYPE="C"
760   #endif
761 }
762 #endif
763