xref: /aosp_15_r20/external/icu/icu4c/source/test/cintltst/ulocbuildertst.c (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2023 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "cintltst.h"
5 #include "cstring.h"
6 #include "unicode/uloc.h"
7 #include "unicode/ulocbuilder.h"
8 #include "unicode/ulocale.h"
9 
10 #define WHERE __FILE__ ":" XLINE(__LINE__) " "
11 #define XLINE(s) LINE(s)
12 #define LINE(s) #s
13 
14 #ifndef UPRV_LENGTHOF
15 #define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
16 #endif
17 void addLocaleBuilderTest(TestNode** root);
18 
Verify(ULocaleBuilder * bld,const char * expected,const char * msg)19 static void Verify(ULocaleBuilder* bld, const char* expected, const char* msg) {
20     UErrorCode status = U_ZERO_ERROR;
21     UErrorCode copyStatus = U_ZERO_ERROR;
22     UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR;
23     if (ulocbld_copyErrorTo(bld, &copyStatus)) {
24         log_err(msg, u_errorName(copyStatus));
25     }
26     if (!ulocbld_copyErrorTo(bld, &errorStatus)) {
27         log_err("Should always get the previous error and return false");
28     }
29     char tag[ULOC_FULLNAME_CAPACITY];
30     ulocbld_buildLanguageTag(bld, tag, ULOC_FULLNAME_CAPACITY, &status);
31 
32     if (U_FAILURE(status)) {
33         log_err(msg, u_errorName(status));
34     }
35     if (status != copyStatus) {
36         log_err(msg, u_errorName(status));
37     }
38     if (strcmp(tag, expected) != 0) {
39         log_err("should get \"%s\", but got \"%s\"\n", expected, tag);
40     }
41 }
42 
TestLocaleBuilder(void)43 static void TestLocaleBuilder(void) {
44 
45     // The following test data are copy from
46     // icu4j/main/core/src/test/java/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
47     // "L": +1 = language
48     // "S": +1 = script
49     // "R": +1 = region
50     // "V": +1 = variant
51     // "K": +1 = Unicode locale key / +2 = Unicode locale type
52     // "A": +1 = Unicode locale attribute
53     // "E": +1 = extension letter / +2 = extension value
54     // "P": +1 = private use
55     // "U": +1 = ULocale
56     // "B": +1 = BCP47 language tag
57     // "C": Clear all
58     // "N": Clear extensions
59     // "D": +1 = Unicode locale attribute to be removed
60     // "X": indicates an exception must be thrown
61     // "T": +1 = expected language tag / +2 = expected locale string
62     const char* TESTCASES[][14] = {
63         {"L", "en", "R", "us", "T", "en-US", "en_US"},
64         {"L", "en", "R", "CA", "L", NULL, "T", "und-CA", "_CA"},
65         {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
66         {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
67         {"L", "123", "X"},
68         {"R", "us", "T", "und-US", "_US"},
69         {"R", "usa", "X"},
70         {"R", "123", "L", "it", "R", NULL, "T", "it", "it"},
71         {"R", "123", "L", "it", "R", "", "T", "it", "it"},
72         {"R", "123", "L", "en", "T", "en-123", "en_123"},
73         {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
74         {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
75         {"L", "De", "S", "Arab", "R", "de", "S", NULL, "T", "de-DE", "de_DE"},
76         {"S", "latin", "X"},
77         {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
78         {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
79         {"V", "1234", "L", "en", "V", NULL, "T", "en", "en"},
80         {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
81         {"V", "123", "X"},
82         {"U", "en_US", "T", "en-US", "en_US"},
83         {"U", "en_US_WIN", "X"},
84         {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
85           "fr-FR-1606nict-u-ca-gregory-x-test",
86           "fr_FR_1606NICT@calendar=gregorian;x=test"},
87         {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
88         {"B", "und-CA", "T", "und-CA", "_CA"},
89         // Blocked by ICU-20327
90         // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
91         // "en_US_VAR@x=test"},
92         {"B", "en-US-VAR", "X"},
93         {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
94           "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
95         {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", NULL, "T",
96           "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
97         {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
98           "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
99           "ja_JP@attribute=attr1;calendar=gregorian"},
100         {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn",
101           "en@colnumeric=yes"},
102         {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
103           "th_TH@numbers=thai"},
104         {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
105         {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
106         {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
107         {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
108         {"E", "z", "ExtZ", "L", "en", "E", "z", NULL, "T", "en", "en"},
109         {"E", "a", "x", "X"},
110         {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
111         // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
112         // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
113         // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
114         // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
115         // key = alphanum alpha
116         {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a",
117          "en@0a=yes;attribute=aaa-bbb"},
118         {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
119           "fr_FR@x=yoshito-icu"},
120         {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
121           "ja_JP@calendar=japanese"},
122         {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
123           "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
124         {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
125         {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
126           "th@calendar=gregorian;numbers=thai"},
127         {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
128           "en_US@timezone=America/New_York"},
129         {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
130           "true", "T", "de-u-co-phonebk-kk-ks-level1",
131           "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
132         {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
133           "en_US@calendar=gregorian"},
134         {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
135         {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
136         {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn",
137           "en_US@colnumeric=yes"},
138         {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
139         {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
140         {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
141           "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
142         {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
143           "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
144         {"L", "en", "A", "aa", "X"},
145         {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
146     };
147     UErrorCode status = U_ZERO_ERROR;
148     ULocaleBuilder* bld = ulocbld_open();
149     char tag[ULOC_FULLNAME_CAPACITY];
150     char locale[ULOC_FULLNAME_CAPACITY];
151     for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
152         char actions[1000];
153         actions[0] = '\0';
154         for (int p = 0; p < UPRV_LENGTHOF(TESTCASES[tidx]); p++) {
155              if (TESTCASES[tidx][p] == NULL) {
156                  strcpy(actions, " (nullptr)");
157                  break;
158              }
159              if (p > 0) strcpy(actions, " ");
160              strcpy(actions, TESTCASES[tidx][p]);
161         }
162         int i = 0;
163         const char* method;
164         status = U_ZERO_ERROR;
165         ulocbld_clear(bld);
166         while (true) {
167             status = U_ZERO_ERROR;
168             UErrorCode copyStatus = U_ZERO_ERROR;
169             method = TESTCASES[tidx][i++];
170             if (strcmp("L", method) == 0) {
171                 ulocbld_setLanguage(bld, TESTCASES[tidx][i++], -1);
172                 ulocbld_copyErrorTo(bld, &copyStatus);
173                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
174             } else if (strcmp("S", method) == 0) {
175                 ulocbld_setScript(bld, TESTCASES[tidx][i++], -1);
176                 ulocbld_copyErrorTo(bld, &copyStatus);
177                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
178             } else if (strcmp("R", method) == 0) {
179                 ulocbld_setRegion(bld, TESTCASES[tidx][i++], -1);
180                 ulocbld_copyErrorTo(bld, &copyStatus);
181                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
182             } else if (strcmp("V", method) == 0) {
183                 ulocbld_setVariant(bld, TESTCASES[tidx][i++], -1);
184                 ulocbld_copyErrorTo(bld, &copyStatus);
185                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
186             } else if (strcmp("K", method) == 0) {
187                 const char* key = TESTCASES[tidx][i++];
188                 const char* type = TESTCASES[tidx][i++];
189                 ulocbld_setUnicodeLocaleKeyword(bld, key, -1, type, -1);
190                 ulocbld_copyErrorTo(bld, &copyStatus);
191                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
192             } else if (strcmp("A", method) == 0) {
193                 ulocbld_addUnicodeLocaleAttribute(bld, TESTCASES[tidx][i++], -1);
194                 ulocbld_copyErrorTo(bld, &copyStatus);
195                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
196             } else if (strcmp("E", method) == 0) {
197                 const char* key = TESTCASES[tidx][i++];
198                 const char* value = TESTCASES[tidx][i++];
199                 ulocbld_setExtension(bld, key[0], value, -1);
200                 ulocbld_copyErrorTo(bld, &copyStatus);
201                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
202             } else if (strcmp("P", method) == 0) {
203                 ulocbld_setExtension(bld, 'x', TESTCASES[tidx][i++], -1);
204                 ulocbld_copyErrorTo(bld, &copyStatus);
205                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
206             } else if (strcmp("U", method) == 0) {
207                 ulocbld_setLocale(bld, TESTCASES[tidx][i++], -1);
208                 ulocbld_copyErrorTo(bld, &copyStatus);
209                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
210             } else if (strcmp("B", method) == 0) {
211                 ulocbld_setLanguageTag(bld, TESTCASES[tidx][i++], -1);
212                 ulocbld_copyErrorTo(bld, &copyStatus);
213                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
214             }
215             // clear / remove
216             else if (strcmp("C", method) == 0) {
217                 ulocbld_clear(bld);
218                 ulocbld_copyErrorTo(bld, &copyStatus);
219                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
220             } else if (strcmp("N", method) == 0) {
221                 ulocbld_clearExtensions(bld);
222                 ulocbld_copyErrorTo(bld, &copyStatus);
223                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
224             } else if (strcmp("D", method) == 0) {
225                 ulocbld_removeUnicodeLocaleAttribute(bld, TESTCASES[tidx][i++], -1);
226                 ulocbld_copyErrorTo(bld, &copyStatus);
227                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
228             }
229             // result
230             else if (strcmp("X", method) == 0) {
231                 if (U_SUCCESS(status)) {
232                     log_err("FAIL: No error return - test case: %s", actions);
233                 }
234             } else if (strcmp("T", method) == 0) {
235                 status = U_ZERO_ERROR;
236                 ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
237                 if (status != copyStatus) {
238                     log_err("copyErrorTo not matching");
239                 }
240                 if (U_FAILURE(status) ||
241                     strcmp(locale, TESTCASES[tidx][i + 1]) != 0) {
242                     log_err("FAIL: Wrong locale ID - %s %s %s", locale,
243                             " for test case: ", actions);
244                 }
245                 ulocbld_buildLanguageTag(bld, tag, ULOC_FULLNAME_CAPACITY, &status);
246                 if (U_FAILURE(status) || strcmp(tag, TESTCASES[tidx][i]) != 0) {
247                     log_err("FAIL: Wrong language tag - %s %s %s", tag,
248                             " for test case: ", actions);
249                 }
250                 break;
251             } else {
252                 // Unknown test method
253                 log_err("Unknown test case method: There is an error in the test case data.");
254                 break;
255             }
256             if (status != copyStatus) {
257                 log_err("copyErrorTo not matching");
258             }
259             if (U_FAILURE(status)) {
260                 if (strcmp("X", TESTCASES[tidx][i]) == 0) {
261                     // This failure is expected
262                     break;
263                 } else {
264                     log_err("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
265                           " in test case: ", actions);
266                     break;
267                 }
268             }
269             if (strcmp("T", method) == 0) {
270                 break;
271             }
272         }  // while(true)
273     }  // for TESTCASES
274     ulocbld_close(bld);
275 }
276 
TestLocaleBuilderBasic(void)277 static void TestLocaleBuilderBasic(void) {
278     ULocaleBuilder* bld = ulocbld_open();
279     ulocbld_setLanguage(bld, "zh", -1);
280     Verify(bld, "zh", "ulocbld_setLanguage('zh') got Error: %s\n");
281     ulocbld_setScript(bld, "Hant", -1);
282     Verify(bld, "zh-Hant", "ulocbld_setScript('Hant') got Error: %s\n");
283 
284     ulocbld_setRegion(bld, "SG", -1);
285     Verify(bld, "zh-Hant-SG", "ulocbld_setRegion('SG') got Error: %s\n");
286 
287     ulocbld_setRegion(bld, "HK", -1);
288     ulocbld_setScript(bld, "Hans", -1);
289 
290     Verify(bld, "zh-Hans-HK",
291            "ulocbld_setRegion('HK') and ulocbld_setScript('Hans') got Error: %s\n");
292 
293     ulocbld_setVariant(bld, "revised###", 7);
294     Verify(bld, "zh-Hans-HK-revised",
295            "ulocbld_setVariant('revised') got Error: %s\n");
296 
297     ulocbld_setUnicodeLocaleKeyword(bld, "nu", -1, "thai###", 4);
298     Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
299            "ulocbld_setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
300 
301     ulocbld_setUnicodeLocaleKeyword(bld, "co###", 2, "pinyin", -1);
302     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
303            "ulocbld_setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
304 
305     ulocbld_setUnicodeLocaleKeyword(bld, "nu", -1, "latn###", 4);
306     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
307            "ulocbld_setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
308 
309     ulocbld_setUnicodeLocaleKeyword(bld, "nu", -1, "latn", -1);
310     ulocbld_setUnicodeLocaleKeyword(bld, "nu", -1, NULL, 0);
311     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
312            "ulocbld_setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
313 
314 
315     ulocbld_setUnicodeLocaleKeyword(bld, "co", -1, NULL, 0);
316     Verify(bld, "zh-Hans-HK-revised",
317            "ulocbld_setUnicodeLocaleKeyword('nu', NULL) got Error: %s\n");
318 
319     ulocbld_setScript(bld, "", -1);
320     Verify(bld, "zh-HK-revised",
321            "ulocbld_setScript('') got Error: %s\n");
322 
323     ulocbld_setVariant(bld, "", -1);
324     Verify(bld, "zh-HK",
325            "ulocbld_setVariant('') got Error: %s\n");
326 
327     ulocbld_setRegion(bld, "", -1);
328     Verify(bld, "zh",
329            "ulocbld_setRegion('') got Error: %s\n");
330 
331     ulocbld_close(bld);
332 }
333 
TestLocaleBuilderBasicWithExtensionsOnDefaultLocale(void)334 static void TestLocaleBuilderBasicWithExtensionsOnDefaultLocale(void) {
335     // Change the default locale to one with extension tags.
336     UErrorCode status = U_ZERO_ERROR;
337     char originalDefault[ULOC_FULLNAME_CAPACITY];
338     strcpy(originalDefault, uloc_getDefault());
339     uloc_setDefault("en-US-u-hc-h12", &status);
340     if (U_FAILURE(status)) {
341         log_err("ERROR: Could not change the default locale");
342         return;
343     }
344 
345     // Invoke the basic test now that the default locale has been changed.
346     TestLocaleBuilderBasic();
347 
348     uloc_setDefault(originalDefault, &status);
349     if (U_FAILURE(status)) {
350         log_err("ERROR: Could not restore the default locale");
351     }
352 }
353 
TestSetLanguageWellFormed(void)354 static void TestSetLanguageWellFormed(void) {
355     // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
356     // unicode_language_subtag = alpha{2,3} | alpha{5,8};
357     // ICUTC decided also support alpha{4}
358     static const char* wellFormedLanguages[] = {
359         "",
360 
361         // alpha{2}
362         "en",
363         "NE",
364         "eN",
365         "Ne",
366 
367         // alpha{3}
368         "aNe",
369         "zzz",
370         "AAA",
371 
372         // alpha{4}
373         "ABCD",
374         "abcd",
375 
376         // alpha{5}
377         "efgij",
378         "AbCAD",
379         "ZAASD",
380 
381         // alpha{6}
382         "efgijk",
383         "AADGFE",
384         "AkDfFz",
385 
386         // alpha{7}
387         "asdfads",
388         "ADSFADF",
389         "piSFkDk",
390 
391         // alpha{8}
392         "oieradfz",
393         "IADSFJKR",
394         "kkDSFJkR",
395     };
396     for(int32_t i=0;i<UPRV_LENGTHOF(wellFormedLanguages);i++) {
397         const char* lang = wellFormedLanguages[i];
398         UErrorCode status = U_ZERO_ERROR;
399         ULocaleBuilder* bld = ulocbld_open();
400         ulocbld_setLanguage(bld, lang, -1);
401         char buffer[ULOC_FULLNAME_CAPACITY];
402         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
403         if (U_FAILURE(status)) {
404             log_err("setLanguage(\"%s\") got Error: %s\n",
405                   lang, u_errorName(status));
406         }
407         ulocbld_close(bld);
408     }
409 }
410 
TestSetLanguageIllFormed(void)411 static void TestSetLanguageIllFormed(void) {
412     static const char* illFormed[] = {
413         "a",
414         "z",
415         "A",
416         "F",
417         "2",
418         "0",
419         "9",
420         "{",
421         ".",
422         "[",
423         "]",
424         "\\",
425 
426         "e1",
427         "N2",
428         "3N",
429         "4e",
430         "e:",
431         "43",
432         "a9",
433 
434         "aN0",
435         "z1z",
436         "2zz",
437         "3A3",
438         "456",
439         "af)",
440 
441         // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
442         // "latn",
443         // "Arab",
444         // "LATN",
445 
446         "e)gij",
447         "Ab3AD",
448         "ZAAS8",
449 
450         "efgi[]",
451         "AA9GFE",
452         "7kD3Fz",
453         "as8fads",
454         "0DSFADF",
455         "'iSFkDk",
456 
457         "oieradf+",
458         "IADSFJK-",
459         "kkDSFJk0",
460 
461         // alpha{9}
462         "oieradfab",
463         "IADSFJKDE",
464         "kkDSFJkzf",
465     };
466     for(int32_t i=0;i<UPRV_LENGTHOF(illFormed);i++) {
467         const char* ill = illFormed[i];
468         UErrorCode status = U_ZERO_ERROR;
469         ULocaleBuilder* bld = ulocbld_open();
470         ulocbld_setLanguage(bld, ill, -1);
471         char buffer[ULOC_FULLNAME_CAPACITY];
472         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
473         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
474             log_err("setLanguage(\"%s\") should fail but has no Error\n", ill);
475         }
476         ulocbld_close(bld);
477     }
478 }
479 
TestSetScriptWellFormed(void)480 static void TestSetScriptWellFormed(void) {
481     // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
482     // unicode_script_subtag = alpha{4} ;
483     static const char* wellFormedScripts[] = {
484         "",
485 
486         "Latn",
487         "latn",
488         "lATN",
489         "laTN",
490         "arBN",
491         "ARbn",
492         "adsf",
493         "aADF",
494         "BSVS",
495         "LATn",
496     };
497     for(int32_t i=0;i<UPRV_LENGTHOF(wellFormedScripts);i++) {
498         const char* script = wellFormedScripts[i];
499         UErrorCode status = U_ZERO_ERROR;
500         ULocaleBuilder* bld = ulocbld_open();
501         ulocbld_setScript(bld, script, -1);
502         char buffer[ULOC_FULLNAME_CAPACITY];
503         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
504         if (U_FAILURE(status)) {
505             log_err("setScript(\"%s\") got Error: %s\n",
506                   script, u_errorName(status));
507         }
508         ulocbld_close(bld);
509     }
510 }
TestSetScriptIllFormed(void)511 static void TestSetScriptIllFormed(void) {
512     static const char* illFormed[] = {
513         "a",
514         "z",
515         "A",
516         "F",
517         "2",
518         "0",
519         "9",
520         "{",
521         ".",
522         "[",
523         "]",
524         "\\",
525 
526         "e1",
527         "N2",
528         "3N",
529         "4e",
530         "e:",
531         "43",
532         "a9",
533 
534         "aN0",
535         "z1z",
536         "2zz",
537         "3A3",
538         "456",
539         "af)",
540 
541         "0atn",
542         "l1tn",
543         "lA2N",
544         "la4N",
545         "arB5",
546         "1234",
547 
548         "e)gij",
549         "Ab3AD",
550         "ZAAS8",
551 
552         "efgi[]",
553         "AA9GFE",
554         "7kD3Fz",
555 
556         "as8fads",
557         "0DSFADF",
558         "'iSFkDk",
559 
560         "oieradf+",
561         "IADSFJK-",
562         "kkDSFJk0",
563 
564         // alpha{9}
565         "oieradfab",
566         "IADSFJKDE",
567         "kkDSFJkzf",
568     };
569     for(int32_t i=0;i<UPRV_LENGTHOF(illFormed);i++) {
570         const char* ill = illFormed[i];
571         UErrorCode status = U_ZERO_ERROR;
572         ULocaleBuilder* bld = ulocbld_open();
573         ulocbld_setScript(bld, ill, -1);
574         char buffer[ULOC_FULLNAME_CAPACITY];
575         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
576         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
577             log_err("setScript(\"%s\") should fail but has no Error\n", ill);
578         }
579         ulocbld_close(bld);
580     }
581 }
582 
TestSetRegionWellFormed(void)583 static void TestSetRegionWellFormed(void) {
584     // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
585     // unicode_region_subtag = (alpha{2} | digit{3})
586     static const char* wellFormedRegions[] = {
587         "",
588 
589         // alpha{2}
590         "en",
591         "NE",
592         "eN",
593         "Ne",
594 
595         // digit{3}
596         "000",
597         "999",
598         "123",
599         "987"
600     };
601     for(int32_t i=0;i<UPRV_LENGTHOF(wellFormedRegions);i++) {
602         const char* region = wellFormedRegions[i];
603         UErrorCode status = U_ZERO_ERROR;
604         ULocaleBuilder* bld = ulocbld_open();
605         ulocbld_setRegion(bld, region, -1);
606         char buffer[ULOC_FULLNAME_CAPACITY];
607         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
608         if (U_FAILURE(status)) {
609             log_err("setRegion(\"%s\") got Error: %s\n",
610                   region, u_errorName(status));
611         }
612         ulocbld_close(bld);
613     }
614 }
615 
TestSetRegionIllFormed(void)616 static void TestSetRegionIllFormed(void) {
617     static const char* illFormed[] = {
618         "a",
619         "z",
620         "A",
621         "F",
622         "2",
623         "0",
624         "9",
625         "{",
626         ".",
627         "[",
628         "]",
629         "\\",
630 
631         "e1",
632         "N2",
633         "3N",
634         "4e",
635         "e:",
636         "43",
637         "a9",
638 
639         "aN0",
640         "z1z",
641         "2zz",
642         "3A3",
643         "4.6",
644         "af)",
645 
646         "0atn",
647         "l1tn",
648         "lA2N",
649         "la4N",
650         "arB5",
651         "1234",
652 
653         "e)gij",
654         "Ab3AD",
655         "ZAAS8",
656 
657         "efgi[]",
658         "AA9GFE",
659         "7kD3Fz",
660 
661         "as8fads",
662         "0DSFADF",
663         "'iSFkDk",
664 
665         "oieradf+",
666         "IADSFJK-",
667         "kkDSFJk0",
668 
669         // alpha{9}
670         "oieradfab",
671         "IADSFJKDE",
672         "kkDSFJkzf",
673     };
674     for(int32_t i=0;i<UPRV_LENGTHOF(illFormed);i++) {
675         const char* ill = illFormed[i];
676         UErrorCode status = U_ZERO_ERROR;
677         ULocaleBuilder* bld = ulocbld_open();
678         ulocbld_setRegion(bld, ill, -1);
679         char buffer[ULOC_FULLNAME_CAPACITY];
680         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
681         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
682             log_err("setRegion(\"%s\") should fail but has no Error\n", ill);
683         }
684         ulocbld_close(bld);
685     }
686 }
687 
TestSetVariantWellFormed(void)688 static void TestSetVariantWellFormed(void) {
689     // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
690     // (sep unicode_variant_subtag)*
691     // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
692     static const char* wellFormedVariants[] = {
693         "",
694 
695         // alphanum{5}
696         "efgij",
697         "AbCAD",
698         "ZAASD",
699         "0AASD",
700         "A1CAD",
701         "ef2ij",
702         "ads3X",
703         "owqF4",
704 
705         // alphanum{6}
706         "efgijk",
707         "AADGFE",
708         "AkDfFz",
709         "0ADGFE",
710         "A9DfFz",
711         "AADG7E",
712 
713         // alphanum{7}
714         "asdfads",
715         "ADSFADF",
716         "piSFkDk",
717         "a0dfads",
718         "ADSF3DF",
719         "piSFkD9",
720 
721         // alphanum{8}
722         "oieradfz",
723         "IADSFJKR",
724         "kkDSFJkR",
725         "0ADSFJKR",
726         "12345679",
727 
728         // digit alphanum{3}
729         "0123",
730         "1abc",
731         "20EF",
732         "30EF",
733         "8A03",
734         "3Ax3",
735         "9Axy",
736 
737         // (sep unicode_variant_subtag)*
738         "0123-4567",
739         "0ab3-ABCDE",
740         "9ax3-xByD9",
741         "9ax3-xByD9-adfk934a",
742 
743         "0123_4567",
744         "0ab3_ABCDE",
745         "9ax3_xByD9",
746         "9ax3_xByD9_adfk934a",
747 
748         "9ax3-xByD9_adfk934a",
749         "9ax3_xByD9-adfk934a",
750     };
751     for(int32_t i=0;i<UPRV_LENGTHOF(wellFormedVariants);i++) {
752         const char* variant = wellFormedVariants[i];
753         UErrorCode status = U_ZERO_ERROR;
754         ULocaleBuilder* bld = ulocbld_open();
755         ulocbld_setVariant(bld, variant, -1);
756         char buffer[ULOC_FULLNAME_CAPACITY];
757         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
758         if (U_FAILURE(status)) {
759             log_err("setVariant(\"%s\") got Error: %s\n",
760                   variant, u_errorName(status));
761         }
762         ulocbld_close(bld);
763     }
764 }
765 
TestSetVariantIllFormed(void)766 static void TestSetVariantIllFormed(void) {
767     static const char* illFormed[] = {
768         "a",
769         "z",
770         "A",
771         "F",
772         "2",
773         "0",
774         "9",
775         "{",
776         ".",
777         "[",
778         "]",
779         "\\",
780 
781         "e1",
782         "N2",
783         "3N",
784         "4e",
785         "e:",
786         "43",
787         "a9",
788         "en",
789         "NE",
790         "eN",
791         "Ne",
792 
793         "aNe",
794         "zzz",
795         "AAA",
796         "aN0",
797         "z1z",
798         "2zz",
799         "3A3",
800         "4.6",
801         "af)",
802         "345",
803         "923",
804 
805         "Latn",
806         "latn",
807         "lATN",
808         "laTN",
809         "arBN",
810         "ARbn",
811         "adsf",
812         "aADF",
813         "BSVS",
814         "LATn",
815         "l1tn",
816         "lA2N",
817         "la4N",
818         "arB5",
819         "abc3",
820         "A3BC",
821 
822         "e)gij",
823         "A+3AD",
824         "ZAA=8",
825 
826         "efgi[]",
827         "AA9]FE",
828         "7k[3Fz",
829 
830         "as8f/ds",
831         "0DSFAD{",
832         "'iSFkDk",
833 
834         "oieradf+",
835         "IADSFJK-",
836         "k}DSFJk0",
837 
838         // alpha{9}
839         "oieradfab",
840         "IADSFJKDE",
841         "kkDSFJkzf",
842         "123456789",
843 
844         "-0123",
845         "-0123-4567",
846         "0123-4567-",
847         "-123-4567",
848         "_0123",
849         "_0123_4567",
850         "0123_4567_",
851         "_123_4567",
852 
853         "-abcde-figjk",
854         "abcde-figjk-",
855         "-abcde-figjk-",
856         "_abcde_figjk",
857         "abcde_figjk_",
858         "_abcde_figjk_",
859     };
860     for(int32_t i=0;i<UPRV_LENGTHOF(illFormed);i++) {
861         const char* ill = illFormed[i];
862         UErrorCode status = U_ZERO_ERROR;
863         ULocaleBuilder* bld = ulocbld_open();
864         ulocbld_setVariant(bld, ill, -1);
865         char buffer[ULOC_FULLNAME_CAPACITY];
866         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
867         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
868             log_err("setVariant(\"%s\") should fail but has no Error\n", ill);
869         }
870         ulocbld_close(bld);
871     }
872 }
873 
TestSetUnicodeLocaleKeywordWellFormed(void)874 static void TestSetUnicodeLocaleKeywordWellFormed(void) {
875     // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
876     // keyword = key (sep type)? ;
877     // key = alphanum alpha ;
878     // type = alphanum{3,8} (sep alphanum{3,8})* ;
879     static const char* wellFormed_key_value[] = {
880         "aa", "123",
881         "3b", "zyzbcdef",
882         "0Z", "1ZB30zk9-abc",
883         "cZ", "2ck30zfZ-adsf023-234kcZ",
884         "ZZ", "Lant",
885         "ko", "",
886     };
887     for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
888         UErrorCode status = U_ZERO_ERROR;
889         ULocaleBuilder* bld = ulocbld_open();
890         ulocbld_setUnicodeLocaleKeyword(bld, wellFormed_key_value[i], -1,
891                                         wellFormed_key_value[i + 1], -1);
892         char buffer[ULOC_FULLNAME_CAPACITY];
893         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
894         if (U_FAILURE(status)) {
895             log_err("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
896                   wellFormed_key_value[i],
897                   wellFormed_key_value[i + 1],
898                   u_errorName(status));
899         }
900         ulocbld_close(bld);
901     }
902 }
903 
TestSetUnicodeLocaleKeywordIllFormedKey(void)904 static void TestSetUnicodeLocaleKeywordIllFormedKey(void) {
905     static const char* illFormed[] = {
906         "34",
907         "ab-cde",
908         "123",
909         "b3",
910         "zyzabcdef",
911         "Z0",
912     };
913     for (int i = 0; i < UPRV_LENGTHOF(illFormed); i++) {
914         const char* ill = illFormed[i];
915         UErrorCode status = U_ZERO_ERROR;
916         ULocaleBuilder* bld = ulocbld_open();
917         ulocbld_setUnicodeLocaleKeyword(bld, ill, -1, "abc", 3);
918         char buffer[ULOC_FULLNAME_CAPACITY];
919         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
920         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
921             log_err("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
922                   ill);
923         }
924         ulocbld_close(bld);
925     }
926 }
927 
TestSetUnicodeLocaleKeywordIllFormedValue(void)928 static void TestSetUnicodeLocaleKeywordIllFormedValue(void) {
929     static const char* illFormed[] = {
930         "34",
931         "ab-",
932         "-cd",
933         "-ef-",
934         "zyzabcdef",
935         "ab-abc",
936         "1ZB30zfk9-abc",
937         "2ck30zfk9-adsf023-234kcZ",
938     };
939     ULocaleBuilder* bld = ulocbld_open();
940     for (int i = 0; i < UPRV_LENGTHOF(illFormed); i++) {
941         const char* ill = illFormed[i];
942         ulocbld_clear(bld);
943         UErrorCode status = U_ZERO_ERROR;
944         ulocbld_setUnicodeLocaleKeyword(bld, "ab", 2, ill, -1);
945         char buffer[ULOC_FULLNAME_CAPACITY];
946         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
947         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
948             log_err("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
949                   ill);
950         }
951     }
952     ulocbld_close(bld);
953 }
954 
TestAddRemoveUnicodeLocaleAttribute(void)955 static void TestAddRemoveUnicodeLocaleAttribute(void) {
956     ULocaleBuilder* bld = ulocbld_open();
957     ulocbld_setLanguage(bld, "fr", -1);
958     ulocbld_addUnicodeLocaleAttribute(bld, "abc", -1);
959     ulocbld_addUnicodeLocaleAttribute(bld, "aBc", -1);
960     ulocbld_addUnicodeLocaleAttribute(bld, "EFG123", 3);
961     ulocbld_addUnicodeLocaleAttribute(bld, "efghi###", 5);
962     ulocbld_addUnicodeLocaleAttribute(bld, "efgh", -1);
963     ulocbld_addUnicodeLocaleAttribute(bld, "efGhi", -1);
964     ulocbld_addUnicodeLocaleAttribute(bld, "EFg", -1);
965     ulocbld_addUnicodeLocaleAttribute(bld, "hijk", -1);
966     ulocbld_addUnicodeLocaleAttribute(bld, "EFG", -1);
967     ulocbld_addUnicodeLocaleAttribute(bld, "HiJK", -1);
968     ulocbld_addUnicodeLocaleAttribute(bld, "aBc", -1);
969     UErrorCode status = U_ZERO_ERROR;
970     char locale[ULOC_FULLNAME_CAPACITY];
971     ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
972     if (U_FAILURE(status)) {
973         log_err("addUnicodeLocaleAttribute() got Error: %s\n",
974               u_errorName(status));
975     }
976     char languageTag[ULOC_FULLNAME_CAPACITY];
977     ulocbld_buildLanguageTag(bld, languageTag, ULOC_FULLNAME_CAPACITY, &status);
978     const char* expected = "fr-u-abc-efg-efgh-efghi-hijk";
979     if (U_FAILURE(status) || strcmp(languageTag, expected) != 0) {
980         log_err("Should get \"%s\" but get \"%s\"\n", expected, languageTag);
981     }
982 
983     // remove "efgh" in the middle with different casing.
984     ulocbld_removeUnicodeLocaleAttribute(bld, "eFgH", -1);
985     ulocbld_buildLanguageTag(bld, languageTag, ULOC_FULLNAME_CAPACITY, &status);
986     expected = "fr-u-abc-efg-efghi-hijk";
987     if (U_FAILURE(status) || strcmp(languageTag, expected) != 0) {
988         log_err("removeUnicodeLocaleAttribute() got Error: %s\n",
989               u_errorName(status));
990         log_err("Should get \"%s\" but get \"%s\"\n", expected, languageTag);
991     }
992 
993     // remove non-existing attributes.
994     ulocbld_removeUnicodeLocaleAttribute(bld, "efgh", -1);
995     ulocbld_buildLanguageTag(bld, languageTag, ULOC_FULLNAME_CAPACITY, &status);
996     if (U_FAILURE(status) || strcmp(languageTag, expected) != 0) {
997         log_err("removeUnicodeLocaleAttribute() got Error: %s\n",
998               u_errorName(status));
999         log_err("Should get \"%s\" but get \"%s\"\n", expected, languageTag);
1000     }
1001 
1002     // remove "abc" in the beginning with different casing.
1003     ulocbld_removeUnicodeLocaleAttribute(bld, "ABC", -1);
1004     ulocbld_buildLanguageTag(bld, languageTag, ULOC_FULLNAME_CAPACITY, &status);
1005     expected = "fr-u-efg-efghi-hijk";
1006     if (U_FAILURE(status) || strcmp(languageTag, expected) != 0) {
1007         log_err("removeUnicodeLocaleAttribute() got Error: %s\n",
1008               u_errorName(status));
1009         log_err("Should get \"%s\" but get \"%s\"\n", expected, languageTag);
1010     }
1011 
1012     // remove non-existing substring in the end.
1013     ulocbld_removeUnicodeLocaleAttribute(bld, "hij", -1);
1014     ulocbld_buildLanguageTag(bld, languageTag, ULOC_FULLNAME_CAPACITY, &status);
1015     if (U_FAILURE(status) || strcmp(languageTag, expected) != 0) {
1016         log_err("removeUnicodeLocaleAttribute() got Error: %s\n",
1017               u_errorName(status));
1018         log_err("Should get \"%s\" but get \"%s\"\n", expected, languageTag);
1019     }
1020 
1021     // remove "hijk" in the end with different casing.
1022     ulocbld_removeUnicodeLocaleAttribute(bld, "hIJK", -1);
1023     ulocbld_buildLanguageTag(bld, languageTag, ULOC_FULLNAME_CAPACITY, &status);
1024     expected = "fr-u-efg-efghi";
1025     if (U_FAILURE(status) || strcmp(languageTag, expected) != 0) {
1026         log_err("removeUnicodeLocaleAttribute() got Error: %s\n",
1027               u_errorName(status));
1028         log_err("Should get \"%s\" but get \"%s\"\n", expected, languageTag);
1029     }
1030 
1031     // remove "efghi" in the end with different casing.
1032     ulocbld_removeUnicodeLocaleAttribute(bld, "EFGhi", -1);
1033     ulocbld_buildLanguageTag(bld, languageTag, ULOC_FULLNAME_CAPACITY, &status);
1034     expected = "fr-u-efg";
1035     if (U_FAILURE(status) || strcmp(languageTag, expected) != 0) {
1036         log_err("removeUnicodeLocaleAttribute() got Error: %s\n",
1037               u_errorName(status));
1038         log_err("Should get \"%s\" but get \"%s\"\n", expected, languageTag);
1039     }
1040 
1041     // remove "efg" in as the only one, with different casing.
1042     ulocbld_removeUnicodeLocaleAttribute(bld, "EFG", -1);
1043     ulocbld_buildLanguageTag(bld, languageTag, ULOC_FULLNAME_CAPACITY, &status);
1044     expected = "fr";
1045     if (U_FAILURE(status) || strcmp(languageTag, expected) != 0) {
1046         log_err("removeUnicodeLocaleAttribute() got Error: %s\n",
1047               u_errorName(status));
1048         log_err("Should get \"%s\" but get \"%s\"\n", expected, languageTag);
1049     }
1050     ulocbld_close(bld);
1051 }
1052 
TestAddRemoveUnicodeLocaleAttributeWellFormed(void)1053 static void TestAddRemoveUnicodeLocaleAttributeWellFormed(void) {
1054     // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1055     // attribute = alphanum{3,8} ;
1056     static const char* wellFormedAttributes[] = {
1057         // alphanum{3}
1058         "AbC",
1059         "ZAA",
1060         "0AA",
1061         "x3A",
1062         "xa8",
1063 
1064         // alphanum{4}
1065         "AbCA",
1066         "ZASD",
1067         "0ASD",
1068         "A3a4",
1069         "zK90",
1070 
1071         // alphanum{5}
1072         "efgij",
1073         "AbCAD",
1074         "ZAASD",
1075         "0AASD",
1076         "A1CAD",
1077         "ef2ij",
1078         "ads3X",
1079         "owqF4",
1080 
1081         // alphanum{6}
1082         "efgijk",
1083         "AADGFE",
1084         "AkDfFz",
1085         "0ADGFE",
1086         "A9DfFz",
1087         "AADG7E",
1088 
1089         // alphanum{7}
1090         "asdfads",
1091         "ADSFADF",
1092         "piSFkDk",
1093         "a0dfads",
1094         "ADSF3DF",
1095         "piSFkD9",
1096 
1097         // alphanum{8}
1098         "oieradfz",
1099         "IADSFJKR",
1100         "kkDSFJkR",
1101     };
1102     ULocaleBuilder* bld = ulocbld_open();
1103     for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
1104         if (i % 5 == 0) {
1105             ulocbld_clear(bld);
1106         }
1107         ulocbld_addUnicodeLocaleAttribute(bld, wellFormedAttributes[i], -1);
1108         UErrorCode status = U_ZERO_ERROR;
1109         if (ulocbld_copyErrorTo(bld, &status)) {
1110             log_err("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1111                   wellFormedAttributes[i], u_errorName(status));
1112         }
1113         if (i > 2) {
1114             ulocbld_removeUnicodeLocaleAttribute(bld, wellFormedAttributes[i - 1], -1);
1115             if (ulocbld_copyErrorTo(bld, &status)) {
1116                 log_err("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1117                       wellFormedAttributes[i - 1], u_errorName(status));
1118             }
1119             ulocbld_removeUnicodeLocaleAttribute(bld, wellFormedAttributes[i - 3], -1);
1120             if (ulocbld_copyErrorTo(bld, &status)) {
1121                 log_err("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1122                       wellFormedAttributes[i - 3], u_errorName(status));
1123             }
1124         }
1125     }
1126     ulocbld_close(bld);
1127 }
1128 
TestAddUnicodeLocaleAttributeIllFormed(void)1129 static void TestAddUnicodeLocaleAttributeIllFormed(void) {
1130     static const char* illFormed[] = {
1131         "aa",
1132         "34",
1133         "ab-",
1134         "-cd",
1135         "-ef-",
1136         "zyzabcdef",
1137         "123456789",
1138         "ab-abc",
1139         "1ZB30zfk9-abc",
1140         "2ck30zfk9-adsf023-234kcZ",
1141     };
1142     for (int i = 0; i < UPRV_LENGTHOF(illFormed); i++) {
1143         const char* ill = illFormed[i];
1144         UErrorCode status = U_ZERO_ERROR;
1145         ULocaleBuilder* bld = ulocbld_open();
1146         ulocbld_addUnicodeLocaleAttribute(bld, ill, -1);
1147         char buffer[ULOC_FULLNAME_CAPACITY];
1148         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
1149         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1150             log_err("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1151                   ill);
1152         }
1153         ulocbld_close(bld);
1154     }
1155 }
1156 
TestSetExtensionU(void)1157 static void TestSetExtensionU(void) {
1158     ULocaleBuilder* bld = ulocbld_open();
1159     ulocbld_setLanguage(bld, "zhABC", 2);
1160     Verify(bld, "zh",
1161            "ulocbld_setLanguage(\"zh\") got Error: %s\n");
1162 
1163     ulocbld_setExtension(bld, 'u', "co-stroke", -1);
1164     Verify(bld, "zh-u-co-stroke",
1165            "ulocbld_setExtension('u', \"co-stroke\") got Error: %s\n");
1166 
1167     ulocbld_setExtension(bld, 'U', "ca-islamicABCDE", 10);
1168     Verify(bld, "zh-u-ca-islamic",
1169            "ulocbld_setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1170 
1171     ulocbld_setExtension(bld, 'u', "ca-chinese", 10);
1172     Verify(bld, "zh-u-ca-chinese",
1173            "ulocbld_setExtension('u', \"ca-chinese\") got Error: %s\n");
1174 
1175     ulocbld_setExtension(bld, 'U', "co-pinyin1234", 9);
1176     Verify(bld, "zh-u-co-pinyin",
1177            "ulocbld_setExtension('U', \"co-pinyin\") got Error: %s\n");
1178 
1179     ulocbld_setRegion(bld, "TW123", 2);
1180     Verify(bld, "zh-TW-u-co-pinyin",
1181            "ulocbld_setRegion(\"TW\") got Error: %s\n");
1182 
1183     ulocbld_setExtension(bld, 'U', "", 0);
1184     Verify(bld, "zh-TW",
1185            "ulocbld_setExtension('U', \"\") got Error: %s\n");
1186 
1187     ulocbld_setExtension(bld, 'u', "abc-defg-kr-face", -1);
1188     Verify(bld, "zh-TW-u-abc-defg-kr-face",
1189            "ulocbld_setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1190 
1191     ulocbld_setExtension(bld, 'U', "ca-japanese", -1);
1192     Verify(bld, "zh-TW-u-ca-japanese",
1193            "ulocbld_setExtension('U', \"ca-japanese\") got Error: %s\n");
1194 
1195     ulocbld_close(bld);
1196 }
1197 
TestSetExtensionValidateUWellFormed(void)1198 static void TestSetExtensionValidateUWellFormed(void) {
1199     static const char* wellFormedExtensions[] = {
1200         // keyword
1201         //   keyword = key (sep type)? ;
1202         //   key = alphanum alpha ;
1203         //   type = alphanum{3,8} (sep alphanum{3,8})* ;
1204         "3A",
1205         "ZA",
1206         "az-abc",
1207         "zz-123",
1208         "7z-12345678",
1209         "kb-A234567Z",
1210         // (sep keyword)+
1211         "1z-ZZ",
1212         "2z-ZZ-123",
1213         "3z-ZZ-123-cd",
1214         "0z-ZZ-123-cd-efghijkl",
1215         // attribute
1216         "abc",
1217         "456",
1218         "87654321",
1219         "ZABADFSD",
1220         // (sep attribute)+
1221         "abc-ZABADFSD",
1222         "123-ZABADFSD",
1223         "K2K-12345678",
1224         "K2K-12345678-zzz",
1225         // (sep attribute)+ (sep keyword)*
1226         "K2K-12345678-zz",
1227         "K2K-12345678-zz-0z",
1228         "K2K-12345678-9z-AZ-abc",
1229         "K2K-12345678-zz-9A-234",
1230         "K2K-12345678-zk0-abc-efg-zz-9k-234",
1231     };
1232     for (int i = 0; i < UPRV_LENGTHOF(wellFormedExtensions); i++) {
1233         const char* extension = wellFormedExtensions[i];
1234         UErrorCode status = U_ZERO_ERROR;
1235         ULocaleBuilder* bld = ulocbld_open();
1236         ulocbld_setExtension(bld, 'u', extension, -1);
1237         char buffer[ULOC_FULLNAME_CAPACITY];
1238         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
1239         if (U_FAILURE(status)) {
1240             log_err("setExtension('u', \"%s\") got Error: %s\n",
1241                   extension, u_errorName(status));
1242         }
1243         ulocbld_close(bld);
1244     }
1245 }
1246 
TestSetExtensionValidateUIllFormed(void)1247 static void TestSetExtensionValidateUIllFormed(void) {
1248     static const char* illFormed[] = {
1249         // bad key
1250         "-",
1251         "-ab",
1252         "ab-",
1253         "abc-",
1254         "-abc",
1255         "0",
1256         "a",
1257         "A0",
1258         "z9",
1259         "09",
1260         "90",
1261         // bad keyword
1262         "AB-A0",
1263         "AB-efg-A0",
1264         "xy-123456789",
1265         "AB-Aa-",
1266         "AB-Aac-",
1267         // bad attribute
1268         "abcdefghi",
1269         "abcdefgh-",
1270         "abcdefgh-abcdefghi",
1271         "abcdefgh-1",
1272         "abcdefgh-a",
1273         "abcdefgh-a2345678z",
1274     };
1275     for (int i = 0; i < UPRV_LENGTHOF(illFormed); i++) {
1276         const char* ill = illFormed[i];
1277         UErrorCode status = U_ZERO_ERROR;
1278         ULocaleBuilder* bld = ulocbld_open();
1279         ulocbld_setExtension(bld, 'u', ill, -1);
1280         char buffer[ULOC_FULLNAME_CAPACITY];
1281         ulocbld_buildLocaleID(bld, buffer, ULOC_FULLNAME_CAPACITY, &status);
1282         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1283             log_err("setExtension('u', \"%s\") should fail but has no Error\n",
1284                   ill);
1285         }
1286         ulocbld_close(bld);
1287     }
1288 }
1289 
TestSetExtensionT(void)1290 static void TestSetExtensionT(void) {
1291     ULocaleBuilder* bld = ulocbld_open();
1292     ulocbld_setLanguage(bld, "fr", 2);
1293     Verify(bld, "fr",
1294            "ulocbld_setLanguage(\"fr\") got Error: %s\n");
1295 
1296     ulocbld_setExtension(bld, 'T', "zh", -1);
1297     Verify(bld, "fr-t-zh",
1298            "ulocbld_setExtension('T', \"zh\") got Error: %s\n");
1299 
1300     ulocbld_setExtension(bld, 't', "zh-Hant-TW-1234-A9-123-456ABCDE", -1);
1301     Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1302            "ulocbld_setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1303 
1304     ulocbld_setExtension(bld, 'T', "a9-123", -1);
1305     Verify(bld, "fr-t-a9-123",
1306            "ulocbld_setExtension('T', \"a9-123\") got Error: %s\n");
1307 
1308     ulocbld_setRegion(bld, "MX###", 2);
1309     Verify(bld, "fr-MX-t-a9-123",
1310            "ulocbld_setRegion(\"MX\") got Error: %s\n");
1311 
1312     ulocbld_setScript(bld, "Hans##", 4);
1313     Verify(bld, "fr-Hans-MX-t-a9-123",
1314            "ulocbld_setScript(\"Hans\") got Error: %s\n");
1315 
1316     ulocbld_setVariant(bld, "9abc-abcde1234", 10 );
1317     Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
1318            "ulocbld_setVariant(\"9abc-abcde\") got Error: %s\n");
1319 
1320     ulocbld_setExtension(bld, 'T', "", 0);
1321     Verify(bld, "fr-Hans-MX-9abc-abcde",
1322            "ulocbld_setExtension('T', \"\") got Error: %s\n");
1323 
1324     ulocbld_close(bld);
1325 }
1326 
TestSetExtensionValidateTWellFormed(void)1327 static void TestSetExtensionValidateTWellFormed(void) {
1328     // ((sep tlang (sep tfield)*) | (sep tfield)+)
1329     static const char* wellFormedExtensions[] = {
1330         // tlang
1331         //  tlang = unicode_language_subtag (sep unicode_script_subtag)?
1332         //          (sep unicode_region_subtag)?  (sep unicode_variant_subtag)* ;
1333         // unicode_language_subtag
1334         "en",
1335         "abc",
1336         "abcde",
1337         "ABCDEFGH",
1338         // unicode_language_subtag sep unicode_script_subtag
1339         "en-latn",
1340         "abc-arab",
1341         "ABCDEFGH-Thai",
1342         // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1343         "en-latn-ME",
1344         "abc-arab-RU",
1345         "ABCDEFGH-Thai-TH",
1346         "en-latn-409",
1347         "abc-arab-123",
1348         "ABCDEFGH-Thai-456",
1349         // unicode_language_subtag sep unicode_region_subtag
1350         "en-ME",
1351         "abc-RU",
1352         "ABCDEFGH-TH",
1353         "en-409",
1354         "abc-123",
1355         "ABCDEFGH-456",
1356         // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1357         // sep (sep unicode_variant_subtag)*
1358         "en-latn-ME-abcde",
1359         "abc-arab-RU-3abc-abcdef",
1360         "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1361         "en-latn-409-xafsa",
1362         "abc-arab-123-ADASDF",
1363         "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1364         // (sep tfield)+
1365         "A0-abcde",
1366         "z9-abcde123",
1367         "z9-abcde123-a1-abcde",
1368         // tlang (sep tfield)*
1369         "fr-A0-abcde",
1370         "fr-FR-A0-abcde",
1371         "fr-123-z9-abcde123-a1-abcde",
1372         "fr-Latn-FR-z9-abcde123-a1-abcde",
1373         "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1374         "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1375     };
1376     ULocaleBuilder* bld = ulocbld_open();
1377     for (int i = 0; i < UPRV_LENGTHOF(wellFormedExtensions); i++) {
1378         ulocbld_clear(bld);
1379         const char* extension = wellFormedExtensions[i];
1380         ulocbld_setExtension(bld, 't', extension, -1);
1381         UErrorCode status = U_ZERO_ERROR;
1382         if (ulocbld_copyErrorTo(bld, &status)) {
1383             log_err("ulocbld_setExtension('t', \"%s\") got Error: %s\n",
1384                   extension, u_errorName(status));
1385         }
1386     }
1387     ulocbld_close(bld);
1388 }
1389 
TestSetExtensionValidateTIllFormed(void)1390 static void TestSetExtensionValidateTIllFormed(void) {
1391     static const char* illFormed[] = {
1392         "a",
1393         "a-",
1394         "0",
1395         "9-",
1396         "-9",
1397         "-z",
1398         "Latn",
1399         "Latn-",
1400         "en-",
1401         "nob-",
1402         "-z9",
1403         "a3",
1404         "a3-",
1405         "3a",
1406         "0z-",
1407         "en-123-a1",
1408         "en-TH-a1",
1409         "gab-TH-a1",
1410         "gab-Thai-a1",
1411         "gab-Thai-TH-a1",
1412         "gab-Thai-TH-0bde-a1",
1413         "gab-Thai-TH-0bde-3b",
1414         "gab-Thai-TH-0bde-z9-a1",
1415         "gab-Thai-TH-0bde-z9-3b",
1416         "gab-Thai-TH-0bde-z9-abcde123-3b",
1417         "gab-Thai-TH-0bde-z9-abcde123-ab",
1418         "gab-Thai-TH-0bde-z9-abcde123-ab",
1419         "gab-Thai-TH-0bde-z9-abcde123-a1",
1420         "gab-Thai-TH-0bde-z9-abcde123-a1-",
1421         "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1422         "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1423         // ICU-21408
1424         "root",
1425     };
1426     ULocaleBuilder* bld = ulocbld_open();
1427     for (int i = 0; i < UPRV_LENGTHOF(illFormed); i++) {
1428         ulocbld_clear(bld);
1429         const char* ill = illFormed[i];
1430         UErrorCode status = U_ZERO_ERROR;
1431         ulocbld_setExtension(bld, 't', ill, -1);
1432         if (!ulocbld_copyErrorTo(bld, &status) || status != U_ILLEGAL_ARGUMENT_ERROR) {
1433             log_err("setExtension('t', \"%s\") should fail but has no Error\n",
1434                   ill);
1435         }
1436     }
1437     ulocbld_close(bld);
1438 }
1439 
TestSetExtensionPU(void)1440 static void TestSetExtensionPU(void) {
1441     ULocaleBuilder* bld = ulocbld_open();
1442     ulocbld_setLanguage(bld, "ar123", 2);
1443     Verify(bld, "ar",
1444            "ulocbld_setLanguage(\"ar\") got Error: %s\n");
1445 
1446     ulocbld_setExtension(bld, 'X', "a-b-c-d-e12345", 9);
1447     Verify(bld, "ar-x-a-b-c-d-e",
1448            "ulocbld_setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1449 
1450     ulocbld_setExtension(bld, 'x', "0-1-2-3", -1);
1451     Verify(bld, "ar-x-0-1-2-3",
1452            "ulocbld_setExtension('x', \"0-1-2-3\") got Error: %s\n");
1453 
1454     ulocbld_setExtension(bld, 'X', "0-12345678-x-x", -1);
1455     Verify(bld, "ar-x-0-12345678-x-x",
1456            "ulocbld_setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1457 
1458     ulocbld_setRegion(bld, "TH123", 2);
1459     Verify(bld, "ar-TH-x-0-12345678-x-x",
1460            "ulocbld_setRegion(\"TH\") got Error: %s\n");
1461 
1462     ulocbld_setExtension(bld, 'X', "", -1);
1463     Verify(bld, "ar-TH",
1464            "ulocbld_setExtension(\"X\") got Error: %s\n");
1465     ulocbld_close(bld);
1466 }
1467 
TestSetExtensionValidatePUWellFormed(void)1468 static void TestSetExtensionValidatePUWellFormed(void) {
1469     // ((sep tlang (sep tfield)*) | (sep tfield)+)
1470     static const char* wellFormedExtensions[] = {
1471         "a",  // Short subtag
1472         "z",  // Short subtag
1473         "0",  // Short subtag, digit
1474         "9",  // Short subtag, digit
1475         "a-0",  // Two short subtag, alpha and digit
1476         "9-z",  // Two short subtag, digit and alpha
1477         "ab",
1478         "abc",
1479         "abcefghi",  // Long subtag
1480         "87654321",
1481         "01",
1482         "234",
1483         "0a-ab-87654321",  // Three subtags
1484         "87654321-ab-00-3A",  // Four subtabs
1485         "a-9-87654321",  // Three subtags with short and long subtags
1486         "87654321-ab-0-3A",
1487     };
1488     ULocaleBuilder* bld = ulocbld_open();
1489     for (int i = 0; i < UPRV_LENGTHOF(wellFormedExtensions); i++) {
1490         ulocbld_clear(bld);
1491         const char* extension = wellFormedExtensions[i];
1492         UErrorCode status = U_ZERO_ERROR;
1493         ulocbld_setExtension(bld, 'x', extension, -1);
1494         if (ulocbld_copyErrorTo(bld, &status) || U_FAILURE(status)) {
1495             log_err("setExtension('x', \"%s\") got Error: %s\n",
1496                   extension, u_errorName(status));
1497         }
1498     }
1499     ulocbld_close(bld);
1500 }
1501 
TestSetExtensionValidatePUIllFormed(void)1502 static void TestSetExtensionValidatePUIllFormed(void) {
1503     static const char* illFormed[] = {
1504         "123456789",  // Too long
1505         "abcdefghi",  // Too long
1506         "ab-123456789",  // Second subtag too long
1507         "abcdefghi-12",  // First subtag too long
1508         "a-ab-987654321",  // Third subtag too long
1509         "987654321-a-0-3",  // First subtag too long
1510     };
1511     ULocaleBuilder* bld = ulocbld_open();
1512     for (int i = 0; i < UPRV_LENGTHOF(illFormed); i++) {
1513         const char* ill = illFormed[i];
1514         ulocbld_clear(bld);
1515         ulocbld_setExtension(bld, 'x', ill, -1);
1516         UErrorCode status = U_ZERO_ERROR;
1517         if (!ulocbld_copyErrorTo(bld, &status) ||status != U_ILLEGAL_ARGUMENT_ERROR) {
1518             log_err("ulocbld_setExtension('x', \"%s\") should fail but has no Error\n",
1519                   ill);
1520         }
1521     }
1522     ulocbld_close(bld);
1523 }
1524 
TestSetExtensionOthers(void)1525 static void TestSetExtensionOthers(void) {
1526     ULocaleBuilder* bld = ulocbld_open();
1527     ulocbld_setLanguage(bld, "fr", -1);
1528     Verify(bld, "fr",
1529            "ulocbld_setLanguage(\"fr\") got Error: %s\n");
1530 
1531     ulocbld_setExtension(bld, 'Z', "ab1234", 2);
1532     Verify(bld, "fr-z-ab",
1533            "ulocbld_setExtension('Z', \"ab\") got Error: %s\n");
1534 
1535     ulocbld_setExtension(bld, '0', "xyz12345-abcdefg", -1);
1536     Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
1537            "ulocbld_setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1538 
1539     ulocbld_setExtension(bld, 'a', "01-12345678-ABcdef", -1);
1540     Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1541            "ulocbld_setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1542 
1543     ulocbld_setRegion(bld, "TH1234", 2);
1544     Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1545            "ulocbld_setRegion(\"TH\") got Error: %s\n");
1546 
1547     ulocbld_setScript(bld, "Arab", -1);
1548     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1549            "ulocbld_setRegion(\"Arab\") got Error: %s\n");
1550 
1551     ulocbld_setExtension(bld, 'A', "97", 2);
1552     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1553            "ulocbld_setExtension('a', \"97\") got Error: %s\n");
1554 
1555     ulocbld_setExtension(bld, 'a', "", 0);
1556     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1557            "ulocbld_setExtension('a', \"\") got Error: %s\n");
1558 
1559     ulocbld_setExtension(bld, '0', "", -1);
1560     Verify(bld, "fr-Arab-TH-z-ab",
1561            "ulocbld_setExtension('0', \"\") got Error: %s\n");
1562     ulocbld_close(bld);
1563 }
1564 
TestSetExtensionValidateOthersWellFormed(void)1565 static void TestSetExtensionValidateOthersWellFormed(void) {
1566     static const char* wellFormedExtensions[] = {
1567         "ab",
1568         "abc",
1569         "abcefghi",
1570         "01",
1571         "234",
1572         "87654321",
1573         "0a-ab-87654321",
1574         "87654321-ab-00-3A",
1575     };
1576 
1577     const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1578     const int32_t aToZLen = strlen(aToZ);
1579     ULocaleBuilder* bld = ulocbld_open();
1580     for (int i = 0; i < UPRV_LENGTHOF(wellFormedExtensions); i++) {
1581         const char* extension = wellFormedExtensions[i];
1582         ulocbld_clear(bld);
1583         char ch = aToZ[i];
1584         i = (i + 1) % aToZLen;
1585         UErrorCode status = U_ZERO_ERROR;
1586         ulocbld_setExtension(bld, ch, extension, -1);
1587         if (ulocbld_copyErrorTo(bld, &status) || U_FAILURE(status)) {
1588             log_err("ulocbld_setExtension('%c', \"%s\") got Error: %s\n",
1589                   ch, extension, u_errorName(status));
1590         }
1591     }
1592 
1593     const char* someChars =
1594         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1595     const int32_t someCharsLen = strlen(someChars);
1596     for (int32_t i = 0; i < someCharsLen; i++) {
1597         char ch = someChars[i];
1598         UErrorCode status = U_ZERO_ERROR;
1599         ulocbld_clear(bld);
1600         ulocbld_setExtension(bld, ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], -1);
1601         if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
1602             if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
1603                 if (ulocbld_copyErrorTo(bld, &status) || U_FAILURE(status)) {
1604                     log_err("setExtension('%c', \"%s\") got Error: %s\n",
1605                           ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
1606                 }
1607             }
1608         } else {
1609             if (!ulocbld_copyErrorTo(bld, &status) || status != U_ILLEGAL_ARGUMENT_ERROR) {
1610                 log_err("setExtension('%c', \"%s\") should fail but has no Error\n",
1611                       ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1612             }
1613         }
1614 
1615     }
1616     ulocbld_close(bld);
1617 }
1618 
TestSetExtensionValidateOthersIllFormed(void)1619 static void TestSetExtensionValidateOthersIllFormed(void) {
1620     static const char* illFormed[] = {
1621         "0",  // Too short
1622         "a",  // Too short
1623         "123456789",  // Too long
1624         "abcdefghi",  // Too long
1625         "ab-123456789",  // Second subtag too long
1626         "abcdefghi-12",  // First subtag too long
1627         "a-ab-87654321",  // Third subtag too long
1628         "87654321-a-0-3",  // First subtag too long
1629     };
1630     const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1631     const int32_t aToZLen = strlen(aToZ);
1632     ULocaleBuilder* bld = ulocbld_open();
1633     for (int i = 0; i < UPRV_LENGTHOF(illFormed); i++) {
1634         const char* ill = illFormed[i];
1635         char ch = aToZ[i];
1636         ulocbld_clear(bld);
1637         i = (i + 1) % aToZLen;
1638         UErrorCode status = U_ZERO_ERROR;
1639         ulocbld_setExtension(bld, ch, ill, -1);
1640         if (!ulocbld_copyErrorTo(bld, &status) || status != U_ILLEGAL_ARGUMENT_ERROR) {
1641             log_err("setExtension('%c', \"%s\") should fail but has no Error\n",
1642                   ch, ill);
1643         }
1644     }
1645     ulocbld_close(bld);
1646 }
1647 
TestSetLocale(void)1648 static void TestSetLocale(void) {
1649     ULocaleBuilder* bld1 = ulocbld_open();
1650     ULocaleBuilder* bld2 = ulocbld_open();
1651     UErrorCode status = U_ZERO_ERROR;
1652 
1653     ulocbld_setLanguage(bld1, "en", -1);
1654     ulocbld_setScript(bld1, "Latn", -1);
1655     ulocbld_setRegion(bld1, "MX", -1);
1656     ulocbld_setVariant(bld1, "3456-abcde", -1);
1657     ulocbld_addUnicodeLocaleAttribute(bld1, "456", -1);
1658     ulocbld_addUnicodeLocaleAttribute(bld1, "123", -1);
1659     ulocbld_setUnicodeLocaleKeyword(bld1, "nu", -1, "thai", -1);
1660     ulocbld_setUnicodeLocaleKeyword(bld1, "co", -1, "stroke", -1);
1661     ulocbld_setUnicodeLocaleKeyword(bld1, "ca", -1, "chinese", -1);
1662     char locale1[ULOC_FULLNAME_CAPACITY];
1663     ulocbld_buildLocaleID(bld1, locale1, ULOC_FULLNAME_CAPACITY, &status);
1664 
1665     if (U_FAILURE(status)) {
1666         log_err("build got Error: %s\n", u_errorName(status));
1667     }
1668     ulocbld_setLocale(bld2, locale1, -1);
1669     char locale2[ULOC_FULLNAME_CAPACITY];
1670     ulocbld_buildLocaleID(bld2, locale2, ULOC_FULLNAME_CAPACITY, &status);
1671     if (U_FAILURE(status)) {
1672         log_err("build got Error: %s\n", u_errorName(status));
1673     }
1674     if (strcmp(locale1, locale2) != 0) {
1675         log_err("Two locales should be the same, but one is '%s' and the other is '%s'",
1676               locale1, locale2);
1677     }
1678     ulocbld_close(bld1);
1679     ulocbld_close(bld2);
1680 }
1681 
TestBuildULocale(void)1682 static void TestBuildULocale(void) {
1683     ULocaleBuilder* bld1 = ulocbld_open();
1684     UErrorCode status = U_ZERO_ERROR;
1685 
1686     ulocbld_setLanguage(bld1, "fr", -1);
1687     ULocale* fr = ulocbld_buildULocale(bld1, &status);
1688     if (assertSuccess(WHERE "ulocbld_buildULocale()", &status)) {
1689         assertEquals(WHERE "ulocale_getLanguage()", "fr", ulocale_getLanguage(fr));
1690     }
1691 
1692     ulocbld_setLanguage(bld1, "ar", -1);
1693     ulocbld_setScript(bld1, "Arab", -1);
1694     ulocbld_setRegion(bld1, "EG", -1);
1695     ulocbld_setVariant(bld1, "3456-abcde", -1);
1696     ulocbld_setUnicodeLocaleKeyword(bld1, "nu", -1, "thai", -1);
1697     ulocbld_setUnicodeLocaleKeyword(bld1, "co", -1, "stroke", -1);
1698     ulocbld_setUnicodeLocaleKeyword(bld1, "ca", -1, "chinese", -1);
1699     ULocale* l = ulocbld_buildULocale(bld1, &status);
1700 
1701     if (assertSuccess(WHERE "ulocbld_buildULocale()", &status)) {
1702         assertEquals(WHERE "ulocale_getLanguage()", "ar", ulocale_getLanguage(l));
1703         assertEquals(WHERE "ulocale_getScript()", "Arab", ulocale_getScript(l));
1704         assertEquals(WHERE "ulocale_getRegion()", "EG", ulocale_getRegion(l));
1705         assertEquals(WHERE "ulocale_getVariant()", "3456_ABCDE", ulocale_getVariant(l));
1706         char buf[ULOC_FULLNAME_CAPACITY];
1707         assertIntEquals(WHERE "ulocale_getUnicodeKeywordValue(\"nu\")", 4,
1708                      ulocale_getUnicodeKeywordValue(l, "nu", -1, buf, ULOC_FULLNAME_CAPACITY, &status));
1709         if (assertSuccess(WHERE "ulocale_getUnicodeKeywordValue(\"nu\")", &status)) {
1710             assertEquals(WHERE "ulocale_getUnicodeKeywordValue(\"nu\")", "thai", buf);
1711         }
1712 
1713         status = U_ZERO_ERROR;
1714         assertIntEquals(WHERE "ulocale_getUnicodeKeywordValue(\"co\")", 6,
1715                      ulocale_getUnicodeKeywordValue(l, "co", -1, buf, ULOC_FULLNAME_CAPACITY, &status));
1716         if (assertSuccess(WHERE "ulocale_getUnicodeKeywordValue(\"co\")", &status)) {
1717             assertEquals(WHERE "ulocale_getUnicodeKeywordValue(\"co\")", "stroke", buf);
1718         }
1719 
1720         status = U_ZERO_ERROR;
1721         assertIntEquals(WHERE "ulocale_getUnicodeKeywordValue(\"ca\")", 7,
1722                      ulocale_getUnicodeKeywordValue(l, "ca", -1, buf, ULOC_FULLNAME_CAPACITY, &status));
1723         if (assertSuccess(WHERE "ulocale_getUnicodeKeywordValue(\"ca\")", &status)) {
1724             assertEquals(WHERE "ulocale_getUnicodeKeywordValue(\"ca\")", "chinese", buf);
1725         }
1726         ulocale_close(l);
1727     }
1728     ulocbld_adoptULocale(bld1, fr);
1729     char buf[ULOC_FULLNAME_CAPACITY];
1730     ulocbld_buildLocaleID(bld1, buf, ULOC_FULLNAME_CAPACITY, &status);
1731     if (assertSuccess(WHERE "ulocbld_buildULocale()", &status)) {
1732         assertEquals(WHERE "ulocbld_buildULocale()", "fr", buf);
1733     }
1734     ulocbld_close(bld1);
1735 }
1736 
1737 
TestPosixCases(void)1738 static void TestPosixCases(void) {
1739     UErrorCode status = U_ZERO_ERROR;
1740     ULocaleBuilder* bld = ulocbld_open();
1741     ulocbld_setLanguage(bld, "en", -1);
1742     ulocbld_setRegion(bld, "MX", -1);
1743     ulocbld_setScript(bld, "Arab", -1);
1744     ulocbld_setUnicodeLocaleKeyword(bld, "nu", -1, "Thai", -1);
1745     ulocbld_setExtension(bld, 'x', "1", -1);
1746     // All of above should be cleared by the setLocale call.
1747     const char* posix = "en_US_POSIX";
1748     ulocbld_setLocale(bld, posix, -1);
1749     char locale[ULOC_FULLNAME_CAPACITY];
1750     ulocbld_buildLocaleID(bld, locale, ULOC_FULLNAME_CAPACITY, &status);
1751     if (U_FAILURE(status)) {
1752         log_err("build got Error: %s\n", u_errorName(status));
1753     }
1754     if (strcmp(posix, locale) != 0) {
1755         log_err("The result locale should be the set as the setLocale %s but got %s\n",
1756                 posix, locale);
1757     }
1758     ulocbld_close(bld);
1759 }
1760 
1761 #define TESTCASE(name) addTest(root, &name, "tsutil/ulocbuildertst/" #name)
addLocaleBuilderTest(TestNode ** root)1762 void addLocaleBuilderTest(TestNode** root)
1763 {
1764     TESTCASE(TestLocaleBuilder);
1765     TESTCASE(TestLocaleBuilderBasic);
1766     TESTCASE(TestLocaleBuilderBasicWithExtensionsOnDefaultLocale);
1767     TESTCASE(TestSetLanguageWellFormed);
1768     TESTCASE(TestSetLanguageIllFormed);
1769     TESTCASE(TestSetScriptWellFormed);
1770     TESTCASE(TestSetScriptIllFormed);
1771     TESTCASE(TestSetRegionWellFormed);
1772     TESTCASE(TestSetRegionIllFormed);
1773     TESTCASE(TestSetVariantWellFormed);
1774     TESTCASE(TestSetVariantIllFormed);
1775     TESTCASE(TestSetUnicodeLocaleKeywordWellFormed);
1776     TESTCASE(TestSetUnicodeLocaleKeywordIllFormedKey);
1777     TESTCASE(TestSetUnicodeLocaleKeywordIllFormedValue);
1778     TESTCASE(TestAddRemoveUnicodeLocaleAttribute);
1779     TESTCASE(TestAddRemoveUnicodeLocaleAttributeWellFormed);
1780     TESTCASE(TestAddUnicodeLocaleAttributeIllFormed);
1781     TESTCASE(TestSetExtensionU);
1782     TESTCASE(TestSetExtensionValidateUWellFormed);
1783     TESTCASE(TestSetExtensionValidateUIllFormed);
1784     TESTCASE(TestSetExtensionT);
1785     TESTCASE(TestSetExtensionValidateTWellFormed);
1786     TESTCASE(TestSetExtensionValidateTIllFormed);
1787     TESTCASE(TestSetExtensionPU);
1788     TESTCASE(TestSetExtensionValidatePUWellFormed);
1789     TESTCASE(TestSetExtensionValidatePUIllFormed);
1790     TESTCASE(TestSetExtensionOthers);
1791     TESTCASE(TestSetExtensionValidateOthersWellFormed);
1792     TESTCASE(TestSetExtensionValidateOthersIllFormed);
1793     TESTCASE(TestSetLocale);
1794     TESTCASE(TestBuildULocale);
1795     TESTCASE(TestPosixCases);
1796 }
1797