xref: /aosp_15_r20/external/icu/libicu/cts_headers/ulocimp.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2004-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 
10 #ifndef ULOCIMP_H
11 #define ULOCIMP_H
12 
13 #include <cstddef>
14 
15 #include "unicode/bytestream.h"
16 #include "unicode/uloc.h"
17 
18 #include "charstr.h"
19 
20 /**
21  * Create an iterator over the specified keywords list
22  * @param keywordList double-null terminated list. Will be copied.
23  * @param keywordListSize size in bytes of keywordList
24  * @param status err code
25  * @return enumeration (owned by caller) of the keyword list.
26  * @internal ICU 3.0
27  */
28 U_CAPI UEnumeration* U_EXPORT2
29 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
30 
31 /**
32  * Look up a resource bundle table item with fallback on the table level.
33  * This is accessible so it can be called by C++ code.
34  */
35 U_CAPI const UChar * U_EXPORT2
36 uloc_getTableStringWithFallback(
37     const char *path,
38     const char *locale,
39     const char *tableKey,
40     const char *subTableKey,
41     const char *itemKey,
42     int32_t *pLength,
43     UErrorCode *pErrorCode);
44 
45 namespace {
46 /*returns true if a is an ID separator false otherwise*/
_isIDSeparator(char a)47 inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; }
48 }  // namespace
49 
50 U_CFUNC const char*
51 uloc_getCurrentCountryID(const char* oldID);
52 
53 U_CFUNC const char*
54 uloc_getCurrentLanguageID(const char* oldID);
55 
56 U_EXPORT icu::CharString
57 ulocimp_getKeywords(const char* localeID,
58                     char prev,
59                     bool valuesToo,
60                     UErrorCode& status);
61 
62 U_EXPORT void
63 ulocimp_getKeywords(const char* localeID,
64                     char prev,
65                     icu::ByteSink& sink,
66                     bool valuesToo,
67                     UErrorCode& status);
68 
69 U_EXPORT icu::CharString
70 ulocimp_getName(const char* localeID,
71                 UErrorCode& err);
72 
73 U_EXPORT void
74 ulocimp_getName(const char* localeID,
75                 icu::ByteSink& sink,
76                 UErrorCode& err);
77 
78 U_EXPORT icu::CharString
79 ulocimp_getBaseName(const char* localeID,
80                     UErrorCode& err);
81 
82 U_EXPORT void
83 ulocimp_getBaseName(const char* localeID,
84                     icu::ByteSink& sink,
85                     UErrorCode& err);
86 
87 U_EXPORT icu::CharString
88 ulocimp_canonicalize(const char* localeID,
89                      UErrorCode& err);
90 
91 U_EXPORT void
92 ulocimp_canonicalize(const char* localeID,
93                      icu::ByteSink& sink,
94                      UErrorCode& err);
95 
96 U_EXPORT icu::CharString
97 ulocimp_getKeywordValue(const char* localeID,
98                         const char* keywordName,
99                         UErrorCode& status);
100 
101 U_EXPORT void
102 ulocimp_getKeywordValue(const char* localeID,
103                         const char* keywordName,
104                         icu::ByteSink& sink,
105                         UErrorCode& status);
106 
107 U_EXPORT icu::CharString
108 ulocimp_getLanguage(const char* localeID, UErrorCode& status);
109 
110 U_EXPORT icu::CharString
111 ulocimp_getScript(const char* localeID, UErrorCode& status);
112 
113 U_EXPORT icu::CharString
114 ulocimp_getRegion(const char* localeID, UErrorCode& status);
115 
116 U_EXPORT icu::CharString
117 ulocimp_getVariant(const char* localeID, UErrorCode& status);
118 
119 U_EXPORT void
120 ulocimp_setKeywordValue(const char* keywordName,
121                         const char* keywordValue,
122                         icu::CharString& localeID,
123                         UErrorCode& status);
124 
125 U_EXPORT int32_t
126 ulocimp_setKeywordValue(const char* keywords,
127                         const char* keywordName,
128                         const char* keywordValue,
129                         icu::ByteSink& sink,
130                         UErrorCode& status);
131 
132 U_EXPORT void
133 ulocimp_getSubtags(
134         const char* localeID,
135         icu::CharString* language,
136         icu::CharString* script,
137         icu::CharString* region,
138         icu::CharString* variant,
139         const char** pEnd,
140         UErrorCode& status);
141 
142 U_EXPORT void
143 ulocimp_getSubtags(
144         const char* localeID,
145         icu::ByteSink* language,
146         icu::ByteSink* script,
147         icu::ByteSink* region,
148         icu::ByteSink* variant,
149         const char** pEnd,
150         UErrorCode& status);
151 
152 inline void
ulocimp_getSubtags(const char * localeID,std::nullptr_t,std::nullptr_t,std::nullptr_t,std::nullptr_t,const char ** pEnd,UErrorCode & status)153 ulocimp_getSubtags(
154         const char* localeID,
155         std::nullptr_t,
156         std::nullptr_t,
157         std::nullptr_t,
158         std::nullptr_t,
159         const char** pEnd,
160         UErrorCode& status) {
161     ulocimp_getSubtags(
162             localeID,
163             static_cast<icu::ByteSink*>(nullptr),
164             static_cast<icu::ByteSink*>(nullptr),
165             static_cast<icu::ByteSink*>(nullptr),
166             static_cast<icu::ByteSink*>(nullptr),
167             pEnd,
168             status);
169 }
170 
171 U_EXPORT icu::CharString
172 ulocimp_getParent(const char* localeID,
173                   UErrorCode& err);
174 
175 U_EXPORT void
176 ulocimp_getParent(const char* localeID,
177                   icu::ByteSink& sink,
178                   UErrorCode& err);
179 
180 U_EXPORT icu::CharString
181 ulocimp_toLanguageTag(const char* localeID,
182                       bool strict,
183                       UErrorCode& status);
184 
185 /**
186  * Writes a well-formed language tag for this locale ID.
187  *
188  * **Note**: When `strict` is false, any locale fields which do not satisfy the
189  * BCP47 syntax requirement will be omitted from the result.  When `strict` is
190  * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
191  * fields do not satisfy the BCP47 syntax requirement.
192  *
193  * @param localeID  the input locale ID
194  * @param sink      the output sink receiving the BCP47 language
195  *                  tag for this Locale.
196  * @param strict    boolean value indicating if the function returns
197  *                  an error for an ill-formed input locale ID.
198  * @param err       error information if receiving the language
199  *                  tag failed.
200  * @return          The length of the BCP47 language tag.
201  *
202  * @internal ICU 64
203  */
204 U_EXPORT void
205 ulocimp_toLanguageTag(const char* localeID,
206                       icu::ByteSink& sink,
207                       bool strict,
208                       UErrorCode& err);
209 
210 U_EXPORT icu::CharString
211 ulocimp_forLanguageTag(const char* langtag,
212                        int32_t tagLen,
213                        int32_t* parsedLength,
214                        UErrorCode& status);
215 
216 /**
217  * Returns a locale ID for the specified BCP47 language tag string.
218  * If the specified language tag contains any ill-formed subtags,
219  * the first such subtag and all following subtags are ignored.
220  * <p>
221  * This implements the 'Language-Tag' production of BCP 47, and so
222  * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
223  * (regular and irregular) as well as private use language tags.
224  *
225  * Private use tags are represented as 'x-whatever',
226  * and legacy tags are converted to their canonical replacements where they exist.
227  *
228  * Note that a few legacy tags have no modern replacement;
229  * these will be converted using the fallback described in
230  * the first paragraph, so some information might be lost.
231  *
232  * @param langtag   the input BCP47 language tag.
233  * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
234  * @param sink      the output sink receiving a locale ID for the
235  *                  specified BCP47 language tag.
236  * @param parsedLength  if not NULL, successfully parsed length
237  *                      for the input language tag is set.
238  * @param err       error information if receiving the locald ID
239  *                  failed.
240  * @internal ICU 63
241  */
242 U_EXPORT void
243 ulocimp_forLanguageTag(const char* langtag,
244                        int32_t tagLen,
245                        icu::ByteSink& sink,
246                        int32_t* parsedLength,
247                        UErrorCode& err);
248 
249 /**
250  * Get the region to use for supplemental data lookup. Uses
251  * (1) any region specified by locale tag "rg"; if none then
252  * (2) any unicode_region_tag in the locale ID; if none then
253  * (3) if inferRegion is true, the region suggested by
254  * getLikelySubtags on the localeID.
255  * If no region is found, returns an empty string.
256  *
257  * @param localeID
258  *     The complete locale ID (with keywords) from which
259  *     to get the region to use for supplemental data.
260  * @param inferRegion
261  *     If true, will try to infer region from localeID if
262  *     no other region is found.
263  * @param status
264  *     Pointer to in/out UErrorCode value for latest status.
265  * @return
266  *     The region code found, empty if none found.
267  * @internal ICU 57
268  */
269 U_EXPORT icu::CharString
270 ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
271                                      UErrorCode& status);
272 
273 U_EXPORT icu::CharString
274 ulocimp_addLikelySubtags(const char* localeID,
275                          UErrorCode& status);
276 
277 /**
278  * Add the likely subtags for a provided locale ID, per the algorithm described
279  * in the following CLDR technical report:
280  *
281  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
282  *
283  * If localeID is already in the maximal form, or there is no data available
284  * for maximization, it will be copied to the output buffer.  For example,
285  * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
286  *
287  * Examples:
288  *
289  * "en" maximizes to "en_Latn_US"
290  *
291  * "de" maximizes to "de_Latn_US"
292  *
293  * "sr" maximizes to "sr_Cyrl_RS"
294  *
295  * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
296  *
297  * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
298  *
299  * @param localeID The locale to maximize
300  * @param sink The output sink receiving the maximized locale
301  * @param err Error information if maximizing the locale failed.  If the length
302  * of the localeID and the null-terminator is greater than the maximum allowed size,
303  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
304  * @internal ICU 64
305  */
306 U_EXPORT void
307 ulocimp_addLikelySubtags(const char* localeID,
308                          icu::ByteSink& sink,
309                          UErrorCode& err);
310 
311 U_EXPORT icu::CharString
312 ulocimp_minimizeSubtags(const char* localeID,
313                         bool favorScript,
314                         UErrorCode& status);
315 
316 /**
317  * Minimize the subtags for a provided locale ID, per the algorithm described
318  * in the following CLDR technical report:
319  *
320  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
321  *
322  * If localeID is already in the minimal form, or there is no data available
323  * for minimization, it will be copied to the output buffer.  Since the
324  * minimization algorithm relies on proper maximization, see the comments
325  * for ulocimp_addLikelySubtags for reasons why there might not be any data.
326  *
327  * Examples:
328  *
329  * "en_Latn_US" minimizes to "en"
330  *
331  * "de_Latn_US" minimizes to "de"
332  *
333  * "sr_Cyrl_RS" minimizes to "sr"
334  *
335  * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
336  * script, and minimizing to "zh" would imply "zh_Hans_CN".)
337  *
338  * @param localeID The locale to minimize
339  * @param sink The output sink receiving the maximized locale
340  * @param favorScript favor to keep script if true, region if false.
341  * @param err Error information if minimizing the locale failed.  If the length
342  * of the localeID and the null-terminator is greater than the maximum allowed size,
343  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
344  * @internal ICU 64
345  */
346 U_EXPORT void
347 ulocimp_minimizeSubtags(const char* localeID,
348                         icu::ByteSink& sink,
349                         bool favorScript,
350                         UErrorCode& err);
351 
352 U_CAPI const char * U_EXPORT2
353 locale_getKeywordsStart(const char *localeID);
354 
355 bool
356 ultag_isExtensionSubtags(const char* s, int32_t len);
357 
358 bool
359 ultag_isLanguageSubtag(const char* s, int32_t len);
360 
361 bool
362 ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
363 
364 bool
365 ultag_isRegionSubtag(const char* s, int32_t len);
366 
367 bool
368 ultag_isScriptSubtag(const char* s, int32_t len);
369 
370 bool
371 ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
372 
373 bool
374 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
375 
376 bool
377 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
378 
379 bool
380 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
381 
382 bool
383 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
384 
385 bool
386 ultag_isUnicodeLocaleType(const char* s, int32_t len);
387 
388 bool
389 ultag_isVariantSubtags(const char* s, int32_t len);
390 
391 const char*
392 ultag_getTKeyStart(const char* localeID);
393 
394 U_EXPORT const char*
395 ulocimp_toBcpKey(const char* key);
396 
397 U_EXPORT const char*
398 ulocimp_toLegacyKey(const char* key);
399 
400 U_EXPORT const char*
401 ulocimp_toBcpType(const char* key, const char* type, bool* isKnownKey, bool* isSpecialType);
402 
403 U_EXPORT const char*
404 ulocimp_toLegacyType(const char* key, const char* type, bool* isKnownKey, bool* isSpecialType);
405 
406 /* Function for testing purpose */
407 U_EXPORT const char* const*
408 ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
409 
410 // Return true if the value is already canonicalized.
411 U_EXPORT bool
412 ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
413 
414 #endif
415