1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2004-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9
10 #ifndef ULOCIMP_H
11 #define ULOCIMP_H
12
13 #include <cstddef>
14
15 #include "unicode/bytestream.h"
16 #include "unicode/uloc.h"
17
18 #include "charstr.h"
19
20 /**
21 * Create an iterator over the specified keywords list
22 * @param keywordList double-null terminated list. Will be copied.
23 * @param keywordListSize size in bytes of keywordList
24 * @param status err code
25 * @return enumeration (owned by caller) of the keyword list.
26 * @internal ICU 3.0
27 */
28 U_CAPI UEnumeration* U_EXPORT2
29 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
30
31 /**
32 * Look up a resource bundle table item with fallback on the table level.
33 * This is accessible so it can be called by C++ code.
34 */
35 U_CAPI const UChar * U_EXPORT2
36 uloc_getTableStringWithFallback(
37 const char *path,
38 const char *locale,
39 const char *tableKey,
40 const char *subTableKey,
41 const char *itemKey,
42 int32_t *pLength,
43 UErrorCode *pErrorCode);
44
45 namespace {
46 /*returns true if a is an ID separator false otherwise*/
_isIDSeparator(char a)47 inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; }
48 } // namespace
49
50 U_CFUNC const char*
51 uloc_getCurrentCountryID(const char* oldID);
52
53 U_CFUNC const char*
54 uloc_getCurrentLanguageID(const char* oldID);
55
56 U_EXPORT icu::CharString
57 ulocimp_getKeywords(const char* localeID,
58 char prev,
59 bool valuesToo,
60 UErrorCode& status);
61
62 U_EXPORT void
63 ulocimp_getKeywords(const char* localeID,
64 char prev,
65 icu::ByteSink& sink,
66 bool valuesToo,
67 UErrorCode& status);
68
69 U_EXPORT icu::CharString
70 ulocimp_getName(const char* localeID,
71 UErrorCode& err);
72
73 U_EXPORT void
74 ulocimp_getName(const char* localeID,
75 icu::ByteSink& sink,
76 UErrorCode& err);
77
78 U_EXPORT icu::CharString
79 ulocimp_getBaseName(const char* localeID,
80 UErrorCode& err);
81
82 U_EXPORT void
83 ulocimp_getBaseName(const char* localeID,
84 icu::ByteSink& sink,
85 UErrorCode& err);
86
87 U_EXPORT icu::CharString
88 ulocimp_canonicalize(const char* localeID,
89 UErrorCode& err);
90
91 U_EXPORT void
92 ulocimp_canonicalize(const char* localeID,
93 icu::ByteSink& sink,
94 UErrorCode& err);
95
96 U_EXPORT icu::CharString
97 ulocimp_getKeywordValue(const char* localeID,
98 const char* keywordName,
99 UErrorCode& status);
100
101 U_EXPORT void
102 ulocimp_getKeywordValue(const char* localeID,
103 const char* keywordName,
104 icu::ByteSink& sink,
105 UErrorCode& status);
106
107 U_EXPORT icu::CharString
108 ulocimp_getLanguage(const char* localeID, UErrorCode& status);
109
110 U_EXPORT icu::CharString
111 ulocimp_getScript(const char* localeID, UErrorCode& status);
112
113 U_EXPORT icu::CharString
114 ulocimp_getRegion(const char* localeID, UErrorCode& status);
115
116 U_EXPORT icu::CharString
117 ulocimp_getVariant(const char* localeID, UErrorCode& status);
118
119 U_EXPORT void
120 ulocimp_setKeywordValue(const char* keywordName,
121 const char* keywordValue,
122 icu::CharString& localeID,
123 UErrorCode& status);
124
125 U_EXPORT int32_t
126 ulocimp_setKeywordValue(const char* keywords,
127 const char* keywordName,
128 const char* keywordValue,
129 icu::ByteSink& sink,
130 UErrorCode& status);
131
132 U_EXPORT void
133 ulocimp_getSubtags(
134 const char* localeID,
135 icu::CharString* language,
136 icu::CharString* script,
137 icu::CharString* region,
138 icu::CharString* variant,
139 const char** pEnd,
140 UErrorCode& status);
141
142 U_EXPORT void
143 ulocimp_getSubtags(
144 const char* localeID,
145 icu::ByteSink* language,
146 icu::ByteSink* script,
147 icu::ByteSink* region,
148 icu::ByteSink* variant,
149 const char** pEnd,
150 UErrorCode& status);
151
152 inline void
ulocimp_getSubtags(const char * localeID,std::nullptr_t,std::nullptr_t,std::nullptr_t,std::nullptr_t,const char ** pEnd,UErrorCode & status)153 ulocimp_getSubtags(
154 const char* localeID,
155 std::nullptr_t,
156 std::nullptr_t,
157 std::nullptr_t,
158 std::nullptr_t,
159 const char** pEnd,
160 UErrorCode& status) {
161 ulocimp_getSubtags(
162 localeID,
163 static_cast<icu::ByteSink*>(nullptr),
164 static_cast<icu::ByteSink*>(nullptr),
165 static_cast<icu::ByteSink*>(nullptr),
166 static_cast<icu::ByteSink*>(nullptr),
167 pEnd,
168 status);
169 }
170
171 U_EXPORT icu::CharString
172 ulocimp_getParent(const char* localeID,
173 UErrorCode& err);
174
175 U_EXPORT void
176 ulocimp_getParent(const char* localeID,
177 icu::ByteSink& sink,
178 UErrorCode& err);
179
180 U_EXPORT icu::CharString
181 ulocimp_toLanguageTag(const char* localeID,
182 bool strict,
183 UErrorCode& status);
184
185 /**
186 * Writes a well-formed language tag for this locale ID.
187 *
188 * **Note**: When `strict` is false, any locale fields which do not satisfy the
189 * BCP47 syntax requirement will be omitted from the result. When `strict` is
190 * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
191 * fields do not satisfy the BCP47 syntax requirement.
192 *
193 * @param localeID the input locale ID
194 * @param sink the output sink receiving the BCP47 language
195 * tag for this Locale.
196 * @param strict boolean value indicating if the function returns
197 * an error for an ill-formed input locale ID.
198 * @param err error information if receiving the language
199 * tag failed.
200 * @return The length of the BCP47 language tag.
201 *
202 * @internal ICU 64
203 */
204 U_EXPORT void
205 ulocimp_toLanguageTag(const char* localeID,
206 icu::ByteSink& sink,
207 bool strict,
208 UErrorCode& err);
209
210 U_EXPORT icu::CharString
211 ulocimp_forLanguageTag(const char* langtag,
212 int32_t tagLen,
213 int32_t* parsedLength,
214 UErrorCode& status);
215
216 /**
217 * Returns a locale ID for the specified BCP47 language tag string.
218 * If the specified language tag contains any ill-formed subtags,
219 * the first such subtag and all following subtags are ignored.
220 * <p>
221 * This implements the 'Language-Tag' production of BCP 47, and so
222 * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
223 * (regular and irregular) as well as private use language tags.
224 *
225 * Private use tags are represented as 'x-whatever',
226 * and legacy tags are converted to their canonical replacements where they exist.
227 *
228 * Note that a few legacy tags have no modern replacement;
229 * these will be converted using the fallback described in
230 * the first paragraph, so some information might be lost.
231 *
232 * @param langtag the input BCP47 language tag.
233 * @param tagLen the length of langtag, or -1 to call uprv_strlen().
234 * @param sink the output sink receiving a locale ID for the
235 * specified BCP47 language tag.
236 * @param parsedLength if not NULL, successfully parsed length
237 * for the input language tag is set.
238 * @param err error information if receiving the locald ID
239 * failed.
240 * @internal ICU 63
241 */
242 U_EXPORT void
243 ulocimp_forLanguageTag(const char* langtag,
244 int32_t tagLen,
245 icu::ByteSink& sink,
246 int32_t* parsedLength,
247 UErrorCode& err);
248
249 /**
250 * Get the region to use for supplemental data lookup. Uses
251 * (1) any region specified by locale tag "rg"; if none then
252 * (2) any unicode_region_tag in the locale ID; if none then
253 * (3) if inferRegion is true, the region suggested by
254 * getLikelySubtags on the localeID.
255 * If no region is found, returns an empty string.
256 *
257 * @param localeID
258 * The complete locale ID (with keywords) from which
259 * to get the region to use for supplemental data.
260 * @param inferRegion
261 * If true, will try to infer region from localeID if
262 * no other region is found.
263 * @param status
264 * Pointer to in/out UErrorCode value for latest status.
265 * @return
266 * The region code found, empty if none found.
267 * @internal ICU 57
268 */
269 U_EXPORT icu::CharString
270 ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
271 UErrorCode& status);
272
273 U_EXPORT icu::CharString
274 ulocimp_addLikelySubtags(const char* localeID,
275 UErrorCode& status);
276
277 /**
278 * Add the likely subtags for a provided locale ID, per the algorithm described
279 * in the following CLDR technical report:
280 *
281 * http://www.unicode.org/reports/tr35/#Likely_Subtags
282 *
283 * If localeID is already in the maximal form, or there is no data available
284 * for maximization, it will be copied to the output buffer. For example,
285 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
286 *
287 * Examples:
288 *
289 * "en" maximizes to "en_Latn_US"
290 *
291 * "de" maximizes to "de_Latn_US"
292 *
293 * "sr" maximizes to "sr_Cyrl_RS"
294 *
295 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
296 *
297 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
298 *
299 * @param localeID The locale to maximize
300 * @param sink The output sink receiving the maximized locale
301 * @param err Error information if maximizing the locale failed. If the length
302 * of the localeID and the null-terminator is greater than the maximum allowed size,
303 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
304 * @internal ICU 64
305 */
306 U_EXPORT void
307 ulocimp_addLikelySubtags(const char* localeID,
308 icu::ByteSink& sink,
309 UErrorCode& err);
310
311 U_EXPORT icu::CharString
312 ulocimp_minimizeSubtags(const char* localeID,
313 bool favorScript,
314 UErrorCode& status);
315
316 /**
317 * Minimize the subtags for a provided locale ID, per the algorithm described
318 * in the following CLDR technical report:
319 *
320 * http://www.unicode.org/reports/tr35/#Likely_Subtags
321 *
322 * If localeID is already in the minimal form, or there is no data available
323 * for minimization, it will be copied to the output buffer. Since the
324 * minimization algorithm relies on proper maximization, see the comments
325 * for ulocimp_addLikelySubtags for reasons why there might not be any data.
326 *
327 * Examples:
328 *
329 * "en_Latn_US" minimizes to "en"
330 *
331 * "de_Latn_US" minimizes to "de"
332 *
333 * "sr_Cyrl_RS" minimizes to "sr"
334 *
335 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
336 * script, and minimizing to "zh" would imply "zh_Hans_CN".)
337 *
338 * @param localeID The locale to minimize
339 * @param sink The output sink receiving the maximized locale
340 * @param favorScript favor to keep script if true, region if false.
341 * @param err Error information if minimizing the locale failed. If the length
342 * of the localeID and the null-terminator is greater than the maximum allowed size,
343 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
344 * @internal ICU 64
345 */
346 U_EXPORT void
347 ulocimp_minimizeSubtags(const char* localeID,
348 icu::ByteSink& sink,
349 bool favorScript,
350 UErrorCode& err);
351
352 U_CAPI const char * U_EXPORT2
353 locale_getKeywordsStart(const char *localeID);
354
355 bool
356 ultag_isExtensionSubtags(const char* s, int32_t len);
357
358 bool
359 ultag_isLanguageSubtag(const char* s, int32_t len);
360
361 bool
362 ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
363
364 bool
365 ultag_isRegionSubtag(const char* s, int32_t len);
366
367 bool
368 ultag_isScriptSubtag(const char* s, int32_t len);
369
370 bool
371 ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
372
373 bool
374 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
375
376 bool
377 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
378
379 bool
380 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
381
382 bool
383 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
384
385 bool
386 ultag_isUnicodeLocaleType(const char* s, int32_t len);
387
388 bool
389 ultag_isVariantSubtags(const char* s, int32_t len);
390
391 const char*
392 ultag_getTKeyStart(const char* localeID);
393
394 U_EXPORT const char*
395 ulocimp_toBcpKey(const char* key);
396
397 U_EXPORT const char*
398 ulocimp_toLegacyKey(const char* key);
399
400 U_EXPORT const char*
401 ulocimp_toBcpType(const char* key, const char* type, bool* isKnownKey, bool* isSpecialType);
402
403 U_EXPORT const char*
404 ulocimp_toLegacyType(const char* key, const char* type, bool* isKnownKey, bool* isSpecialType);
405
406 /* Function for testing purpose */
407 U_EXPORT const char* const*
408 ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
409
410 // Return true if the value is already canonicalized.
411 U_EXPORT bool
412 ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
413
414 #endif
415