xref: /aosp_15_r20/external/cronet/third_party/icu/source/i18n/dayperiodrules.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2016, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * dayperiodrules.cpp
9 *
10 * created on: 2016-01-20
11 * created by: kazede
12 */
13 
14 #include "dayperiodrules.h"
15 
16 #include "unicode/ures.h"
17 #include "bytesinkutil.h"
18 #include "charstr.h"
19 #include "cstring.h"
20 #include "ucln_in.h"
21 #include "uhash.h"
22 #include "ulocimp.h"
23 #include "umutex.h"
24 #include "uresimp.h"
25 
26 
27 U_NAMESPACE_BEGIN
28 
29 namespace {
30 
31 struct DayPeriodRulesData : public UMemory {
DayPeriodRulesData__anona3341eb60111::DayPeriodRulesData32     DayPeriodRulesData() : localeToRuleSetNumMap(nullptr), rules(nullptr), maxRuleSetNum(0) {}
33 
34     UHashtable *localeToRuleSetNumMap;
35     DayPeriodRules *rules;
36     int32_t maxRuleSetNum;
37 } *data = nullptr;
38 
39 enum CutoffType {
40     CUTOFF_TYPE_UNKNOWN = -1,
41     CUTOFF_TYPE_BEFORE,
42     CUTOFF_TYPE_AFTER,  // TODO: AFTER is deprecated in CLDR 29. Remove.
43     CUTOFF_TYPE_FROM,
44     CUTOFF_TYPE_AT
45 };
46 
47 } // namespace
48 
49 struct DayPeriodRulesDataSink : public ResourceSink {
DayPeriodRulesDataSinkDayPeriodRulesDataSink50     DayPeriodRulesDataSink() {
51         for (int32_t i = 0; i < UPRV_LENGTHOF(cutoffs); ++i) { cutoffs[i] = 0; }
52     }
53     virtual ~DayPeriodRulesDataSink();
54 
putDayPeriodRulesDataSink55     virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) override {
56         ResourceTable dayPeriodData = value.getTable(errorCode);
57         if (U_FAILURE(errorCode)) { return; }
58 
59         for (int32_t i = 0; dayPeriodData.getKeyAndValue(i, key, value); ++i) {
60             if (uprv_strcmp(key, "locales") == 0) {
61                 ResourceTable locales = value.getTable(errorCode);
62                 if (U_FAILURE(errorCode)) { return; }
63 
64                 for (int32_t j = 0; locales.getKeyAndValue(j, key, value); ++j) {
65                     UnicodeString setNum_str = value.getUnicodeString(errorCode);
66                     int32_t setNum = parseSetNum(setNum_str, errorCode);
67                     uhash_puti(data->localeToRuleSetNumMap, const_cast<char *>(key), setNum, &errorCode);
68                 }
69             } else if (uprv_strcmp(key, "rules") == 0) {
70                 // Allocate one more than needed to skip [0]. See comment in parseSetNum().
71                 data->rules = new DayPeriodRules[data->maxRuleSetNum + 1];
72                 if (data->rules == nullptr) {
73                     errorCode = U_MEMORY_ALLOCATION_ERROR;
74                     return;
75                 }
76                 ResourceTable rules = value.getTable(errorCode);
77                 processRules(rules, key, value, errorCode);
78                 if (U_FAILURE(errorCode)) { return; }
79             }
80         }
81     }
82 
processRulesDayPeriodRulesDataSink83     void processRules(const ResourceTable &rules, const char *key,
84                       ResourceValue &value, UErrorCode &errorCode) {
85         if (U_FAILURE(errorCode)) { return; }
86 
87         for (int32_t i = 0; rules.getKeyAndValue(i, key, value); ++i) {
88             ruleSetNum = parseSetNum(key, errorCode);
89             ResourceTable ruleSet = value.getTable(errorCode);
90             if (U_FAILURE(errorCode)) { return; }
91 
92             for (int32_t j = 0; ruleSet.getKeyAndValue(j, key, value); ++j) {
93                 period = DayPeriodRules::getDayPeriodFromString(key);
94                 if (period == DayPeriodRules::DAYPERIOD_UNKNOWN) {
95                     errorCode = U_INVALID_FORMAT_ERROR;
96                     return;
97                 }
98                 ResourceTable periodDefinition = value.getTable(errorCode);
99                 if (U_FAILURE(errorCode)) { return; }
100 
101                 for (int32_t k = 0; periodDefinition.getKeyAndValue(k, key, value); ++k) {
102                     if (value.getType() == URES_STRING) {
103                         // Key-value pairs (e.g. before{6:00}).
104                         CutoffType type = getCutoffTypeFromString(key);
105                         addCutoff(type, value.getUnicodeString(errorCode), errorCode);
106                         if (U_FAILURE(errorCode)) { return; }
107                     } else {
108                         // Arrays (e.g. before{6:00, 24:00}).
109                         cutoffType = getCutoffTypeFromString(key);
110                         ResourceArray cutoffArray = value.getArray(errorCode);
111                         if (U_FAILURE(errorCode)) { return; }
112 
113                         int32_t length = cutoffArray.getSize();
114                         for (int32_t l = 0; l < length; ++l) {
115                             cutoffArray.getValue(l, value);
116                             addCutoff(cutoffType, value.getUnicodeString(errorCode), errorCode);
117                             if (U_FAILURE(errorCode)) { return; }
118                         }
119                     }
120                 }
121                 setDayPeriodForHoursFromCutoffs(errorCode);
122                 for (int32_t k = 0; k < UPRV_LENGTHOF(cutoffs); ++k) {
123                     cutoffs[k] = 0;
124                 }
125             }
126 
127             if (!data->rules[ruleSetNum].allHoursAreSet()) {
128                 errorCode = U_INVALID_FORMAT_ERROR;
129                 return;
130             }
131         }
132     }
133 
134     // Members.
135     int32_t cutoffs[25];  // [0] thru [24]: 24 is allowed in "before 24".
136 
137     // "Path" to data.
138     int32_t ruleSetNum;
139     DayPeriodRules::DayPeriod period;
140     CutoffType cutoffType;
141 
142     // Helpers.
parseSetNumDayPeriodRulesDataSink143     static int32_t parseSetNum(const UnicodeString &setNumStr, UErrorCode &errorCode) {
144         CharString cs;
145         cs.appendInvariantChars(setNumStr, errorCode);
146         return parseSetNum(cs.data(), errorCode);
147     }
148 
parseSetNumDayPeriodRulesDataSink149     static int32_t parseSetNum(const char *setNumStr, UErrorCode &errorCode) {
150         if (U_FAILURE(errorCode)) { return -1; }
151 
152         if (uprv_strncmp(setNumStr, "set", 3) != 0) {
153             errorCode = U_INVALID_FORMAT_ERROR;
154             return -1;
155         }
156 
157         int32_t i = 3;
158         int32_t setNum = 0;
159         while (setNumStr[i] != 0) {
160             int32_t digit = setNumStr[i] - '0';
161             if (digit < 0 || 9 < digit) {
162                 errorCode = U_INVALID_FORMAT_ERROR;
163                 return -1;
164             }
165             setNum = 10 * setNum + digit;
166             ++i;
167         }
168 
169         // Rule set number must not be zero. (0 is used to indicate "not found" by hashmap.)
170         // Currently ICU data conveniently starts numbering rule sets from 1.
171         if (setNum == 0) {
172             errorCode = U_INVALID_FORMAT_ERROR;
173             return -1;
174         } else {
175             return setNum;
176         }
177     }
178 
addCutoffDayPeriodRulesDataSink179     void addCutoff(CutoffType type, const UnicodeString &hour_str, UErrorCode &errorCode) {
180         if (U_FAILURE(errorCode)) { return; }
181 
182         if (type == CUTOFF_TYPE_UNKNOWN) {
183             errorCode = U_INVALID_FORMAT_ERROR;
184             return;
185         }
186 
187         int32_t hour = parseHour(hour_str, errorCode);
188         if (U_FAILURE(errorCode)) { return; }
189 
190         cutoffs[hour] |= 1 << type;
191     }
192 
193     // Translate the cutoffs[] array to day period rules.
setDayPeriodForHoursFromCutoffsDayPeriodRulesDataSink194     void setDayPeriodForHoursFromCutoffs(UErrorCode &errorCode) {
195         DayPeriodRules &rule = data->rules[ruleSetNum];
196 
197         for (int32_t startHour = 0; startHour <= 24; ++startHour) {
198             // AT cutoffs must be either midnight or noon.
199             if (cutoffs[startHour] & (1 << CUTOFF_TYPE_AT)) {
200                 if (startHour == 0 && period == DayPeriodRules::DAYPERIOD_MIDNIGHT) {
201                     rule.fHasMidnight = true;
202                 } else if (startHour == 12 && period == DayPeriodRules::DAYPERIOD_NOON) {
203                     rule.fHasNoon = true;
204                 } else {
205                     errorCode = U_INVALID_FORMAT_ERROR;  // Bad data.
206                     return;
207                 }
208             }
209 
210             // FROM/AFTER and BEFORE must come in a pair.
211             if (cutoffs[startHour] & (1 << CUTOFF_TYPE_FROM) ||
212                     cutoffs[startHour] & (1 << CUTOFF_TYPE_AFTER)) {
213                 for (int32_t hour = startHour + 1;; ++hour) {
214                     if (hour == startHour) {
215                         // We've gone around the array once and can't find a BEFORE.
216                         errorCode = U_INVALID_FORMAT_ERROR;
217                         return;
218                     }
219                     if (hour == 25) { hour = 0; }
220                     if (cutoffs[hour] & (1 << CUTOFF_TYPE_BEFORE)) {
221                         rule.add(startHour, hour, period);
222                         break;
223                     }
224                 }
225             }
226         }
227     }
228 
229     // Translate "before" to CUTOFF_TYPE_BEFORE, for example.
getCutoffTypeFromStringDayPeriodRulesDataSink230     static CutoffType getCutoffTypeFromString(const char *type_str) {
231         if (uprv_strcmp(type_str, "from") == 0) {
232             return CUTOFF_TYPE_FROM;
233         } else if (uprv_strcmp(type_str, "before") == 0) {
234             return CUTOFF_TYPE_BEFORE;
235         } else if (uprv_strcmp(type_str, "after") == 0) {
236             return CUTOFF_TYPE_AFTER;
237         } else if (uprv_strcmp(type_str, "at") == 0) {
238             return CUTOFF_TYPE_AT;
239         } else {
240             return CUTOFF_TYPE_UNKNOWN;
241         }
242     }
243 
244     // Gets the numerical value of the hour from the Unicode string.
parseHourDayPeriodRulesDataSink245     static int32_t parseHour(const UnicodeString &time, UErrorCode &errorCode) {
246         if (U_FAILURE(errorCode)) {
247             return 0;
248         }
249 
250         int32_t hourLimit = time.length() - 3;
251         // `time` must look like "x:00" or "xx:00".
252         // If length is wrong or `time` doesn't end with ":00", error out.
253         if ((hourLimit != 1 && hourLimit != 2) ||
254                 time[hourLimit] != 0x3A || time[hourLimit + 1] != 0x30 ||
255                 time[hourLimit + 2] != 0x30) {
256             errorCode = U_INVALID_FORMAT_ERROR;
257             return 0;
258         }
259 
260         // If `time` doesn't begin with a number in [0, 24], error out.
261         // Note: "24:00" is possible in "before 24:00".
262         int32_t hour = time[0] - 0x30;
263         if (hour < 0 || 9 < hour) {
264             errorCode = U_INVALID_FORMAT_ERROR;
265             return 0;
266         }
267         if (hourLimit == 2) {
268             int32_t hourDigit2 = time[1] - 0x30;
269             if (hourDigit2 < 0 || 9 < hourDigit2) {
270                 errorCode = U_INVALID_FORMAT_ERROR;
271                 return 0;
272             }
273             hour = hour * 10 + hourDigit2;
274             if (hour > 24) {
275                 errorCode = U_INVALID_FORMAT_ERROR;
276                 return 0;
277             }
278         }
279 
280         return hour;
281     }
282 };  // struct DayPeriodRulesDataSink
283 
284 struct DayPeriodRulesCountSink : public ResourceSink {
285     virtual ~DayPeriodRulesCountSink();
286 
putDayPeriodRulesCountSink287     virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) override {
288         ResourceTable rules = value.getTable(errorCode);
289         if (U_FAILURE(errorCode)) { return; }
290 
291         for (int32_t i = 0; rules.getKeyAndValue(i, key, value); ++i) {
292             int32_t setNum = DayPeriodRulesDataSink::parseSetNum(key, errorCode);
293             if (setNum > data->maxRuleSetNum) {
294                 data->maxRuleSetNum = setNum;
295             }
296         }
297     }
298 };
299 
300 // Out-of-line virtual destructors.
~DayPeriodRulesDataSink()301 DayPeriodRulesDataSink::~DayPeriodRulesDataSink() {}
~DayPeriodRulesCountSink()302 DayPeriodRulesCountSink::~DayPeriodRulesCountSink() {}
303 
304 namespace {
305 
306 UInitOnce initOnce {};
307 
dayPeriodRulesCleanup()308 U_CFUNC UBool U_CALLCONV dayPeriodRulesCleanup() {
309     delete[] data->rules;
310     uhash_close(data->localeToRuleSetNumMap);
311     delete data;
312     data = nullptr;
313     return true;
314 }
315 
316 }  // namespace
317 
load(UErrorCode & errorCode)318 void U_CALLCONV DayPeriodRules::load(UErrorCode &errorCode) {
319     if (U_FAILURE(errorCode)) {
320         return;
321     }
322 
323     data = new DayPeriodRulesData();
324     data->localeToRuleSetNumMap = uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
325     LocalUResourceBundlePointer rb_dayPeriods(ures_openDirect(nullptr, "dayPeriods", &errorCode));
326 
327     // Get the largest rule set number (so we allocate enough objects).
328     DayPeriodRulesCountSink countSink;
329     ures_getAllItemsWithFallback(rb_dayPeriods.getAlias(), "rules", countSink, errorCode);
330 
331     // Populate rules.
332     DayPeriodRulesDataSink sink;
333     ures_getAllItemsWithFallback(rb_dayPeriods.getAlias(), "", sink, errorCode);
334 
335     ucln_i18n_registerCleanup(UCLN_I18N_DAYPERIODRULES, dayPeriodRulesCleanup);
336 }
337 
getInstance(const Locale & locale,UErrorCode & errorCode)338 const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCode &errorCode) {
339     umtx_initOnce(initOnce, DayPeriodRules::load, errorCode);
340 
341     // If the entire day period rules data doesn't conform to spec (even if the part we want
342     // does), return nullptr.
343     if(U_FAILURE(errorCode)) { return nullptr; }
344 
345     const char *localeCode = locale.getBaseName();
346     char name[ULOC_FULLNAME_CAPACITY];
347 
348     if (uprv_strlen(localeCode) < ULOC_FULLNAME_CAPACITY) {
349         uprv_strcpy(name, localeCode);
350 
351         // Treat empty string as root.
352         if (*name == '\0') {
353             uprv_strcpy(name, "root");
354         }
355     } else {
356         errorCode = U_BUFFER_OVERFLOW_ERROR;
357         return nullptr;
358     }
359 
360     int32_t ruleSetNum = 0;  // NB there is no rule set 0 and 0 is returned upon lookup failure.
361     while (*name != '\0') {
362         ruleSetNum = uhash_geti(data->localeToRuleSetNumMap, name);
363         if (ruleSetNum == 0) {
364             CharString parent;
365             CharStringByteSink sink(&parent);
366             ulocimp_getParent(name, sink, &errorCode);
367             if (parent.isEmpty()) {
368                 // Saves a lookup in the hash table.
369                 break;
370             }
371             parent.extract(name, UPRV_LENGTHOF(name), errorCode);
372         } else {
373             break;
374         }
375     }
376 
377     if (ruleSetNum <= 0 || data->rules[ruleSetNum].getDayPeriodForHour(0) == DAYPERIOD_UNKNOWN) {
378         // If day period for hour 0 is UNKNOWN then day period for all hours are UNKNOWN.
379         // Data doesn't exist even with fallback.
380         return nullptr;
381     } else {
382         return &data->rules[ruleSetNum];
383     }
384 }
385 
DayPeriodRules()386 DayPeriodRules::DayPeriodRules() : fHasMidnight(false), fHasNoon(false) {
387     for (int32_t i = 0; i < 24; ++i) {
388         fDayPeriodForHour[i] = DayPeriodRules::DAYPERIOD_UNKNOWN;
389     }
390 }
391 
getMidPointForDayPeriod(DayPeriodRules::DayPeriod dayPeriod,UErrorCode & errorCode) const392 double DayPeriodRules::getMidPointForDayPeriod(
393         DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const {
394     if (U_FAILURE(errorCode)) { return -1; }
395 
396     int32_t startHour = getStartHourForDayPeriod(dayPeriod, errorCode);
397     int32_t endHour = getEndHourForDayPeriod(dayPeriod, errorCode);
398     // Can't obtain startHour or endHour; bail out.
399     if (U_FAILURE(errorCode)) { return -1; }
400 
401     double midPoint = (startHour + endHour) / 2.0;
402 
403     if (startHour > endHour) {
404         // dayPeriod wraps around midnight. Shift midPoint by 12 hours, in the direction that
405         // lands it in [0, 24).
406         midPoint += 12;
407         if (midPoint >= 24) {
408             midPoint -= 24;
409         }
410     }
411 
412     return midPoint;
413 }
414 
getStartHourForDayPeriod(DayPeriodRules::DayPeriod dayPeriod,UErrorCode & errorCode) const415 int32_t DayPeriodRules::getStartHourForDayPeriod(
416         DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const {
417     if (U_FAILURE(errorCode)) { return -1; }
418 
419     if (dayPeriod == DAYPERIOD_MIDNIGHT) { return 0; }
420     if (dayPeriod == DAYPERIOD_NOON) { return 12; }
421 
422     if (fDayPeriodForHour[0] == dayPeriod && fDayPeriodForHour[23] == dayPeriod) {
423         // dayPeriod wraps around midnight. Start hour is later than end hour.
424         for (int32_t i = 22; i >= 1; --i) {
425             if (fDayPeriodForHour[i] != dayPeriod) {
426                 return (i + 1);
427             }
428         }
429     } else {
430         for (int32_t i = 0; i <= 23; ++i) {
431             if (fDayPeriodForHour[i] == dayPeriod) {
432                 return i;
433             }
434         }
435     }
436 
437     // dayPeriod doesn't exist in rule set; set error and exit.
438     errorCode = U_ILLEGAL_ARGUMENT_ERROR;
439     return -1;
440 }
441 
getEndHourForDayPeriod(DayPeriodRules::DayPeriod dayPeriod,UErrorCode & errorCode) const442 int32_t DayPeriodRules::getEndHourForDayPeriod(
443         DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const {
444     if (U_FAILURE(errorCode)) { return -1; }
445 
446     if (dayPeriod == DAYPERIOD_MIDNIGHT) { return 0; }
447     if (dayPeriod == DAYPERIOD_NOON) { return 12; }
448 
449     if (fDayPeriodForHour[0] == dayPeriod && fDayPeriodForHour[23] == dayPeriod) {
450         // dayPeriod wraps around midnight. End hour is before start hour.
451         for (int32_t i = 1; i <= 22; ++i) {
452             if (fDayPeriodForHour[i] != dayPeriod) {
453                 // i o'clock is when a new period starts, therefore when the old period ends.
454                 return i;
455             }
456         }
457     } else {
458         for (int32_t i = 23; i >= 0; --i) {
459             if (fDayPeriodForHour[i] == dayPeriod) {
460                 return (i + 1);
461             }
462         }
463     }
464 
465     // dayPeriod doesn't exist in rule set; set error and exit.
466     errorCode = U_ILLEGAL_ARGUMENT_ERROR;
467     return -1;
468 }
469 
getDayPeriodFromString(const char * type_str)470 DayPeriodRules::DayPeriod DayPeriodRules::getDayPeriodFromString(const char *type_str) {
471     if (uprv_strcmp(type_str, "midnight") == 0) {
472         return DAYPERIOD_MIDNIGHT;
473     } else if (uprv_strcmp(type_str, "noon") == 0) {
474         return DAYPERIOD_NOON;
475     } else if (uprv_strcmp(type_str, "morning1") == 0) {
476         return DAYPERIOD_MORNING1;
477     } else if (uprv_strcmp(type_str, "afternoon1") == 0) {
478         return DAYPERIOD_AFTERNOON1;
479     } else if (uprv_strcmp(type_str, "evening1") == 0) {
480         return DAYPERIOD_EVENING1;
481     } else if (uprv_strcmp(type_str, "night1") == 0) {
482         return DAYPERIOD_NIGHT1;
483     } else if (uprv_strcmp(type_str, "morning2") == 0) {
484         return DAYPERIOD_MORNING2;
485     } else if (uprv_strcmp(type_str, "afternoon2") == 0) {
486         return DAYPERIOD_AFTERNOON2;
487     } else if (uprv_strcmp(type_str, "evening2") == 0) {
488         return DAYPERIOD_EVENING2;
489     } else if (uprv_strcmp(type_str, "night2") == 0) {
490         return DAYPERIOD_NIGHT2;
491     } else if (uprv_strcmp(type_str, "am") == 0) {
492         return DAYPERIOD_AM;
493     } else if (uprv_strcmp(type_str, "pm") == 0) {
494         return DAYPERIOD_PM;
495     } else {
496         return DAYPERIOD_UNKNOWN;
497     }
498 }
499 
add(int32_t startHour,int32_t limitHour,DayPeriod period)500 void DayPeriodRules::add(int32_t startHour, int32_t limitHour, DayPeriod period) {
501     for (int32_t i = startHour; i != limitHour; ++i) {
502         if (i == 24) { i = 0; }
503         fDayPeriodForHour[i] = period;
504     }
505 }
506 
allHoursAreSet()507 UBool DayPeriodRules::allHoursAreSet() {
508     for (int32_t i = 0; i < 24; ++i) {
509         if (fDayPeriodForHour[i] == DAYPERIOD_UNKNOWN) { return false; }
510     }
511 
512     return true;
513 }
514 
515 
516 
517 U_NAMESPACE_END
518