xref: /aosp_15_r20/external/cronet/third_party/icu/source/i18n/smpdtfmt.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2016, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 *
9 * File SMPDTFMT.CPP
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   02/19/97    aliu        Converted from java.
15 *   03/31/97    aliu        Modified extensively to work with 50 locales.
16 *   04/01/97    aliu        Added support for centuries.
17 *   07/09/97    helena      Made ParsePosition into a class.
18 *   07/21/98    stephen     Added initializeDefaultCentury.
19 *                             Removed getZoneIndex (added in DateFormatSymbols)
20 *                             Removed subParseLong
21 *                             Removed chk
22 *   02/22/99    stephen     Removed character literals for EBCDIC safety
23 *   10/14/99    aliu        Updated 2-digit year parsing so that only "00" thru
24 *                           "99" are recognized. {j28 4182066}
25 *   11/15/99    weiv        Added support for week of year/day of week format
26 ********************************************************************************
27 */
28 
29 #define ZID_KEY_MAX 128
30 
31 #include "unicode/utypes.h"
32 
33 #if !UCONFIG_NO_FORMATTING
34 #include "unicode/smpdtfmt.h"
35 #include "unicode/dtfmtsym.h"
36 #include "unicode/ures.h"
37 #include "unicode/msgfmt.h"
38 #include "unicode/calendar.h"
39 #include "unicode/gregocal.h"
40 #include "unicode/timezone.h"
41 #include "unicode/decimfmt.h"
42 #include "unicode/dcfmtsym.h"
43 #include "unicode/uchar.h"
44 #include "unicode/uniset.h"
45 #include "unicode/ustring.h"
46 #include "unicode/basictz.h"
47 #include "unicode/simpleformatter.h"
48 #include "unicode/simplenumberformatter.h"
49 #include "unicode/simpletz.h"
50 #include "unicode/rbtz.h"
51 #include "unicode/tzfmt.h"
52 #include "unicode/ucasemap.h"
53 #include "unicode/utf16.h"
54 #include "unicode/vtzone.h"
55 #include "unicode/udisplaycontext.h"
56 #include "unicode/brkiter.h"
57 #include "unicode/rbnf.h"
58 #include "unicode/dtptngen.h"
59 #include "uresimp.h"
60 #include "olsontz.h"
61 #include "patternprops.h"
62 #include "fphdlimp.h"
63 #include "hebrwcal.h"
64 #include "cstring.h"
65 #include "uassert.h"
66 #include "cmemory.h"
67 #include "umutex.h"
68 #include "mutex.h"
69 #include <float.h>
70 #include "smpdtfst.h"
71 #include "sharednumberformat.h"
72 #include "ucasemap_imp.h"
73 #include "ustr_imp.h"
74 #include "charstr.h"
75 #include "uvector.h"
76 #include "cstr.h"
77 #include "dayperiodrules.h"
78 #include "tznames_impl.h"   // ZONE_NAME_U16_MAX
79 #include "number_utypes.h"
80 
81 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
82 #include <stdio.h>
83 #endif
84 
85 // *****************************************************************************
86 // class SimpleDateFormat
87 // *****************************************************************************
88 
89 U_NAMESPACE_BEGIN
90 
91 /**
92  * Last-resort string to use for "GMT" when constructing time zone strings.
93  */
94 // For time zones that have no names, use strings GMT+minutes and
95 // GMT-minutes. For instance, in France the time zone is GMT+60.
96 // Also accepted are GMT+H:MM or GMT-H:MM.
97 // Currently not being used
98 //static const char16_t gGmt[]      = {0x0047, 0x004D, 0x0054, 0x0000};         // "GMT"
99 //static const char16_t gGmtPlus[]  = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
100 //static const char16_t gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
101 //static const char16_t gDefGmtPat[]       = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
102 //static const char16_t gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
103 //static const char16_t gDefGmtNegHmPat[]  = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
104 //static const char16_t gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
105 //static const char16_t gDefGmtPosHmPat[]  = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
106 //static const char16_t gUt[]       = {0x0055, 0x0054, 0x0000};  // "UT"
107 //static const char16_t gUtc[]      = {0x0055, 0x0054, 0x0043, 0x0000};  // "UT"
108 
109 typedef enum GmtPatSize {
110     kGmtLen = 3,
111     kGmtPatLen = 6,
112     kNegHmsLen = 9,
113     kNegHmLen = 6,
114     kPosHmsLen = 9,
115     kPosHmLen = 6,
116     kUtLen = 2,
117     kUtcLen = 3
118 } GmtPatSize;
119 
120 // Stuff needed for numbering system overrides
121 
122 typedef enum OvrStrType {
123     kOvrStrDate = 0,
124     kOvrStrTime = 1,
125     kOvrStrBoth = 2
126 } OvrStrType;
127 
128 static const UDateFormatField kDateFields[] = {
129     UDAT_YEAR_FIELD,
130     UDAT_MONTH_FIELD,
131     UDAT_DATE_FIELD,
132     UDAT_DAY_OF_YEAR_FIELD,
133     UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
134     UDAT_WEEK_OF_YEAR_FIELD,
135     UDAT_WEEK_OF_MONTH_FIELD,
136     UDAT_YEAR_WOY_FIELD,
137     UDAT_EXTENDED_YEAR_FIELD,
138     UDAT_JULIAN_DAY_FIELD,
139     UDAT_STANDALONE_DAY_FIELD,
140     UDAT_STANDALONE_MONTH_FIELD,
141     UDAT_QUARTER_FIELD,
142     UDAT_STANDALONE_QUARTER_FIELD,
143     UDAT_YEAR_NAME_FIELD,
144     UDAT_RELATED_YEAR_FIELD };
145 static const int8_t kDateFieldsCount = 16;
146 
147 static const UDateFormatField kTimeFields[] = {
148     UDAT_HOUR_OF_DAY1_FIELD,
149     UDAT_HOUR_OF_DAY0_FIELD,
150     UDAT_MINUTE_FIELD,
151     UDAT_SECOND_FIELD,
152     UDAT_FRACTIONAL_SECOND_FIELD,
153     UDAT_HOUR1_FIELD,
154     UDAT_HOUR0_FIELD,
155     UDAT_MILLISECONDS_IN_DAY_FIELD,
156     UDAT_TIMEZONE_RFC_FIELD,
157     UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD };
158 static const int8_t kTimeFieldsCount = 10;
159 
160 
161 // This is a pattern-of-last-resort used when we can't load a usable pattern out
162 // of a resource.
163 static const char16_t gDefaultPattern[] =
164 {
165     0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
166 };  /* "yMMdd hh:mm a" */
167 
168 // This prefix is designed to NEVER MATCH real text, in order to
169 // suppress the parsing of negative numbers.  Adjust as needed (if
170 // this becomes valid Unicode).
171 static const char16_t SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
172 
173 /**
174  * These are the tags we expect to see in normal resource bundle files associated
175  * with a locale.
176  */
177 static const char16_t QUOTE = 0x27; // Single quote
178 
179 /*
180  * The field range check bias for each UDateFormatField.
181  * The bias is added to the minimum and maximum values
182  * before they are compared to the parsed number.
183  * For example, the calendar stores zero-based month numbers
184  * but the parsed month numbers start at 1, so the bias is 1.
185  *
186  * A value of -1 means that the value is not checked.
187  */
188 static const int32_t gFieldRangeBias[] = {
189     -1,  // 'G' - UDAT_ERA_FIELD
190     -1,  // 'y' - UDAT_YEAR_FIELD
191      1,  // 'M' - UDAT_MONTH_FIELD
192      0,  // 'd' - UDAT_DATE_FIELD
193     -1,  // 'k' - UDAT_HOUR_OF_DAY1_FIELD
194     -1,  // 'H' - UDAT_HOUR_OF_DAY0_FIELD
195      0,  // 'm' - UDAT_MINUTE_FIELD
196      0,  // 's' - UDAT_SECOND_FIELD
197     -1,  // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
198     -1,  // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
199     -1,  // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
200     -1,  // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
201     -1,  // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
202     -1,  // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
203     -1,  // 'a' - UDAT_AM_PM_FIELD
204     -1,  // 'h' - UDAT_HOUR1_FIELD
205     -1,  // 'K' - UDAT_HOUR0_FIELD
206     -1,  // 'z' - UDAT_TIMEZONE_FIELD
207     -1,  // 'Y' - UDAT_YEAR_WOY_FIELD
208     -1,  // 'e' - UDAT_DOW_LOCAL_FIELD
209     -1,  // 'u' - UDAT_EXTENDED_YEAR_FIELD
210     -1,  // 'g' - UDAT_JULIAN_DAY_FIELD
211     -1,  // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
212     -1,  // 'Z' - UDAT_TIMEZONE_RFC_FIELD
213     -1,  // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
214      0,  // 'c' - UDAT_STANDALONE_DAY_FIELD
215      1,  // 'L' - UDAT_STANDALONE_MONTH_FIELD
216     -1,  // 'Q' - UDAT_QUARTER_FIELD (1-4?)
217     -1,  // 'q' - UDAT_STANDALONE_QUARTER_FIELD
218     -1,  // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
219     -1,  // 'U' - UDAT_YEAR_NAME_FIELD
220     -1,  // 'O' - UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD
221     -1,  // 'X' - UDAT_TIMEZONE_ISO_FIELD
222     -1,  // 'x' - UDAT_TIMEZONE_ISO_LOCAL_FIELD
223     -1,  // 'r' - UDAT_RELATED_YEAR_FIELD
224 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
225     -1,  // ':' - UDAT_TIME_SEPARATOR_FIELD
226 #else
227     -1,  // (no pattern character currently) - UDAT_TIME_SEPARATOR_FIELD
228 #endif
229 };
230 
231 // When calendar uses hebr numbering (i.e. he@calendar=hebrew),
232 // offset the years within the current millennium down to 1-999
233 static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000;
234 static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000;
235 
236 /**
237  * Maximum range for detecting daylight offset of a time zone when parsed time zone
238  * string indicates it's daylight saving time, but the detected time zone does not
239  * observe daylight saving time at the parsed date.
240  */
241 static const double MAX_DAYLIGHT_DETECTION_RANGE = 30*365*24*60*60*1000.0;
242 
243 static UMutex LOCK;
244 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)245 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
246 
247 SimpleDateFormat::NSOverride::~NSOverride() {
248     if (snf != nullptr) {
249         snf->removeRef();
250     }
251 }
252 
253 
free()254 void SimpleDateFormat::NSOverride::free() {
255     NSOverride *cur = this;
256     while (cur) {
257         NSOverride *next_temp = cur->next;
258         delete cur;
259         cur = next_temp;
260     }
261 }
262 
263 // no matter what the locale's default number format looked like, we want
264 // to modify it so that it doesn't use thousands separators, doesn't always
265 // show the decimal point, and recognizes integers only when parsing
fixNumberFormatForDates(NumberFormat & nf)266 static void fixNumberFormatForDates(NumberFormat &nf) {
267     nf.setGroupingUsed(false);
268     DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(&nf);
269     if (decfmt != nullptr) {
270         decfmt->setDecimalSeparatorAlwaysShown(false);
271     }
272     nf.setParseIntegerOnly(true);
273     nf.setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
274 }
275 
createSharedNumberFormat(NumberFormat * nfToAdopt)276 static const SharedNumberFormat *createSharedNumberFormat(
277         NumberFormat *nfToAdopt) {
278     fixNumberFormatForDates(*nfToAdopt);
279     const SharedNumberFormat *result = new SharedNumberFormat(nfToAdopt);
280     if (result == nullptr) {
281         delete nfToAdopt;
282     }
283     return result;
284 }
285 
createSharedNumberFormat(const Locale & loc,UErrorCode & status)286 static const SharedNumberFormat *createSharedNumberFormat(
287         const Locale &loc, UErrorCode &status) {
288     NumberFormat *nf = NumberFormat::createInstance(loc, status);
289     if (U_FAILURE(status)) {
290         return nullptr;
291     }
292     const SharedNumberFormat *result = createSharedNumberFormat(nf);
293     if (result == nullptr) {
294         status = U_MEMORY_ALLOCATION_ERROR;
295     }
296     return result;
297 }
298 
allocSharedNumberFormatters()299 static const SharedNumberFormat **allocSharedNumberFormatters() {
300     const SharedNumberFormat **result = (const SharedNumberFormat**)
301             uprv_malloc(UDAT_FIELD_COUNT * sizeof(const SharedNumberFormat*));
302     if (result == nullptr) {
303         return nullptr;
304     }
305     for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
306         result[i] = nullptr;
307     }
308     return result;
309 }
310 
freeSharedNumberFormatters(const SharedNumberFormat ** list)311 static void freeSharedNumberFormatters(const SharedNumberFormat ** list) {
312     for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
313         SharedObject::clearPtr(list[i]);
314     }
315     uprv_free(list);
316 }
317 
getNumberFormatByIndex(UDateFormatField index) const318 const NumberFormat *SimpleDateFormat::getNumberFormatByIndex(
319         UDateFormatField index) const {
320     if (fSharedNumberFormatters == nullptr ||
321         fSharedNumberFormatters[index] == nullptr) {
322         return fNumberFormat;
323     }
324     return &(**fSharedNumberFormatters[index]);
325 }
326 
327 //----------------------------------------------------------------------
328 
~SimpleDateFormat()329 SimpleDateFormat::~SimpleDateFormat()
330 {
331     delete fSymbols;
332     if (fSharedNumberFormatters) {
333         freeSharedNumberFormatters(fSharedNumberFormatters);
334     }
335     if (fTimeZoneFormat) {
336         delete fTimeZoneFormat;
337     }
338     delete fSimpleNumberFormatter;
339 
340 #if !UCONFIG_NO_BREAK_ITERATION
341     delete fCapitalizationBrkIter;
342 #endif
343 }
344 
345 //----------------------------------------------------------------------
346 
SimpleDateFormat(UErrorCode & status)347 SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
348   :   fLocale(Locale::getDefault())
349 {
350     initializeBooleanAttributes();
351     construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
352     initializeDefaultCentury();
353 }
354 
355 //----------------------------------------------------------------------
356 
SimpleDateFormat(const UnicodeString & pattern,UErrorCode & status)357 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
358                                    UErrorCode &status)
359 :   fPattern(pattern),
360     fLocale(Locale::getDefault())
361 {
362     fDateOverride.setToBogus();
363     fTimeOverride.setToBogus();
364     initializeBooleanAttributes();
365     initializeCalendar(nullptr,fLocale,status);
366     fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
367     initialize(fLocale, status);
368     initializeDefaultCentury();
369 
370 }
371 //----------------------------------------------------------------------
372 
SimpleDateFormat(const UnicodeString & pattern,const UnicodeString & override,UErrorCode & status)373 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
374                                    const UnicodeString& override,
375                                    UErrorCode &status)
376 :   fPattern(pattern),
377     fLocale(Locale::getDefault())
378 {
379     fDateOverride.setTo(override);
380     fTimeOverride.setToBogus();
381     initializeBooleanAttributes();
382     initializeCalendar(nullptr,fLocale,status);
383     fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
384     initialize(fLocale, status);
385     initializeDefaultCentury();
386 
387     processOverrideString(fLocale,override,kOvrStrBoth,status);
388 
389 }
390 
391 //----------------------------------------------------------------------
392 
SimpleDateFormat(const UnicodeString & pattern,const Locale & locale,UErrorCode & status)393 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
394                                    const Locale& locale,
395                                    UErrorCode& status)
396 :   fPattern(pattern),
397     fLocale(locale)
398 {
399 
400     fDateOverride.setToBogus();
401     fTimeOverride.setToBogus();
402     initializeBooleanAttributes();
403 
404     initializeCalendar(nullptr,fLocale,status);
405     fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
406     initialize(fLocale, status);
407     initializeDefaultCentury();
408 }
409 
410 //----------------------------------------------------------------------
411 
SimpleDateFormat(const UnicodeString & pattern,const UnicodeString & override,const Locale & locale,UErrorCode & status)412 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
413                                    const UnicodeString& override,
414                                    const Locale& locale,
415                                    UErrorCode& status)
416 :   fPattern(pattern),
417     fLocale(locale)
418 {
419 
420     fDateOverride.setTo(override);
421     fTimeOverride.setToBogus();
422     initializeBooleanAttributes();
423 
424     initializeCalendar(nullptr,fLocale,status);
425     fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
426     initialize(fLocale, status);
427     initializeDefaultCentury();
428 
429     processOverrideString(locale,override,kOvrStrBoth,status);
430 
431 }
432 
433 //----------------------------------------------------------------------
434 
SimpleDateFormat(const UnicodeString & pattern,DateFormatSymbols * symbolsToAdopt,UErrorCode & status)435 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
436                                    DateFormatSymbols* symbolsToAdopt,
437                                    UErrorCode& status)
438 :   fPattern(pattern),
439     fLocale(Locale::getDefault()),
440     fSymbols(symbolsToAdopt)
441 {
442 
443     fDateOverride.setToBogus();
444     fTimeOverride.setToBogus();
445     initializeBooleanAttributes();
446 
447     initializeCalendar(nullptr,fLocale,status);
448     initialize(fLocale, status);
449     initializeDefaultCentury();
450 }
451 
452 //----------------------------------------------------------------------
453 
SimpleDateFormat(const UnicodeString & pattern,const DateFormatSymbols & symbols,UErrorCode & status)454 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
455                                    const DateFormatSymbols& symbols,
456                                    UErrorCode& status)
457 :   fPattern(pattern),
458     fLocale(Locale::getDefault()),
459     fSymbols(new DateFormatSymbols(symbols))
460 {
461 
462     fDateOverride.setToBogus();
463     fTimeOverride.setToBogus();
464     initializeBooleanAttributes();
465 
466     initializeCalendar(nullptr, fLocale, status);
467     initialize(fLocale, status);
468     initializeDefaultCentury();
469 }
470 
471 //----------------------------------------------------------------------
472 
473 // Not for public consumption; used by DateFormat
SimpleDateFormat(EStyle timeStyle,EStyle dateStyle,const Locale & locale,UErrorCode & status)474 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
475                                    EStyle dateStyle,
476                                    const Locale& locale,
477                                    UErrorCode& status)
478 :   fLocale(locale)
479 {
480     initializeBooleanAttributes();
481     construct(timeStyle, dateStyle, fLocale, status);
482     if(U_SUCCESS(status)) {
483       initializeDefaultCentury();
484     }
485 }
486 
487 //----------------------------------------------------------------------
488 
489 /**
490  * Not for public consumption; used by DateFormat.  This constructor
491  * never fails.  If the resource data is not available, it uses the
492  * the last resort symbols.
493  */
SimpleDateFormat(const Locale & locale,UErrorCode & status)494 SimpleDateFormat::SimpleDateFormat(const Locale& locale,
495                                    UErrorCode& status)
496 :   fPattern(gDefaultPattern),
497     fLocale(locale)
498 {
499     if (U_FAILURE(status)) return;
500     initializeBooleanAttributes();
501     initializeCalendar(nullptr, fLocale, status);
502     fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
503     if (U_FAILURE(status))
504     {
505         status = U_ZERO_ERROR;
506         delete fSymbols;
507         // This constructor doesn't fail; it uses last resort data
508         fSymbols = new DateFormatSymbols(status);
509         /* test for nullptr */
510         if (fSymbols == 0) {
511             status = U_MEMORY_ALLOCATION_ERROR;
512             return;
513         }
514     }
515 
516     fDateOverride.setToBogus();
517     fTimeOverride.setToBogus();
518 
519     initialize(fLocale, status);
520     if(U_SUCCESS(status)) {
521       initializeDefaultCentury();
522     }
523 }
524 
525 //----------------------------------------------------------------------
526 
SimpleDateFormat(const SimpleDateFormat & other)527 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
528 :   DateFormat(other),
529     fLocale(other.fLocale)
530 {
531     initializeBooleanAttributes();
532     *this = other;
533 }
534 
535 //----------------------------------------------------------------------
536 
operator =(const SimpleDateFormat & other)537 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
538 {
539     if (this == &other) {
540         return *this;
541     }
542 
543     // fSimpleNumberFormatter references fNumberFormatter, delete it
544     // before we call the = operator which may invalidate fNumberFormatter
545     delete fSimpleNumberFormatter;
546     fSimpleNumberFormatter = nullptr;
547 
548     DateFormat::operator=(other);
549     fDateOverride = other.fDateOverride;
550     fTimeOverride = other.fTimeOverride;
551 
552     delete fSymbols;
553     fSymbols = nullptr;
554 
555     if (other.fSymbols)
556         fSymbols = new DateFormatSymbols(*other.fSymbols);
557 
558     fDefaultCenturyStart         = other.fDefaultCenturyStart;
559     fDefaultCenturyStartYear     = other.fDefaultCenturyStartYear;
560     fHaveDefaultCentury          = other.fHaveDefaultCentury;
561 
562     fPattern = other.fPattern;
563     fHasMinute = other.fHasMinute;
564     fHasSecond = other.fHasSecond;
565 
566     fLocale = other.fLocale;
567 
568     // TimeZoneFormat can now be set independently via setter.
569     // If it is nullptr, it will be lazily initialized from locale.
570     delete fTimeZoneFormat;
571     fTimeZoneFormat = nullptr;
572     TimeZoneFormat *otherTZFormat;
573     {
574         // Synchronization is required here, when accessing other.fTimeZoneFormat,
575         // because another thread may be concurrently executing other.tzFormat(),
576         // a logically const function that lazily creates other.fTimeZoneFormat.
577         //
578         // Without synchronization, reordered memory writes could allow us
579         // to see a non-null fTimeZoneFormat before the object itself was
580         // fully initialized. In case of a race, it doesn't matter whether
581         // we see a null or a fully initialized other.fTimeZoneFormat,
582         // only that we avoid seeing a partially initialized object.
583         //
584         // Once initialized, no const function can modify fTimeZoneFormat,
585         // meaning that once we have safely grabbed the other.fTimeZoneFormat
586         // pointer, continued synchronization is not required to use it.
587         Mutex m(&LOCK);
588         otherTZFormat = other.fTimeZoneFormat;
589     }
590     if (otherTZFormat) {
591         fTimeZoneFormat = new TimeZoneFormat(*otherTZFormat);
592     }
593 
594 #if !UCONFIG_NO_BREAK_ITERATION
595     if (other.fCapitalizationBrkIter != nullptr) {
596         fCapitalizationBrkIter = (other.fCapitalizationBrkIter)->clone();
597     }
598 #endif
599 
600     if (fSharedNumberFormatters != nullptr) {
601         freeSharedNumberFormatters(fSharedNumberFormatters);
602         fSharedNumberFormatters = nullptr;
603     }
604     if (other.fSharedNumberFormatters != nullptr) {
605         fSharedNumberFormatters = allocSharedNumberFormatters();
606         if (fSharedNumberFormatters) {
607             for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
608                 SharedObject::copyPtr(
609                         other.fSharedNumberFormatters[i],
610                         fSharedNumberFormatters[i]);
611             }
612         }
613     }
614 
615     UErrorCode localStatus = U_ZERO_ERROR;
616     // SimpleNumberFormatter does not have a copy constructor. Furthermore,
617     // it references data from an internal field, fNumberFormatter,
618     // so we must rematerialize that reference after copying over the number formatter.
619     initSimpleNumberFormatter(localStatus);
620     return *this;
621 }
622 
623 //----------------------------------------------------------------------
624 
625 SimpleDateFormat*
clone() const626 SimpleDateFormat::clone() const
627 {
628     return new SimpleDateFormat(*this);
629 }
630 
631 //----------------------------------------------------------------------
632 
633 bool
operator ==(const Format & other) const634 SimpleDateFormat::operator==(const Format& other) const
635 {
636     if (DateFormat::operator==(other)) {
637         // The DateFormat::operator== check for fCapitalizationContext equality above
638         //   is sufficient to check equality of all derived context-related data.
639         // DateFormat::operator== guarantees following cast is safe
640         SimpleDateFormat* that = (SimpleDateFormat*)&other;
641         return (fPattern             == that->fPattern &&
642                 fSymbols             != nullptr && // Check for pathological object
643                 that->fSymbols       != nullptr && // Check for pathological object
644                 *fSymbols            == *that->fSymbols &&
645                 fHaveDefaultCentury  == that->fHaveDefaultCentury &&
646                 fDefaultCenturyStart == that->fDefaultCenturyStart);
647     }
648     return false;
649 }
650 
651 //----------------------------------------------------------------------
652 static const char16_t* timeSkeletons[4] = {
653     u"jmmsszzzz",   // kFull
654     u"jmmssz",      // kLong
655     u"jmmss",       // kMedium
656     u"jmm",         // kShort
657 };
658 
construct(EStyle timeStyle,EStyle dateStyle,const Locale & locale,UErrorCode & status)659 void SimpleDateFormat::construct(EStyle timeStyle,
660                                  EStyle dateStyle,
661                                  const Locale& locale,
662                                  UErrorCode& status)
663 {
664     // called by several constructors to load pattern data from the resources
665     if (U_FAILURE(status)) return;
666 
667     // We will need the calendar to know what type of symbols to load.
668     initializeCalendar(nullptr, locale, status);
669     if (U_FAILURE(status)) return;
670 
671     // Load date time patterns directly from resources.
672     const char* cType = fCalendar ? fCalendar->getType() : nullptr;
673     LocalUResourceBundlePointer bundle(ures_open(nullptr, locale.getBaseName(), &status));
674     if (U_FAILURE(status)) return;
675 
676     UBool cTypeIsGregorian = true;
677     LocalUResourceBundlePointer dateTimePatterns;
678     if (cType != nullptr && uprv_strcmp(cType, "gregorian") != 0) {
679         CharString resourcePath("calendar/", status);
680         resourcePath.append(cType, status).append("/DateTimePatterns", status);
681         dateTimePatterns.adoptInstead(
682             ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(),
683                                       (UResourceBundle*)nullptr, &status));
684         cTypeIsGregorian = false;
685     }
686 
687     // Check for "gregorian" fallback.
688     if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) {
689         status = U_ZERO_ERROR;
690         dateTimePatterns.adoptInstead(
691             ures_getByKeyWithFallback(bundle.getAlias(),
692                                       "calendar/gregorian/DateTimePatterns",
693                                       (UResourceBundle*)nullptr, &status));
694     }
695     if (U_FAILURE(status)) return;
696 
697     LocalUResourceBundlePointer currentBundle;
698 
699     if (ures_getSize(dateTimePatterns.getAlias()) <= kDateTime)
700     {
701         status = U_INVALID_FORMAT_ERROR;
702         return;
703     }
704 
705     setLocaleIDs(ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_VALID_LOCALE, &status),
706                  ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_ACTUAL_LOCALE, &status));
707 
708     // create a symbols object from the locale
709     fSymbols = DateFormatSymbols::createForLocale(locale, status);
710     if (U_FAILURE(status)) return;
711     /* test for nullptr */
712     if (fSymbols == 0) {
713         status = U_MEMORY_ALLOCATION_ERROR;
714         return;
715     }
716 
717     const char16_t *resStr,*ovrStr;
718     int32_t resStrLen,ovrStrLen = 0;
719     fDateOverride.setToBogus();
720     fTimeOverride.setToBogus();
721 
722     UnicodeString timePattern;
723     if (timeStyle >= kFull && timeStyle <= kShort) {
724         bool hasRgOrHcSubtag = false;
725         // also use DTPG if the locale has the "rg" or "hc" ("hours") subtag-- even if the overriding region
726         // or hour cycle is the same as the one we get by default, we go through the DateTimePatternGenerator
727         UErrorCode dummyErr1 = U_ZERO_ERROR, dummyErr2 = U_ZERO_ERROR;
728         if (locale.getKeywordValue("rg", nullptr, 0, dummyErr1) > 0 || locale.getKeywordValue("hours", nullptr, 0, dummyErr2) > 0) {
729             hasRgOrHcSubtag = true;
730         }
731 
732         const char* baseLocID = locale.getBaseName();
733         if (baseLocID[0]!=0 && uprv_strcmp(baseLocID,"und")!=0) {
734             UErrorCode useStatus = U_ZERO_ERROR;
735             Locale baseLoc(baseLocID);
736             Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus));
737             if (hasRgOrHcSubtag || (U_SUCCESS(useStatus) && validLoc!=baseLoc)) {
738                 bool useDTPG = hasRgOrHcSubtag;
739                 const char* baseReg = baseLoc.getCountry(); // empty string if no region
740                 if ((baseReg[0]!=0 && uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0)
741                         || uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) {
742                     // use DTPG if
743                     // * baseLoc has a region and validLoc does not have the same one (or has none), OR
744                     // * validLoc has a different language code than baseLoc
745                     // * the original locale has the rg or hc subtag
746                     useDTPG = true;
747                 }
748                 if (useDTPG) {
749                     // The standard time formats may have the wrong time cycle, because:
750                     // the valid locale differs in important ways (region, language) from
751                     // the base locale.
752                     // We could *also* check whether they do actually have a mismatch with
753                     // the time cycle preferences for the region, but that is a lot more
754                     // work for little or no additional benefit, since just going ahead
755                     // and always synthesizing the time format as per the following should
756                     // create a locale-appropriate pattern with cycle that matches the
757                     // region preferences anyway.
758                     LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstanceNoStdPat(locale, useStatus));
759                     if (U_SUCCESS(useStatus)) {
760                         UnicodeString timeSkeleton(true, timeSkeletons[timeStyle], -1);
761                         timePattern = dtpg->getBestPattern(timeSkeleton, useStatus);
762                     }
763                 }
764             }
765         }
766     }
767 
768     // if the pattern should include both date and time information, use the date/time
769     // pattern string as a guide to tell use how to glue together the appropriate date
770     // and time pattern strings.
771     if ((timeStyle != kNone) && (dateStyle != kNone))
772     {
773         UnicodeString tempus1(timePattern);
774         if (tempus1.length() == 0) {
775             currentBundle.adoptInstead(
776                     ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, nullptr, &status));
777             if (U_FAILURE(status)) {
778                status = U_INVALID_FORMAT_ERROR;
779                return;
780             }
781             switch (ures_getType(currentBundle.getAlias())) {
782                 case URES_STRING: {
783                    resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
784                    break;
785                 }
786                 case URES_ARRAY: {
787                    resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
788                    ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
789                    fTimeOverride.setTo(true, ovrStr, ovrStrLen);
790                    break;
791                 }
792                 default: {
793                    status = U_INVALID_FORMAT_ERROR;
794                    return;
795                 }
796             }
797 
798             tempus1.setTo(true, resStr, resStrLen);
799         }
800 
801         currentBundle.adoptInstead(
802                 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, nullptr, &status));
803         if (U_FAILURE(status)) {
804            status = U_INVALID_FORMAT_ERROR;
805            return;
806         }
807         switch (ures_getType(currentBundle.getAlias())) {
808             case URES_STRING: {
809                resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
810                break;
811             }
812             case URES_ARRAY: {
813                resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
814                ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
815                fDateOverride.setTo(true, ovrStr, ovrStrLen);
816                break;
817             }
818             default: {
819                status = U_INVALID_FORMAT_ERROR;
820                return;
821             }
822         }
823 
824         UnicodeString tempus2(true, resStr, resStrLen);
825 
826         // Currently, for compatibility with pre-CLDR-42 data, we default to the "atTime"
827         // combining patterns. Depending on guidance in CLDR 42 spec and on DisplayOptions,
828         // we may change this.
829         LocalUResourceBundlePointer dateAtTimePatterns;
830         if (!cTypeIsGregorian) {
831             CharString resourcePath("calendar/", status);
832             resourcePath.append(cType, status).append("/DateTimePatterns%atTime", status);
833             dateAtTimePatterns.adoptInstead(
834                 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(),
835                                           nullptr, &status));
836         }
837         if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) {
838             status = U_ZERO_ERROR;
839             dateAtTimePatterns.adoptInstead(
840                 ures_getByKeyWithFallback(bundle.getAlias(),
841                                           "calendar/gregorian/DateTimePatterns%atTime",
842                                           nullptr, &status));
843         }
844         if (U_SUCCESS(status) && ures_getSize(dateAtTimePatterns.getAlias()) >= 4) {
845             resStr = ures_getStringByIndex(dateAtTimePatterns.getAlias(), dateStyle - kDateOffset, &resStrLen, &status);
846         } else {
847             status = U_ZERO_ERROR;
848             int32_t glueIndex = kDateTime;
849             int32_t patternsSize = ures_getSize(dateTimePatterns.getAlias());
850             if (patternsSize >= (kDateTimeOffset + kShort + 1)) {
851                 // Get proper date time format
852                 glueIndex = (int32_t)(kDateTimeOffset + (dateStyle - kDateOffset));
853             }
854 
855             resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), glueIndex, &resStrLen, &status);
856         }
857         SimpleFormatter(UnicodeString(true, resStr, resStrLen), 2, 2, status).
858                 format(tempus1, tempus2, fPattern, status);
859     }
860     // if the pattern includes just time data or just date date, load the appropriate
861     // pattern string from the resources
862     // setTo() - see DateFormatSymbols::assignArray comments
863     else if (timeStyle != kNone) {
864         fPattern.setTo(timePattern);
865         if (fPattern.length() == 0) {
866             currentBundle.adoptInstead(
867                     ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, nullptr, &status));
868             if (U_FAILURE(status)) {
869                status = U_INVALID_FORMAT_ERROR;
870                return;
871             }
872             switch (ures_getType(currentBundle.getAlias())) {
873                 case URES_STRING: {
874                    resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
875                    break;
876                 }
877                 case URES_ARRAY: {
878                    resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
879                    ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
880                    fDateOverride.setTo(true, ovrStr, ovrStrLen);
881                    break;
882                 }
883                 default: {
884                    status = U_INVALID_FORMAT_ERROR;
885                    return;
886                 }
887             }
888             fPattern.setTo(true, resStr, resStrLen);
889         }
890     }
891     else if (dateStyle != kNone) {
892         currentBundle.adoptInstead(
893                 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, nullptr, &status));
894         if (U_FAILURE(status)) {
895            status = U_INVALID_FORMAT_ERROR;
896            return;
897         }
898         switch (ures_getType(currentBundle.getAlias())) {
899             case URES_STRING: {
900                resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
901                break;
902             }
903             case URES_ARRAY: {
904                resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
905                ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
906                fDateOverride.setTo(true, ovrStr, ovrStrLen);
907                break;
908             }
909             default: {
910                status = U_INVALID_FORMAT_ERROR;
911                return;
912             }
913         }
914         fPattern.setTo(true, resStr, resStrLen);
915     }
916 
917     // and if it includes _neither_, that's an error
918     else
919         status = U_INVALID_FORMAT_ERROR;
920 
921     // finally, finish initializing by creating a Calendar and a NumberFormat
922     initialize(locale, status);
923 }
924 
925 //----------------------------------------------------------------------
926 
927 Calendar*
initializeCalendar(TimeZone * adoptZone,const Locale & locale,UErrorCode & status)928 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
929 {
930     if(!U_FAILURE(status)) {
931         fCalendar = Calendar::createInstance(
932             adoptZone ? adoptZone : TimeZone::forLocaleOrDefault(locale), locale, status);
933     }
934     return fCalendar;
935 }
936 
937 void
initialize(const Locale & locale,UErrorCode & status)938 SimpleDateFormat::initialize(const Locale& locale,
939                              UErrorCode& status)
940 {
941     if (U_FAILURE(status)) return;
942 
943     parsePattern(); // Need this before initNumberFormatters(), to set fHasHanYearChar
944 
945     // Simple-minded hack to force Gannen year numbering for ja@calendar=japanese
946     // if format is non-numeric (includes 年) and fDateOverride is not already specified.
947     // Now this does get updated if applyPattern subsequently changes the pattern type.
948     if (fDateOverride.isBogus() && fHasHanYearChar &&
949             fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 &&
950             uprv_strcmp(fLocale.getLanguage(),"ja") == 0) {
951         fDateOverride.setTo(u"y=jpanyear", -1);
952     }
953 
954     // We don't need to check that the row count is >= 1, since all 2d arrays have at
955     // least one row
956     fNumberFormat = NumberFormat::createInstance(locale, status);
957     if (fNumberFormat != nullptr && U_SUCCESS(status))
958     {
959         fixNumberFormatForDates(*fNumberFormat);
960         //fNumberFormat->setLenient(true); // Java uses a custom DateNumberFormat to format/parse
961 
962         initNumberFormatters(locale, status);
963         initSimpleNumberFormatter(status);
964 
965     }
966     else if (U_SUCCESS(status))
967     {
968         status = U_MISSING_RESOURCE_ERROR;
969     }
970 }
971 
972 /* Initialize the fields we use to disambiguate ambiguous years. Separate
973  * so we can call it from readObject().
974  */
initializeDefaultCentury()975 void SimpleDateFormat::initializeDefaultCentury()
976 {
977   if(fCalendar) {
978     fHaveDefaultCentury = fCalendar->haveDefaultCentury();
979     if(fHaveDefaultCentury) {
980       fDefaultCenturyStart = fCalendar->defaultCenturyStart();
981       fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
982     } else {
983       fDefaultCenturyStart = DBL_MIN;
984       fDefaultCenturyStartYear = -1;
985     }
986   }
987 }
988 
989 /*
990  * Initialize the boolean attributes. Separate so we can call it from all constructors.
991  */
initializeBooleanAttributes()992 void SimpleDateFormat::initializeBooleanAttributes()
993 {
994     UErrorCode status = U_ZERO_ERROR;
995 
996     setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status);
997     setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
998     setBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, true, status);
999     setBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, true, status);
1000 }
1001 
1002 /* Define one-century window into which to disambiguate dates using
1003  * two-digit years. Make public in JDK 1.2.
1004  */
parseAmbiguousDatesAsAfter(UDate startDate,UErrorCode & status)1005 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
1006 {
1007     if(U_FAILURE(status)) {
1008         return;
1009     }
1010     if(!fCalendar) {
1011       status = U_ILLEGAL_ARGUMENT_ERROR;
1012       return;
1013     }
1014 
1015     fCalendar->setTime(startDate, status);
1016     if(U_SUCCESS(status)) {
1017         fHaveDefaultCentury = true;
1018         fDefaultCenturyStart = startDate;
1019         fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
1020     }
1021 }
1022 
1023 //----------------------------------------------------------------------
1024 
1025 UnicodeString&
format(Calendar & cal,UnicodeString & appendTo,FieldPosition & pos) const1026 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
1027 {
1028   UErrorCode status = U_ZERO_ERROR;
1029   FieldPositionOnlyHandler handler(pos);
1030   return _format(cal, appendTo, handler, status);
1031 }
1032 
1033 //----------------------------------------------------------------------
1034 
1035 UnicodeString&
format(Calendar & cal,UnicodeString & appendTo,FieldPositionIterator * posIter,UErrorCode & status) const1036 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo,
1037                          FieldPositionIterator* posIter, UErrorCode& status) const
1038 {
1039   FieldPositionIteratorHandler handler(posIter, status);
1040   return _format(cal, appendTo, handler, status);
1041 }
1042 
1043 //----------------------------------------------------------------------
1044 
1045 UnicodeString&
_format(Calendar & cal,UnicodeString & appendTo,FieldPositionHandler & handler,UErrorCode & status) const1046 SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo,
1047                             FieldPositionHandler& handler, UErrorCode& status) const
1048 {
1049     if ( U_FAILURE(status) ) {
1050        return appendTo;
1051     }
1052     Calendar* workCal = &cal;
1053     Calendar* calClone = nullptr;
1054     if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
1055         // Different calendar type
1056         // We use the time and time zone from the input calendar, but
1057         // do not use the input calendar for field calculation.
1058         calClone = fCalendar->clone();
1059         if (calClone != nullptr) {
1060             UDate t = cal.getTime(status);
1061             calClone->setTime(t, status);
1062             calClone->setTimeZone(cal.getTimeZone());
1063             workCal = calClone;
1064         } else {
1065             status = U_MEMORY_ALLOCATION_ERROR;
1066             return appendTo;
1067         }
1068     }
1069 
1070     UBool inQuote = false;
1071     char16_t prevCh = 0;
1072     int32_t count = 0;
1073     int32_t fieldNum = 0;
1074     UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1075 
1076     // loop through the pattern string character by character
1077     for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
1078         char16_t ch = fPattern[i];
1079 
1080         // Use subFormat() to format a repeated pattern character
1081         // when a different pattern or non-pattern character is seen
1082         if (ch != prevCh && count > 0) {
1083             subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++,
1084                       prevCh, handler, *workCal, status);
1085             count = 0;
1086         }
1087         if (ch == QUOTE) {
1088             // Consecutive single quotes are a single quote literal,
1089             // either outside of quotes or between quotes
1090             if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
1091                 appendTo += (char16_t)QUOTE;
1092                 ++i;
1093             } else {
1094                 inQuote = ! inQuote;
1095             }
1096         }
1097         else if (!inQuote && isSyntaxChar(ch)) {
1098             // ch is a date-time pattern character to be interpreted
1099             // by subFormat(); count the number of times it is repeated
1100             prevCh = ch;
1101             ++count;
1102         }
1103         else {
1104             // Append quoted characters and unquoted non-pattern characters
1105             appendTo += ch;
1106         }
1107     }
1108 
1109     // Format the last item in the pattern, if any
1110     if (count > 0) {
1111         subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++,
1112                   prevCh, handler, *workCal, status);
1113     }
1114 
1115     if (calClone != nullptr) {
1116         delete calClone;
1117     }
1118 
1119     return appendTo;
1120 }
1121 
1122 //----------------------------------------------------------------------
1123 
1124 /* Map calendar field into calendar field level.
1125  * the larger the level, the smaller the field unit.
1126  * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
1127  * UCAL_MONTH level is 20.
1128  * NOTE: if new fields adds in, the table needs to update.
1129  */
1130 const int32_t
1131 SimpleDateFormat::fgCalendarFieldToLevel[] =
1132 {
1133     /*GyM*/ 0, 10, 20,
1134     /*wW*/ 20, 30,
1135     /*dDEF*/ 30, 20, 30, 30,
1136     /*ahHm*/ 40, 50, 50, 60,
1137     /*sS*/ 70, 80,
1138     /*z?Y*/ 0, 0, 10,
1139     /*eug*/ 30, 10, 0,
1140     /*A?.*/ 40, 0, 0
1141 };
1142 
getLevelFromChar(char16_t ch)1143 int32_t SimpleDateFormat::getLevelFromChar(char16_t ch) {
1144     // Map date field LETTER into calendar field level.
1145     // the larger the level, the smaller the field unit.
1146     // NOTE: if new fields adds in, the table needs to update.
1147     static const int32_t mapCharToLevel[] = {
1148             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1149         //
1150             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1151         //       !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /
1152             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1153 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1154         //   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?
1155             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0, -1, -1, -1, -1, -1,
1156 #else
1157         //   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?
1158             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1159 #endif
1160         //   @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
1161             -1, 40, -1, -1, 20, 30, 30,  0, 50, -1, -1, 50, 20, 20, -1,  0,
1162         //   P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _
1163             -1, 20, -1, 80, -1, 10,  0, 30,  0, 10,  0, -1, -1, -1, -1, -1,
1164         //   `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
1165             -1, 40, -1, 30, 30, 30, -1,  0, 50, -1, -1, 50,  0, 60, -1, -1,
1166         //   p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~
1167             -1, 20, 10, 70, -1, 10,  0, 20,  0, 10,  0, -1, -1, -1, -1, -1
1168     };
1169 
1170     return ch < UPRV_LENGTHOF(mapCharToLevel) ? mapCharToLevel[ch] : -1;
1171 }
1172 
isSyntaxChar(char16_t ch)1173 UBool SimpleDateFormat::isSyntaxChar(char16_t ch) {
1174     static const UBool mapCharToIsSyntax[] = {
1175         //
1176         false, false, false, false, false, false, false, false,
1177         //
1178         false, false, false, false, false, false, false, false,
1179         //
1180         false, false, false, false, false, false, false, false,
1181         //
1182         false, false, false, false, false, false, false, false,
1183         //         !      "      #      $      %      &      '
1184         false, false, false, false, false, false, false, false,
1185         //  (      )      *      +      ,      -      .      /
1186         false, false, false, false, false, false, false, false,
1187         //  0      1      2      3      4      5      6      7
1188         false, false, false, false, false, false, false, false,
1189 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1190         //  8      9      :      ;      <      =      >      ?
1191         false, false,  true, false, false, false, false, false,
1192 #else
1193         //  8      9      :      ;      <      =      >      ?
1194         false, false, false, false, false, false, false, false,
1195 #endif
1196         //  @      A      B      C      D      E      F      G
1197         false,  true,  true,  true,  true,  true,  true,  true,
1198         //  H      I      J      K      L      M      N      O
1199          true,  true,  true,  true,  true,  true,  true,  true,
1200         //  P      Q      R      S      T      U      V      W
1201          true,  true,  true,  true,  true,  true,  true,  true,
1202         //  X      Y      Z      [      \      ]      ^      _
1203          true,  true,  true, false, false, false, false, false,
1204         //  `      a      b      c      d      e      f      g
1205         false,  true,  true,  true,  true,  true,  true,  true,
1206         //  h      i      j      k      l      m      n      o
1207          true,  true,  true,  true,  true,  true,  true,  true,
1208         //  p      q      r      s      t      u      v      w
1209          true,  true,  true,  true,  true,  true,  true,  true,
1210         //  x      y      z      {      |      }      ~
1211          true,  true,  true, false, false, false, false, false
1212     };
1213 
1214     return ch < UPRV_LENGTHOF(mapCharToIsSyntax) ? mapCharToIsSyntax[ch] : false;
1215 }
1216 
1217 // Map index into pattern character string to Calendar field number.
1218 const UCalendarDateFields
1219 SimpleDateFormat::fgPatternIndexToCalendarField[] =
1220 {
1221     /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
1222     /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
1223     /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
1224     /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
1225     /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
1226     /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
1227     /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
1228     /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
1229     /*v*/   UCAL_ZONE_OFFSET,
1230     /*c*/   UCAL_DOW_LOCAL,
1231     /*L*/   UCAL_MONTH,
1232     /*Q*/   UCAL_MONTH,
1233     /*q*/   UCAL_MONTH,
1234     /*V*/   UCAL_ZONE_OFFSET,
1235     /*U*/   UCAL_YEAR,
1236     /*O*/   UCAL_ZONE_OFFSET,
1237     /*Xx*/  UCAL_ZONE_OFFSET, UCAL_ZONE_OFFSET,
1238     /*r*/   UCAL_EXTENDED_YEAR,
1239     /*bB*/   UCAL_FIELD_COUNT, UCAL_FIELD_COUNT,  // no mappings to calendar fields
1240 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1241     /*:*/   UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */
1242 #else
1243     /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/   UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */
1244 #endif
1245 };
1246 
1247 // Map index into pattern character string to DateFormat field number
1248 const UDateFormatField
1249 SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
1250     /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
1251     /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
1252     /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
1253     /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
1254     /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
1255     /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
1256     /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
1257     /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
1258     /*v*/   UDAT_TIMEZONE_GENERIC_FIELD,
1259     /*c*/   UDAT_STANDALONE_DAY_FIELD,
1260     /*L*/   UDAT_STANDALONE_MONTH_FIELD,
1261     /*Q*/   UDAT_QUARTER_FIELD,
1262     /*q*/   UDAT_STANDALONE_QUARTER_FIELD,
1263     /*V*/   UDAT_TIMEZONE_SPECIAL_FIELD,
1264     /*U*/   UDAT_YEAR_NAME_FIELD,
1265     /*O*/   UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD,
1266     /*Xx*/  UDAT_TIMEZONE_ISO_FIELD, UDAT_TIMEZONE_ISO_LOCAL_FIELD,
1267     /*r*/   UDAT_RELATED_YEAR_FIELD,
1268     /*bB*/  UDAT_AM_PM_MIDNIGHT_NOON_FIELD, UDAT_FLEXIBLE_DAY_PERIOD_FIELD,
1269 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1270     /*:*/   UDAT_TIME_SEPARATOR_FIELD,
1271 #else
1272     /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/   UDAT_TIME_SEPARATOR_FIELD,
1273 #endif
1274 };
1275 
1276 //----------------------------------------------------------------------
1277 
1278 /**
1279  * Append symbols[value] to dst.  Make sure the array index is not out
1280  * of bounds.
1281  */
1282 static inline void
_appendSymbol(UnicodeString & dst,int32_t value,const UnicodeString * symbols,int32_t symbolsCount)1283 _appendSymbol(UnicodeString& dst,
1284               int32_t value,
1285               const UnicodeString* symbols,
1286               int32_t symbolsCount) {
1287     U_ASSERT(0 <= value && value < symbolsCount);
1288     if (0 <= value && value < symbolsCount) {
1289         dst += symbols[value];
1290     }
1291 }
1292 
1293 static inline void
_appendSymbolWithMonthPattern(UnicodeString & dst,int32_t value,const UnicodeString * symbols,int32_t symbolsCount,const UnicodeString * monthPattern,UErrorCode & status)1294 _appendSymbolWithMonthPattern(UnicodeString& dst, int32_t value, const UnicodeString* symbols, int32_t symbolsCount,
1295               const UnicodeString* monthPattern, UErrorCode& status) {
1296     U_ASSERT(0 <= value && value < symbolsCount);
1297     if (0 <= value && value < symbolsCount) {
1298         if (monthPattern == nullptr) {
1299             dst += symbols[value];
1300         } else {
1301             SimpleFormatter(*monthPattern, 1, 1, status).format(symbols[value], dst, status);
1302         }
1303     }
1304 }
1305 
1306 //----------------------------------------------------------------------
1307 
1308 void
initSimpleNumberFormatter(UErrorCode & status)1309 SimpleDateFormat::initSimpleNumberFormatter(UErrorCode &status) {
1310     if (U_FAILURE(status)) {
1311         return;
1312     }
1313     auto* df = dynamic_cast<const DecimalFormat*>(fNumberFormat);
1314     if (df == nullptr) {
1315         return;
1316     }
1317     const DecimalFormatSymbols* syms = df->getDecimalFormatSymbols();
1318     if (syms == nullptr) {
1319         return;
1320     }
1321     fSimpleNumberFormatter = new number::SimpleNumberFormatter(
1322         number::SimpleNumberFormatter::forLocaleAndSymbolsAndGroupingStrategy(
1323             fLocale, *syms, UNUM_GROUPING_OFF, status
1324         )
1325     );
1326     if (fSimpleNumberFormatter == nullptr) {
1327         status = U_MEMORY_ALLOCATION_ERROR;
1328     }
1329 }
1330 
1331 void
initNumberFormatters(const Locale & locale,UErrorCode & status)1332 SimpleDateFormat::initNumberFormatters(const Locale &locale,UErrorCode &status) {
1333     if (U_FAILURE(status)) {
1334         return;
1335     }
1336     if ( fDateOverride.isBogus() && fTimeOverride.isBogus() ) {
1337         return;
1338     }
1339     umtx_lock(&LOCK);
1340     if (fSharedNumberFormatters == nullptr) {
1341         fSharedNumberFormatters = allocSharedNumberFormatters();
1342         if (fSharedNumberFormatters == nullptr) {
1343             status = U_MEMORY_ALLOCATION_ERROR;
1344         }
1345     }
1346     umtx_unlock(&LOCK);
1347 
1348     if (U_FAILURE(status)) {
1349         return;
1350     }
1351 
1352     processOverrideString(locale,fDateOverride,kOvrStrDate,status);
1353     processOverrideString(locale,fTimeOverride,kOvrStrTime,status);
1354 }
1355 
1356 void
processOverrideString(const Locale & locale,const UnicodeString & str,int8_t type,UErrorCode & status)1357 SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status) {
1358     if (str.isBogus() || U_FAILURE(status)) {
1359         return;
1360     }
1361 
1362     int32_t start = 0;
1363     int32_t len;
1364     UnicodeString nsName;
1365     UnicodeString ovrField;
1366     UBool moreToProcess = true;
1367     NSOverride *overrideList = nullptr;
1368 
1369     while (moreToProcess) {
1370         int32_t delimiterPosition = str.indexOf((char16_t)ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE,start);
1371         if (delimiterPosition == -1) {
1372             moreToProcess = false;
1373             len = str.length() - start;
1374         } else {
1375             len = delimiterPosition - start;
1376         }
1377         UnicodeString currentString(str,start,len);
1378         int32_t equalSignPosition = currentString.indexOf((char16_t)ULOC_KEYWORD_ASSIGN_UNICODE,0);
1379         if (equalSignPosition == -1) { // Simple override string such as "hebrew"
1380             nsName.setTo(currentString);
1381             ovrField.setToBogus();
1382         } else { // Field specific override string such as "y=hebrew"
1383             nsName.setTo(currentString,equalSignPosition+1);
1384             ovrField.setTo(currentString,0,1); // We just need the first character.
1385         }
1386 
1387         int32_t nsNameHash = nsName.hashCode();
1388         // See if the numbering system is in the override list, if not, then add it.
1389         NSOverride *curr = overrideList;
1390         const SharedNumberFormat *snf = nullptr;
1391         UBool found = false;
1392         while ( curr && !found ) {
1393             if ( curr->hash == nsNameHash ) {
1394                 snf = curr->snf;
1395                 found = true;
1396             }
1397             curr = curr->next;
1398         }
1399 
1400         if (!found) {
1401            LocalPointer<NSOverride> cur(new NSOverride);
1402            if (!cur.isNull()) {
1403                char kw[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1404                uprv_strcpy(kw,"numbers=");
1405                nsName.extract(0,len,kw+8,ULOC_KEYWORD_AND_VALUES_CAPACITY-8,US_INV);
1406 
1407                Locale ovrLoc(locale.getLanguage(),locale.getCountry(),locale.getVariant(),kw);
1408                cur->hash = nsNameHash;
1409                cur->next = overrideList;
1410                SharedObject::copyPtr(
1411                        createSharedNumberFormat(ovrLoc, status), cur->snf);
1412                if (U_FAILURE(status)) {
1413                    if (overrideList) {
1414                        overrideList->free();
1415                    }
1416                    return;
1417                }
1418                snf = cur->snf;
1419                overrideList = cur.orphan();
1420            } else {
1421                status = U_MEMORY_ALLOCATION_ERROR;
1422                if (overrideList) {
1423                    overrideList->free();
1424                }
1425                return;
1426            }
1427         }
1428 
1429         // Now that we have an appropriate number formatter, fill in the appropriate spaces in the
1430         // number formatters table.
1431         if (ovrField.isBogus()) {
1432             switch (type) {
1433                 case kOvrStrDate:
1434                 case kOvrStrBoth: {
1435                     for ( int8_t i=0 ; i<kDateFieldsCount; i++ ) {
1436                         SharedObject::copyPtr(snf, fSharedNumberFormatters[kDateFields[i]]);
1437                     }
1438                     if (type==kOvrStrDate) {
1439                         break;
1440                     }
1441                     U_FALLTHROUGH;
1442                 }
1443                 case kOvrStrTime : {
1444                     for ( int8_t i=0 ; i<kTimeFieldsCount; i++ ) {
1445                         SharedObject::copyPtr(snf, fSharedNumberFormatters[kTimeFields[i]]);
1446                     }
1447                     break;
1448                 }
1449             }
1450         } else {
1451            // if the pattern character is unrecognized, signal an error and bail out
1452            UDateFormatField patternCharIndex =
1453               DateFormatSymbols::getPatternCharIndex(ovrField.charAt(0));
1454            if (patternCharIndex == UDAT_FIELD_COUNT) {
1455                status = U_INVALID_FORMAT_ERROR;
1456                if (overrideList) {
1457                    overrideList->free();
1458                }
1459                return;
1460            }
1461            SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]);
1462         }
1463 
1464         start = delimiterPosition + 1;
1465     }
1466     if (overrideList) {
1467         overrideList->free();
1468     }
1469 }
1470 
1471 //---------------------------------------------------------------------
1472 void
subFormat(UnicodeString & appendTo,char16_t ch,int32_t count,UDisplayContext capitalizationContext,int32_t fieldNum,char16_t fieldToOutput,FieldPositionHandler & handler,Calendar & cal,UErrorCode & status) const1473 SimpleDateFormat::subFormat(UnicodeString &appendTo,
1474                             char16_t ch,
1475                             int32_t count,
1476                             UDisplayContext capitalizationContext,
1477                             int32_t fieldNum,
1478                             char16_t fieldToOutput,
1479                             FieldPositionHandler& handler,
1480                             Calendar& cal,
1481                             UErrorCode& status) const
1482 {
1483     if (U_FAILURE(status)) {
1484         return;
1485     }
1486 
1487     // this function gets called by format() to produce the appropriate substitution
1488     // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1489 
1490     UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
1491     const int32_t maxIntCount = 10;
1492     int32_t beginOffset = appendTo.length();
1493     const NumberFormat *currentNumberFormat;
1494     DateFormatSymbols::ECapitalizationContextUsageType capContextUsageType = DateFormatSymbols::kCapContextUsageOther;
1495 
1496     UBool isHebrewCalendar = (uprv_strcmp(cal.getType(),"hebrew") == 0);
1497     UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0);
1498 
1499     // if the pattern character is unrecognized, signal an error and dump out
1500     if (patternCharIndex == UDAT_FIELD_COUNT)
1501     {
1502         if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
1503             status = U_INVALID_FORMAT_ERROR;
1504         }
1505         return;
1506     }
1507 
1508     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1509     int32_t value = 0;
1510     // Don't get value unless it is useful
1511     if (field < UCAL_FIELD_COUNT) {
1512         value = (patternCharIndex != UDAT_RELATED_YEAR_FIELD)? cal.get(field, status): cal.getRelatedYear(status);
1513     }
1514     if (U_FAILURE(status)) {
1515         return;
1516     }
1517 
1518     currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
1519     if (currentNumberFormat == nullptr) {
1520         status = U_INTERNAL_PROGRAM_ERROR;
1521         return;
1522     }
1523     UnicodeString hebr("hebr", 4, US_INV);
1524 
1525     switch (patternCharIndex) {
1526 
1527     // for any "G" symbol, write out the appropriate era string
1528     // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
1529     case UDAT_ERA_FIELD:
1530         if (isChineseCalendar) {
1531             zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J
1532         } else {
1533             if (count == 5) {
1534                 _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1535                 capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow;
1536             } else if (count == 4) {
1537                 _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1538                 capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide;
1539             } else {
1540                 _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
1541                 capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev;
1542             }
1543         }
1544         break;
1545 
1546      case UDAT_YEAR_NAME_FIELD:
1547         if (fSymbols->fShortYearNames != nullptr && value <= fSymbols->fShortYearNamesCount) {
1548             // the Calendar YEAR field runs 1 through 60 for cyclic years
1549             _appendSymbol(appendTo, value - 1, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount);
1550             break;
1551         }
1552         // else fall through to numeric year handling, do not break here
1553         U_FALLTHROUGH;
1554 
1555    // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1556     // NEW: UTS#35:
1557 //Year         y     yy     yyy     yyyy     yyyyy
1558 //AD 1         1     01     001     0001     00001
1559 //AD 12       12     12     012     0012     00012
1560 //AD 123     123     23     123     0123     00123
1561 //AD 1234   1234     34    1234     1234     01234
1562 //AD 12345 12345     45   12345    12345     12345
1563     case UDAT_YEAR_FIELD:
1564     case UDAT_YEAR_WOY_FIELD:
1565         if (fDateOverride.compare(hebr)==0 && value>HEBREW_CAL_CUR_MILLENIUM_START_YEAR && value<HEBREW_CAL_CUR_MILLENIUM_END_YEAR) {
1566             value-=HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
1567         }
1568         if(count == 2)
1569             zeroPaddingNumber(currentNumberFormat, appendTo, value, 2, 2);
1570         else
1571             zeroPaddingNumber(currentNumberFormat, appendTo, value, count, maxIntCount);
1572         break;
1573 
1574     // for "MMMM"/"LLLL", write out the whole month name, for "MMM"/"LLL", write out the month
1575     // abbreviation, for "M"/"L" or "MM"/"LL", write out the month as a number with the
1576     // appropriate number of digits
1577     // for "MMMMM"/"LLLLL", use the narrow form
1578     case UDAT_MONTH_FIELD:
1579     case UDAT_STANDALONE_MONTH_FIELD:
1580         if ( isHebrewCalendar ) {
1581            HebrewCalendar *hc = (HebrewCalendar*)&cal;
1582            if (hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value == 6 && count >= 3 )
1583                value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar.
1584            if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6 && count < 3 )
1585                value--; // Adjust the month number down 1 in Hebrew non-leap years, i.e. Adar is 6, not 7.
1586         }
1587         {
1588             int32_t isLeapMonth = (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount)?
1589                         cal.get(UCAL_IS_LEAP_MONTH, status): 0;
1590             // should consolidate the next section by using arrays of pointers & counts for the right symbols...
1591             if (count == 5) {
1592                 if (patternCharIndex == UDAT_MONTH_FIELD) {
1593                     _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fNarrowMonths, fSymbols->fNarrowMonthsCount,
1594                             (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatNarrow]): nullptr, status);
1595                 } else {
1596                     _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneNarrowMonths, fSymbols->fStandaloneNarrowMonthsCount,
1597                             (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneNarrow]): nullptr, status);
1598                 }
1599                 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthNarrow;
1600             } else if (count == 4) {
1601                 if (patternCharIndex == UDAT_MONTH_FIELD) {
1602                     _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fMonths, fSymbols->fMonthsCount,
1603                             (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]): nullptr, status);
1604                     capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1605                 } else {
1606                     _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount,
1607                             (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]): nullptr, status);
1608                     capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1609                 }
1610             } else if (count == 3) {
1611                 if (patternCharIndex == UDAT_MONTH_FIELD) {
1612                     _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fShortMonths, fSymbols->fShortMonthsCount,
1613                             (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]): nullptr, status);
1614                     capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1615                 } else {
1616                     _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount,
1617                             (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]): nullptr, status);
1618                     capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1619                 }
1620             } else {
1621                 UnicodeString monthNumber;
1622                 zeroPaddingNumber(currentNumberFormat,monthNumber, value + 1, count, maxIntCount);
1623                 _appendSymbolWithMonthPattern(appendTo, 0, &monthNumber, 1,
1624                         (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric]): nullptr, status);
1625             }
1626         }
1627         break;
1628 
1629     // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
1630     case UDAT_HOUR_OF_DAY1_FIELD:
1631         if (value == 0)
1632             zeroPaddingNumber(currentNumberFormat,appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1633         else
1634             zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1635         break;
1636 
1637     case UDAT_FRACTIONAL_SECOND_FIELD:
1638         // Fractional seconds left-justify
1639         {
1640             int32_t minDigits = (count > 3) ? 3 : count;
1641             if (count == 1) {
1642                 value /= 100;
1643             } else if (count == 2) {
1644                 value /= 10;
1645             }
1646             zeroPaddingNumber(currentNumberFormat, appendTo, value, minDigits, maxIntCount);
1647             if (count > 3) {
1648                 zeroPaddingNumber(currentNumberFormat, appendTo, 0, count - 3, maxIntCount);
1649             }
1650         }
1651         break;
1652 
1653     // for "ee" or "e", use local numeric day-of-the-week
1654     // for "EEEEEE" or "eeeeee", write out the short day-of-the-week name
1655     // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1656     // for "EEEE" or "eeee", write out the wide day-of-the-week name
1657     // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1658     case UDAT_DOW_LOCAL_FIELD:
1659         if ( count < 3 ) {
1660             zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1661             break;
1662         }
1663         // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1664         // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1665         value = cal.get(UCAL_DAY_OF_WEEK, status);
1666         if (U_FAILURE(status)) {
1667             return;
1668         }
1669         // fall through, do not break here
1670         U_FALLTHROUGH;
1671     case UDAT_DAY_OF_WEEK_FIELD:
1672         if (count == 5) {
1673             _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1674                           fSymbols->fNarrowWeekdaysCount);
1675             capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1676         } else if (count == 4) {
1677             _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1678                           fSymbols->fWeekdaysCount);
1679             capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1680         } else if (count == 6) {
1681             _appendSymbol(appendTo, value, fSymbols->fShorterWeekdays,
1682                           fSymbols->fShorterWeekdaysCount);
1683             capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1684         } else {
1685             _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1686                           fSymbols->fShortWeekdaysCount);
1687             capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1688         }
1689         break;
1690 
1691     // for "ccc", write out the abbreviated day-of-the-week name
1692     // for "cccc", write out the wide day-of-the-week name
1693     // for "ccccc", use the narrow day-of-the-week name
1694     // for "ccccc", use the short day-of-the-week name
1695     case UDAT_STANDALONE_DAY_FIELD:
1696         if ( count < 3 ) {
1697             zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, maxIntCount);
1698             break;
1699         }
1700         // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1701         // we want standard day-of-week, so first fix value.
1702         value = cal.get(UCAL_DAY_OF_WEEK, status);
1703         if (U_FAILURE(status)) {
1704             return;
1705         }
1706         if (count == 5) {
1707             _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1708                           fSymbols->fStandaloneNarrowWeekdaysCount);
1709             capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1710         } else if (count == 4) {
1711             _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1712                           fSymbols->fStandaloneWeekdaysCount);
1713             capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1714         } else if (count == 6) {
1715             _appendSymbol(appendTo, value, fSymbols->fStandaloneShorterWeekdays,
1716                           fSymbols->fStandaloneShorterWeekdaysCount);
1717             capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1718         } else { // count == 3
1719             _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1720                           fSymbols->fStandaloneShortWeekdaysCount);
1721             capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1722         }
1723         break;
1724 
1725     // for "a" symbol, write out the whole AM/PM string
1726     case UDAT_AM_PM_FIELD:
1727         if (count < 5) {
1728             _appendSymbol(appendTo, value, fSymbols->fAmPms,
1729                           fSymbols->fAmPmsCount);
1730         } else {
1731             _appendSymbol(appendTo, value, fSymbols->fNarrowAmPms,
1732                           fSymbols->fNarrowAmPmsCount);
1733         }
1734         break;
1735 
1736     // if we see pattern character for UDAT_TIME_SEPARATOR_FIELD (none currently defined),
1737     // write out the time separator string. Leave support in for future definition.
1738     case UDAT_TIME_SEPARATOR_FIELD:
1739         {
1740             UnicodeString separator;
1741             appendTo += fSymbols->getTimeSeparatorString(separator);
1742         }
1743         break;
1744 
1745     // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1746     // as "12"
1747     case UDAT_HOUR1_FIELD:
1748         if (value == 0)
1749             zeroPaddingNumber(currentNumberFormat,appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1750         else
1751             zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1752         break;
1753 
1754     case UDAT_TIMEZONE_FIELD: // 'z'
1755     case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
1756     case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
1757     case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
1758     case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
1759     case UDAT_TIMEZONE_ISO_FIELD: // 'X'
1760     case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
1761         {
1762             char16_t zsbuf[ZONE_NAME_U16_MAX];
1763             UnicodeString zoneString(zsbuf, 0, UPRV_LENGTHOF(zsbuf));
1764             const TimeZone& tz = cal.getTimeZone();
1765             UDate date = cal.getTime(status);
1766             const TimeZoneFormat *tzfmt = tzFormat(status);
1767             if (U_SUCCESS(status)) {
1768                 if (patternCharIndex == UDAT_TIMEZONE_FIELD) {
1769                     if (count < 4) {
1770                         // "z", "zz", "zzz"
1771                         tzfmt->format(UTZFMT_STYLE_SPECIFIC_SHORT, tz, date, zoneString);
1772                         capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1773                     } else {
1774                         // "zzzz" or longer
1775                         tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, date, zoneString);
1776                         capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1777                     }
1778                 }
1779                 else if (patternCharIndex == UDAT_TIMEZONE_RFC_FIELD) {
1780                     if (count < 4) {
1781                         // "Z"
1782                         tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1783                     } else if (count == 5) {
1784                         // "ZZZZZ"
1785                         tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1786                     } else {
1787                         // "ZZ", "ZZZ", "ZZZZ"
1788                         tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1789                     }
1790                 }
1791                 else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) {
1792                     if (count == 1) {
1793                         // "v"
1794                         tzfmt->format(UTZFMT_STYLE_GENERIC_SHORT, tz, date, zoneString);
1795                         capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1796                     } else if (count == 4) {
1797                         // "vvvv"
1798                         tzfmt->format(UTZFMT_STYLE_GENERIC_LONG, tz, date, zoneString);
1799                         capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1800                     }
1801                 }
1802                 else if (patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD) {
1803                     if (count == 1) {
1804                         // "V"
1805                         tzfmt->format(UTZFMT_STYLE_ZONE_ID_SHORT, tz, date, zoneString);
1806                     } else if (count == 2) {
1807                         // "VV"
1808                         tzfmt->format(UTZFMT_STYLE_ZONE_ID, tz, date, zoneString);
1809                     } else if (count == 3) {
1810                         // "VVV"
1811                         tzfmt->format(UTZFMT_STYLE_EXEMPLAR_LOCATION, tz, date, zoneString);
1812                     } else if (count == 4) {
1813                         // "VVVV"
1814                         tzfmt->format(UTZFMT_STYLE_GENERIC_LOCATION, tz, date, zoneString);
1815                         capContextUsageType = DateFormatSymbols::kCapContextUsageZoneLong;
1816                     }
1817                 }
1818                 else if (patternCharIndex == UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD) {
1819                     if (count == 1) {
1820                         // "O"
1821                         tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT_SHORT, tz, date, zoneString);
1822                     } else if (count == 4) {
1823                         // "OOOO"
1824                         tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1825                     }
1826                 }
1827                 else if (patternCharIndex == UDAT_TIMEZONE_ISO_FIELD) {
1828                     if (count == 1) {
1829                         // "X"
1830                         tzfmt->format(UTZFMT_STYLE_ISO_BASIC_SHORT, tz, date, zoneString);
1831                     } else if (count == 2) {
1832                         // "XX"
1833                         tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FIXED, tz, date, zoneString);
1834                     } else if (count == 3) {
1835                         // "XXX"
1836                         tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FIXED, tz, date, zoneString);
1837                     } else if (count == 4) {
1838                         // "XXXX"
1839                         tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FULL, tz, date, zoneString);
1840                     } else if (count == 5) {
1841                         // "XXXXX"
1842                         tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1843                     }
1844                 }
1845                 else if (patternCharIndex == UDAT_TIMEZONE_ISO_LOCAL_FIELD) {
1846                     if (count == 1) {
1847                         // "x"
1848                         tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, tz, date, zoneString);
1849                     } else if (count == 2) {
1850                         // "xx"
1851                         tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, tz, date, zoneString);
1852                     } else if (count == 3) {
1853                         // "xxx"
1854                         tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, tz, date, zoneString);
1855                     } else if (count == 4) {
1856                         // "xxxx"
1857                         tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1858                     } else if (count == 5) {
1859                         // "xxxxx"
1860                         tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, tz, date, zoneString);
1861                     }
1862                 }
1863                 else {
1864                     UPRV_UNREACHABLE_EXIT;
1865                 }
1866             }
1867             appendTo += zoneString;
1868         }
1869         break;
1870 
1871     case UDAT_QUARTER_FIELD:
1872         if (count >= 5)
1873             _appendSymbol(appendTo, value/3, fSymbols->fNarrowQuarters,
1874                           fSymbols->fNarrowQuartersCount);
1875          else if (count == 4)
1876             _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1877                           fSymbols->fQuartersCount);
1878         else if (count == 3)
1879             _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1880                           fSymbols->fShortQuartersCount);
1881         else
1882             zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1883         break;
1884 
1885     case UDAT_STANDALONE_QUARTER_FIELD:
1886         if (count >= 5)
1887             _appendSymbol(appendTo, value/3, fSymbols->fStandaloneNarrowQuarters,
1888                           fSymbols->fStandaloneNarrowQuartersCount);
1889         else if (count == 4)
1890             _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1891                           fSymbols->fStandaloneQuartersCount);
1892         else if (count == 3)
1893             _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1894                           fSymbols->fStandaloneShortQuartersCount);
1895         else
1896             zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1897         break;
1898 
1899     case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
1900     {
1901         const UnicodeString *toAppend = nullptr;
1902         int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status);
1903 
1904         // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day.
1905         // For ICU 57 output of "midnight" is temporarily suppressed.
1906 
1907         // For "midnight" and "noon":
1908         // Time, as displayed, must be exactly noon or midnight.
1909         // This means minutes and seconds, if present, must be zero.
1910         if ((/*hour == 0 ||*/ hour == 12) &&
1911                 (!fHasMinute || cal.get(UCAL_MINUTE, status) == 0) &&
1912                 (!fHasSecond || cal.get(UCAL_SECOND, status) == 0)) {
1913             // Stealing am/pm value to use as our array index.
1914             // It works out: am/midnight are both 0, pm/noon are both 1,
1915             // 12 am is 12 midnight, and 12 pm is 12 noon.
1916             int32_t val = cal.get(UCAL_AM_PM, status);
1917 
1918             if (count <= 3) {
1919                 toAppend = &fSymbols->fAbbreviatedDayPeriods[val];
1920             } else if (count == 4 || count > 5) {
1921                 toAppend = &fSymbols->fWideDayPeriods[val];
1922             } else { // count == 5
1923                 toAppend = &fSymbols->fNarrowDayPeriods[val];
1924             }
1925         }
1926 
1927         // toAppend is nullptr if time isn't exactly midnight or noon (as displayed).
1928         // toAppend is bogus if time is midnight or noon, but no localized string exists.
1929         // In either case, fall back to am/pm.
1930         if (toAppend == nullptr || toAppend->isBogus()) {
1931             // Reformat with identical arguments except ch, now changed to 'a'.
1932             // We are passing a different fieldToOutput because we want to add
1933             // 'b' to field position. This makes this fallback stable when
1934             // there is a data change on locales.
1935             subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'b', handler, cal, status);
1936             return;
1937         } else {
1938             appendTo += *toAppend;
1939         }
1940 
1941         break;
1942     }
1943 
1944     case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
1945     {
1946         // TODO: Maybe fetch the DayperiodRules during initialization (instead of at the first
1947         // loading of an instance) if a relevant pattern character (b or B) is used.
1948         const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status);
1949         if (U_FAILURE(status)) {
1950             // Data doesn't conform to spec, therefore loading failed.
1951             break;
1952         }
1953         if (ruleSet == nullptr) {
1954             // Data doesn't exist for the locale we're looking for.
1955             // Falling back to am/pm.
1956             // We are passing a different fieldToOutput because we want to add
1957             // 'B' to field position. This makes this fallback stable when
1958             // there is a data change on locales.
1959             subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status);
1960             return;
1961         }
1962 
1963         // Get current display time.
1964         int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status);
1965         int32_t minute = 0;
1966         if (fHasMinute) {
1967             minute = cal.get(UCAL_MINUTE, status);
1968         }
1969         int32_t second = 0;
1970         if (fHasSecond) {
1971             second = cal.get(UCAL_SECOND, status);
1972         }
1973 
1974         // Determine day period.
1975         DayPeriodRules::DayPeriod periodType;
1976         if (hour == 0 && minute == 0 && second == 0 && ruleSet->hasMidnight()) {
1977             periodType = DayPeriodRules::DAYPERIOD_MIDNIGHT;
1978         } else if (hour == 12 && minute == 0 && second == 0 && ruleSet->hasNoon()) {
1979             periodType = DayPeriodRules::DAYPERIOD_NOON;
1980         } else {
1981             periodType = ruleSet->getDayPeriodForHour(hour);
1982         }
1983 
1984         // Rule set exists, therefore periodType can't be UNKNOWN.
1985         // Get localized string.
1986         U_ASSERT(periodType != DayPeriodRules::DAYPERIOD_UNKNOWN);
1987         UnicodeString *toAppend = nullptr;
1988         int32_t index;
1989 
1990         // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day.
1991         // For ICU 57 output of "midnight" is temporarily suppressed.
1992 
1993         if (periodType != DayPeriodRules::DAYPERIOD_AM &&
1994                 periodType != DayPeriodRules::DAYPERIOD_PM &&
1995                 periodType != DayPeriodRules::DAYPERIOD_MIDNIGHT) {
1996             index = (int32_t)periodType;
1997             if (count <= 3) {
1998                 toAppend = &fSymbols->fAbbreviatedDayPeriods[index];  // i.e. short
1999             } else if (count == 4 || count > 5) {
2000                 toAppend = &fSymbols->fWideDayPeriods[index];
2001             } else {  // count == 5
2002                 toAppend = &fSymbols->fNarrowDayPeriods[index];
2003             }
2004         }
2005 
2006         // Fallback schedule:
2007         // Midnight/Noon -> General Periods -> AM/PM.
2008 
2009         // Midnight/Noon -> General Periods.
2010         if ((toAppend == nullptr || toAppend->isBogus()) &&
2011                 (periodType == DayPeriodRules::DAYPERIOD_MIDNIGHT ||
2012                  periodType == DayPeriodRules::DAYPERIOD_NOON)) {
2013             periodType = ruleSet->getDayPeriodForHour(hour);
2014             index = (int32_t)periodType;
2015 
2016             if (count <= 3) {
2017                 toAppend = &fSymbols->fAbbreviatedDayPeriods[index];  // i.e. short
2018             } else if (count == 4 || count > 5) {
2019                 toAppend = &fSymbols->fWideDayPeriods[index];
2020             } else {  // count == 5
2021                 toAppend = &fSymbols->fNarrowDayPeriods[index];
2022             }
2023         }
2024 
2025         // General Periods -> AM/PM.
2026         if (periodType == DayPeriodRules::DAYPERIOD_AM ||
2027             periodType == DayPeriodRules::DAYPERIOD_PM ||
2028             toAppend->isBogus()) {
2029             // We are passing a different fieldToOutput because we want to add
2030             // 'B' to field position iterator. This makes this fallback stable when
2031             // there is a data change on locales.
2032             subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status);
2033             return;
2034         }
2035         else {
2036             appendTo += *toAppend;
2037         }
2038 
2039         break;
2040     }
2041 
2042     // all of the other pattern symbols can be formatted as simple numbers with
2043     // appropriate zero padding
2044     default:
2045         zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
2046         break;
2047     }
2048 #if !UCONFIG_NO_BREAK_ITERATION
2049     // if first field, check to see whether we need to and are able to titlecase it
2050     if (fieldNum == 0 && fCapitalizationBrkIter != nullptr && appendTo.length() > beginOffset &&
2051             u_islower(appendTo.char32At(beginOffset))) {
2052         UBool titlecase = false;
2053         switch (capitalizationContext) {
2054             case UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE:
2055                 titlecase = true;
2056                 break;
2057             case UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU:
2058                 titlecase = fSymbols->fCapitalization[capContextUsageType][0];
2059                 break;
2060             case UDISPCTX_CAPITALIZATION_FOR_STANDALONE:
2061                 titlecase = fSymbols->fCapitalization[capContextUsageType][1];
2062                 break;
2063             default:
2064                 // titlecase = false;
2065                 break;
2066         }
2067         if (titlecase) {
2068             BreakIterator* const mutableCapitalizationBrkIter = fCapitalizationBrkIter->clone();
2069             UnicodeString firstField(appendTo, beginOffset);
2070             firstField.toTitle(mutableCapitalizationBrkIter, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
2071             appendTo.replaceBetween(beginOffset, appendTo.length(), firstField);
2072             delete mutableCapitalizationBrkIter;
2073         }
2074     }
2075 #endif
2076 
2077     handler.addAttribute(DateFormatSymbols::getPatternCharIndex(fieldToOutput), beginOffset, appendTo.length());
2078 }
2079 
2080 //----------------------------------------------------------------------
2081 
adoptNumberFormat(NumberFormat * formatToAdopt)2082 void SimpleDateFormat::adoptNumberFormat(NumberFormat *formatToAdopt) {
2083     // Null out the fast formatter, it references fNumberFormat which we're
2084     // about to invalidate
2085     delete fSimpleNumberFormatter;
2086     fSimpleNumberFormatter = nullptr;
2087 
2088     fixNumberFormatForDates(*formatToAdopt);
2089     delete fNumberFormat;
2090     fNumberFormat = formatToAdopt;
2091 
2092     // We successfully set the default number format. Now delete the overrides
2093     // (can't fail).
2094     if (fSharedNumberFormatters) {
2095         freeSharedNumberFormatters(fSharedNumberFormatters);
2096         fSharedNumberFormatters = nullptr;
2097     }
2098 
2099     // Recompute fSimpleNumberFormatter if necessary
2100     UErrorCode localStatus = U_ZERO_ERROR;
2101     initSimpleNumberFormatter(localStatus);
2102 }
2103 
adoptNumberFormat(const UnicodeString & fields,NumberFormat * formatToAdopt,UErrorCode & status)2104 void SimpleDateFormat::adoptNumberFormat(const UnicodeString& fields, NumberFormat *formatToAdopt, UErrorCode &status){
2105     fixNumberFormatForDates(*formatToAdopt);
2106     LocalPointer<NumberFormat> fmt(formatToAdopt);
2107     if (U_FAILURE(status)) {
2108         return;
2109     }
2110 
2111     // We must ensure fSharedNumberFormatters is allocated.
2112     if (fSharedNumberFormatters == nullptr) {
2113         fSharedNumberFormatters = allocSharedNumberFormatters();
2114         if (fSharedNumberFormatters == nullptr) {
2115             status = U_MEMORY_ALLOCATION_ERROR;
2116             return;
2117         }
2118     }
2119     const SharedNumberFormat *newFormat = createSharedNumberFormat(fmt.orphan());
2120     if (newFormat == nullptr) {
2121         status = U_MEMORY_ALLOCATION_ERROR;
2122         return;
2123     }
2124     for (int i=0; i<fields.length(); i++) {
2125         char16_t field = fields.charAt(i);
2126         // if the pattern character is unrecognized, signal an error and bail out
2127         UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(field);
2128         if (patternCharIndex == UDAT_FIELD_COUNT) {
2129             status = U_INVALID_FORMAT_ERROR;
2130             newFormat->deleteIfZeroRefCount();
2131             return;
2132         }
2133 
2134         // Set the number formatter in the table
2135         SharedObject::copyPtr(
2136                 newFormat, fSharedNumberFormatters[patternCharIndex]);
2137     }
2138     newFormat->deleteIfZeroRefCount();
2139 }
2140 
2141 const NumberFormat *
getNumberFormatForField(char16_t field) const2142 SimpleDateFormat::getNumberFormatForField(char16_t field) const {
2143     UDateFormatField index = DateFormatSymbols::getPatternCharIndex(field);
2144     if (index == UDAT_FIELD_COUNT) {
2145         return nullptr;
2146     }
2147     return getNumberFormatByIndex(index);
2148 }
2149 
2150 //----------------------------------------------------------------------
2151 void
zeroPaddingNumber(const NumberFormat * currentNumberFormat,UnicodeString & appendTo,int32_t value,int32_t minDigits,int32_t maxDigits) const2152 SimpleDateFormat::zeroPaddingNumber(
2153         const NumberFormat *currentNumberFormat,
2154         UnicodeString &appendTo,
2155         int32_t value, int32_t minDigits, int32_t maxDigits) const
2156 {
2157 
2158     if (currentNumberFormat == fNumberFormat && fSimpleNumberFormatter) {
2159         // Can use fast path
2160         UErrorCode localStatus = U_ZERO_ERROR;
2161         number::SimpleNumber number = number::SimpleNumber::forInt64(value, localStatus);
2162         number.setMinimumIntegerDigits(minDigits, localStatus);
2163         number.truncateStart(maxDigits, localStatus);
2164 
2165         number::FormattedNumber result = fSimpleNumberFormatter->format(std::move(number), localStatus);
2166         if (U_FAILURE(localStatus)) {
2167             return;
2168         }
2169         appendTo.append(result.toTempString(localStatus));
2170         return;
2171     }
2172 
2173     // Check for RBNF (no clone necessary)
2174     auto* rbnf = dynamic_cast<const RuleBasedNumberFormat*>(currentNumberFormat);
2175     if (rbnf != nullptr) {
2176         FieldPosition pos(FieldPosition::DONT_CARE);
2177         rbnf->format(value, appendTo, pos);  // 3rd arg is there to speed up processing
2178         return;
2179     }
2180 
2181     // Fall back to slow path (clone and mutate the NumberFormat)
2182     if (currentNumberFormat != nullptr) {
2183         FieldPosition pos(FieldPosition::DONT_CARE);
2184         LocalPointer<NumberFormat> nf(currentNumberFormat->clone());
2185         nf->setMinimumIntegerDigits(minDigits);
2186         nf->setMaximumIntegerDigits(maxDigits);
2187         nf->format(value, appendTo, pos);  // 3rd arg is there to speed up processing
2188     }
2189 }
2190 
2191 //----------------------------------------------------------------------
2192 
2193 /**
2194  * Return true if the given format character, occurring count
2195  * times, represents a numeric field.
2196  */
isNumeric(char16_t formatChar,int32_t count)2197 UBool SimpleDateFormat::isNumeric(char16_t formatChar, int32_t count) {
2198     return DateFormatSymbols::isNumericPatternChar(formatChar, count);
2199 }
2200 
2201 UBool
isAtNumericField(const UnicodeString & pattern,int32_t patternOffset)2202 SimpleDateFormat::isAtNumericField(const UnicodeString &pattern, int32_t patternOffset) {
2203     if (patternOffset >= pattern.length()) {
2204         // not at any field
2205         return false;
2206     }
2207     char16_t ch = pattern.charAt(patternOffset);
2208     UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
2209     if (f == UDAT_FIELD_COUNT) {
2210         // not at any field
2211         return false;
2212     }
2213     int32_t i = patternOffset;
2214     while (pattern.charAt(++i) == ch) {}
2215     return DateFormatSymbols::isNumericField(f, i - patternOffset);
2216 }
2217 
2218 UBool
isAfterNonNumericField(const UnicodeString & pattern,int32_t patternOffset)2219 SimpleDateFormat::isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset) {
2220     if (patternOffset <= 0) {
2221         // not after any field
2222         return false;
2223     }
2224     char16_t ch = pattern.charAt(--patternOffset);
2225     UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
2226     if (f == UDAT_FIELD_COUNT) {
2227         // not after any field
2228         return false;
2229     }
2230     int32_t i = patternOffset;
2231     while (pattern.charAt(--i) == ch) {}
2232     return !DateFormatSymbols::isNumericField(f, patternOffset - i);
2233 }
2234 
2235 void
parse(const UnicodeString & text,Calendar & cal,ParsePosition & parsePos) const2236 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
2237 {
2238     UErrorCode status = U_ZERO_ERROR;
2239     int32_t pos = parsePos.getIndex();
2240     if(parsePos.getIndex() < 0) {
2241         parsePos.setErrorIndex(0);
2242         return;
2243     }
2244     int32_t start = pos;
2245 
2246     // Hold the day period until everything else is parsed, because we need
2247     // the hour to interpret time correctly.
2248     int32_t dayPeriodInt = -1;
2249 
2250     UBool ambiguousYear[] = { false };
2251     int32_t saveHebrewMonth = -1;
2252     int32_t count = 0;
2253     UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2254 
2255     // For parsing abutting numeric fields. 'abutPat' is the
2256     // offset into 'pattern' of the first of 2 or more abutting
2257     // numeric fields.  'abutStart' is the offset into 'text'
2258     // where parsing the fields begins. 'abutPass' starts off as 0
2259     // and increments each time we try to parse the fields.
2260     int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
2261     int32_t abutStart = 0;
2262     int32_t abutPass = 0;
2263     UBool inQuote = false;
2264 
2265     MessageFormat * numericLeapMonthFormatter = nullptr;
2266 
2267     Calendar* calClone = nullptr;
2268     Calendar *workCal = &cal;
2269     if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
2270         // Different calendar type
2271         // We use the time/zone from the input calendar, but
2272         // do not use the input calendar for field calculation.
2273         calClone = fCalendar->clone();
2274         if (calClone != nullptr) {
2275             calClone->setTime(cal.getTime(status),status);
2276             if (U_FAILURE(status)) {
2277                 goto ExitParse;
2278             }
2279             calClone->setTimeZone(cal.getTimeZone());
2280             workCal = calClone;
2281         } else {
2282             status = U_MEMORY_ALLOCATION_ERROR;
2283             goto ExitParse;
2284         }
2285     }
2286 
2287     if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
2288         numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, status);
2289         if (numericLeapMonthFormatter == nullptr) {
2290              status = U_MEMORY_ALLOCATION_ERROR;
2291              goto ExitParse;
2292         } else if (U_FAILURE(status)) {
2293              goto ExitParse; // this will delete numericLeapMonthFormatter
2294         }
2295     }
2296 
2297     for (int32_t i=0; i<fPattern.length(); ++i) {
2298         char16_t ch = fPattern.charAt(i);
2299 
2300         // Handle alphabetic field characters.
2301         if (!inQuote && isSyntaxChar(ch)) {
2302             int32_t fieldPat = i;
2303 
2304             // Count the length of this field specifier
2305             count = 1;
2306             while ((i+1)<fPattern.length() &&
2307                    fPattern.charAt(i+1) == ch) {
2308                 ++count;
2309                 ++i;
2310             }
2311 
2312             if (isNumeric(ch, count)) {
2313                 if (abutPat < 0) {
2314                     // Determine if there is an abutting numeric field.
2315                     // Record the start of a set of abutting numeric fields.
2316                     if (isAtNumericField(fPattern, i + 1)) {
2317                         abutPat = fieldPat;
2318                         abutStart = pos;
2319                         abutPass = 0;
2320                     }
2321                 }
2322             } else {
2323                 abutPat = -1; // End of any abutting fields
2324             }
2325 
2326             // Handle fields within a run of abutting numeric fields.  Take
2327             // the pattern "HHmmss" as an example. We will try to parse
2328             // 2/2/2 characters of the input text, then if that fails,
2329             // 1/2/2.  We only adjust the width of the leftmost field; the
2330             // others remain fixed.  This allows "123456" => 12:34:56, but
2331             // "12345" => 1:23:45.  Likewise, for the pattern "yyyyMMdd" we
2332             // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
2333             if (abutPat >= 0) {
2334                 // If we are at the start of a run of abutting fields, then
2335                 // shorten this field in each pass.  If we can't shorten
2336                 // this field any more, then the parse of this set of
2337                 // abutting numeric fields has failed.
2338                 if (fieldPat == abutPat) {
2339                     count -= abutPass++;
2340                     if (count == 0) {
2341                         status = U_PARSE_ERROR;
2342                         goto ExitParse;
2343                     }
2344                 }
2345 
2346                 pos = subParse(text, pos, ch, count,
2347                                true, false, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType);
2348 
2349                 // If the parse fails anywhere in the run, back up to the
2350                 // start of the run and retry.
2351                 if (pos < 0) {
2352                     i = abutPat - 1;
2353                     pos = abutStart;
2354                     continue;
2355                 }
2356             }
2357 
2358             // Handle non-numeric fields and non-abutting numeric
2359             // fields.
2360             else if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
2361                 int32_t s = subParse(text, pos, ch, count,
2362                                false, true, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType, &dayPeriodInt);
2363 
2364                 if (s == -pos-1) {
2365                     // era not present, in special cases allow this to continue
2366                     // from the position where the era was expected
2367                     s = pos;
2368 
2369                     if (i+1 < fPattern.length()) {
2370                         // move to next pattern character
2371                         char16_t c = fPattern.charAt(i+1);
2372 
2373                         // check for whitespace
2374                         if (PatternProps::isWhiteSpace(c)) {
2375                             i++;
2376                             // Advance over run in pattern
2377                             while ((i+1)<fPattern.length() &&
2378                                    PatternProps::isWhiteSpace(fPattern.charAt(i+1))) {
2379                                 ++i;
2380                             }
2381                         }
2382                     }
2383                 }
2384                 else if (s <= 0) {
2385                     status = U_PARSE_ERROR;
2386                     goto ExitParse;
2387                 }
2388                 pos = s;
2389             }
2390         }
2391 
2392         // Handle literal pattern characters.  These are any
2393         // quoted characters and non-alphabetic unquoted
2394         // characters.
2395         else {
2396 
2397             abutPat = -1; // End of any abutting fields
2398 
2399             if (! matchLiterals(fPattern, i, text, pos, getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status), getBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, status), isLenient())) {
2400                 status = U_PARSE_ERROR;
2401                 goto ExitParse;
2402             }
2403         }
2404     }
2405 
2406     // Special hack for trailing "." after non-numeric field.
2407     if (text.charAt(pos) == 0x2e && getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
2408         // only do if the last field is not numeric
2409         if (isAfterNonNumericField(fPattern, fPattern.length())) {
2410             pos++; // skip the extra "."
2411         }
2412     }
2413 
2414     // If dayPeriod is set, use it in conjunction with hour-of-day to determine am/pm.
2415     if (dayPeriodInt >= 0) {
2416         DayPeriodRules::DayPeriod dayPeriod = (DayPeriodRules::DayPeriod)dayPeriodInt;
2417         const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status);
2418 
2419         if (!cal.isSet(UCAL_HOUR) && !cal.isSet(UCAL_HOUR_OF_DAY)) {
2420             // If hour is not set, set time to the midpoint of current day period, overwriting
2421             // minutes if it's set.
2422             double midPoint = ruleSet->getMidPointForDayPeriod(dayPeriod, status);
2423 
2424             // If we can't get midPoint we do nothing.
2425             if (U_SUCCESS(status)) {
2426                 // Truncate midPoint toward zero to get the hour.
2427                 // Any leftover means it was a half-hour.
2428                 int32_t midPointHour = (int32_t) midPoint;
2429                 int32_t midPointMinute = (midPoint - midPointHour) > 0 ? 30 : 0;
2430 
2431                 // No need to set am/pm because hour-of-day is set last therefore takes precedence.
2432                 cal.set(UCAL_HOUR_OF_DAY, midPointHour);
2433                 cal.set(UCAL_MINUTE, midPointMinute);
2434             }
2435         } else {
2436             int hourOfDay;
2437 
2438             if (cal.isSet(UCAL_HOUR_OF_DAY)) {  // Hour is parsed in 24-hour format.
2439                 hourOfDay = cal.get(UCAL_HOUR_OF_DAY, status);
2440             } else {  // Hour is parsed in 12-hour format.
2441                 hourOfDay = cal.get(UCAL_HOUR, status);
2442                 // cal.get() turns 12 to 0 for 12-hour time; change 0 to 12
2443                 // so 0 unambiguously means a 24-hour time from above.
2444                 if (hourOfDay == 0) { hourOfDay = 12; }
2445             }
2446             U_ASSERT(0 <= hourOfDay && hourOfDay <= 23);
2447 
2448 
2449             // If hour-of-day is 0 or 13 thru 23 then input time in unambiguously in 24-hour format.
2450             if (hourOfDay == 0 || (13 <= hourOfDay && hourOfDay <= 23)) {
2451                 // Make hour-of-day take precedence over (hour + am/pm) by setting it again.
2452                 cal.set(UCAL_HOUR_OF_DAY, hourOfDay);
2453             } else {
2454                 // We have a 12-hour time and need to choose between am and pm.
2455                 // Behave as if dayPeriod spanned 6 hours each way from its center point.
2456                 // This will parse correctly for consistent time + period (e.g. 10 at night) as
2457                 // well as provide a reasonable recovery for inconsistent time + period (e.g.
2458                 // 9 in the afternoon).
2459 
2460                 // Assume current time is in the AM.
2461                 // - Change 12 back to 0 for easier handling of 12am.
2462                 // - Append minutes as fractional hours because e.g. 8:15 and 8:45 could be parsed
2463                 // into different half-days if center of dayPeriod is at 14:30.
2464                 // - cal.get(MINUTE) will return 0 if MINUTE is unset, which works.
2465                 if (hourOfDay == 12) { hourOfDay = 0; }
2466                 double currentHour = hourOfDay + (cal.get(UCAL_MINUTE, status)) / 60.0;
2467                 double midPointHour = ruleSet->getMidPointForDayPeriod(dayPeriod, status);
2468 
2469                 if (U_SUCCESS(status)) {
2470                     double hoursAheadMidPoint = currentHour - midPointHour;
2471 
2472                     // Assume current time is in the AM.
2473                     if (-6 <= hoursAheadMidPoint && hoursAheadMidPoint < 6) {
2474                         // Assumption holds; set time as such.
2475                         cal.set(UCAL_AM_PM, 0);
2476                     } else {
2477                         cal.set(UCAL_AM_PM, 1);
2478                     }
2479                 }
2480             }
2481         }
2482     }
2483 
2484     // At this point the fields of Calendar have been set.  Calendar
2485     // will fill in default values for missing fields when the time
2486     // is computed.
2487 
2488     parsePos.setIndex(pos);
2489 
2490     // This part is a problem:  When we call parsedDate.after, we compute the time.
2491     // Take the date April 3 2004 at 2:30 am.  When this is first set up, the year
2492     // will be wrong if we're parsing a 2-digit year pattern.  It will be 1904.
2493     // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day.  2:30 am
2494     // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
2495     // on that day.  It is therefore parsed out to fields as 3:30 am.  Then we
2496     // add 100 years, and get April 3 2004 at 3:30 am.  Note that April 3 2004 is
2497     // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
2498     /*
2499         UDate parsedDate = calendar.getTime();
2500         if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
2501             calendar.add(Calendar.YEAR, 100);
2502             parsedDate = calendar.getTime();
2503         }
2504     */
2505     // Because of the above condition, save off the fields in case we need to readjust.
2506     // The procedure we use here is not particularly efficient, but there is no other
2507     // way to do this given the API restrictions present in Calendar.  We minimize
2508     // inefficiency by only performing this computation when it might apply, that is,
2509     // when the two-digit year is equal to the start year, and thus might fall at the
2510     // front or the back of the default century.  This only works because we adjust
2511     // the year correctly to start with in other cases -- see subParse().
2512     if (ambiguousYear[0] || tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) // If this is true then the two-digit year == the default start year
2513     {
2514         // We need a copy of the fields, and we need to avoid triggering a call to
2515         // complete(), which will recalculate the fields.  Since we can't access
2516         // the fields[] array in Calendar, we clone the entire object.  This will
2517         // stop working if Calendar.clone() is ever rewritten to call complete().
2518         Calendar *copy;
2519         if (ambiguousYear[0]) {
2520             copy = cal.clone();
2521             // Check for failed cloning.
2522             if (copy == nullptr) {
2523                 status = U_MEMORY_ALLOCATION_ERROR;
2524                 goto ExitParse;
2525             }
2526             UDate parsedDate = copy->getTime(status);
2527             // {sfb} check internalGetDefaultCenturyStart
2528             if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
2529                 // We can't use add here because that does a complete() first.
2530                 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
2531             }
2532             delete copy;
2533         }
2534 
2535         if (tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) {
2536             copy = cal.clone();
2537             // Check for failed cloning.
2538             if (copy == nullptr) {
2539                 status = U_MEMORY_ALLOCATION_ERROR;
2540                 goto ExitParse;
2541             }
2542             const TimeZone & tz = cal.getTimeZone();
2543             BasicTimeZone *btz = nullptr;
2544 
2545             if (dynamic_cast<const OlsonTimeZone *>(&tz) != nullptr
2546                 || dynamic_cast<const SimpleTimeZone *>(&tz) != nullptr
2547                 || dynamic_cast<const RuleBasedTimeZone *>(&tz) != nullptr
2548                 || dynamic_cast<const VTimeZone *>(&tz) != nullptr) {
2549                 btz = (BasicTimeZone*)&tz;
2550             }
2551 
2552             // Get local millis
2553             copy->set(UCAL_ZONE_OFFSET, 0);
2554             copy->set(UCAL_DST_OFFSET, 0);
2555             UDate localMillis = copy->getTime(status);
2556 
2557             // Make sure parsed time zone type (Standard or Daylight)
2558             // matches the rule used by the parsed time zone.
2559             int32_t raw, dst;
2560             if (btz != nullptr) {
2561                 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) {
2562                     btz->getOffsetFromLocal(localMillis,
2563                         UCAL_TZ_LOCAL_STANDARD_FORMER, UCAL_TZ_LOCAL_STANDARD_LATTER, raw, dst, status);
2564                 } else {
2565                     btz->getOffsetFromLocal(localMillis,
2566                         UCAL_TZ_LOCAL_DAYLIGHT_FORMER, UCAL_TZ_LOCAL_DAYLIGHT_LATTER, raw, dst, status);
2567                 }
2568             } else {
2569                 // No good way to resolve ambiguous time at transition,
2570                 // but following code work in most case.
2571                 tz.getOffset(localMillis, true, raw, dst, status);
2572             }
2573 
2574             // Now, compare the results with parsed type, either standard or daylight saving time
2575             int32_t resolvedSavings = dst;
2576             if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) {
2577                 if (dst != 0) {
2578                     // Override DST_OFFSET = 0 in the result calendar
2579                     resolvedSavings = 0;
2580                 }
2581             } else { // tztype == TZTYPE_DST
2582                 if (dst == 0) {
2583                     if (btz != nullptr) {
2584                         // This implementation resolves daylight saving time offset
2585                         // closest rule after the given time.
2586                         UDate baseTime = localMillis + raw;
2587                         UDate time = baseTime;
2588                         UDate limit = baseTime + MAX_DAYLIGHT_DETECTION_RANGE;
2589                         TimeZoneTransition trs;
2590                         UBool trsAvail;
2591 
2592                         // Search for DST rule after the given time
2593                         while (time < limit) {
2594                             trsAvail = btz->getNextTransition(time, false, trs);
2595                             if (!trsAvail) {
2596                                 break;
2597                             }
2598                             resolvedSavings = trs.getTo()->getDSTSavings();
2599                             if (resolvedSavings != 0) {
2600                                 break;
2601                             }
2602                             time = trs.getTime();
2603                         }
2604 
2605                         if (resolvedSavings == 0) {
2606                             // If no DST rule after the given time was found, search for
2607                             // DST rule before.
2608                             time = baseTime;
2609                             limit = baseTime - MAX_DAYLIGHT_DETECTION_RANGE;
2610                             while (time > limit) {
2611                                 trsAvail = btz->getPreviousTransition(time, true, trs);
2612                                 if (!trsAvail) {
2613                                     break;
2614                                 }
2615                                 resolvedSavings = trs.getFrom()->getDSTSavings();
2616                                 if (resolvedSavings != 0) {
2617                                     break;
2618                                 }
2619                                 time = trs.getTime() - 1;
2620                             }
2621 
2622                             if (resolvedSavings == 0) {
2623                                 resolvedSavings = btz->getDSTSavings();
2624                             }
2625                         }
2626                     } else {
2627                         resolvedSavings = tz.getDSTSavings();
2628                     }
2629                     if (resolvedSavings == 0) {
2630                         // final fallback
2631                         resolvedSavings = U_MILLIS_PER_HOUR;
2632                     }
2633                 }
2634             }
2635             cal.set(UCAL_ZONE_OFFSET, raw);
2636             cal.set(UCAL_DST_OFFSET, resolvedSavings);
2637             delete copy;
2638         }
2639     }
2640 ExitParse:
2641     // Set the parsed result if local calendar is used
2642     // instead of the input calendar
2643     if (U_SUCCESS(status) && workCal != &cal) {
2644         cal.setTimeZone(workCal->getTimeZone());
2645         cal.setTime(workCal->getTime(status), status);
2646     }
2647 
2648     if (numericLeapMonthFormatter != nullptr) {
2649         delete numericLeapMonthFormatter;
2650     }
2651     if (calClone != nullptr) {
2652         delete calClone;
2653     }
2654 
2655     // If any Calendar calls failed, we pretend that we
2656     // couldn't parse the string, when in reality this isn't quite accurate--
2657     // we did parse it; the Calendar calls just failed.
2658     if (U_FAILURE(status)) {
2659         parsePos.setErrorIndex(pos);
2660         parsePos.setIndex(start);
2661     }
2662 }
2663 
2664 //----------------------------------------------------------------------
2665 
2666 static int32_t
2667 matchStringWithOptionalDot(const UnicodeString &text,
2668                             int32_t index,
2669                             const UnicodeString &data);
2670 
matchQuarterString(const UnicodeString & text,int32_t start,UCalendarDateFields field,const UnicodeString * data,int32_t dataCount,Calendar & cal) const2671 int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
2672                               int32_t start,
2673                               UCalendarDateFields field,
2674                               const UnicodeString* data,
2675                               int32_t dataCount,
2676                               Calendar& cal) const
2677 {
2678     int32_t i = 0;
2679     int32_t count = dataCount;
2680 
2681     // There may be multiple strings in the data[] array which begin with
2682     // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2683     // We keep track of the longest match, and return that.  Note that this
2684     // unfortunately requires us to test all array elements.
2685     int32_t bestMatchLength = 0, bestMatch = -1;
2686     UnicodeString bestMatchName;
2687 
2688     for (; i < count; ++i) {
2689         int32_t matchLength = 0;
2690         if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2691             bestMatchLength = matchLength;
2692             bestMatch = i;
2693         }
2694     }
2695 
2696     if (bestMatch >= 0) {
2697         cal.set(field, bestMatch * 3);
2698         return start + bestMatchLength;
2699     }
2700 
2701     return -start;
2702 }
2703 
matchDayPeriodStrings(const UnicodeString & text,int32_t start,const UnicodeString * data,int32_t dataCount,int32_t & dayPeriod) const2704 int32_t SimpleDateFormat::matchDayPeriodStrings(const UnicodeString& text, int32_t start,
2705                               const UnicodeString* data, int32_t dataCount,
2706                               int32_t &dayPeriod) const
2707 {
2708 
2709     int32_t bestMatchLength = 0, bestMatch = -1;
2710 
2711     for (int32_t i = 0; i < dataCount; ++i) {
2712         int32_t matchLength = 0;
2713         if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2714             bestMatchLength = matchLength;
2715             bestMatch = i;
2716         }
2717     }
2718 
2719     if (bestMatch >= 0) {
2720         dayPeriod = bestMatch;
2721         return start + bestMatchLength;
2722     }
2723 
2724     return -start;
2725 }
2726 
2727 //----------------------------------------------------------------------
matchLiterals(const UnicodeString & pattern,int32_t & patternOffset,const UnicodeString & text,int32_t & textOffset,UBool whitespaceLenient,UBool partialMatchLenient,UBool oldLeniency)2728 UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
2729                                       int32_t &patternOffset,
2730                                       const UnicodeString &text,
2731                                       int32_t &textOffset,
2732                                       UBool whitespaceLenient,
2733                                       UBool partialMatchLenient,
2734                                       UBool oldLeniency)
2735 {
2736     UBool inQuote = false;
2737     UnicodeString literal;
2738     int32_t i = patternOffset;
2739 
2740     // scan pattern looking for contiguous literal characters
2741     for ( ; i < pattern.length(); i += 1) {
2742         char16_t ch = pattern.charAt(i);
2743 
2744         if (!inQuote && isSyntaxChar(ch)) {
2745             break;
2746         }
2747 
2748         if (ch == QUOTE) {
2749             // Match a quote literal ('') inside OR outside of quotes
2750             if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
2751                 i += 1;
2752             } else {
2753                 inQuote = !inQuote;
2754                 continue;
2755             }
2756         }
2757 
2758         literal += ch;
2759     }
2760 
2761     // at this point, literal contains the literal text
2762     // and i is the index of the next non-literal pattern character.
2763     int32_t p;
2764     int32_t t = textOffset;
2765 
2766     if (whitespaceLenient) {
2767         // trim leading, trailing whitespace from
2768         // the literal text
2769         literal.trim();
2770 
2771         // ignore any leading whitespace in the text
2772         while (t < text.length() && u_isWhitespace(text.charAt(t))) {
2773             t += 1;
2774         }
2775     }
2776 
2777     for (p = 0; p < literal.length() && t < text.length();) {
2778         UBool needWhitespace = false;
2779 
2780         while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) {
2781             needWhitespace = true;
2782             p += 1;
2783         }
2784 
2785         if (needWhitespace) {
2786             int32_t tStart = t;
2787 
2788             while (t < text.length()) {
2789                 char16_t tch = text.charAt(t);
2790 
2791                 if (!u_isUWhiteSpace(tch) && !PatternProps::isWhiteSpace(tch)) {
2792                     break;
2793                 }
2794 
2795                 t += 1;
2796             }
2797 
2798             // TODO: should we require internal spaces
2799             // in lenient mode? (There won't be any
2800             // leading or trailing spaces)
2801             if (!whitespaceLenient && t == tStart) {
2802                 // didn't find matching whitespace:
2803                 // an error in strict mode
2804                 return false;
2805             }
2806 
2807             // In strict mode, this run of whitespace
2808             // may have been at the end.
2809             if (p >= literal.length()) {
2810                 break;
2811             }
2812         }
2813         if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
2814             // Ran out of text, or found a non-matching character:
2815             // OK in lenient mode, an error in strict mode.
2816             if (whitespaceLenient) {
2817                 if (t == textOffset && text.charAt(t) == 0x2e &&
2818                         isAfterNonNumericField(pattern, patternOffset)) {
2819                     // Lenient mode and the literal input text begins with a "." and
2820                     // we are after a non-numeric field: We skip the "."
2821                     ++t;
2822                     continue;  // Do not update p.
2823                 }
2824                 // if it is actual whitespace and we're whitespace lenient it's OK
2825 
2826                 char16_t wsc = text.charAt(t);
2827                 if(PatternProps::isWhiteSpace(wsc)) {
2828                     // Lenient mode and it's just whitespace we skip it
2829                     ++t;
2830                     continue;  // Do not update p.
2831                 }
2832             }
2833             // hack around oldleniency being a bit of a catch-all bucket and we're just adding support specifically for partial matches
2834             if(partialMatchLenient && oldLeniency) {
2835                 break;
2836             }
2837 
2838             return false;
2839         }
2840         ++p;
2841         ++t;
2842     }
2843 
2844     // At this point if we're in strict mode we have a complete match.
2845     // If we're in lenient mode we may have a partial match, or no
2846     // match at all.
2847     if (p <= 0) {
2848         // no match. Pretend it matched a run of whitespace
2849         // and ignorables in the text.
2850         const  UnicodeSet *ignorables = nullptr;
2851         UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(pattern.charAt(i));
2852         if (patternCharIndex != UDAT_FIELD_COUNT) {
2853             ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
2854         }
2855 
2856         for (t = textOffset; t < text.length(); t += 1) {
2857             char16_t ch = text.charAt(t);
2858 
2859             if (ignorables == nullptr || !ignorables->contains(ch)) {
2860                 break;
2861             }
2862         }
2863     }
2864 
2865     // if we get here, we've got a complete match.
2866     patternOffset = i - 1;
2867     textOffset = t;
2868 
2869     return true;
2870 }
2871 
2872 //----------------------------------------------------------------------
2873 // check both wide and abbrev months.
2874 // Does not currently handle monthPattern.
2875 // UCalendarDateFields field = UCAL_MONTH
2876 
matchAlphaMonthStrings(const UnicodeString & text,int32_t start,const UnicodeString * wideData,const UnicodeString * shortData,int32_t dataCount,Calendar & cal) const2877 int32_t SimpleDateFormat::matchAlphaMonthStrings(const UnicodeString& text,
2878                               int32_t start,
2879                               const UnicodeString* wideData,
2880                               const UnicodeString* shortData,
2881                               int32_t dataCount,
2882                               Calendar& cal) const
2883 {
2884     int32_t i;
2885     int32_t bestMatchLength = 0, bestMatch = -1;
2886 
2887     for (i = 0; i < dataCount; ++i) {
2888         int32_t matchLen = 0;
2889         if ((matchLen = matchStringWithOptionalDot(text, start, wideData[i])) > bestMatchLength) {
2890             bestMatch = i;
2891             bestMatchLength = matchLen;
2892         }
2893     }
2894     for (i = 0; i < dataCount; ++i) {
2895         int32_t matchLen = 0;
2896         if ((matchLen = matchStringWithOptionalDot(text, start, shortData[i])) > bestMatchLength) {
2897             bestMatch = i;
2898             bestMatchLength = matchLen;
2899         }
2900     }
2901 
2902     if (bestMatch >= 0) {
2903         // Adjustment for Hebrew Calendar month Adar II
2904         if (!strcmp(cal.getType(),"hebrew") && bestMatch==13) {
2905             cal.set(UCAL_MONTH,6);
2906         } else {
2907             cal.set(UCAL_MONTH, bestMatch);
2908         }
2909         return start + bestMatchLength;
2910     }
2911 
2912     return -start;
2913 }
2914 
2915 //----------------------------------------------------------------------
2916 
matchString(const UnicodeString & text,int32_t start,UCalendarDateFields field,const UnicodeString * data,int32_t dataCount,const UnicodeString * monthPattern,Calendar & cal) const2917 int32_t SimpleDateFormat::matchString(const UnicodeString& text,
2918                               int32_t start,
2919                               UCalendarDateFields field,
2920                               const UnicodeString* data,
2921                               int32_t dataCount,
2922                               const UnicodeString* monthPattern,
2923                               Calendar& cal) const
2924 {
2925     int32_t i = 0;
2926     int32_t count = dataCount;
2927 
2928     if (field == UCAL_DAY_OF_WEEK) i = 1;
2929 
2930     // There may be multiple strings in the data[] array which begin with
2931     // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2932     // We keep track of the longest match, and return that.  Note that this
2933     // unfortunately requires us to test all array elements.
2934     // But this does not really work for cases such as Chuvash in which
2935     // May is "ҫу" and August is "ҫурла"/"ҫур.", hence matchAlphaMonthStrings.
2936     int32_t bestMatchLength = 0, bestMatch = -1;
2937     UnicodeString bestMatchName;
2938     int32_t isLeapMonth = 0;
2939 
2940     for (; i < count; ++i) {
2941         int32_t matchLen = 0;
2942         if ((matchLen = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2943             bestMatch = i;
2944             bestMatchLength = matchLen;
2945         }
2946 
2947         if (monthPattern != nullptr) {
2948             UErrorCode status = U_ZERO_ERROR;
2949             UnicodeString leapMonthName;
2950             SimpleFormatter(*monthPattern, 1, 1, status).format(data[i], leapMonthName, status);
2951             if (U_SUCCESS(status)) {
2952                 if ((matchLen = matchStringWithOptionalDot(text, start, leapMonthName)) > bestMatchLength) {
2953                     bestMatch = i;
2954                     bestMatchLength = matchLen;
2955                     isLeapMonth = 1;
2956                 }
2957             }
2958         }
2959     }
2960 
2961     if (bestMatch >= 0) {
2962         if (field < UCAL_FIELD_COUNT) {
2963             // Adjustment for Hebrew Calendar month Adar II
2964             if (!strcmp(cal.getType(),"hebrew") && field==UCAL_MONTH && bestMatch==13) {
2965                 cal.set(field,6);
2966             } else {
2967                 if (field == UCAL_YEAR) {
2968                     bestMatch++; // only get here for cyclic year names, which match 1-based years 1-60
2969                 }
2970                 cal.set(field, bestMatch);
2971             }
2972             if (monthPattern != nullptr) {
2973                 cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth);
2974             }
2975         }
2976 
2977         return start + bestMatchLength;
2978     }
2979 
2980     return -start;
2981 }
2982 
2983 static int32_t
matchStringWithOptionalDot(const UnicodeString & text,int32_t index,const UnicodeString & data)2984 matchStringWithOptionalDot(const UnicodeString &text,
2985                             int32_t index,
2986                             const UnicodeString &data) {
2987     UErrorCode sts = U_ZERO_ERROR;
2988     int32_t matchLenText = 0;
2989     int32_t matchLenData = 0;
2990 
2991     u_caseInsensitivePrefixMatch(text.getBuffer() + index, text.length() - index,
2992                                  data.getBuffer(), data.length(),
2993                                  0 /* default case option */,
2994                                  &matchLenText, &matchLenData,
2995                                  &sts);
2996     U_ASSERT (U_SUCCESS(sts));
2997 
2998     if (matchLenData == data.length() /* normal match */
2999         || (data.charAt(data.length() - 1) == 0x2e
3000             && matchLenData == data.length() - 1 /* match without trailing dot */)) {
3001         return matchLenText;
3002     }
3003 
3004     return 0;
3005 }
3006 
3007 //----------------------------------------------------------------------
3008 
3009 void
set2DigitYearStart(UDate d,UErrorCode & status)3010 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
3011 {
3012     parseAmbiguousDatesAsAfter(d, status);
3013 }
3014 
3015 /**
3016  * Private member function that converts the parsed date strings into
3017  * timeFields. Returns -start (for ParsePosition) if failed.
3018  */
subParse(const UnicodeString & text,int32_t & start,char16_t ch,int32_t count,UBool obeyCount,UBool allowNegative,UBool ambiguousYear[],int32_t & saveHebrewMonth,Calendar & cal,int32_t patLoc,MessageFormat * numericLeapMonthFormatter,UTimeZoneFormatTimeType * tzTimeType,int32_t * dayPeriod) const3019 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, char16_t ch, int32_t count,
3020                            UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal,
3021                            int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType,
3022                            int32_t *dayPeriod) const
3023 {
3024     Formattable number;
3025     int32_t value = 0;
3026     int32_t i;
3027     int32_t ps = 0;
3028     UErrorCode status = U_ZERO_ERROR;
3029     ParsePosition pos(0);
3030     UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
3031     const NumberFormat *currentNumberFormat;
3032     UnicodeString temp;
3033     UBool gotNumber = false;
3034 
3035 #if defined (U_DEBUG_CAL)
3036     //fprintf(stderr, "%s:%d - [%c]  st=%d \n", __FILE__, __LINE__, (char) ch, start);
3037 #endif
3038 
3039     if (patternCharIndex == UDAT_FIELD_COUNT) {
3040         return -start;
3041     }
3042 
3043     currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
3044     if (currentNumberFormat == nullptr) {
3045         return -start;
3046     }
3047     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; // UCAL_FIELD_COUNT if irrelevant
3048     UnicodeString hebr("hebr", 4, US_INV);
3049 
3050     if (numericLeapMonthFormatter != nullptr) {
3051         numericLeapMonthFormatter->setFormats((const Format **)&currentNumberFormat, 1);
3052     }
3053     UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0);
3054 
3055     // If there are any spaces here, skip over them.  If we hit the end
3056     // of the string, then fail.
3057     for (;;) {
3058         if (start >= text.length()) {
3059             return -start;
3060         }
3061         UChar32 c = text.char32At(start);
3062         if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) {
3063             break;
3064         }
3065         start += U16_LENGTH(c);
3066     }
3067     pos.setIndex(start);
3068 
3069     // We handle a few special cases here where we need to parse
3070     // a number value.  We handle further, more generic cases below.  We need
3071     // to handle some of them here because some fields require extra processing on
3072     // the parsed value.
3073     if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||                       // k
3074         patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD ||                       // H
3075         patternCharIndex == UDAT_HOUR1_FIELD ||                              // h
3076         patternCharIndex == UDAT_HOUR0_FIELD ||                              // K
3077         (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) ||          // e
3078         (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) ||     // c
3079         (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) ||              // M
3080         (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) ||   // L
3081         (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) ||            // Q
3082         (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q
3083         patternCharIndex == UDAT_YEAR_FIELD ||                               // y
3084         patternCharIndex == UDAT_YEAR_WOY_FIELD ||                           // Y
3085         patternCharIndex == UDAT_YEAR_NAME_FIELD ||                          // U (falls back to numeric)
3086         (patternCharIndex == UDAT_ERA_FIELD && isChineseCalendar) ||         // G
3087         patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)                    // S
3088     {
3089         int32_t parseStart = pos.getIndex();
3090         // It would be good to unify this with the obeyCount logic below,
3091         // but that's going to be difficult.
3092         const UnicodeString* src;
3093 
3094         UBool parsedNumericLeapMonth = false;
3095         if (numericLeapMonthFormatter != nullptr && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) {
3096             int32_t argCount;
3097             Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount);
3098             if (args != nullptr && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) {
3099                 parsedNumericLeapMonth = true;
3100                 number.setLong(args[0].getLong());
3101                 cal.set(UCAL_IS_LEAP_MONTH, 1);
3102                 delete[] args;
3103             } else {
3104                 pos.setIndex(parseStart);
3105                 cal.set(UCAL_IS_LEAP_MONTH, 0);
3106             }
3107         }
3108 
3109         if (!parsedNumericLeapMonth) {
3110             if (obeyCount) {
3111                 if ((start+count) > text.length()) {
3112                     return -start;
3113                 }
3114 
3115                 text.extractBetween(0, start + count, temp);
3116                 src = &temp;
3117             } else {
3118                 src = &text;
3119             }
3120 
3121             parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3122         }
3123 
3124         int32_t txtLoc = pos.getIndex();
3125 
3126         if (txtLoc > parseStart) {
3127             value = number.getLong();
3128             gotNumber = true;
3129 
3130             // suffix processing
3131             if (value < 0 ) {
3132                 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, true);
3133                 if (txtLoc != pos.getIndex()) {
3134                     value *= -1;
3135                 }
3136             }
3137             else {
3138                 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, false);
3139             }
3140 
3141             if (!getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
3142                 // Check the range of the value
3143                 int32_t bias = gFieldRangeBias[patternCharIndex];
3144                 if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
3145                     return -start;
3146                 }
3147             }
3148 
3149             pos.setIndex(txtLoc);
3150         }
3151     }
3152 
3153     // Make sure that we got a number if
3154     // we want one, and didn't get one
3155     // if we don't want one.
3156     switch (patternCharIndex) {
3157         case UDAT_HOUR_OF_DAY1_FIELD:
3158         case UDAT_HOUR_OF_DAY0_FIELD:
3159         case UDAT_HOUR1_FIELD:
3160         case UDAT_HOUR0_FIELD:
3161             // special range check for hours:
3162             if (value < 0 || value > 24) {
3163                 return -start;
3164             }
3165 
3166             // fall through to gotNumber check
3167             U_FALLTHROUGH;
3168         case UDAT_YEAR_FIELD:
3169         case UDAT_YEAR_WOY_FIELD:
3170         case UDAT_FRACTIONAL_SECOND_FIELD:
3171             // these must be a number
3172             if (! gotNumber) {
3173                 return -start;
3174             }
3175 
3176             break;
3177 
3178         default:
3179             // we check the rest of the fields below.
3180             break;
3181     }
3182 
3183     switch (patternCharIndex) {
3184     case UDAT_ERA_FIELD:
3185         if (isChineseCalendar) {
3186             if (!gotNumber) {
3187                 return -start;
3188             }
3189             cal.set(UCAL_ERA, value);
3190             return pos.getIndex();
3191         }
3192         if (count == 5) {
3193             ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, nullptr, cal);
3194         } else if (count == 4) {
3195             ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, nullptr, cal);
3196         } else {
3197             ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, nullptr, cal);
3198         }
3199 
3200         // check return position, if it equals -start, then matchString error
3201         // special case the return code so we don't necessarily fail out until we
3202         // verify no year information also
3203         if (ps == -start)
3204             ps--;
3205 
3206         return ps;
3207 
3208     case UDAT_YEAR_FIELD:
3209         // If there are 3 or more YEAR pattern characters, this indicates
3210         // that the year value is to be treated literally, without any
3211         // two-digit year adjustments (e.g., from "01" to 2001).  Otherwise
3212         // we made adjustments to place the 2-digit year in the proper
3213         // century, for parsed strings from "00" to "99".  Any other string
3214         // is treated literally:  "2250", "-1", "1", "002".
3215         if (fDateOverride.compare(hebr)==0 && value < 1000) {
3216             value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
3217         } else if (text.moveIndex32(start, 2) == pos.getIndex() && !isChineseCalendar
3218             && u_isdigit(text.char32At(start))
3219             && u_isdigit(text.char32At(text.moveIndex32(start, 1))))
3220         {
3221             // only adjust year for patterns less than 3.
3222             if(count < 3) {
3223                 // Assume for example that the defaultCenturyStart is 6/18/1903.
3224                 // This means that two-digit years will be forced into the range
3225                 // 6/18/1903 to 6/17/2003.  As a result, years 00, 01, and 02
3226                 // correspond to 2000, 2001, and 2002.  Years 04, 05, etc. correspond
3227                 // to 1904, 1905, etc.  If the year is 03, then it is 2003 if the
3228                 // other fields specify a date before 6/18, or 1903 if they specify a
3229                 // date afterwards.  As a result, 03 is an ambiguous year.  All other
3230                 // two-digit years are unambiguous.
3231                 if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
3232                     int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
3233                     ambiguousYear[0] = (value == ambiguousTwoDigitYear);
3234                     value += (fDefaultCenturyStartYear/100)*100 +
3235                             (value < ambiguousTwoDigitYear ? 100 : 0);
3236                 }
3237             }
3238         }
3239         cal.set(UCAL_YEAR, value);
3240 
3241         // Delayed checking for adjustment of Hebrew month numbers in non-leap years.
3242         if (saveHebrewMonth >= 0) {
3243             HebrewCalendar *hc = (HebrewCalendar*)&cal;
3244             if (!hc->isLeapYear(value) && saveHebrewMonth >= 6) {
3245                cal.set(UCAL_MONTH,saveHebrewMonth);
3246             } else {
3247                cal.set(UCAL_MONTH,saveHebrewMonth-1);
3248             }
3249             saveHebrewMonth = -1;
3250         }
3251         return pos.getIndex();
3252 
3253     case UDAT_YEAR_WOY_FIELD:
3254         // Comment is the same as for UDAT_Year_FIELDs - look above
3255         if (fDateOverride.compare(hebr)==0 && value < 1000) {
3256             value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
3257         } else if (text.moveIndex32(start, 2) == pos.getIndex()
3258             && u_isdigit(text.char32At(start))
3259             && u_isdigit(text.char32At(text.moveIndex32(start, 1)))
3260             && fHaveDefaultCentury )
3261         {
3262             int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
3263             ambiguousYear[0] = (value == ambiguousTwoDigitYear);
3264             value += (fDefaultCenturyStartYear/100)*100 +
3265                 (value < ambiguousTwoDigitYear ? 100 : 0);
3266         }
3267         cal.set(UCAL_YEAR_WOY, value);
3268         return pos.getIndex();
3269 
3270     case UDAT_YEAR_NAME_FIELD:
3271         if (fSymbols->fShortYearNames != nullptr) {
3272             int32_t newStart = matchString(text, start, UCAL_YEAR, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount, nullptr, cal);
3273             if (newStart > 0) {
3274                 return newStart;
3275             }
3276         }
3277         if (gotNumber && (getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC,status) || value > fSymbols->fShortYearNamesCount)) {
3278             cal.set(UCAL_YEAR, value);
3279             return pos.getIndex();
3280         }
3281         return -start;
3282 
3283     case UDAT_MONTH_FIELD:
3284     case UDAT_STANDALONE_MONTH_FIELD:
3285         if (gotNumber) // i.e., M or MM.
3286         {
3287             // When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether
3288             // or not it was a leap year.  We may or may not yet know what year it is, so might have to delay checking until
3289             // the year is parsed.
3290             if (!strcmp(cal.getType(),"hebrew")) {
3291                 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3292                 if (cal.isSet(UCAL_YEAR)) {
3293                    UErrorCode monthStatus = U_ZERO_ERROR;
3294                    if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && value >= 6) {
3295                        cal.set(UCAL_MONTH, value);
3296                    } else {
3297                        cal.set(UCAL_MONTH, value - 1);
3298                    }
3299                 } else {
3300                     saveHebrewMonth = value;
3301                 }
3302             } else {
3303                 // Don't want to parse the month if it is a string
3304                 // while pattern uses numeric style: M/MM, L/LL
3305                 // [We computed 'value' above.]
3306                 cal.set(UCAL_MONTH, value - 1);
3307             }
3308             return pos.getIndex();
3309         } else {
3310             // count >= 3 // i.e., MMM/MMMM, LLL/LLLL
3311             // Want to be able to parse both short and long forms.
3312             // Try count == 4 first:
3313             UnicodeString * wideMonthPat = nullptr;
3314             UnicodeString * shortMonthPat = nullptr;
3315             if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
3316                 if (patternCharIndex==UDAT_MONTH_FIELD) {
3317                     wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide];
3318                     shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev];
3319                 } else {
3320                     wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide];
3321                     shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev];
3322                 }
3323             }
3324             int32_t newStart = 0;
3325             if (patternCharIndex==UDAT_MONTH_FIELD) {
3326                 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 &&
3327                         fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fMonthsCount==fSymbols->fShortMonthsCount) {
3328                     // single function to check both wide and short, an experiment
3329                     newStart = matchAlphaMonthStrings(text, start, fSymbols->fMonths, fSymbols->fShortMonths, fSymbols->fMonthsCount, cal); // try MMMM,MMM
3330                     if (newStart > 0) {
3331                         return newStart;
3332                     }
3333                 }
3334                 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3335                     newStart = matchString(text, start, UCAL_MONTH, fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal); // try MMMM
3336                     if (newStart > 0) {
3337                         return newStart;
3338                     }
3339                 }
3340                 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3341                     newStart = matchString(text, start, UCAL_MONTH, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal); // try MMM
3342                 }
3343             } else {
3344                 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 &&
3345                         fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fStandaloneMonthsCount==fSymbols->fStandaloneShortMonthsCount) {
3346                     // single function to check both wide and short, an experiment
3347                     newStart = matchAlphaMonthStrings(text, start, fSymbols->fStandaloneMonths, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneMonthsCount, cal); // try MMMM,MMM
3348                     if (newStart > 0) {
3349                         return newStart;
3350                     }
3351                 }
3352                 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3353                     newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal); // try LLLL
3354                     if (newStart > 0) {
3355                         return newStart;
3356                     }
3357                 }
3358                 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3359                     newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL
3360                 }
3361             }
3362             if (newStart > 0 || !getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))  // currently we do not try to parse MMMMM/LLLLL: #8860
3363                 return newStart;
3364             // else we allowing parsing as number, below
3365         }
3366         break;
3367 
3368     case UDAT_HOUR_OF_DAY1_FIELD:
3369         // [We computed 'value' above.]
3370         if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
3371             value = 0;
3372 
3373         // fall through to set field
3374         U_FALLTHROUGH;
3375     case UDAT_HOUR_OF_DAY0_FIELD:
3376         cal.set(UCAL_HOUR_OF_DAY, value);
3377         return pos.getIndex();
3378 
3379     case UDAT_FRACTIONAL_SECOND_FIELD:
3380         // Fractional seconds left-justify
3381         i = countDigits(text, start, pos.getIndex());
3382         if (i < 3) {
3383             while (i < 3) {
3384                 value *= 10;
3385                 i++;
3386             }
3387         } else {
3388             int32_t a = 1;
3389             while (i > 3) {
3390                 a *= 10;
3391                 i--;
3392             }
3393             value /= a;
3394         }
3395         cal.set(UCAL_MILLISECOND, value);
3396         return pos.getIndex();
3397 
3398     case UDAT_DOW_LOCAL_FIELD:
3399         if (gotNumber) // i.e., e or ee
3400         {
3401             // [We computed 'value' above.]
3402             cal.set(UCAL_DOW_LOCAL, value);
3403             return pos.getIndex();
3404         }
3405         // else for eee-eeeee fall through to handling of EEE-EEEEE
3406         // fall through, do not break here
3407         U_FALLTHROUGH;
3408     case UDAT_DAY_OF_WEEK_FIELD:
3409         {
3410             // Want to be able to parse both short and long forms.
3411             // Try count == 4 (EEEE) wide first:
3412             int32_t newStart = 0;
3413             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3414                 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3415                                           fSymbols->fWeekdays, fSymbols->fWeekdaysCount, nullptr, cal)) > 0)
3416                     return newStart;
3417             }
3418             // EEEE wide failed, now try EEE abbreviated
3419             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3420                 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3421                                        fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, nullptr, cal)) > 0)
3422                     return newStart;
3423             }
3424             // EEE abbreviated failed, now try EEEEEE short
3425             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) {
3426                 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3427                                        fSymbols->fShorterWeekdays, fSymbols->fShorterWeekdaysCount, nullptr, cal)) > 0)
3428                     return newStart;
3429             }
3430             // EEEEEE short failed, now try EEEEE narrow
3431             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3432                 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3433                                        fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, nullptr, cal)) > 0)
3434                     return newStart;
3435             }
3436             if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status) || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
3437                 return newStart;
3438             // else we allowing parsing as number, below
3439         }
3440         break;
3441 
3442     case UDAT_STANDALONE_DAY_FIELD:
3443         {
3444             if (gotNumber) // c or cc
3445             {
3446                 // [We computed 'value' above.]
3447                 cal.set(UCAL_DOW_LOCAL, value);
3448                 return pos.getIndex();
3449             }
3450             // Want to be able to parse both short and long forms.
3451             // Try count == 4 (cccc) first:
3452             int32_t newStart = 0;
3453             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3454                 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3455                                       fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, nullptr, cal)) > 0)
3456                     return newStart;
3457             }
3458             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3459                 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3460                                           fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, nullptr, cal)) > 0)
3461                     return newStart;
3462             }
3463             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) {
3464                 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3465                                           fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, nullptr, cal)) > 0)
3466                     return newStart;
3467             }
3468             if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3469                 return newStart;
3470             // else we allowing parsing as number, below
3471         }
3472         break;
3473 
3474     case UDAT_AM_PM_FIELD:
3475         {
3476             // optionally try both wide/abbrev and narrow forms
3477             int32_t newStart = 0;
3478             // try wide/abbrev
3479             if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count < 5 ) {
3480                 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, nullptr, cal)) > 0) {
3481                     return newStart;
3482                 }
3483             }
3484             // try narrow
3485             if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count >= 5 ) {
3486                 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fNarrowAmPms, fSymbols->fNarrowAmPmsCount, nullptr, cal)) > 0) {
3487                     return newStart;
3488                 }
3489             }
3490             // no matches for given options
3491             return -start;
3492         }
3493 
3494     case UDAT_HOUR1_FIELD:
3495         // [We computed 'value' above.]
3496         if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
3497             value = 0;
3498 
3499         // fall through to set field
3500         U_FALLTHROUGH;
3501     case UDAT_HOUR0_FIELD:
3502         cal.set(UCAL_HOUR, value);
3503         return pos.getIndex();
3504 
3505     case UDAT_QUARTER_FIELD:
3506         if (gotNumber) // i.e., Q or QQ.
3507         {
3508             // Don't want to parse the month if it is a string
3509             // while pattern uses numeric style: Q or QQ.
3510             // [We computed 'value' above.]
3511             cal.set(UCAL_MONTH, (value - 1) * 3);
3512             return pos.getIndex();
3513         } else {
3514             // count >= 3 // i.e., QQQ or QQQQ
3515             // Want to be able to parse short, long, and narrow forms.
3516             // Try count == 4 first:
3517             int32_t newStart = 0;
3518 
3519             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3520                 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3521                                       fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
3522                     return newStart;
3523             }
3524             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3525                 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3526                                           fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0)
3527                     return newStart;
3528             }
3529             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3530                 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3531                                       fSymbols->fNarrowQuarters, fSymbols->fNarrowQuartersCount, cal)) > 0)
3532                     return newStart;
3533             }
3534             if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3535                 return newStart;
3536             // else we allowing parsing as number, below
3537             if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status))
3538                 return -start;
3539         }
3540         break;
3541 
3542     case UDAT_STANDALONE_QUARTER_FIELD:
3543         if (gotNumber) // i.e., q or qq.
3544         {
3545             // Don't want to parse the month if it is a string
3546             // while pattern uses numeric style: q or q.
3547             // [We computed 'value' above.]
3548             cal.set(UCAL_MONTH, (value - 1) * 3);
3549             return pos.getIndex();
3550         } else {
3551             // count >= 3 // i.e., qqq or qqqq
3552             // Want to be able to parse both short and long forms.
3553             // Try count == 4 first:
3554             int32_t newStart = 0;
3555 
3556             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3557                 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3558                                       fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
3559                     return newStart;
3560             }
3561             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3562                 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3563                                           fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0)
3564                     return newStart;
3565             }
3566             if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3567                 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3568                                           fSymbols->fStandaloneNarrowQuarters, fSymbols->fStandaloneNarrowQuartersCount, cal)) > 0)
3569                     return newStart;
3570             }
3571             if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3572                 return newStart;
3573             // else we allowing parsing as number, below
3574             if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status))
3575                 return -start;
3576         }
3577         break;
3578 
3579     case UDAT_TIMEZONE_FIELD: // 'z'
3580         {
3581             UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_SPECIFIC_SHORT : UTZFMT_STYLE_SPECIFIC_LONG;
3582             const TimeZoneFormat *tzfmt = tzFormat(status);
3583             if (U_SUCCESS(status)) {
3584                 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3585                 if (tz != nullptr) {
3586                     cal.adoptTimeZone(tz);
3587                     return pos.getIndex();
3588                 }
3589             }
3590             return -start;
3591     }
3592         break;
3593     case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
3594         {
3595             UTimeZoneFormatStyle style = (count < 4) ?
3596                 UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL : ((count == 5) ? UTZFMT_STYLE_ISO_EXTENDED_FULL: UTZFMT_STYLE_LOCALIZED_GMT);
3597             const TimeZoneFormat *tzfmt = tzFormat(status);
3598             if (U_SUCCESS(status)) {
3599                 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3600                 if (tz != nullptr) {
3601                     cal.adoptTimeZone(tz);
3602                     return pos.getIndex();
3603                 }
3604             }
3605             return -start;
3606         }
3607     case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
3608         {
3609             UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_GENERIC_SHORT : UTZFMT_STYLE_GENERIC_LONG;
3610             const TimeZoneFormat *tzfmt = tzFormat(status);
3611             if (U_SUCCESS(status)) {
3612                 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3613                 if (tz != nullptr) {
3614                     cal.adoptTimeZone(tz);
3615                     return pos.getIndex();
3616                 }
3617             }
3618             return -start;
3619         }
3620     case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
3621         {
3622             UTimeZoneFormatStyle style;
3623             switch (count) {
3624             case 1:
3625                 style = UTZFMT_STYLE_ZONE_ID_SHORT;
3626                 break;
3627             case 2:
3628                 style = UTZFMT_STYLE_ZONE_ID;
3629                 break;
3630             case 3:
3631                 style = UTZFMT_STYLE_EXEMPLAR_LOCATION;
3632                 break;
3633             default:
3634                 style = UTZFMT_STYLE_GENERIC_LOCATION;
3635                 break;
3636             }
3637             const TimeZoneFormat *tzfmt = tzFormat(status);
3638             if (U_SUCCESS(status)) {
3639                 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3640                 if (tz != nullptr) {
3641                     cal.adoptTimeZone(tz);
3642                     return pos.getIndex();
3643                 }
3644             }
3645             return -start;
3646         }
3647     case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
3648         {
3649             UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_LOCALIZED_GMT_SHORT : UTZFMT_STYLE_LOCALIZED_GMT;
3650             const TimeZoneFormat *tzfmt = tzFormat(status);
3651             if (U_SUCCESS(status)) {
3652                 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3653                 if (tz != nullptr) {
3654                     cal.adoptTimeZone(tz);
3655                     return pos.getIndex();
3656                 }
3657             }
3658             return -start;
3659         }
3660     case UDAT_TIMEZONE_ISO_FIELD: // 'X'
3661         {
3662             UTimeZoneFormatStyle style;
3663             switch (count) {
3664             case 1:
3665                 style = UTZFMT_STYLE_ISO_BASIC_SHORT;
3666                 break;
3667             case 2:
3668                 style = UTZFMT_STYLE_ISO_BASIC_FIXED;
3669                 break;
3670             case 3:
3671                 style = UTZFMT_STYLE_ISO_EXTENDED_FIXED;
3672                 break;
3673             case 4:
3674                 style = UTZFMT_STYLE_ISO_BASIC_FULL;
3675                 break;
3676             default:
3677                 style = UTZFMT_STYLE_ISO_EXTENDED_FULL;
3678                 break;
3679             }
3680             const TimeZoneFormat *tzfmt = tzFormat(status);
3681             if (U_SUCCESS(status)) {
3682                 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3683                 if (tz != nullptr) {
3684                     cal.adoptTimeZone(tz);
3685                     return pos.getIndex();
3686                 }
3687             }
3688             return -start;
3689         }
3690     case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
3691         {
3692             UTimeZoneFormatStyle style;
3693             switch (count) {
3694             case 1:
3695                 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT;
3696                 break;
3697             case 2:
3698                 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED;
3699                 break;
3700             case 3:
3701                 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED;
3702                 break;
3703             case 4:
3704                 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL;
3705                 break;
3706             default:
3707                 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL;
3708                 break;
3709             }
3710             const TimeZoneFormat *tzfmt = tzFormat(status);
3711             if (U_SUCCESS(status)) {
3712                 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3713                 if (tz != nullptr) {
3714                     cal.adoptTimeZone(tz);
3715                     return pos.getIndex();
3716                 }
3717             }
3718             return -start;
3719         }
3720     // currently no pattern character is defined for UDAT_TIME_SEPARATOR_FIELD
3721     // so we should not get here. Leave support in for future definition.
3722     case UDAT_TIME_SEPARATOR_FIELD:
3723         {
3724             static const char16_t def_sep = DateFormatSymbols::DEFAULT_TIME_SEPARATOR;
3725             static const char16_t alt_sep = DateFormatSymbols::ALTERNATE_TIME_SEPARATOR;
3726 
3727             // Try matching a time separator.
3728             int32_t count_sep = 1;
3729             UnicodeString data[3];
3730             fSymbols->getTimeSeparatorString(data[0]);
3731 
3732             // Add the default, if different from the locale.
3733             if (data[0].compare(&def_sep, 1) != 0) {
3734                 data[count_sep++].setTo(def_sep);
3735             }
3736 
3737             // If lenient, add also the alternate, if different from the locale.
3738             if (isLenient() && data[0].compare(&alt_sep, 1) != 0) {
3739                 data[count_sep++].setTo(alt_sep);
3740             }
3741 
3742             return matchString(text, start, UCAL_FIELD_COUNT /* => nothing to set */, data, count_sep, nullptr, cal);
3743         }
3744 
3745     case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
3746     {
3747         U_ASSERT(dayPeriod != nullptr);
3748         int32_t ampmStart = subParse(text, start, 0x61, count,
3749                            obeyCount, allowNegative, ambiguousYear, saveHebrewMonth, cal,
3750                            patLoc, numericLeapMonthFormatter, tzTimeType);
3751 
3752         if (ampmStart > 0) {
3753             return ampmStart;
3754         } else {
3755             int32_t newStart = 0;
3756 
3757             // Only match the first two strings from the day period strings array.
3758             if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3759                 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods,
3760                                                         2, *dayPeriod)) > 0) {
3761                     return newStart;
3762                 }
3763             }
3764             if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3765                 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods,
3766                                                         2, *dayPeriod)) > 0) {
3767                     return newStart;
3768                 }
3769             }
3770             // count == 4, but allow other counts
3771             if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) {
3772                 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods,
3773                                                         2, *dayPeriod)) > 0) {
3774                     return newStart;
3775                 }
3776             }
3777 
3778             return -start;
3779         }
3780     }
3781 
3782     case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
3783     {
3784         U_ASSERT(dayPeriod != nullptr);
3785         int32_t newStart = 0;
3786 
3787         if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3788             if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods,
3789                                 fSymbols->fAbbreviatedDayPeriodsCount, *dayPeriod)) > 0) {
3790                 return newStart;
3791             }
3792         }
3793         if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3794             if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods,
3795                                 fSymbols->fNarrowDayPeriodsCount, *dayPeriod)) > 0) {
3796                 return newStart;
3797             }
3798         }
3799         if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3800             if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods,
3801                                 fSymbols->fWideDayPeriodsCount, *dayPeriod)) > 0) {
3802                 return newStart;
3803             }
3804         }
3805 
3806         return -start;
3807     }
3808 
3809     default:
3810         // Handle "generic" fields
3811         // this is now handled below, outside the switch block
3812         break;
3813     }
3814     // Handle "generic" fields:
3815     // switch default case now handled here (outside switch block) to allow
3816     // parsing of some string fields as digits for lenient case
3817 
3818     int32_t parseStart = pos.getIndex();
3819     const UnicodeString* src;
3820     if (obeyCount) {
3821         if ((start+count) > text.length()) {
3822             return -start;
3823         }
3824         text.extractBetween(0, start + count, temp);
3825         src = &temp;
3826     } else {
3827         src = &text;
3828     }
3829     parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3830     if (obeyCount && !isLenient() && pos.getIndex() < start + count) {
3831         return -start;
3832     }
3833     if (pos.getIndex() != parseStart) {
3834         int32_t val = number.getLong();
3835 
3836         // Don't need suffix processing here (as in number processing at the beginning of the function);
3837         // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes.
3838 
3839         if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) {
3840             // Check the range of the value
3841             int32_t bias = gFieldRangeBias[patternCharIndex];
3842             if (bias >= 0 && (val > cal.getMaximum(field) + bias || val < cal.getMinimum(field) + bias)) {
3843                 return -start;
3844             }
3845         }
3846 
3847         // For the following, need to repeat some of the "if (gotNumber)" code above:
3848         // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD,
3849         // UDAT_[STANDALONE_]QUARTER_FIELD
3850         switch (patternCharIndex) {
3851         case UDAT_MONTH_FIELD:
3852             // See notes under UDAT_MONTH_FIELD case above
3853             if (!strcmp(cal.getType(),"hebrew")) {
3854                 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3855                 if (cal.isSet(UCAL_YEAR)) {
3856                    UErrorCode monthStatus = U_ZERO_ERROR;
3857                    if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && val >= 6) {
3858                        cal.set(UCAL_MONTH, val);
3859                    } else {
3860                        cal.set(UCAL_MONTH, val - 1);
3861                    }
3862                 } else {
3863                     saveHebrewMonth = val;
3864                 }
3865             } else {
3866                 cal.set(UCAL_MONTH, val - 1);
3867             }
3868             break;
3869         case UDAT_STANDALONE_MONTH_FIELD:
3870             cal.set(UCAL_MONTH, val - 1);
3871             break;
3872         case UDAT_DOW_LOCAL_FIELD:
3873         case UDAT_STANDALONE_DAY_FIELD:
3874             cal.set(UCAL_DOW_LOCAL, val);
3875             break;
3876         case UDAT_QUARTER_FIELD:
3877         case UDAT_STANDALONE_QUARTER_FIELD:
3878              cal.set(UCAL_MONTH, (val - 1) * 3);
3879              break;
3880         case UDAT_RELATED_YEAR_FIELD:
3881             cal.setRelatedYear(val);
3882             break;
3883         default:
3884             cal.set(field, val);
3885             break;
3886         }
3887         return pos.getIndex();
3888     }
3889     return -start;
3890 }
3891 
3892 /**
3893  * Parse an integer using fNumberFormat.  This method is semantically
3894  * const, but actually may modify fNumberFormat.
3895  */
parseInt(const UnicodeString & text,Formattable & number,ParsePosition & pos,UBool allowNegative,const NumberFormat * fmt) const3896 void SimpleDateFormat::parseInt(const UnicodeString& text,
3897                                 Formattable& number,
3898                                 ParsePosition& pos,
3899                                 UBool allowNegative,
3900                                 const NumberFormat *fmt) const {
3901     parseInt(text, number, -1, pos, allowNegative,fmt);
3902 }
3903 
3904 /**
3905  * Parse an integer using fNumberFormat up to maxDigits.
3906  */
parseInt(const UnicodeString & text,Formattable & number,int32_t maxDigits,ParsePosition & pos,UBool allowNegative,const NumberFormat * fmt) const3907 void SimpleDateFormat::parseInt(const UnicodeString& text,
3908                                 Formattable& number,
3909                                 int32_t maxDigits,
3910                                 ParsePosition& pos,
3911                                 UBool allowNegative,
3912                                 const NumberFormat *fmt) const {
3913     UnicodeString oldPrefix;
3914     auto* fmtAsDF = dynamic_cast<const DecimalFormat*>(fmt);
3915     LocalPointer<DecimalFormat> df;
3916     if (!allowNegative && fmtAsDF != nullptr) {
3917         df.adoptInstead(fmtAsDF->clone());
3918         if (df.isNull()) {
3919             // Memory allocation error
3920             return;
3921         }
3922         df->setNegativePrefix(UnicodeString(true, SUPPRESS_NEGATIVE_PREFIX, -1));
3923         fmt = df.getAlias();
3924     }
3925     int32_t oldPos = pos.getIndex();
3926     fmt->parse(text, number, pos);
3927 
3928     if (maxDigits > 0) {
3929         // adjust the result to fit into
3930         // the maxDigits and move the position back
3931         int32_t nDigits = pos.getIndex() - oldPos;
3932         if (nDigits > maxDigits) {
3933             int32_t val = number.getLong();
3934             nDigits -= maxDigits;
3935             while (nDigits > 0) {
3936                 val /= 10;
3937                 nDigits--;
3938             }
3939             pos.setIndex(oldPos + maxDigits);
3940             number.setLong(val);
3941         }
3942     }
3943 }
3944 
countDigits(const UnicodeString & text,int32_t start,int32_t end) const3945 int32_t SimpleDateFormat::countDigits(const UnicodeString& text, int32_t start, int32_t end) const {
3946     int32_t numDigits = 0;
3947     int32_t idx = start;
3948     while (idx < end) {
3949         UChar32 cp = text.char32At(idx);
3950         if (u_isdigit(cp)) {
3951             numDigits++;
3952         }
3953         idx += U16_LENGTH(cp);
3954     }
3955     return numDigits;
3956 }
3957 
3958 //----------------------------------------------------------------------
3959 
translatePattern(const UnicodeString & originalPattern,UnicodeString & translatedPattern,const UnicodeString & from,const UnicodeString & to,UErrorCode & status)3960 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
3961                                         UnicodeString& translatedPattern,
3962                                         const UnicodeString& from,
3963                                         const UnicodeString& to,
3964                                         UErrorCode& status)
3965 {
3966     // run through the pattern and convert any pattern symbols from the version
3967     // in "from" to the corresponding character in "to".  This code takes
3968     // quoted strings into account (it doesn't try to translate them), and it signals
3969     // an error if a particular "pattern character" doesn't appear in "from".
3970     // Depending on the values of "from" and "to" this can convert from generic
3971     // to localized patterns or localized to generic.
3972     if (U_FAILURE(status)) {
3973         return;
3974     }
3975 
3976     translatedPattern.remove();
3977     UBool inQuote = false;
3978     for (int32_t i = 0; i < originalPattern.length(); ++i) {
3979         char16_t c = originalPattern[i];
3980         if (inQuote) {
3981             if (c == QUOTE) {
3982                 inQuote = false;
3983             }
3984         } else {
3985             if (c == QUOTE) {
3986                 inQuote = true;
3987             } else if (isSyntaxChar(c)) {
3988                 int32_t ci = from.indexOf(c);
3989                 if (ci == -1) {
3990                     status = U_INVALID_FORMAT_ERROR;
3991                     return;
3992                 }
3993                 c = to[ci];
3994             }
3995         }
3996         translatedPattern += c;
3997     }
3998     if (inQuote) {
3999         status = U_INVALID_FORMAT_ERROR;
4000         return;
4001     }
4002 }
4003 
4004 //----------------------------------------------------------------------
4005 
4006 UnicodeString&
toPattern(UnicodeString & result) const4007 SimpleDateFormat::toPattern(UnicodeString& result) const
4008 {
4009     result = fPattern;
4010     return result;
4011 }
4012 
4013 //----------------------------------------------------------------------
4014 
4015 UnicodeString&
toLocalizedPattern(UnicodeString & result,UErrorCode & status) const4016 SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
4017                                      UErrorCode& status) const
4018 {
4019     translatePattern(fPattern, result,
4020                      UnicodeString(DateFormatSymbols::getPatternUChars()),
4021                      fSymbols->fLocalPatternChars, status);
4022     return result;
4023 }
4024 
4025 //----------------------------------------------------------------------
4026 
4027 void
applyPattern(const UnicodeString & pattern)4028 SimpleDateFormat::applyPattern(const UnicodeString& pattern)
4029 {
4030     fPattern = pattern;
4031     parsePattern();
4032 
4033     // Hack to update use of Gannen year numbering for ja@calendar=japanese -
4034     // use only if format is non-numeric (includes 年) and no other fDateOverride.
4035     if (fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 &&
4036             uprv_strcmp(fLocale.getLanguage(),"ja") == 0) {
4037         if (fDateOverride==UnicodeString(u"y=jpanyear") && !fHasHanYearChar) {
4038             // Gannen numbering is set but new pattern should not use it, unset;
4039             // use procedure from adoptNumberFormat to clear overrides
4040             if (fSharedNumberFormatters) {
4041                 freeSharedNumberFormatters(fSharedNumberFormatters);
4042                 fSharedNumberFormatters = nullptr;
4043             }
4044             fDateOverride.setToBogus(); // record status
4045         } else if (fDateOverride.isBogus() && fHasHanYearChar) {
4046             // No current override (=> no Gannen numbering) but new pattern needs it;
4047             // use procedures from initNUmberFormatters / adoptNumberFormat
4048             umtx_lock(&LOCK);
4049             if (fSharedNumberFormatters == nullptr) {
4050                 fSharedNumberFormatters = allocSharedNumberFormatters();
4051             }
4052             umtx_unlock(&LOCK);
4053             if (fSharedNumberFormatters != nullptr) {
4054                 Locale ovrLoc(fLocale.getLanguage(),fLocale.getCountry(),fLocale.getVariant(),"numbers=jpanyear");
4055                 UErrorCode status = U_ZERO_ERROR;
4056                 const SharedNumberFormat *snf = createSharedNumberFormat(ovrLoc, status);
4057                 if (U_SUCCESS(status)) {
4058                     // Now that we have an appropriate number formatter, fill in the
4059                     // appropriate slot in the number formatters table.
4060                     UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(u'y');
4061                     SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]);
4062                     snf->deleteIfZeroRefCount();
4063                     fDateOverride.setTo(u"y=jpanyear", -1); // record status
4064                 }
4065             }
4066         }
4067     }
4068 }
4069 
4070 //----------------------------------------------------------------------
4071 
4072 void
applyLocalizedPattern(const UnicodeString & pattern,UErrorCode & status)4073 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
4074                                         UErrorCode &status)
4075 {
4076     translatePattern(pattern, fPattern,
4077                      fSymbols->fLocalPatternChars,
4078                      UnicodeString(DateFormatSymbols::getPatternUChars()), status);
4079 }
4080 
4081 //----------------------------------------------------------------------
4082 
4083 const DateFormatSymbols*
getDateFormatSymbols() const4084 SimpleDateFormat::getDateFormatSymbols() const
4085 {
4086     return fSymbols;
4087 }
4088 
4089 //----------------------------------------------------------------------
4090 
4091 void
adoptDateFormatSymbols(DateFormatSymbols * newFormatSymbols)4092 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
4093 {
4094     delete fSymbols;
4095     fSymbols = newFormatSymbols;
4096 }
4097 
4098 //----------------------------------------------------------------------
4099 void
setDateFormatSymbols(const DateFormatSymbols & newFormatSymbols)4100 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
4101 {
4102     delete fSymbols;
4103     fSymbols = new DateFormatSymbols(newFormatSymbols);
4104 }
4105 
4106 //----------------------------------------------------------------------
4107 const TimeZoneFormat*
getTimeZoneFormat() const4108 SimpleDateFormat::getTimeZoneFormat() const {
4109     // TimeZoneFormat initialization might fail when out of memory.
4110     // If we always initialize TimeZoneFormat instance, we can return
4111     // such status there. For now, this implementation lazily instantiates
4112     // a TimeZoneFormat for performance optimization reasons, but cannot
4113     // propagate such error (probably just out of memory case) to the caller.
4114     UErrorCode status = U_ZERO_ERROR;
4115     return (const TimeZoneFormat*)tzFormat(status);
4116 }
4117 
4118 //----------------------------------------------------------------------
4119 void
adoptTimeZoneFormat(TimeZoneFormat * timeZoneFormatToAdopt)4120 SimpleDateFormat::adoptTimeZoneFormat(TimeZoneFormat* timeZoneFormatToAdopt)
4121 {
4122     delete fTimeZoneFormat;
4123     fTimeZoneFormat = timeZoneFormatToAdopt;
4124 }
4125 
4126 //----------------------------------------------------------------------
4127 void
setTimeZoneFormat(const TimeZoneFormat & newTimeZoneFormat)4128 SimpleDateFormat::setTimeZoneFormat(const TimeZoneFormat& newTimeZoneFormat)
4129 {
4130     delete fTimeZoneFormat;
4131     fTimeZoneFormat = new TimeZoneFormat(newTimeZoneFormat);
4132 }
4133 
4134 //----------------------------------------------------------------------
4135 
4136 
adoptCalendar(Calendar * calendarToAdopt)4137 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
4138 {
4139   UErrorCode status = U_ZERO_ERROR;
4140   Locale calLocale(fLocale);
4141   calLocale.setKeywordValue("calendar", calendarToAdopt->getType(), status);
4142   DateFormatSymbols *newSymbols =
4143           DateFormatSymbols::createForLocale(calLocale, status);
4144   if (U_FAILURE(status)) {
4145       delete calendarToAdopt;
4146       return;
4147   }
4148   DateFormat::adoptCalendar(calendarToAdopt);
4149   delete fSymbols;
4150   fSymbols = newSymbols;
4151   initializeDefaultCentury();  // we need a new century (possibly)
4152 }
4153 
4154 
4155 //----------------------------------------------------------------------
4156 
4157 
4158 // override the DateFormat implementation in order to
4159 // lazily initialize fCapitalizationBrkIter
4160 void
setContext(UDisplayContext value,UErrorCode & status)4161 SimpleDateFormat::setContext(UDisplayContext value, UErrorCode& status)
4162 {
4163     DateFormat::setContext(value, status);
4164 #if !UCONFIG_NO_BREAK_ITERATION
4165     if (U_SUCCESS(status)) {
4166         if ( fCapitalizationBrkIter == nullptr && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
4167                 value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE) ) {
4168             status = U_ZERO_ERROR;
4169             fCapitalizationBrkIter = BreakIterator::createSentenceInstance(fLocale, status);
4170             if (U_FAILURE(status)) {
4171                 delete fCapitalizationBrkIter;
4172                 fCapitalizationBrkIter = nullptr;
4173             }
4174         }
4175     }
4176 #endif
4177 }
4178 
4179 
4180 //----------------------------------------------------------------------
4181 
4182 
4183 UBool
isFieldUnitIgnored(UCalendarDateFields field) const4184 SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
4185     return isFieldUnitIgnored(fPattern, field);
4186 }
4187 
4188 
4189 UBool
isFieldUnitIgnored(const UnicodeString & pattern,UCalendarDateFields field)4190 SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
4191                                      UCalendarDateFields field) {
4192     int32_t fieldLevel = fgCalendarFieldToLevel[field];
4193     int32_t level;
4194     char16_t ch;
4195     UBool inQuote = false;
4196     char16_t prevCh = 0;
4197     int32_t count = 0;
4198 
4199     for (int32_t i = 0; i < pattern.length(); ++i) {
4200         ch = pattern[i];
4201         if (ch != prevCh && count > 0) {
4202             level = getLevelFromChar(prevCh);
4203             // the larger the level, the smaller the field unit.
4204             if (fieldLevel <= level) {
4205                 return false;
4206             }
4207             count = 0;
4208         }
4209         if (ch == QUOTE) {
4210             if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
4211                 ++i;
4212             } else {
4213                 inQuote = ! inQuote;
4214             }
4215         }
4216         else if (!inQuote && isSyntaxChar(ch)) {
4217             prevCh = ch;
4218             ++count;
4219         }
4220     }
4221     if (count > 0) {
4222         // last item
4223         level = getLevelFromChar(prevCh);
4224         if (fieldLevel <= level) {
4225             return false;
4226         }
4227     }
4228     return true;
4229 }
4230 
4231 //----------------------------------------------------------------------
4232 
4233 const Locale&
getSmpFmtLocale() const4234 SimpleDateFormat::getSmpFmtLocale() const {
4235     return fLocale;
4236 }
4237 
4238 //----------------------------------------------------------------------
4239 
4240 int32_t
checkIntSuffix(const UnicodeString & text,int32_t start,int32_t patLoc,UBool isNegative) const4241 SimpleDateFormat::checkIntSuffix(const UnicodeString& text, int32_t start,
4242                                  int32_t patLoc, UBool isNegative) const {
4243     // local variables
4244     UnicodeString suf;
4245     int32_t patternMatch;
4246     int32_t textPreMatch;
4247     int32_t textPostMatch;
4248 
4249     // check that we are still in range
4250     if ( (start > text.length()) ||
4251          (start < 0) ||
4252          (patLoc < 0) ||
4253          (patLoc > fPattern.length())) {
4254         // out of range, don't advance location in text
4255         return start;
4256     }
4257 
4258     // get the suffix
4259     DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat);
4260     if (decfmt != nullptr) {
4261         if (isNegative) {
4262             suf = decfmt->getNegativeSuffix(suf);
4263         }
4264         else {
4265             suf = decfmt->getPositiveSuffix(suf);
4266         }
4267     }
4268 
4269     // check for suffix
4270     if (suf.length() <= 0) {
4271         return start;
4272     }
4273 
4274     // check suffix will be encountered in the pattern
4275     patternMatch = compareSimpleAffix(suf,fPattern,patLoc);
4276 
4277     // check if a suffix will be encountered in the text
4278     textPreMatch = compareSimpleAffix(suf,text,start);
4279 
4280     // check if a suffix was encountered in the text
4281     textPostMatch = compareSimpleAffix(suf,text,start-suf.length());
4282 
4283     // check for suffix match
4284     if ((textPreMatch >= 0) && (patternMatch >= 0) && (textPreMatch == patternMatch)) {
4285         return start;
4286     }
4287     else if ((textPostMatch >= 0) && (patternMatch >= 0) && (textPostMatch == patternMatch)) {
4288         return  start - suf.length();
4289     }
4290 
4291     // should not get here
4292     return start;
4293 }
4294 
4295 //----------------------------------------------------------------------
4296 
4297 int32_t
compareSimpleAffix(const UnicodeString & affix,const UnicodeString & input,int32_t pos) const4298 SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
4299                    const UnicodeString& input,
4300                    int32_t pos) const {
4301     int32_t start = pos;
4302     for (int32_t i=0; i<affix.length(); ) {
4303         UChar32 c = affix.char32At(i);
4304         int32_t len = U16_LENGTH(c);
4305         if (PatternProps::isWhiteSpace(c)) {
4306             // We may have a pattern like: \u200F \u0020
4307             //        and input text like: \u200F \u0020
4308             // Note that U+200F and U+0020 are Pattern_White_Space but only
4309             // U+0020 is UWhiteSpace.  So we have to first do a direct
4310             // match of the run of Pattern_White_Space in the pattern,
4311             // then match any extra characters.
4312             UBool literalMatch = false;
4313             while (pos < input.length() &&
4314                    input.char32At(pos) == c) {
4315                 literalMatch = true;
4316                 i += len;
4317                 pos += len;
4318                 if (i == affix.length()) {
4319                     break;
4320                 }
4321                 c = affix.char32At(i);
4322                 len = U16_LENGTH(c);
4323                 if (!PatternProps::isWhiteSpace(c)) {
4324                     break;
4325                 }
4326             }
4327 
4328             // Advance over run in pattern
4329             i = skipPatternWhiteSpace(affix, i);
4330 
4331             // Advance over run in input text
4332             // Must see at least one white space char in input,
4333             // unless we've already matched some characters literally.
4334             int32_t s = pos;
4335             pos = skipUWhiteSpace(input, pos);
4336             if (pos == s && !literalMatch) {
4337                 return -1;
4338             }
4339 
4340             // If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
4341             // Otherwise, the previous lines may have skipped over text (such as U+00A0) that
4342             // is also in the affix.
4343             i = skipUWhiteSpace(affix, i);
4344         } else {
4345             if (pos < input.length() &&
4346                 input.char32At(pos) == c) {
4347                 i += len;
4348                 pos += len;
4349             } else {
4350                 return -1;
4351             }
4352         }
4353     }
4354     return pos - start;
4355 }
4356 
4357 //----------------------------------------------------------------------
4358 
4359 int32_t
skipPatternWhiteSpace(const UnicodeString & text,int32_t pos) const4360 SimpleDateFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const {
4361     const char16_t* s = text.getBuffer();
4362     return (int32_t)(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s);
4363 }
4364 
4365 //----------------------------------------------------------------------
4366 
4367 int32_t
skipUWhiteSpace(const UnicodeString & text,int32_t pos) const4368 SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const {
4369     while (pos < text.length()) {
4370         UChar32 c = text.char32At(pos);
4371         if (!u_isUWhiteSpace(c)) {
4372             break;
4373         }
4374         pos += U16_LENGTH(c);
4375     }
4376     return pos;
4377 }
4378 
4379 //----------------------------------------------------------------------
4380 
4381 // Lazy TimeZoneFormat instantiation, semantically const.
4382 TimeZoneFormat *
tzFormat(UErrorCode & status) const4383 SimpleDateFormat::tzFormat(UErrorCode &status) const {
4384     Mutex m(&LOCK);
4385     if (fTimeZoneFormat == nullptr && U_SUCCESS(status)) {
4386         const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat =
4387                 TimeZoneFormat::createInstance(fLocale, status);
4388     }
4389     return fTimeZoneFormat;
4390 }
4391 
parsePattern()4392 void SimpleDateFormat::parsePattern() {
4393     fHasMinute = false;
4394     fHasSecond = false;
4395     fHasHanYearChar = false;
4396 
4397     int len = fPattern.length();
4398     UBool inQuote = false;
4399     for (int32_t i = 0; i < len; ++i) {
4400         char16_t ch = fPattern[i];
4401         if (ch == QUOTE) {
4402             inQuote = !inQuote;
4403         }
4404         if (ch == 0x5E74) { // don't care whether this is inside quotes
4405             fHasHanYearChar = true;
4406         }
4407         if (!inQuote) {
4408             if (ch == 0x6D) {  // 0x6D == 'm'
4409                 fHasMinute = true;
4410             }
4411             if (ch == 0x73) {  // 0x73 == 's'
4412                 fHasSecond = true;
4413             }
4414         }
4415     }
4416 }
4417 
4418 U_NAMESPACE_END
4419 
4420 #endif /* #if !UCONFIG_NO_FORMATTING */
4421 
4422 //eof
4423