1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2016, International Business Machines Corporation and *
6 * others. All Rights Reserved. *
7 *******************************************************************************
8 *
9 * File SMPDTFMT.CPP
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 02/19/97 aliu Converted from java.
15 * 03/31/97 aliu Modified extensively to work with 50 locales.
16 * 04/01/97 aliu Added support for centuries.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 07/21/98 stephen Added initializeDefaultCentury.
19 * Removed getZoneIndex (added in DateFormatSymbols)
20 * Removed subParseLong
21 * Removed chk
22 * 02/22/99 stephen Removed character literals for EBCDIC safety
23 * 10/14/99 aliu Updated 2-digit year parsing so that only "00" thru
24 * "99" are recognized. {j28 4182066}
25 * 11/15/99 weiv Added support for week of year/day of week format
26 ********************************************************************************
27 */
28
29 #define ZID_KEY_MAX 128
30
31 #include "unicode/utypes.h"
32
33 #if !UCONFIG_NO_FORMATTING
34 #include "unicode/smpdtfmt.h"
35 #include "unicode/dtfmtsym.h"
36 #include "unicode/ures.h"
37 #include "unicode/msgfmt.h"
38 #include "unicode/calendar.h"
39 #include "unicode/gregocal.h"
40 #include "unicode/timezone.h"
41 #include "unicode/decimfmt.h"
42 #include "unicode/dcfmtsym.h"
43 #include "unicode/uchar.h"
44 #include "unicode/uniset.h"
45 #include "unicode/ustring.h"
46 #include "unicode/basictz.h"
47 #include "unicode/simpleformatter.h"
48 #include "unicode/simplenumberformatter.h"
49 #include "unicode/simpletz.h"
50 #include "unicode/rbtz.h"
51 #include "unicode/tzfmt.h"
52 #include "unicode/ucasemap.h"
53 #include "unicode/utf16.h"
54 #include "unicode/vtzone.h"
55 #include "unicode/udisplaycontext.h"
56 #include "unicode/brkiter.h"
57 #include "unicode/rbnf.h"
58 #include "unicode/dtptngen.h"
59 #include "uresimp.h"
60 #include "olsontz.h"
61 #include "patternprops.h"
62 #include "fphdlimp.h"
63 #include "hebrwcal.h"
64 #include "cstring.h"
65 #include "uassert.h"
66 #include "cmemory.h"
67 #include "umutex.h"
68 #include "mutex.h"
69 #include <float.h>
70 #include "smpdtfst.h"
71 #include "sharednumberformat.h"
72 #include "ucasemap_imp.h"
73 #include "ustr_imp.h"
74 #include "charstr.h"
75 #include "uvector.h"
76 #include "cstr.h"
77 #include "dayperiodrules.h"
78 #include "tznames_impl.h" // ZONE_NAME_U16_MAX
79 #include "number_utypes.h"
80
81 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
82 #include <stdio.h>
83 #endif
84
85 // *****************************************************************************
86 // class SimpleDateFormat
87 // *****************************************************************************
88
89 U_NAMESPACE_BEGIN
90
91 /**
92 * Last-resort string to use for "GMT" when constructing time zone strings.
93 */
94 // For time zones that have no names, use strings GMT+minutes and
95 // GMT-minutes. For instance, in France the time zone is GMT+60.
96 // Also accepted are GMT+H:MM or GMT-H:MM.
97 // Currently not being used
98 //static const char16_t gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT"
99 //static const char16_t gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
100 //static const char16_t gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
101 //static const char16_t gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
102 //static const char16_t gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
103 //static const char16_t gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
104 //static const char16_t gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
105 //static const char16_t gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
106 //static const char16_t gUt[] = {0x0055, 0x0054, 0x0000}; // "UT"
107 //static const char16_t gUtc[] = {0x0055, 0x0054, 0x0043, 0x0000}; // "UT"
108
109 typedef enum GmtPatSize {
110 kGmtLen = 3,
111 kGmtPatLen = 6,
112 kNegHmsLen = 9,
113 kNegHmLen = 6,
114 kPosHmsLen = 9,
115 kPosHmLen = 6,
116 kUtLen = 2,
117 kUtcLen = 3
118 } GmtPatSize;
119
120 // Stuff needed for numbering system overrides
121
122 typedef enum OvrStrType {
123 kOvrStrDate = 0,
124 kOvrStrTime = 1,
125 kOvrStrBoth = 2
126 } OvrStrType;
127
128 static const UDateFormatField kDateFields[] = {
129 UDAT_YEAR_FIELD,
130 UDAT_MONTH_FIELD,
131 UDAT_DATE_FIELD,
132 UDAT_DAY_OF_YEAR_FIELD,
133 UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
134 UDAT_WEEK_OF_YEAR_FIELD,
135 UDAT_WEEK_OF_MONTH_FIELD,
136 UDAT_YEAR_WOY_FIELD,
137 UDAT_EXTENDED_YEAR_FIELD,
138 UDAT_JULIAN_DAY_FIELD,
139 UDAT_STANDALONE_DAY_FIELD,
140 UDAT_STANDALONE_MONTH_FIELD,
141 UDAT_QUARTER_FIELD,
142 UDAT_STANDALONE_QUARTER_FIELD,
143 UDAT_YEAR_NAME_FIELD,
144 UDAT_RELATED_YEAR_FIELD };
145 static const int8_t kDateFieldsCount = 16;
146
147 static const UDateFormatField kTimeFields[] = {
148 UDAT_HOUR_OF_DAY1_FIELD,
149 UDAT_HOUR_OF_DAY0_FIELD,
150 UDAT_MINUTE_FIELD,
151 UDAT_SECOND_FIELD,
152 UDAT_FRACTIONAL_SECOND_FIELD,
153 UDAT_HOUR1_FIELD,
154 UDAT_HOUR0_FIELD,
155 UDAT_MILLISECONDS_IN_DAY_FIELD,
156 UDAT_TIMEZONE_RFC_FIELD,
157 UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD };
158 static const int8_t kTimeFieldsCount = 10;
159
160
161 // This is a pattern-of-last-resort used when we can't load a usable pattern out
162 // of a resource.
163 static const char16_t gDefaultPattern[] =
164 {
165 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
166 }; /* "yMMdd hh:mm a" */
167
168 // This prefix is designed to NEVER MATCH real text, in order to
169 // suppress the parsing of negative numbers. Adjust as needed (if
170 // this becomes valid Unicode).
171 static const char16_t SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
172
173 /**
174 * These are the tags we expect to see in normal resource bundle files associated
175 * with a locale.
176 */
177 static const char16_t QUOTE = 0x27; // Single quote
178
179 /*
180 * The field range check bias for each UDateFormatField.
181 * The bias is added to the minimum and maximum values
182 * before they are compared to the parsed number.
183 * For example, the calendar stores zero-based month numbers
184 * but the parsed month numbers start at 1, so the bias is 1.
185 *
186 * A value of -1 means that the value is not checked.
187 */
188 static const int32_t gFieldRangeBias[] = {
189 -1, // 'G' - UDAT_ERA_FIELD
190 -1, // 'y' - UDAT_YEAR_FIELD
191 1, // 'M' - UDAT_MONTH_FIELD
192 0, // 'd' - UDAT_DATE_FIELD
193 -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD
194 -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD
195 0, // 'm' - UDAT_MINUTE_FIELD
196 0, // 's' - UDAT_SECOND_FIELD
197 -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
198 -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
199 -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
200 -1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
201 -1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
202 -1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
203 -1, // 'a' - UDAT_AM_PM_FIELD
204 -1, // 'h' - UDAT_HOUR1_FIELD
205 -1, // 'K' - UDAT_HOUR0_FIELD
206 -1, // 'z' - UDAT_TIMEZONE_FIELD
207 -1, // 'Y' - UDAT_YEAR_WOY_FIELD
208 -1, // 'e' - UDAT_DOW_LOCAL_FIELD
209 -1, // 'u' - UDAT_EXTENDED_YEAR_FIELD
210 -1, // 'g' - UDAT_JULIAN_DAY_FIELD
211 -1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
212 -1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD
213 -1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
214 0, // 'c' - UDAT_STANDALONE_DAY_FIELD
215 1, // 'L' - UDAT_STANDALONE_MONTH_FIELD
216 -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?)
217 -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD
218 -1, // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
219 -1, // 'U' - UDAT_YEAR_NAME_FIELD
220 -1, // 'O' - UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD
221 -1, // 'X' - UDAT_TIMEZONE_ISO_FIELD
222 -1, // 'x' - UDAT_TIMEZONE_ISO_LOCAL_FIELD
223 -1, // 'r' - UDAT_RELATED_YEAR_FIELD
224 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
225 -1, // ':' - UDAT_TIME_SEPARATOR_FIELD
226 #else
227 -1, // (no pattern character currently) - UDAT_TIME_SEPARATOR_FIELD
228 #endif
229 };
230
231 // When calendar uses hebr numbering (i.e. he@calendar=hebrew),
232 // offset the years within the current millennium down to 1-999
233 static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000;
234 static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000;
235
236 /**
237 * Maximum range for detecting daylight offset of a time zone when parsed time zone
238 * string indicates it's daylight saving time, but the detected time zone does not
239 * observe daylight saving time at the parsed date.
240 */
241 static const double MAX_DAYLIGHT_DETECTION_RANGE = 30*365*24*60*60*1000.0;
242
243 static UMutex LOCK;
244
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)245 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
246
247 SimpleDateFormat::NSOverride::~NSOverride() {
248 if (snf != nullptr) {
249 snf->removeRef();
250 }
251 }
252
253
free()254 void SimpleDateFormat::NSOverride::free() {
255 NSOverride *cur = this;
256 while (cur) {
257 NSOverride *next_temp = cur->next;
258 delete cur;
259 cur = next_temp;
260 }
261 }
262
263 // no matter what the locale's default number format looked like, we want
264 // to modify it so that it doesn't use thousands separators, doesn't always
265 // show the decimal point, and recognizes integers only when parsing
fixNumberFormatForDates(NumberFormat & nf)266 static void fixNumberFormatForDates(NumberFormat &nf) {
267 nf.setGroupingUsed(false);
268 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(&nf);
269 if (decfmt != nullptr) {
270 decfmt->setDecimalSeparatorAlwaysShown(false);
271 }
272 nf.setParseIntegerOnly(true);
273 nf.setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
274 }
275
createSharedNumberFormat(NumberFormat * nfToAdopt)276 static const SharedNumberFormat *createSharedNumberFormat(
277 NumberFormat *nfToAdopt) {
278 fixNumberFormatForDates(*nfToAdopt);
279 const SharedNumberFormat *result = new SharedNumberFormat(nfToAdopt);
280 if (result == nullptr) {
281 delete nfToAdopt;
282 }
283 return result;
284 }
285
createSharedNumberFormat(const Locale & loc,UErrorCode & status)286 static const SharedNumberFormat *createSharedNumberFormat(
287 const Locale &loc, UErrorCode &status) {
288 NumberFormat *nf = NumberFormat::createInstance(loc, status);
289 if (U_FAILURE(status)) {
290 return nullptr;
291 }
292 const SharedNumberFormat *result = createSharedNumberFormat(nf);
293 if (result == nullptr) {
294 status = U_MEMORY_ALLOCATION_ERROR;
295 }
296 return result;
297 }
298
allocSharedNumberFormatters()299 static const SharedNumberFormat **allocSharedNumberFormatters() {
300 const SharedNumberFormat **result = (const SharedNumberFormat**)
301 uprv_malloc(UDAT_FIELD_COUNT * sizeof(const SharedNumberFormat*));
302 if (result == nullptr) {
303 return nullptr;
304 }
305 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
306 result[i] = nullptr;
307 }
308 return result;
309 }
310
freeSharedNumberFormatters(const SharedNumberFormat ** list)311 static void freeSharedNumberFormatters(const SharedNumberFormat ** list) {
312 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
313 SharedObject::clearPtr(list[i]);
314 }
315 uprv_free(list);
316 }
317
getNumberFormatByIndex(UDateFormatField index) const318 const NumberFormat *SimpleDateFormat::getNumberFormatByIndex(
319 UDateFormatField index) const {
320 if (fSharedNumberFormatters == nullptr ||
321 fSharedNumberFormatters[index] == nullptr) {
322 return fNumberFormat;
323 }
324 return &(**fSharedNumberFormatters[index]);
325 }
326
327 //----------------------------------------------------------------------
328
~SimpleDateFormat()329 SimpleDateFormat::~SimpleDateFormat()
330 {
331 delete fSymbols;
332 if (fSharedNumberFormatters) {
333 freeSharedNumberFormatters(fSharedNumberFormatters);
334 }
335 if (fTimeZoneFormat) {
336 delete fTimeZoneFormat;
337 }
338 delete fSimpleNumberFormatter;
339
340 #if !UCONFIG_NO_BREAK_ITERATION
341 delete fCapitalizationBrkIter;
342 #endif
343 }
344
345 //----------------------------------------------------------------------
346
SimpleDateFormat(UErrorCode & status)347 SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
348 : fLocale(Locale::getDefault())
349 {
350 initializeBooleanAttributes();
351 construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
352 initializeDefaultCentury();
353 }
354
355 //----------------------------------------------------------------------
356
SimpleDateFormat(const UnicodeString & pattern,UErrorCode & status)357 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
358 UErrorCode &status)
359 : fPattern(pattern),
360 fLocale(Locale::getDefault())
361 {
362 fDateOverride.setToBogus();
363 fTimeOverride.setToBogus();
364 initializeBooleanAttributes();
365 initializeCalendar(nullptr,fLocale,status);
366 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
367 initialize(fLocale, status);
368 initializeDefaultCentury();
369
370 }
371 //----------------------------------------------------------------------
372
SimpleDateFormat(const UnicodeString & pattern,const UnicodeString & override,UErrorCode & status)373 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
374 const UnicodeString& override,
375 UErrorCode &status)
376 : fPattern(pattern),
377 fLocale(Locale::getDefault())
378 {
379 fDateOverride.setTo(override);
380 fTimeOverride.setToBogus();
381 initializeBooleanAttributes();
382 initializeCalendar(nullptr,fLocale,status);
383 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
384 initialize(fLocale, status);
385 initializeDefaultCentury();
386
387 processOverrideString(fLocale,override,kOvrStrBoth,status);
388
389 }
390
391 //----------------------------------------------------------------------
392
SimpleDateFormat(const UnicodeString & pattern,const Locale & locale,UErrorCode & status)393 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
394 const Locale& locale,
395 UErrorCode& status)
396 : fPattern(pattern),
397 fLocale(locale)
398 {
399
400 fDateOverride.setToBogus();
401 fTimeOverride.setToBogus();
402 initializeBooleanAttributes();
403
404 initializeCalendar(nullptr,fLocale,status);
405 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
406 initialize(fLocale, status);
407 initializeDefaultCentury();
408 }
409
410 //----------------------------------------------------------------------
411
SimpleDateFormat(const UnicodeString & pattern,const UnicodeString & override,const Locale & locale,UErrorCode & status)412 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
413 const UnicodeString& override,
414 const Locale& locale,
415 UErrorCode& status)
416 : fPattern(pattern),
417 fLocale(locale)
418 {
419
420 fDateOverride.setTo(override);
421 fTimeOverride.setToBogus();
422 initializeBooleanAttributes();
423
424 initializeCalendar(nullptr,fLocale,status);
425 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
426 initialize(fLocale, status);
427 initializeDefaultCentury();
428
429 processOverrideString(locale,override,kOvrStrBoth,status);
430
431 }
432
433 //----------------------------------------------------------------------
434
SimpleDateFormat(const UnicodeString & pattern,DateFormatSymbols * symbolsToAdopt,UErrorCode & status)435 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
436 DateFormatSymbols* symbolsToAdopt,
437 UErrorCode& status)
438 : fPattern(pattern),
439 fLocale(Locale::getDefault()),
440 fSymbols(symbolsToAdopt)
441 {
442
443 fDateOverride.setToBogus();
444 fTimeOverride.setToBogus();
445 initializeBooleanAttributes();
446
447 initializeCalendar(nullptr,fLocale,status);
448 initialize(fLocale, status);
449 initializeDefaultCentury();
450 }
451
452 //----------------------------------------------------------------------
453
SimpleDateFormat(const UnicodeString & pattern,const DateFormatSymbols & symbols,UErrorCode & status)454 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
455 const DateFormatSymbols& symbols,
456 UErrorCode& status)
457 : fPattern(pattern),
458 fLocale(Locale::getDefault()),
459 fSymbols(new DateFormatSymbols(symbols))
460 {
461
462 fDateOverride.setToBogus();
463 fTimeOverride.setToBogus();
464 initializeBooleanAttributes();
465
466 initializeCalendar(nullptr, fLocale, status);
467 initialize(fLocale, status);
468 initializeDefaultCentury();
469 }
470
471 //----------------------------------------------------------------------
472
473 // Not for public consumption; used by DateFormat
SimpleDateFormat(EStyle timeStyle,EStyle dateStyle,const Locale & locale,UErrorCode & status)474 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
475 EStyle dateStyle,
476 const Locale& locale,
477 UErrorCode& status)
478 : fLocale(locale)
479 {
480 initializeBooleanAttributes();
481 construct(timeStyle, dateStyle, fLocale, status);
482 if(U_SUCCESS(status)) {
483 initializeDefaultCentury();
484 }
485 }
486
487 //----------------------------------------------------------------------
488
489 /**
490 * Not for public consumption; used by DateFormat. This constructor
491 * never fails. If the resource data is not available, it uses the
492 * the last resort symbols.
493 */
SimpleDateFormat(const Locale & locale,UErrorCode & status)494 SimpleDateFormat::SimpleDateFormat(const Locale& locale,
495 UErrorCode& status)
496 : fPattern(gDefaultPattern),
497 fLocale(locale)
498 {
499 if (U_FAILURE(status)) return;
500 initializeBooleanAttributes();
501 initializeCalendar(nullptr, fLocale, status);
502 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
503 if (U_FAILURE(status))
504 {
505 status = U_ZERO_ERROR;
506 delete fSymbols;
507 // This constructor doesn't fail; it uses last resort data
508 fSymbols = new DateFormatSymbols(status);
509 /* test for nullptr */
510 if (fSymbols == 0) {
511 status = U_MEMORY_ALLOCATION_ERROR;
512 return;
513 }
514 }
515
516 fDateOverride.setToBogus();
517 fTimeOverride.setToBogus();
518
519 initialize(fLocale, status);
520 if(U_SUCCESS(status)) {
521 initializeDefaultCentury();
522 }
523 }
524
525 //----------------------------------------------------------------------
526
SimpleDateFormat(const SimpleDateFormat & other)527 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
528 : DateFormat(other),
529 fLocale(other.fLocale)
530 {
531 initializeBooleanAttributes();
532 *this = other;
533 }
534
535 //----------------------------------------------------------------------
536
operator =(const SimpleDateFormat & other)537 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
538 {
539 if (this == &other) {
540 return *this;
541 }
542
543 // fSimpleNumberFormatter references fNumberFormatter, delete it
544 // before we call the = operator which may invalidate fNumberFormatter
545 delete fSimpleNumberFormatter;
546 fSimpleNumberFormatter = nullptr;
547
548 DateFormat::operator=(other);
549 fDateOverride = other.fDateOverride;
550 fTimeOverride = other.fTimeOverride;
551
552 delete fSymbols;
553 fSymbols = nullptr;
554
555 if (other.fSymbols)
556 fSymbols = new DateFormatSymbols(*other.fSymbols);
557
558 fDefaultCenturyStart = other.fDefaultCenturyStart;
559 fDefaultCenturyStartYear = other.fDefaultCenturyStartYear;
560 fHaveDefaultCentury = other.fHaveDefaultCentury;
561
562 fPattern = other.fPattern;
563 fHasMinute = other.fHasMinute;
564 fHasSecond = other.fHasSecond;
565
566 fLocale = other.fLocale;
567
568 // TimeZoneFormat can now be set independently via setter.
569 // If it is nullptr, it will be lazily initialized from locale.
570 delete fTimeZoneFormat;
571 fTimeZoneFormat = nullptr;
572 TimeZoneFormat *otherTZFormat;
573 {
574 // Synchronization is required here, when accessing other.fTimeZoneFormat,
575 // because another thread may be concurrently executing other.tzFormat(),
576 // a logically const function that lazily creates other.fTimeZoneFormat.
577 //
578 // Without synchronization, reordered memory writes could allow us
579 // to see a non-null fTimeZoneFormat before the object itself was
580 // fully initialized. In case of a race, it doesn't matter whether
581 // we see a null or a fully initialized other.fTimeZoneFormat,
582 // only that we avoid seeing a partially initialized object.
583 //
584 // Once initialized, no const function can modify fTimeZoneFormat,
585 // meaning that once we have safely grabbed the other.fTimeZoneFormat
586 // pointer, continued synchronization is not required to use it.
587 Mutex m(&LOCK);
588 otherTZFormat = other.fTimeZoneFormat;
589 }
590 if (otherTZFormat) {
591 fTimeZoneFormat = new TimeZoneFormat(*otherTZFormat);
592 }
593
594 #if !UCONFIG_NO_BREAK_ITERATION
595 if (other.fCapitalizationBrkIter != nullptr) {
596 fCapitalizationBrkIter = (other.fCapitalizationBrkIter)->clone();
597 }
598 #endif
599
600 if (fSharedNumberFormatters != nullptr) {
601 freeSharedNumberFormatters(fSharedNumberFormatters);
602 fSharedNumberFormatters = nullptr;
603 }
604 if (other.fSharedNumberFormatters != nullptr) {
605 fSharedNumberFormatters = allocSharedNumberFormatters();
606 if (fSharedNumberFormatters) {
607 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
608 SharedObject::copyPtr(
609 other.fSharedNumberFormatters[i],
610 fSharedNumberFormatters[i]);
611 }
612 }
613 }
614
615 UErrorCode localStatus = U_ZERO_ERROR;
616 // SimpleNumberFormatter does not have a copy constructor. Furthermore,
617 // it references data from an internal field, fNumberFormatter,
618 // so we must rematerialize that reference after copying over the number formatter.
619 initSimpleNumberFormatter(localStatus);
620 return *this;
621 }
622
623 //----------------------------------------------------------------------
624
625 SimpleDateFormat*
clone() const626 SimpleDateFormat::clone() const
627 {
628 return new SimpleDateFormat(*this);
629 }
630
631 //----------------------------------------------------------------------
632
633 bool
operator ==(const Format & other) const634 SimpleDateFormat::operator==(const Format& other) const
635 {
636 if (DateFormat::operator==(other)) {
637 // The DateFormat::operator== check for fCapitalizationContext equality above
638 // is sufficient to check equality of all derived context-related data.
639 // DateFormat::operator== guarantees following cast is safe
640 SimpleDateFormat* that = (SimpleDateFormat*)&other;
641 return (fPattern == that->fPattern &&
642 fSymbols != nullptr && // Check for pathological object
643 that->fSymbols != nullptr && // Check for pathological object
644 *fSymbols == *that->fSymbols &&
645 fHaveDefaultCentury == that->fHaveDefaultCentury &&
646 fDefaultCenturyStart == that->fDefaultCenturyStart);
647 }
648 return false;
649 }
650
651 //----------------------------------------------------------------------
652 static const char16_t* timeSkeletons[4] = {
653 u"jmmsszzzz", // kFull
654 u"jmmssz", // kLong
655 u"jmmss", // kMedium
656 u"jmm", // kShort
657 };
658
construct(EStyle timeStyle,EStyle dateStyle,const Locale & locale,UErrorCode & status)659 void SimpleDateFormat::construct(EStyle timeStyle,
660 EStyle dateStyle,
661 const Locale& locale,
662 UErrorCode& status)
663 {
664 // called by several constructors to load pattern data from the resources
665 if (U_FAILURE(status)) return;
666
667 // We will need the calendar to know what type of symbols to load.
668 initializeCalendar(nullptr, locale, status);
669 if (U_FAILURE(status)) return;
670
671 // Load date time patterns directly from resources.
672 const char* cType = fCalendar ? fCalendar->getType() : nullptr;
673 LocalUResourceBundlePointer bundle(ures_open(nullptr, locale.getBaseName(), &status));
674 if (U_FAILURE(status)) return;
675
676 UBool cTypeIsGregorian = true;
677 LocalUResourceBundlePointer dateTimePatterns;
678 if (cType != nullptr && uprv_strcmp(cType, "gregorian") != 0) {
679 CharString resourcePath("calendar/", status);
680 resourcePath.append(cType, status).append("/DateTimePatterns", status);
681 dateTimePatterns.adoptInstead(
682 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(),
683 (UResourceBundle*)nullptr, &status));
684 cTypeIsGregorian = false;
685 }
686
687 // Check for "gregorian" fallback.
688 if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) {
689 status = U_ZERO_ERROR;
690 dateTimePatterns.adoptInstead(
691 ures_getByKeyWithFallback(bundle.getAlias(),
692 "calendar/gregorian/DateTimePatterns",
693 (UResourceBundle*)nullptr, &status));
694 }
695 if (U_FAILURE(status)) return;
696
697 LocalUResourceBundlePointer currentBundle;
698
699 if (ures_getSize(dateTimePatterns.getAlias()) <= kDateTime)
700 {
701 status = U_INVALID_FORMAT_ERROR;
702 return;
703 }
704
705 setLocaleIDs(ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_VALID_LOCALE, &status),
706 ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_ACTUAL_LOCALE, &status));
707
708 // create a symbols object from the locale
709 fSymbols = DateFormatSymbols::createForLocale(locale, status);
710 if (U_FAILURE(status)) return;
711 /* test for nullptr */
712 if (fSymbols == 0) {
713 status = U_MEMORY_ALLOCATION_ERROR;
714 return;
715 }
716
717 const char16_t *resStr,*ovrStr;
718 int32_t resStrLen,ovrStrLen = 0;
719 fDateOverride.setToBogus();
720 fTimeOverride.setToBogus();
721
722 UnicodeString timePattern;
723 if (timeStyle >= kFull && timeStyle <= kShort) {
724 bool hasRgOrHcSubtag = false;
725 // also use DTPG if the locale has the "rg" or "hc" ("hours") subtag-- even if the overriding region
726 // or hour cycle is the same as the one we get by default, we go through the DateTimePatternGenerator
727 UErrorCode dummyErr1 = U_ZERO_ERROR, dummyErr2 = U_ZERO_ERROR;
728 if (locale.getKeywordValue("rg", nullptr, 0, dummyErr1) > 0 || locale.getKeywordValue("hours", nullptr, 0, dummyErr2) > 0) {
729 hasRgOrHcSubtag = true;
730 }
731
732 const char* baseLocID = locale.getBaseName();
733 if (baseLocID[0]!=0 && uprv_strcmp(baseLocID,"und")!=0) {
734 UErrorCode useStatus = U_ZERO_ERROR;
735 Locale baseLoc(baseLocID);
736 Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus));
737 if (hasRgOrHcSubtag || (U_SUCCESS(useStatus) && validLoc!=baseLoc)) {
738 bool useDTPG = hasRgOrHcSubtag;
739 const char* baseReg = baseLoc.getCountry(); // empty string if no region
740 if ((baseReg[0]!=0 && uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0)
741 || uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) {
742 // use DTPG if
743 // * baseLoc has a region and validLoc does not have the same one (or has none), OR
744 // * validLoc has a different language code than baseLoc
745 // * the original locale has the rg or hc subtag
746 useDTPG = true;
747 }
748 if (useDTPG) {
749 // The standard time formats may have the wrong time cycle, because:
750 // the valid locale differs in important ways (region, language) from
751 // the base locale.
752 // We could *also* check whether they do actually have a mismatch with
753 // the time cycle preferences for the region, but that is a lot more
754 // work for little or no additional benefit, since just going ahead
755 // and always synthesizing the time format as per the following should
756 // create a locale-appropriate pattern with cycle that matches the
757 // region preferences anyway.
758 LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstanceNoStdPat(locale, useStatus));
759 if (U_SUCCESS(useStatus)) {
760 UnicodeString timeSkeleton(true, timeSkeletons[timeStyle], -1);
761 timePattern = dtpg->getBestPattern(timeSkeleton, useStatus);
762 }
763 }
764 }
765 }
766 }
767
768 // if the pattern should include both date and time information, use the date/time
769 // pattern string as a guide to tell use how to glue together the appropriate date
770 // and time pattern strings.
771 if ((timeStyle != kNone) && (dateStyle != kNone))
772 {
773 UnicodeString tempus1(timePattern);
774 if (tempus1.length() == 0) {
775 currentBundle.adoptInstead(
776 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, nullptr, &status));
777 if (U_FAILURE(status)) {
778 status = U_INVALID_FORMAT_ERROR;
779 return;
780 }
781 switch (ures_getType(currentBundle.getAlias())) {
782 case URES_STRING: {
783 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
784 break;
785 }
786 case URES_ARRAY: {
787 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
788 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
789 fTimeOverride.setTo(true, ovrStr, ovrStrLen);
790 break;
791 }
792 default: {
793 status = U_INVALID_FORMAT_ERROR;
794 return;
795 }
796 }
797
798 tempus1.setTo(true, resStr, resStrLen);
799 }
800
801 currentBundle.adoptInstead(
802 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, nullptr, &status));
803 if (U_FAILURE(status)) {
804 status = U_INVALID_FORMAT_ERROR;
805 return;
806 }
807 switch (ures_getType(currentBundle.getAlias())) {
808 case URES_STRING: {
809 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
810 break;
811 }
812 case URES_ARRAY: {
813 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
814 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
815 fDateOverride.setTo(true, ovrStr, ovrStrLen);
816 break;
817 }
818 default: {
819 status = U_INVALID_FORMAT_ERROR;
820 return;
821 }
822 }
823
824 UnicodeString tempus2(true, resStr, resStrLen);
825
826 // Currently, for compatibility with pre-CLDR-42 data, we default to the "atTime"
827 // combining patterns. Depending on guidance in CLDR 42 spec and on DisplayOptions,
828 // we may change this.
829 LocalUResourceBundlePointer dateAtTimePatterns;
830 if (!cTypeIsGregorian) {
831 CharString resourcePath("calendar/", status);
832 resourcePath.append(cType, status).append("/DateTimePatterns%atTime", status);
833 dateAtTimePatterns.adoptInstead(
834 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(),
835 nullptr, &status));
836 }
837 if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) {
838 status = U_ZERO_ERROR;
839 dateAtTimePatterns.adoptInstead(
840 ures_getByKeyWithFallback(bundle.getAlias(),
841 "calendar/gregorian/DateTimePatterns%atTime",
842 nullptr, &status));
843 }
844 if (U_SUCCESS(status) && ures_getSize(dateAtTimePatterns.getAlias()) >= 4) {
845 resStr = ures_getStringByIndex(dateAtTimePatterns.getAlias(), dateStyle - kDateOffset, &resStrLen, &status);
846 } else {
847 status = U_ZERO_ERROR;
848 int32_t glueIndex = kDateTime;
849 int32_t patternsSize = ures_getSize(dateTimePatterns.getAlias());
850 if (patternsSize >= (kDateTimeOffset + kShort + 1)) {
851 // Get proper date time format
852 glueIndex = (int32_t)(kDateTimeOffset + (dateStyle - kDateOffset));
853 }
854
855 resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), glueIndex, &resStrLen, &status);
856 }
857 SimpleFormatter(UnicodeString(true, resStr, resStrLen), 2, 2, status).
858 format(tempus1, tempus2, fPattern, status);
859 }
860 // if the pattern includes just time data or just date date, load the appropriate
861 // pattern string from the resources
862 // setTo() - see DateFormatSymbols::assignArray comments
863 else if (timeStyle != kNone) {
864 fPattern.setTo(timePattern);
865 if (fPattern.length() == 0) {
866 currentBundle.adoptInstead(
867 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, nullptr, &status));
868 if (U_FAILURE(status)) {
869 status = U_INVALID_FORMAT_ERROR;
870 return;
871 }
872 switch (ures_getType(currentBundle.getAlias())) {
873 case URES_STRING: {
874 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
875 break;
876 }
877 case URES_ARRAY: {
878 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
879 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
880 fDateOverride.setTo(true, ovrStr, ovrStrLen);
881 break;
882 }
883 default: {
884 status = U_INVALID_FORMAT_ERROR;
885 return;
886 }
887 }
888 fPattern.setTo(true, resStr, resStrLen);
889 }
890 }
891 else if (dateStyle != kNone) {
892 currentBundle.adoptInstead(
893 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, nullptr, &status));
894 if (U_FAILURE(status)) {
895 status = U_INVALID_FORMAT_ERROR;
896 return;
897 }
898 switch (ures_getType(currentBundle.getAlias())) {
899 case URES_STRING: {
900 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
901 break;
902 }
903 case URES_ARRAY: {
904 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
905 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
906 fDateOverride.setTo(true, ovrStr, ovrStrLen);
907 break;
908 }
909 default: {
910 status = U_INVALID_FORMAT_ERROR;
911 return;
912 }
913 }
914 fPattern.setTo(true, resStr, resStrLen);
915 }
916
917 // and if it includes _neither_, that's an error
918 else
919 status = U_INVALID_FORMAT_ERROR;
920
921 // finally, finish initializing by creating a Calendar and a NumberFormat
922 initialize(locale, status);
923 }
924
925 //----------------------------------------------------------------------
926
927 Calendar*
initializeCalendar(TimeZone * adoptZone,const Locale & locale,UErrorCode & status)928 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
929 {
930 if(!U_FAILURE(status)) {
931 fCalendar = Calendar::createInstance(
932 adoptZone ? adoptZone : TimeZone::forLocaleOrDefault(locale), locale, status);
933 }
934 return fCalendar;
935 }
936
937 void
initialize(const Locale & locale,UErrorCode & status)938 SimpleDateFormat::initialize(const Locale& locale,
939 UErrorCode& status)
940 {
941 if (U_FAILURE(status)) return;
942
943 parsePattern(); // Need this before initNumberFormatters(), to set fHasHanYearChar
944
945 // Simple-minded hack to force Gannen year numbering for ja@calendar=japanese
946 // if format is non-numeric (includes 年) and fDateOverride is not already specified.
947 // Now this does get updated if applyPattern subsequently changes the pattern type.
948 if (fDateOverride.isBogus() && fHasHanYearChar &&
949 fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 &&
950 uprv_strcmp(fLocale.getLanguage(),"ja") == 0) {
951 fDateOverride.setTo(u"y=jpanyear", -1);
952 }
953
954 // We don't need to check that the row count is >= 1, since all 2d arrays have at
955 // least one row
956 fNumberFormat = NumberFormat::createInstance(locale, status);
957 if (fNumberFormat != nullptr && U_SUCCESS(status))
958 {
959 fixNumberFormatForDates(*fNumberFormat);
960 //fNumberFormat->setLenient(true); // Java uses a custom DateNumberFormat to format/parse
961
962 initNumberFormatters(locale, status);
963 initSimpleNumberFormatter(status);
964
965 }
966 else if (U_SUCCESS(status))
967 {
968 status = U_MISSING_RESOURCE_ERROR;
969 }
970 }
971
972 /* Initialize the fields we use to disambiguate ambiguous years. Separate
973 * so we can call it from readObject().
974 */
initializeDefaultCentury()975 void SimpleDateFormat::initializeDefaultCentury()
976 {
977 if(fCalendar) {
978 fHaveDefaultCentury = fCalendar->haveDefaultCentury();
979 if(fHaveDefaultCentury) {
980 fDefaultCenturyStart = fCalendar->defaultCenturyStart();
981 fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
982 } else {
983 fDefaultCenturyStart = DBL_MIN;
984 fDefaultCenturyStartYear = -1;
985 }
986 }
987 }
988
989 /*
990 * Initialize the boolean attributes. Separate so we can call it from all constructors.
991 */
initializeBooleanAttributes()992 void SimpleDateFormat::initializeBooleanAttributes()
993 {
994 UErrorCode status = U_ZERO_ERROR;
995
996 setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status);
997 setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
998 setBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, true, status);
999 setBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, true, status);
1000 }
1001
1002 /* Define one-century window into which to disambiguate dates using
1003 * two-digit years. Make public in JDK 1.2.
1004 */
parseAmbiguousDatesAsAfter(UDate startDate,UErrorCode & status)1005 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
1006 {
1007 if(U_FAILURE(status)) {
1008 return;
1009 }
1010 if(!fCalendar) {
1011 status = U_ILLEGAL_ARGUMENT_ERROR;
1012 return;
1013 }
1014
1015 fCalendar->setTime(startDate, status);
1016 if(U_SUCCESS(status)) {
1017 fHaveDefaultCentury = true;
1018 fDefaultCenturyStart = startDate;
1019 fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
1020 }
1021 }
1022
1023 //----------------------------------------------------------------------
1024
1025 UnicodeString&
format(Calendar & cal,UnicodeString & appendTo,FieldPosition & pos) const1026 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
1027 {
1028 UErrorCode status = U_ZERO_ERROR;
1029 FieldPositionOnlyHandler handler(pos);
1030 return _format(cal, appendTo, handler, status);
1031 }
1032
1033 //----------------------------------------------------------------------
1034
1035 UnicodeString&
format(Calendar & cal,UnicodeString & appendTo,FieldPositionIterator * posIter,UErrorCode & status) const1036 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo,
1037 FieldPositionIterator* posIter, UErrorCode& status) const
1038 {
1039 FieldPositionIteratorHandler handler(posIter, status);
1040 return _format(cal, appendTo, handler, status);
1041 }
1042
1043 //----------------------------------------------------------------------
1044
1045 UnicodeString&
_format(Calendar & cal,UnicodeString & appendTo,FieldPositionHandler & handler,UErrorCode & status) const1046 SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo,
1047 FieldPositionHandler& handler, UErrorCode& status) const
1048 {
1049 if ( U_FAILURE(status) ) {
1050 return appendTo;
1051 }
1052 Calendar* workCal = &cal;
1053 Calendar* calClone = nullptr;
1054 if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
1055 // Different calendar type
1056 // We use the time and time zone from the input calendar, but
1057 // do not use the input calendar for field calculation.
1058 calClone = fCalendar->clone();
1059 if (calClone != nullptr) {
1060 UDate t = cal.getTime(status);
1061 calClone->setTime(t, status);
1062 calClone->setTimeZone(cal.getTimeZone());
1063 workCal = calClone;
1064 } else {
1065 status = U_MEMORY_ALLOCATION_ERROR;
1066 return appendTo;
1067 }
1068 }
1069
1070 UBool inQuote = false;
1071 char16_t prevCh = 0;
1072 int32_t count = 0;
1073 int32_t fieldNum = 0;
1074 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1075
1076 // loop through the pattern string character by character
1077 for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
1078 char16_t ch = fPattern[i];
1079
1080 // Use subFormat() to format a repeated pattern character
1081 // when a different pattern or non-pattern character is seen
1082 if (ch != prevCh && count > 0) {
1083 subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++,
1084 prevCh, handler, *workCal, status);
1085 count = 0;
1086 }
1087 if (ch == QUOTE) {
1088 // Consecutive single quotes are a single quote literal,
1089 // either outside of quotes or between quotes
1090 if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
1091 appendTo += (char16_t)QUOTE;
1092 ++i;
1093 } else {
1094 inQuote = ! inQuote;
1095 }
1096 }
1097 else if (!inQuote && isSyntaxChar(ch)) {
1098 // ch is a date-time pattern character to be interpreted
1099 // by subFormat(); count the number of times it is repeated
1100 prevCh = ch;
1101 ++count;
1102 }
1103 else {
1104 // Append quoted characters and unquoted non-pattern characters
1105 appendTo += ch;
1106 }
1107 }
1108
1109 // Format the last item in the pattern, if any
1110 if (count > 0) {
1111 subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++,
1112 prevCh, handler, *workCal, status);
1113 }
1114
1115 if (calClone != nullptr) {
1116 delete calClone;
1117 }
1118
1119 return appendTo;
1120 }
1121
1122 //----------------------------------------------------------------------
1123
1124 /* Map calendar field into calendar field level.
1125 * the larger the level, the smaller the field unit.
1126 * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
1127 * UCAL_MONTH level is 20.
1128 * NOTE: if new fields adds in, the table needs to update.
1129 */
1130 const int32_t
1131 SimpleDateFormat::fgCalendarFieldToLevel[] =
1132 {
1133 /*GyM*/ 0, 10, 20,
1134 /*wW*/ 20, 30,
1135 /*dDEF*/ 30, 20, 30, 30,
1136 /*ahHm*/ 40, 50, 50, 60,
1137 /*sS*/ 70, 80,
1138 /*z?Y*/ 0, 0, 10,
1139 /*eug*/ 30, 10, 0,
1140 /*A?.*/ 40, 0, 0
1141 };
1142
getLevelFromChar(char16_t ch)1143 int32_t SimpleDateFormat::getLevelFromChar(char16_t ch) {
1144 // Map date field LETTER into calendar field level.
1145 // the larger the level, the smaller the field unit.
1146 // NOTE: if new fields adds in, the table needs to update.
1147 static const int32_t mapCharToLevel[] = {
1148 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1149 //
1150 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1151 // ! " # $ % & ' ( ) * + , - . /
1152 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1153 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1154 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
1155 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1,
1156 #else
1157 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
1158 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1159 #endif
1160 // @ A B C D E F G H I J K L M N O
1161 -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, 0,
1162 // P Q R S T U V W X Y Z [ \ ] ^ _
1163 -1, 20, -1, 80, -1, 10, 0, 30, 0, 10, 0, -1, -1, -1, -1, -1,
1164 // ` a b c d e f g h i j k l m n o
1165 -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, 0, 60, -1, -1,
1166 // p q r s t u v w x y z { | } ~
1167 -1, 20, 10, 70, -1, 10, 0, 20, 0, 10, 0, -1, -1, -1, -1, -1
1168 };
1169
1170 return ch < UPRV_LENGTHOF(mapCharToLevel) ? mapCharToLevel[ch] : -1;
1171 }
1172
isSyntaxChar(char16_t ch)1173 UBool SimpleDateFormat::isSyntaxChar(char16_t ch) {
1174 static const UBool mapCharToIsSyntax[] = {
1175 //
1176 false, false, false, false, false, false, false, false,
1177 //
1178 false, false, false, false, false, false, false, false,
1179 //
1180 false, false, false, false, false, false, false, false,
1181 //
1182 false, false, false, false, false, false, false, false,
1183 // ! " # $ % & '
1184 false, false, false, false, false, false, false, false,
1185 // ( ) * + , - . /
1186 false, false, false, false, false, false, false, false,
1187 // 0 1 2 3 4 5 6 7
1188 false, false, false, false, false, false, false, false,
1189 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1190 // 8 9 : ; < = > ?
1191 false, false, true, false, false, false, false, false,
1192 #else
1193 // 8 9 : ; < = > ?
1194 false, false, false, false, false, false, false, false,
1195 #endif
1196 // @ A B C D E F G
1197 false, true, true, true, true, true, true, true,
1198 // H I J K L M N O
1199 true, true, true, true, true, true, true, true,
1200 // P Q R S T U V W
1201 true, true, true, true, true, true, true, true,
1202 // X Y Z [ \ ] ^ _
1203 true, true, true, false, false, false, false, false,
1204 // ` a b c d e f g
1205 false, true, true, true, true, true, true, true,
1206 // h i j k l m n o
1207 true, true, true, true, true, true, true, true,
1208 // p q r s t u v w
1209 true, true, true, true, true, true, true, true,
1210 // x y z { | } ~
1211 true, true, true, false, false, false, false, false
1212 };
1213
1214 return ch < UPRV_LENGTHOF(mapCharToIsSyntax) ? mapCharToIsSyntax[ch] : false;
1215 }
1216
1217 // Map index into pattern character string to Calendar field number.
1218 const UCalendarDateFields
1219 SimpleDateFormat::fgPatternIndexToCalendarField[] =
1220 {
1221 /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
1222 /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
1223 /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
1224 /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
1225 /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
1226 /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
1227 /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
1228 /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
1229 /*v*/ UCAL_ZONE_OFFSET,
1230 /*c*/ UCAL_DOW_LOCAL,
1231 /*L*/ UCAL_MONTH,
1232 /*Q*/ UCAL_MONTH,
1233 /*q*/ UCAL_MONTH,
1234 /*V*/ UCAL_ZONE_OFFSET,
1235 /*U*/ UCAL_YEAR,
1236 /*O*/ UCAL_ZONE_OFFSET,
1237 /*Xx*/ UCAL_ZONE_OFFSET, UCAL_ZONE_OFFSET,
1238 /*r*/ UCAL_EXTENDED_YEAR,
1239 /*bB*/ UCAL_FIELD_COUNT, UCAL_FIELD_COUNT, // no mappings to calendar fields
1240 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1241 /*:*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */
1242 #else
1243 /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */
1244 #endif
1245 };
1246
1247 // Map index into pattern character string to DateFormat field number
1248 const UDateFormatField
1249 SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
1250 /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
1251 /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
1252 /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
1253 /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
1254 /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
1255 /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
1256 /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
1257 /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
1258 /*v*/ UDAT_TIMEZONE_GENERIC_FIELD,
1259 /*c*/ UDAT_STANDALONE_DAY_FIELD,
1260 /*L*/ UDAT_STANDALONE_MONTH_FIELD,
1261 /*Q*/ UDAT_QUARTER_FIELD,
1262 /*q*/ UDAT_STANDALONE_QUARTER_FIELD,
1263 /*V*/ UDAT_TIMEZONE_SPECIAL_FIELD,
1264 /*U*/ UDAT_YEAR_NAME_FIELD,
1265 /*O*/ UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD,
1266 /*Xx*/ UDAT_TIMEZONE_ISO_FIELD, UDAT_TIMEZONE_ISO_LOCAL_FIELD,
1267 /*r*/ UDAT_RELATED_YEAR_FIELD,
1268 /*bB*/ UDAT_AM_PM_MIDNIGHT_NOON_FIELD, UDAT_FLEXIBLE_DAY_PERIOD_FIELD,
1269 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1270 /*:*/ UDAT_TIME_SEPARATOR_FIELD,
1271 #else
1272 /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UDAT_TIME_SEPARATOR_FIELD,
1273 #endif
1274 };
1275
1276 //----------------------------------------------------------------------
1277
1278 /**
1279 * Append symbols[value] to dst. Make sure the array index is not out
1280 * of bounds.
1281 */
1282 static inline void
_appendSymbol(UnicodeString & dst,int32_t value,const UnicodeString * symbols,int32_t symbolsCount)1283 _appendSymbol(UnicodeString& dst,
1284 int32_t value,
1285 const UnicodeString* symbols,
1286 int32_t symbolsCount) {
1287 U_ASSERT(0 <= value && value < symbolsCount);
1288 if (0 <= value && value < symbolsCount) {
1289 dst += symbols[value];
1290 }
1291 }
1292
1293 static inline void
_appendSymbolWithMonthPattern(UnicodeString & dst,int32_t value,const UnicodeString * symbols,int32_t symbolsCount,const UnicodeString * monthPattern,UErrorCode & status)1294 _appendSymbolWithMonthPattern(UnicodeString& dst, int32_t value, const UnicodeString* symbols, int32_t symbolsCount,
1295 const UnicodeString* monthPattern, UErrorCode& status) {
1296 U_ASSERT(0 <= value && value < symbolsCount);
1297 if (0 <= value && value < symbolsCount) {
1298 if (monthPattern == nullptr) {
1299 dst += symbols[value];
1300 } else {
1301 SimpleFormatter(*monthPattern, 1, 1, status).format(symbols[value], dst, status);
1302 }
1303 }
1304 }
1305
1306 //----------------------------------------------------------------------
1307
1308 void
initSimpleNumberFormatter(UErrorCode & status)1309 SimpleDateFormat::initSimpleNumberFormatter(UErrorCode &status) {
1310 if (U_FAILURE(status)) {
1311 return;
1312 }
1313 auto* df = dynamic_cast<const DecimalFormat*>(fNumberFormat);
1314 if (df == nullptr) {
1315 return;
1316 }
1317 const DecimalFormatSymbols* syms = df->getDecimalFormatSymbols();
1318 if (syms == nullptr) {
1319 return;
1320 }
1321 fSimpleNumberFormatter = new number::SimpleNumberFormatter(
1322 number::SimpleNumberFormatter::forLocaleAndSymbolsAndGroupingStrategy(
1323 fLocale, *syms, UNUM_GROUPING_OFF, status
1324 )
1325 );
1326 if (fSimpleNumberFormatter == nullptr) {
1327 status = U_MEMORY_ALLOCATION_ERROR;
1328 }
1329 }
1330
1331 void
initNumberFormatters(const Locale & locale,UErrorCode & status)1332 SimpleDateFormat::initNumberFormatters(const Locale &locale,UErrorCode &status) {
1333 if (U_FAILURE(status)) {
1334 return;
1335 }
1336 if ( fDateOverride.isBogus() && fTimeOverride.isBogus() ) {
1337 return;
1338 }
1339 umtx_lock(&LOCK);
1340 if (fSharedNumberFormatters == nullptr) {
1341 fSharedNumberFormatters = allocSharedNumberFormatters();
1342 if (fSharedNumberFormatters == nullptr) {
1343 status = U_MEMORY_ALLOCATION_ERROR;
1344 }
1345 }
1346 umtx_unlock(&LOCK);
1347
1348 if (U_FAILURE(status)) {
1349 return;
1350 }
1351
1352 processOverrideString(locale,fDateOverride,kOvrStrDate,status);
1353 processOverrideString(locale,fTimeOverride,kOvrStrTime,status);
1354 }
1355
1356 void
processOverrideString(const Locale & locale,const UnicodeString & str,int8_t type,UErrorCode & status)1357 SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status) {
1358 if (str.isBogus() || U_FAILURE(status)) {
1359 return;
1360 }
1361
1362 int32_t start = 0;
1363 int32_t len;
1364 UnicodeString nsName;
1365 UnicodeString ovrField;
1366 UBool moreToProcess = true;
1367 NSOverride *overrideList = nullptr;
1368
1369 while (moreToProcess) {
1370 int32_t delimiterPosition = str.indexOf((char16_t)ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE,start);
1371 if (delimiterPosition == -1) {
1372 moreToProcess = false;
1373 len = str.length() - start;
1374 } else {
1375 len = delimiterPosition - start;
1376 }
1377 UnicodeString currentString(str,start,len);
1378 int32_t equalSignPosition = currentString.indexOf((char16_t)ULOC_KEYWORD_ASSIGN_UNICODE,0);
1379 if (equalSignPosition == -1) { // Simple override string such as "hebrew"
1380 nsName.setTo(currentString);
1381 ovrField.setToBogus();
1382 } else { // Field specific override string such as "y=hebrew"
1383 nsName.setTo(currentString,equalSignPosition+1);
1384 ovrField.setTo(currentString,0,1); // We just need the first character.
1385 }
1386
1387 int32_t nsNameHash = nsName.hashCode();
1388 // See if the numbering system is in the override list, if not, then add it.
1389 NSOverride *curr = overrideList;
1390 const SharedNumberFormat *snf = nullptr;
1391 UBool found = false;
1392 while ( curr && !found ) {
1393 if ( curr->hash == nsNameHash ) {
1394 snf = curr->snf;
1395 found = true;
1396 }
1397 curr = curr->next;
1398 }
1399
1400 if (!found) {
1401 LocalPointer<NSOverride> cur(new NSOverride);
1402 if (!cur.isNull()) {
1403 char kw[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1404 uprv_strcpy(kw,"numbers=");
1405 nsName.extract(0,len,kw+8,ULOC_KEYWORD_AND_VALUES_CAPACITY-8,US_INV);
1406
1407 Locale ovrLoc(locale.getLanguage(),locale.getCountry(),locale.getVariant(),kw);
1408 cur->hash = nsNameHash;
1409 cur->next = overrideList;
1410 SharedObject::copyPtr(
1411 createSharedNumberFormat(ovrLoc, status), cur->snf);
1412 if (U_FAILURE(status)) {
1413 if (overrideList) {
1414 overrideList->free();
1415 }
1416 return;
1417 }
1418 snf = cur->snf;
1419 overrideList = cur.orphan();
1420 } else {
1421 status = U_MEMORY_ALLOCATION_ERROR;
1422 if (overrideList) {
1423 overrideList->free();
1424 }
1425 return;
1426 }
1427 }
1428
1429 // Now that we have an appropriate number formatter, fill in the appropriate spaces in the
1430 // number formatters table.
1431 if (ovrField.isBogus()) {
1432 switch (type) {
1433 case kOvrStrDate:
1434 case kOvrStrBoth: {
1435 for ( int8_t i=0 ; i<kDateFieldsCount; i++ ) {
1436 SharedObject::copyPtr(snf, fSharedNumberFormatters[kDateFields[i]]);
1437 }
1438 if (type==kOvrStrDate) {
1439 break;
1440 }
1441 U_FALLTHROUGH;
1442 }
1443 case kOvrStrTime : {
1444 for ( int8_t i=0 ; i<kTimeFieldsCount; i++ ) {
1445 SharedObject::copyPtr(snf, fSharedNumberFormatters[kTimeFields[i]]);
1446 }
1447 break;
1448 }
1449 }
1450 } else {
1451 // if the pattern character is unrecognized, signal an error and bail out
1452 UDateFormatField patternCharIndex =
1453 DateFormatSymbols::getPatternCharIndex(ovrField.charAt(0));
1454 if (patternCharIndex == UDAT_FIELD_COUNT) {
1455 status = U_INVALID_FORMAT_ERROR;
1456 if (overrideList) {
1457 overrideList->free();
1458 }
1459 return;
1460 }
1461 SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]);
1462 }
1463
1464 start = delimiterPosition + 1;
1465 }
1466 if (overrideList) {
1467 overrideList->free();
1468 }
1469 }
1470
1471 //---------------------------------------------------------------------
1472 void
subFormat(UnicodeString & appendTo,char16_t ch,int32_t count,UDisplayContext capitalizationContext,int32_t fieldNum,char16_t fieldToOutput,FieldPositionHandler & handler,Calendar & cal,UErrorCode & status) const1473 SimpleDateFormat::subFormat(UnicodeString &appendTo,
1474 char16_t ch,
1475 int32_t count,
1476 UDisplayContext capitalizationContext,
1477 int32_t fieldNum,
1478 char16_t fieldToOutput,
1479 FieldPositionHandler& handler,
1480 Calendar& cal,
1481 UErrorCode& status) const
1482 {
1483 if (U_FAILURE(status)) {
1484 return;
1485 }
1486
1487 // this function gets called by format() to produce the appropriate substitution
1488 // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1489
1490 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
1491 const int32_t maxIntCount = 10;
1492 int32_t beginOffset = appendTo.length();
1493 const NumberFormat *currentNumberFormat;
1494 DateFormatSymbols::ECapitalizationContextUsageType capContextUsageType = DateFormatSymbols::kCapContextUsageOther;
1495
1496 UBool isHebrewCalendar = (uprv_strcmp(cal.getType(),"hebrew") == 0);
1497 UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0);
1498
1499 // if the pattern character is unrecognized, signal an error and dump out
1500 if (patternCharIndex == UDAT_FIELD_COUNT)
1501 {
1502 if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
1503 status = U_INVALID_FORMAT_ERROR;
1504 }
1505 return;
1506 }
1507
1508 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1509 int32_t value = 0;
1510 // Don't get value unless it is useful
1511 if (field < UCAL_FIELD_COUNT) {
1512 value = (patternCharIndex != UDAT_RELATED_YEAR_FIELD)? cal.get(field, status): cal.getRelatedYear(status);
1513 }
1514 if (U_FAILURE(status)) {
1515 return;
1516 }
1517
1518 currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
1519 if (currentNumberFormat == nullptr) {
1520 status = U_INTERNAL_PROGRAM_ERROR;
1521 return;
1522 }
1523 UnicodeString hebr("hebr", 4, US_INV);
1524
1525 switch (patternCharIndex) {
1526
1527 // for any "G" symbol, write out the appropriate era string
1528 // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
1529 case UDAT_ERA_FIELD:
1530 if (isChineseCalendar) {
1531 zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J
1532 } else {
1533 if (count == 5) {
1534 _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1535 capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow;
1536 } else if (count == 4) {
1537 _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1538 capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide;
1539 } else {
1540 _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
1541 capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev;
1542 }
1543 }
1544 break;
1545
1546 case UDAT_YEAR_NAME_FIELD:
1547 if (fSymbols->fShortYearNames != nullptr && value <= fSymbols->fShortYearNamesCount) {
1548 // the Calendar YEAR field runs 1 through 60 for cyclic years
1549 _appendSymbol(appendTo, value - 1, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount);
1550 break;
1551 }
1552 // else fall through to numeric year handling, do not break here
1553 U_FALLTHROUGH;
1554
1555 // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1556 // NEW: UTS#35:
1557 //Year y yy yyy yyyy yyyyy
1558 //AD 1 1 01 001 0001 00001
1559 //AD 12 12 12 012 0012 00012
1560 //AD 123 123 23 123 0123 00123
1561 //AD 1234 1234 34 1234 1234 01234
1562 //AD 12345 12345 45 12345 12345 12345
1563 case UDAT_YEAR_FIELD:
1564 case UDAT_YEAR_WOY_FIELD:
1565 if (fDateOverride.compare(hebr)==0 && value>HEBREW_CAL_CUR_MILLENIUM_START_YEAR && value<HEBREW_CAL_CUR_MILLENIUM_END_YEAR) {
1566 value-=HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
1567 }
1568 if(count == 2)
1569 zeroPaddingNumber(currentNumberFormat, appendTo, value, 2, 2);
1570 else
1571 zeroPaddingNumber(currentNumberFormat, appendTo, value, count, maxIntCount);
1572 break;
1573
1574 // for "MMMM"/"LLLL", write out the whole month name, for "MMM"/"LLL", write out the month
1575 // abbreviation, for "M"/"L" or "MM"/"LL", write out the month as a number with the
1576 // appropriate number of digits
1577 // for "MMMMM"/"LLLLL", use the narrow form
1578 case UDAT_MONTH_FIELD:
1579 case UDAT_STANDALONE_MONTH_FIELD:
1580 if ( isHebrewCalendar ) {
1581 HebrewCalendar *hc = (HebrewCalendar*)&cal;
1582 if (hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value == 6 && count >= 3 )
1583 value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar.
1584 if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6 && count < 3 )
1585 value--; // Adjust the month number down 1 in Hebrew non-leap years, i.e. Adar is 6, not 7.
1586 }
1587 {
1588 int32_t isLeapMonth = (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount)?
1589 cal.get(UCAL_IS_LEAP_MONTH, status): 0;
1590 // should consolidate the next section by using arrays of pointers & counts for the right symbols...
1591 if (count == 5) {
1592 if (patternCharIndex == UDAT_MONTH_FIELD) {
1593 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fNarrowMonths, fSymbols->fNarrowMonthsCount,
1594 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatNarrow]): nullptr, status);
1595 } else {
1596 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneNarrowMonths, fSymbols->fStandaloneNarrowMonthsCount,
1597 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneNarrow]): nullptr, status);
1598 }
1599 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthNarrow;
1600 } else if (count == 4) {
1601 if (patternCharIndex == UDAT_MONTH_FIELD) {
1602 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fMonths, fSymbols->fMonthsCount,
1603 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]): nullptr, status);
1604 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1605 } else {
1606 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount,
1607 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]): nullptr, status);
1608 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1609 }
1610 } else if (count == 3) {
1611 if (patternCharIndex == UDAT_MONTH_FIELD) {
1612 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fShortMonths, fSymbols->fShortMonthsCount,
1613 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]): nullptr, status);
1614 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1615 } else {
1616 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount,
1617 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]): nullptr, status);
1618 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1619 }
1620 } else {
1621 UnicodeString monthNumber;
1622 zeroPaddingNumber(currentNumberFormat,monthNumber, value + 1, count, maxIntCount);
1623 _appendSymbolWithMonthPattern(appendTo, 0, &monthNumber, 1,
1624 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric]): nullptr, status);
1625 }
1626 }
1627 break;
1628
1629 // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
1630 case UDAT_HOUR_OF_DAY1_FIELD:
1631 if (value == 0)
1632 zeroPaddingNumber(currentNumberFormat,appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1633 else
1634 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1635 break;
1636
1637 case UDAT_FRACTIONAL_SECOND_FIELD:
1638 // Fractional seconds left-justify
1639 {
1640 int32_t minDigits = (count > 3) ? 3 : count;
1641 if (count == 1) {
1642 value /= 100;
1643 } else if (count == 2) {
1644 value /= 10;
1645 }
1646 zeroPaddingNumber(currentNumberFormat, appendTo, value, minDigits, maxIntCount);
1647 if (count > 3) {
1648 zeroPaddingNumber(currentNumberFormat, appendTo, 0, count - 3, maxIntCount);
1649 }
1650 }
1651 break;
1652
1653 // for "ee" or "e", use local numeric day-of-the-week
1654 // for "EEEEEE" or "eeeeee", write out the short day-of-the-week name
1655 // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1656 // for "EEEE" or "eeee", write out the wide day-of-the-week name
1657 // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1658 case UDAT_DOW_LOCAL_FIELD:
1659 if ( count < 3 ) {
1660 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1661 break;
1662 }
1663 // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1664 // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1665 value = cal.get(UCAL_DAY_OF_WEEK, status);
1666 if (U_FAILURE(status)) {
1667 return;
1668 }
1669 // fall through, do not break here
1670 U_FALLTHROUGH;
1671 case UDAT_DAY_OF_WEEK_FIELD:
1672 if (count == 5) {
1673 _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1674 fSymbols->fNarrowWeekdaysCount);
1675 capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1676 } else if (count == 4) {
1677 _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1678 fSymbols->fWeekdaysCount);
1679 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1680 } else if (count == 6) {
1681 _appendSymbol(appendTo, value, fSymbols->fShorterWeekdays,
1682 fSymbols->fShorterWeekdaysCount);
1683 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1684 } else {
1685 _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1686 fSymbols->fShortWeekdaysCount);
1687 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1688 }
1689 break;
1690
1691 // for "ccc", write out the abbreviated day-of-the-week name
1692 // for "cccc", write out the wide day-of-the-week name
1693 // for "ccccc", use the narrow day-of-the-week name
1694 // for "ccccc", use the short day-of-the-week name
1695 case UDAT_STANDALONE_DAY_FIELD:
1696 if ( count < 3 ) {
1697 zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, maxIntCount);
1698 break;
1699 }
1700 // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1701 // we want standard day-of-week, so first fix value.
1702 value = cal.get(UCAL_DAY_OF_WEEK, status);
1703 if (U_FAILURE(status)) {
1704 return;
1705 }
1706 if (count == 5) {
1707 _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1708 fSymbols->fStandaloneNarrowWeekdaysCount);
1709 capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1710 } else if (count == 4) {
1711 _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1712 fSymbols->fStandaloneWeekdaysCount);
1713 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1714 } else if (count == 6) {
1715 _appendSymbol(appendTo, value, fSymbols->fStandaloneShorterWeekdays,
1716 fSymbols->fStandaloneShorterWeekdaysCount);
1717 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1718 } else { // count == 3
1719 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1720 fSymbols->fStandaloneShortWeekdaysCount);
1721 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1722 }
1723 break;
1724
1725 // for "a" symbol, write out the whole AM/PM string
1726 case UDAT_AM_PM_FIELD:
1727 if (count < 5) {
1728 _appendSymbol(appendTo, value, fSymbols->fAmPms,
1729 fSymbols->fAmPmsCount);
1730 } else {
1731 _appendSymbol(appendTo, value, fSymbols->fNarrowAmPms,
1732 fSymbols->fNarrowAmPmsCount);
1733 }
1734 break;
1735
1736 // if we see pattern character for UDAT_TIME_SEPARATOR_FIELD (none currently defined),
1737 // write out the time separator string. Leave support in for future definition.
1738 case UDAT_TIME_SEPARATOR_FIELD:
1739 {
1740 UnicodeString separator;
1741 appendTo += fSymbols->getTimeSeparatorString(separator);
1742 }
1743 break;
1744
1745 // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1746 // as "12"
1747 case UDAT_HOUR1_FIELD:
1748 if (value == 0)
1749 zeroPaddingNumber(currentNumberFormat,appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1750 else
1751 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1752 break;
1753
1754 case UDAT_TIMEZONE_FIELD: // 'z'
1755 case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
1756 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
1757 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
1758 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
1759 case UDAT_TIMEZONE_ISO_FIELD: // 'X'
1760 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
1761 {
1762 char16_t zsbuf[ZONE_NAME_U16_MAX];
1763 UnicodeString zoneString(zsbuf, 0, UPRV_LENGTHOF(zsbuf));
1764 const TimeZone& tz = cal.getTimeZone();
1765 UDate date = cal.getTime(status);
1766 const TimeZoneFormat *tzfmt = tzFormat(status);
1767 if (U_SUCCESS(status)) {
1768 if (patternCharIndex == UDAT_TIMEZONE_FIELD) {
1769 if (count < 4) {
1770 // "z", "zz", "zzz"
1771 tzfmt->format(UTZFMT_STYLE_SPECIFIC_SHORT, tz, date, zoneString);
1772 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1773 } else {
1774 // "zzzz" or longer
1775 tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, date, zoneString);
1776 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1777 }
1778 }
1779 else if (patternCharIndex == UDAT_TIMEZONE_RFC_FIELD) {
1780 if (count < 4) {
1781 // "Z"
1782 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1783 } else if (count == 5) {
1784 // "ZZZZZ"
1785 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1786 } else {
1787 // "ZZ", "ZZZ", "ZZZZ"
1788 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1789 }
1790 }
1791 else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) {
1792 if (count == 1) {
1793 // "v"
1794 tzfmt->format(UTZFMT_STYLE_GENERIC_SHORT, tz, date, zoneString);
1795 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1796 } else if (count == 4) {
1797 // "vvvv"
1798 tzfmt->format(UTZFMT_STYLE_GENERIC_LONG, tz, date, zoneString);
1799 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1800 }
1801 }
1802 else if (patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD) {
1803 if (count == 1) {
1804 // "V"
1805 tzfmt->format(UTZFMT_STYLE_ZONE_ID_SHORT, tz, date, zoneString);
1806 } else if (count == 2) {
1807 // "VV"
1808 tzfmt->format(UTZFMT_STYLE_ZONE_ID, tz, date, zoneString);
1809 } else if (count == 3) {
1810 // "VVV"
1811 tzfmt->format(UTZFMT_STYLE_EXEMPLAR_LOCATION, tz, date, zoneString);
1812 } else if (count == 4) {
1813 // "VVVV"
1814 tzfmt->format(UTZFMT_STYLE_GENERIC_LOCATION, tz, date, zoneString);
1815 capContextUsageType = DateFormatSymbols::kCapContextUsageZoneLong;
1816 }
1817 }
1818 else if (patternCharIndex == UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD) {
1819 if (count == 1) {
1820 // "O"
1821 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT_SHORT, tz, date, zoneString);
1822 } else if (count == 4) {
1823 // "OOOO"
1824 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1825 }
1826 }
1827 else if (patternCharIndex == UDAT_TIMEZONE_ISO_FIELD) {
1828 if (count == 1) {
1829 // "X"
1830 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_SHORT, tz, date, zoneString);
1831 } else if (count == 2) {
1832 // "XX"
1833 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FIXED, tz, date, zoneString);
1834 } else if (count == 3) {
1835 // "XXX"
1836 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FIXED, tz, date, zoneString);
1837 } else if (count == 4) {
1838 // "XXXX"
1839 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FULL, tz, date, zoneString);
1840 } else if (count == 5) {
1841 // "XXXXX"
1842 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1843 }
1844 }
1845 else if (patternCharIndex == UDAT_TIMEZONE_ISO_LOCAL_FIELD) {
1846 if (count == 1) {
1847 // "x"
1848 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, tz, date, zoneString);
1849 } else if (count == 2) {
1850 // "xx"
1851 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, tz, date, zoneString);
1852 } else if (count == 3) {
1853 // "xxx"
1854 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, tz, date, zoneString);
1855 } else if (count == 4) {
1856 // "xxxx"
1857 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1858 } else if (count == 5) {
1859 // "xxxxx"
1860 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, tz, date, zoneString);
1861 }
1862 }
1863 else {
1864 UPRV_UNREACHABLE_EXIT;
1865 }
1866 }
1867 appendTo += zoneString;
1868 }
1869 break;
1870
1871 case UDAT_QUARTER_FIELD:
1872 if (count >= 5)
1873 _appendSymbol(appendTo, value/3, fSymbols->fNarrowQuarters,
1874 fSymbols->fNarrowQuartersCount);
1875 else if (count == 4)
1876 _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1877 fSymbols->fQuartersCount);
1878 else if (count == 3)
1879 _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1880 fSymbols->fShortQuartersCount);
1881 else
1882 zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1883 break;
1884
1885 case UDAT_STANDALONE_QUARTER_FIELD:
1886 if (count >= 5)
1887 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneNarrowQuarters,
1888 fSymbols->fStandaloneNarrowQuartersCount);
1889 else if (count == 4)
1890 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1891 fSymbols->fStandaloneQuartersCount);
1892 else if (count == 3)
1893 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1894 fSymbols->fStandaloneShortQuartersCount);
1895 else
1896 zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1897 break;
1898
1899 case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
1900 {
1901 const UnicodeString *toAppend = nullptr;
1902 int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status);
1903
1904 // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day.
1905 // For ICU 57 output of "midnight" is temporarily suppressed.
1906
1907 // For "midnight" and "noon":
1908 // Time, as displayed, must be exactly noon or midnight.
1909 // This means minutes and seconds, if present, must be zero.
1910 if ((/*hour == 0 ||*/ hour == 12) &&
1911 (!fHasMinute || cal.get(UCAL_MINUTE, status) == 0) &&
1912 (!fHasSecond || cal.get(UCAL_SECOND, status) == 0)) {
1913 // Stealing am/pm value to use as our array index.
1914 // It works out: am/midnight are both 0, pm/noon are both 1,
1915 // 12 am is 12 midnight, and 12 pm is 12 noon.
1916 int32_t val = cal.get(UCAL_AM_PM, status);
1917
1918 if (count <= 3) {
1919 toAppend = &fSymbols->fAbbreviatedDayPeriods[val];
1920 } else if (count == 4 || count > 5) {
1921 toAppend = &fSymbols->fWideDayPeriods[val];
1922 } else { // count == 5
1923 toAppend = &fSymbols->fNarrowDayPeriods[val];
1924 }
1925 }
1926
1927 // toAppend is nullptr if time isn't exactly midnight or noon (as displayed).
1928 // toAppend is bogus if time is midnight or noon, but no localized string exists.
1929 // In either case, fall back to am/pm.
1930 if (toAppend == nullptr || toAppend->isBogus()) {
1931 // Reformat with identical arguments except ch, now changed to 'a'.
1932 // We are passing a different fieldToOutput because we want to add
1933 // 'b' to field position. This makes this fallback stable when
1934 // there is a data change on locales.
1935 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'b', handler, cal, status);
1936 return;
1937 } else {
1938 appendTo += *toAppend;
1939 }
1940
1941 break;
1942 }
1943
1944 case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
1945 {
1946 // TODO: Maybe fetch the DayperiodRules during initialization (instead of at the first
1947 // loading of an instance) if a relevant pattern character (b or B) is used.
1948 const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status);
1949 if (U_FAILURE(status)) {
1950 // Data doesn't conform to spec, therefore loading failed.
1951 break;
1952 }
1953 if (ruleSet == nullptr) {
1954 // Data doesn't exist for the locale we're looking for.
1955 // Falling back to am/pm.
1956 // We are passing a different fieldToOutput because we want to add
1957 // 'B' to field position. This makes this fallback stable when
1958 // there is a data change on locales.
1959 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status);
1960 return;
1961 }
1962
1963 // Get current display time.
1964 int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status);
1965 int32_t minute = 0;
1966 if (fHasMinute) {
1967 minute = cal.get(UCAL_MINUTE, status);
1968 }
1969 int32_t second = 0;
1970 if (fHasSecond) {
1971 second = cal.get(UCAL_SECOND, status);
1972 }
1973
1974 // Determine day period.
1975 DayPeriodRules::DayPeriod periodType;
1976 if (hour == 0 && minute == 0 && second == 0 && ruleSet->hasMidnight()) {
1977 periodType = DayPeriodRules::DAYPERIOD_MIDNIGHT;
1978 } else if (hour == 12 && minute == 0 && second == 0 && ruleSet->hasNoon()) {
1979 periodType = DayPeriodRules::DAYPERIOD_NOON;
1980 } else {
1981 periodType = ruleSet->getDayPeriodForHour(hour);
1982 }
1983
1984 // Rule set exists, therefore periodType can't be UNKNOWN.
1985 // Get localized string.
1986 U_ASSERT(periodType != DayPeriodRules::DAYPERIOD_UNKNOWN);
1987 UnicodeString *toAppend = nullptr;
1988 int32_t index;
1989
1990 // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day.
1991 // For ICU 57 output of "midnight" is temporarily suppressed.
1992
1993 if (periodType != DayPeriodRules::DAYPERIOD_AM &&
1994 periodType != DayPeriodRules::DAYPERIOD_PM &&
1995 periodType != DayPeriodRules::DAYPERIOD_MIDNIGHT) {
1996 index = (int32_t)periodType;
1997 if (count <= 3) {
1998 toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short
1999 } else if (count == 4 || count > 5) {
2000 toAppend = &fSymbols->fWideDayPeriods[index];
2001 } else { // count == 5
2002 toAppend = &fSymbols->fNarrowDayPeriods[index];
2003 }
2004 }
2005
2006 // Fallback schedule:
2007 // Midnight/Noon -> General Periods -> AM/PM.
2008
2009 // Midnight/Noon -> General Periods.
2010 if ((toAppend == nullptr || toAppend->isBogus()) &&
2011 (periodType == DayPeriodRules::DAYPERIOD_MIDNIGHT ||
2012 periodType == DayPeriodRules::DAYPERIOD_NOON)) {
2013 periodType = ruleSet->getDayPeriodForHour(hour);
2014 index = (int32_t)periodType;
2015
2016 if (count <= 3) {
2017 toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short
2018 } else if (count == 4 || count > 5) {
2019 toAppend = &fSymbols->fWideDayPeriods[index];
2020 } else { // count == 5
2021 toAppend = &fSymbols->fNarrowDayPeriods[index];
2022 }
2023 }
2024
2025 // General Periods -> AM/PM.
2026 if (periodType == DayPeriodRules::DAYPERIOD_AM ||
2027 periodType == DayPeriodRules::DAYPERIOD_PM ||
2028 toAppend->isBogus()) {
2029 // We are passing a different fieldToOutput because we want to add
2030 // 'B' to field position iterator. This makes this fallback stable when
2031 // there is a data change on locales.
2032 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status);
2033 return;
2034 }
2035 else {
2036 appendTo += *toAppend;
2037 }
2038
2039 break;
2040 }
2041
2042 // all of the other pattern symbols can be formatted as simple numbers with
2043 // appropriate zero padding
2044 default:
2045 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
2046 break;
2047 }
2048 #if !UCONFIG_NO_BREAK_ITERATION
2049 // if first field, check to see whether we need to and are able to titlecase it
2050 if (fieldNum == 0 && fCapitalizationBrkIter != nullptr && appendTo.length() > beginOffset &&
2051 u_islower(appendTo.char32At(beginOffset))) {
2052 UBool titlecase = false;
2053 switch (capitalizationContext) {
2054 case UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE:
2055 titlecase = true;
2056 break;
2057 case UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU:
2058 titlecase = fSymbols->fCapitalization[capContextUsageType][0];
2059 break;
2060 case UDISPCTX_CAPITALIZATION_FOR_STANDALONE:
2061 titlecase = fSymbols->fCapitalization[capContextUsageType][1];
2062 break;
2063 default:
2064 // titlecase = false;
2065 break;
2066 }
2067 if (titlecase) {
2068 BreakIterator* const mutableCapitalizationBrkIter = fCapitalizationBrkIter->clone();
2069 UnicodeString firstField(appendTo, beginOffset);
2070 firstField.toTitle(mutableCapitalizationBrkIter, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
2071 appendTo.replaceBetween(beginOffset, appendTo.length(), firstField);
2072 delete mutableCapitalizationBrkIter;
2073 }
2074 }
2075 #endif
2076
2077 handler.addAttribute(DateFormatSymbols::getPatternCharIndex(fieldToOutput), beginOffset, appendTo.length());
2078 }
2079
2080 //----------------------------------------------------------------------
2081
adoptNumberFormat(NumberFormat * formatToAdopt)2082 void SimpleDateFormat::adoptNumberFormat(NumberFormat *formatToAdopt) {
2083 // Null out the fast formatter, it references fNumberFormat which we're
2084 // about to invalidate
2085 delete fSimpleNumberFormatter;
2086 fSimpleNumberFormatter = nullptr;
2087
2088 fixNumberFormatForDates(*formatToAdopt);
2089 delete fNumberFormat;
2090 fNumberFormat = formatToAdopt;
2091
2092 // We successfully set the default number format. Now delete the overrides
2093 // (can't fail).
2094 if (fSharedNumberFormatters) {
2095 freeSharedNumberFormatters(fSharedNumberFormatters);
2096 fSharedNumberFormatters = nullptr;
2097 }
2098
2099 // Recompute fSimpleNumberFormatter if necessary
2100 UErrorCode localStatus = U_ZERO_ERROR;
2101 initSimpleNumberFormatter(localStatus);
2102 }
2103
adoptNumberFormat(const UnicodeString & fields,NumberFormat * formatToAdopt,UErrorCode & status)2104 void SimpleDateFormat::adoptNumberFormat(const UnicodeString& fields, NumberFormat *formatToAdopt, UErrorCode &status){
2105 fixNumberFormatForDates(*formatToAdopt);
2106 LocalPointer<NumberFormat> fmt(formatToAdopt);
2107 if (U_FAILURE(status)) {
2108 return;
2109 }
2110
2111 // We must ensure fSharedNumberFormatters is allocated.
2112 if (fSharedNumberFormatters == nullptr) {
2113 fSharedNumberFormatters = allocSharedNumberFormatters();
2114 if (fSharedNumberFormatters == nullptr) {
2115 status = U_MEMORY_ALLOCATION_ERROR;
2116 return;
2117 }
2118 }
2119 const SharedNumberFormat *newFormat = createSharedNumberFormat(fmt.orphan());
2120 if (newFormat == nullptr) {
2121 status = U_MEMORY_ALLOCATION_ERROR;
2122 return;
2123 }
2124 for (int i=0; i<fields.length(); i++) {
2125 char16_t field = fields.charAt(i);
2126 // if the pattern character is unrecognized, signal an error and bail out
2127 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(field);
2128 if (patternCharIndex == UDAT_FIELD_COUNT) {
2129 status = U_INVALID_FORMAT_ERROR;
2130 newFormat->deleteIfZeroRefCount();
2131 return;
2132 }
2133
2134 // Set the number formatter in the table
2135 SharedObject::copyPtr(
2136 newFormat, fSharedNumberFormatters[patternCharIndex]);
2137 }
2138 newFormat->deleteIfZeroRefCount();
2139 }
2140
2141 const NumberFormat *
getNumberFormatForField(char16_t field) const2142 SimpleDateFormat::getNumberFormatForField(char16_t field) const {
2143 UDateFormatField index = DateFormatSymbols::getPatternCharIndex(field);
2144 if (index == UDAT_FIELD_COUNT) {
2145 return nullptr;
2146 }
2147 return getNumberFormatByIndex(index);
2148 }
2149
2150 //----------------------------------------------------------------------
2151 void
zeroPaddingNumber(const NumberFormat * currentNumberFormat,UnicodeString & appendTo,int32_t value,int32_t minDigits,int32_t maxDigits) const2152 SimpleDateFormat::zeroPaddingNumber(
2153 const NumberFormat *currentNumberFormat,
2154 UnicodeString &appendTo,
2155 int32_t value, int32_t minDigits, int32_t maxDigits) const
2156 {
2157
2158 if (currentNumberFormat == fNumberFormat && fSimpleNumberFormatter) {
2159 // Can use fast path
2160 UErrorCode localStatus = U_ZERO_ERROR;
2161 number::SimpleNumber number = number::SimpleNumber::forInt64(value, localStatus);
2162 number.setMinimumIntegerDigits(minDigits, localStatus);
2163 number.truncateStart(maxDigits, localStatus);
2164
2165 number::FormattedNumber result = fSimpleNumberFormatter->format(std::move(number), localStatus);
2166 if (U_FAILURE(localStatus)) {
2167 return;
2168 }
2169 appendTo.append(result.toTempString(localStatus));
2170 return;
2171 }
2172
2173 // Check for RBNF (no clone necessary)
2174 auto* rbnf = dynamic_cast<const RuleBasedNumberFormat*>(currentNumberFormat);
2175 if (rbnf != nullptr) {
2176 FieldPosition pos(FieldPosition::DONT_CARE);
2177 rbnf->format(value, appendTo, pos); // 3rd arg is there to speed up processing
2178 return;
2179 }
2180
2181 // Fall back to slow path (clone and mutate the NumberFormat)
2182 if (currentNumberFormat != nullptr) {
2183 FieldPosition pos(FieldPosition::DONT_CARE);
2184 LocalPointer<NumberFormat> nf(currentNumberFormat->clone());
2185 nf->setMinimumIntegerDigits(minDigits);
2186 nf->setMaximumIntegerDigits(maxDigits);
2187 nf->format(value, appendTo, pos); // 3rd arg is there to speed up processing
2188 }
2189 }
2190
2191 //----------------------------------------------------------------------
2192
2193 /**
2194 * Return true if the given format character, occurring count
2195 * times, represents a numeric field.
2196 */
isNumeric(char16_t formatChar,int32_t count)2197 UBool SimpleDateFormat::isNumeric(char16_t formatChar, int32_t count) {
2198 return DateFormatSymbols::isNumericPatternChar(formatChar, count);
2199 }
2200
2201 UBool
isAtNumericField(const UnicodeString & pattern,int32_t patternOffset)2202 SimpleDateFormat::isAtNumericField(const UnicodeString &pattern, int32_t patternOffset) {
2203 if (patternOffset >= pattern.length()) {
2204 // not at any field
2205 return false;
2206 }
2207 char16_t ch = pattern.charAt(patternOffset);
2208 UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
2209 if (f == UDAT_FIELD_COUNT) {
2210 // not at any field
2211 return false;
2212 }
2213 int32_t i = patternOffset;
2214 while (pattern.charAt(++i) == ch) {}
2215 return DateFormatSymbols::isNumericField(f, i - patternOffset);
2216 }
2217
2218 UBool
isAfterNonNumericField(const UnicodeString & pattern,int32_t patternOffset)2219 SimpleDateFormat::isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset) {
2220 if (patternOffset <= 0) {
2221 // not after any field
2222 return false;
2223 }
2224 char16_t ch = pattern.charAt(--patternOffset);
2225 UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
2226 if (f == UDAT_FIELD_COUNT) {
2227 // not after any field
2228 return false;
2229 }
2230 int32_t i = patternOffset;
2231 while (pattern.charAt(--i) == ch) {}
2232 return !DateFormatSymbols::isNumericField(f, patternOffset - i);
2233 }
2234
2235 void
parse(const UnicodeString & text,Calendar & cal,ParsePosition & parsePos) const2236 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
2237 {
2238 UErrorCode status = U_ZERO_ERROR;
2239 int32_t pos = parsePos.getIndex();
2240 if(parsePos.getIndex() < 0) {
2241 parsePos.setErrorIndex(0);
2242 return;
2243 }
2244 int32_t start = pos;
2245
2246 // Hold the day period until everything else is parsed, because we need
2247 // the hour to interpret time correctly.
2248 int32_t dayPeriodInt = -1;
2249
2250 UBool ambiguousYear[] = { false };
2251 int32_t saveHebrewMonth = -1;
2252 int32_t count = 0;
2253 UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2254
2255 // For parsing abutting numeric fields. 'abutPat' is the
2256 // offset into 'pattern' of the first of 2 or more abutting
2257 // numeric fields. 'abutStart' is the offset into 'text'
2258 // where parsing the fields begins. 'abutPass' starts off as 0
2259 // and increments each time we try to parse the fields.
2260 int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
2261 int32_t abutStart = 0;
2262 int32_t abutPass = 0;
2263 UBool inQuote = false;
2264
2265 MessageFormat * numericLeapMonthFormatter = nullptr;
2266
2267 Calendar* calClone = nullptr;
2268 Calendar *workCal = &cal;
2269 if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
2270 // Different calendar type
2271 // We use the time/zone from the input calendar, but
2272 // do not use the input calendar for field calculation.
2273 calClone = fCalendar->clone();
2274 if (calClone != nullptr) {
2275 calClone->setTime(cal.getTime(status),status);
2276 if (U_FAILURE(status)) {
2277 goto ExitParse;
2278 }
2279 calClone->setTimeZone(cal.getTimeZone());
2280 workCal = calClone;
2281 } else {
2282 status = U_MEMORY_ALLOCATION_ERROR;
2283 goto ExitParse;
2284 }
2285 }
2286
2287 if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
2288 numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, status);
2289 if (numericLeapMonthFormatter == nullptr) {
2290 status = U_MEMORY_ALLOCATION_ERROR;
2291 goto ExitParse;
2292 } else if (U_FAILURE(status)) {
2293 goto ExitParse; // this will delete numericLeapMonthFormatter
2294 }
2295 }
2296
2297 for (int32_t i=0; i<fPattern.length(); ++i) {
2298 char16_t ch = fPattern.charAt(i);
2299
2300 // Handle alphabetic field characters.
2301 if (!inQuote && isSyntaxChar(ch)) {
2302 int32_t fieldPat = i;
2303
2304 // Count the length of this field specifier
2305 count = 1;
2306 while ((i+1)<fPattern.length() &&
2307 fPattern.charAt(i+1) == ch) {
2308 ++count;
2309 ++i;
2310 }
2311
2312 if (isNumeric(ch, count)) {
2313 if (abutPat < 0) {
2314 // Determine if there is an abutting numeric field.
2315 // Record the start of a set of abutting numeric fields.
2316 if (isAtNumericField(fPattern, i + 1)) {
2317 abutPat = fieldPat;
2318 abutStart = pos;
2319 abutPass = 0;
2320 }
2321 }
2322 } else {
2323 abutPat = -1; // End of any abutting fields
2324 }
2325
2326 // Handle fields within a run of abutting numeric fields. Take
2327 // the pattern "HHmmss" as an example. We will try to parse
2328 // 2/2/2 characters of the input text, then if that fails,
2329 // 1/2/2. We only adjust the width of the leftmost field; the
2330 // others remain fixed. This allows "123456" => 12:34:56, but
2331 // "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we
2332 // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
2333 if (abutPat >= 0) {
2334 // If we are at the start of a run of abutting fields, then
2335 // shorten this field in each pass. If we can't shorten
2336 // this field any more, then the parse of this set of
2337 // abutting numeric fields has failed.
2338 if (fieldPat == abutPat) {
2339 count -= abutPass++;
2340 if (count == 0) {
2341 status = U_PARSE_ERROR;
2342 goto ExitParse;
2343 }
2344 }
2345
2346 pos = subParse(text, pos, ch, count,
2347 true, false, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType);
2348
2349 // If the parse fails anywhere in the run, back up to the
2350 // start of the run and retry.
2351 if (pos < 0) {
2352 i = abutPat - 1;
2353 pos = abutStart;
2354 continue;
2355 }
2356 }
2357
2358 // Handle non-numeric fields and non-abutting numeric
2359 // fields.
2360 else if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
2361 int32_t s = subParse(text, pos, ch, count,
2362 false, true, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType, &dayPeriodInt);
2363
2364 if (s == -pos-1) {
2365 // era not present, in special cases allow this to continue
2366 // from the position where the era was expected
2367 s = pos;
2368
2369 if (i+1 < fPattern.length()) {
2370 // move to next pattern character
2371 char16_t c = fPattern.charAt(i+1);
2372
2373 // check for whitespace
2374 if (PatternProps::isWhiteSpace(c)) {
2375 i++;
2376 // Advance over run in pattern
2377 while ((i+1)<fPattern.length() &&
2378 PatternProps::isWhiteSpace(fPattern.charAt(i+1))) {
2379 ++i;
2380 }
2381 }
2382 }
2383 }
2384 else if (s <= 0) {
2385 status = U_PARSE_ERROR;
2386 goto ExitParse;
2387 }
2388 pos = s;
2389 }
2390 }
2391
2392 // Handle literal pattern characters. These are any
2393 // quoted characters and non-alphabetic unquoted
2394 // characters.
2395 else {
2396
2397 abutPat = -1; // End of any abutting fields
2398
2399 if (! matchLiterals(fPattern, i, text, pos, getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status), getBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, status), isLenient())) {
2400 status = U_PARSE_ERROR;
2401 goto ExitParse;
2402 }
2403 }
2404 }
2405
2406 // Special hack for trailing "." after non-numeric field.
2407 if (text.charAt(pos) == 0x2e && getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
2408 // only do if the last field is not numeric
2409 if (isAfterNonNumericField(fPattern, fPattern.length())) {
2410 pos++; // skip the extra "."
2411 }
2412 }
2413
2414 // If dayPeriod is set, use it in conjunction with hour-of-day to determine am/pm.
2415 if (dayPeriodInt >= 0) {
2416 DayPeriodRules::DayPeriod dayPeriod = (DayPeriodRules::DayPeriod)dayPeriodInt;
2417 const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status);
2418
2419 if (!cal.isSet(UCAL_HOUR) && !cal.isSet(UCAL_HOUR_OF_DAY)) {
2420 // If hour is not set, set time to the midpoint of current day period, overwriting
2421 // minutes if it's set.
2422 double midPoint = ruleSet->getMidPointForDayPeriod(dayPeriod, status);
2423
2424 // If we can't get midPoint we do nothing.
2425 if (U_SUCCESS(status)) {
2426 // Truncate midPoint toward zero to get the hour.
2427 // Any leftover means it was a half-hour.
2428 int32_t midPointHour = (int32_t) midPoint;
2429 int32_t midPointMinute = (midPoint - midPointHour) > 0 ? 30 : 0;
2430
2431 // No need to set am/pm because hour-of-day is set last therefore takes precedence.
2432 cal.set(UCAL_HOUR_OF_DAY, midPointHour);
2433 cal.set(UCAL_MINUTE, midPointMinute);
2434 }
2435 } else {
2436 int hourOfDay;
2437
2438 if (cal.isSet(UCAL_HOUR_OF_DAY)) { // Hour is parsed in 24-hour format.
2439 hourOfDay = cal.get(UCAL_HOUR_OF_DAY, status);
2440 } else { // Hour is parsed in 12-hour format.
2441 hourOfDay = cal.get(UCAL_HOUR, status);
2442 // cal.get() turns 12 to 0 for 12-hour time; change 0 to 12
2443 // so 0 unambiguously means a 24-hour time from above.
2444 if (hourOfDay == 0) { hourOfDay = 12; }
2445 }
2446 U_ASSERT(0 <= hourOfDay && hourOfDay <= 23);
2447
2448
2449 // If hour-of-day is 0 or 13 thru 23 then input time in unambiguously in 24-hour format.
2450 if (hourOfDay == 0 || (13 <= hourOfDay && hourOfDay <= 23)) {
2451 // Make hour-of-day take precedence over (hour + am/pm) by setting it again.
2452 cal.set(UCAL_HOUR_OF_DAY, hourOfDay);
2453 } else {
2454 // We have a 12-hour time and need to choose between am and pm.
2455 // Behave as if dayPeriod spanned 6 hours each way from its center point.
2456 // This will parse correctly for consistent time + period (e.g. 10 at night) as
2457 // well as provide a reasonable recovery for inconsistent time + period (e.g.
2458 // 9 in the afternoon).
2459
2460 // Assume current time is in the AM.
2461 // - Change 12 back to 0 for easier handling of 12am.
2462 // - Append minutes as fractional hours because e.g. 8:15 and 8:45 could be parsed
2463 // into different half-days if center of dayPeriod is at 14:30.
2464 // - cal.get(MINUTE) will return 0 if MINUTE is unset, which works.
2465 if (hourOfDay == 12) { hourOfDay = 0; }
2466 double currentHour = hourOfDay + (cal.get(UCAL_MINUTE, status)) / 60.0;
2467 double midPointHour = ruleSet->getMidPointForDayPeriod(dayPeriod, status);
2468
2469 if (U_SUCCESS(status)) {
2470 double hoursAheadMidPoint = currentHour - midPointHour;
2471
2472 // Assume current time is in the AM.
2473 if (-6 <= hoursAheadMidPoint && hoursAheadMidPoint < 6) {
2474 // Assumption holds; set time as such.
2475 cal.set(UCAL_AM_PM, 0);
2476 } else {
2477 cal.set(UCAL_AM_PM, 1);
2478 }
2479 }
2480 }
2481 }
2482 }
2483
2484 // At this point the fields of Calendar have been set. Calendar
2485 // will fill in default values for missing fields when the time
2486 // is computed.
2487
2488 parsePos.setIndex(pos);
2489
2490 // This part is a problem: When we call parsedDate.after, we compute the time.
2491 // Take the date April 3 2004 at 2:30 am. When this is first set up, the year
2492 // will be wrong if we're parsing a 2-digit year pattern. It will be 1904.
2493 // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day. 2:30 am
2494 // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
2495 // on that day. It is therefore parsed out to fields as 3:30 am. Then we
2496 // add 100 years, and get April 3 2004 at 3:30 am. Note that April 3 2004 is
2497 // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
2498 /*
2499 UDate parsedDate = calendar.getTime();
2500 if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
2501 calendar.add(Calendar.YEAR, 100);
2502 parsedDate = calendar.getTime();
2503 }
2504 */
2505 // Because of the above condition, save off the fields in case we need to readjust.
2506 // The procedure we use here is not particularly efficient, but there is no other
2507 // way to do this given the API restrictions present in Calendar. We minimize
2508 // inefficiency by only performing this computation when it might apply, that is,
2509 // when the two-digit year is equal to the start year, and thus might fall at the
2510 // front or the back of the default century. This only works because we adjust
2511 // the year correctly to start with in other cases -- see subParse().
2512 if (ambiguousYear[0] || tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) // If this is true then the two-digit year == the default start year
2513 {
2514 // We need a copy of the fields, and we need to avoid triggering a call to
2515 // complete(), which will recalculate the fields. Since we can't access
2516 // the fields[] array in Calendar, we clone the entire object. This will
2517 // stop working if Calendar.clone() is ever rewritten to call complete().
2518 Calendar *copy;
2519 if (ambiguousYear[0]) {
2520 copy = cal.clone();
2521 // Check for failed cloning.
2522 if (copy == nullptr) {
2523 status = U_MEMORY_ALLOCATION_ERROR;
2524 goto ExitParse;
2525 }
2526 UDate parsedDate = copy->getTime(status);
2527 // {sfb} check internalGetDefaultCenturyStart
2528 if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
2529 // We can't use add here because that does a complete() first.
2530 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
2531 }
2532 delete copy;
2533 }
2534
2535 if (tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) {
2536 copy = cal.clone();
2537 // Check for failed cloning.
2538 if (copy == nullptr) {
2539 status = U_MEMORY_ALLOCATION_ERROR;
2540 goto ExitParse;
2541 }
2542 const TimeZone & tz = cal.getTimeZone();
2543 BasicTimeZone *btz = nullptr;
2544
2545 if (dynamic_cast<const OlsonTimeZone *>(&tz) != nullptr
2546 || dynamic_cast<const SimpleTimeZone *>(&tz) != nullptr
2547 || dynamic_cast<const RuleBasedTimeZone *>(&tz) != nullptr
2548 || dynamic_cast<const VTimeZone *>(&tz) != nullptr) {
2549 btz = (BasicTimeZone*)&tz;
2550 }
2551
2552 // Get local millis
2553 copy->set(UCAL_ZONE_OFFSET, 0);
2554 copy->set(UCAL_DST_OFFSET, 0);
2555 UDate localMillis = copy->getTime(status);
2556
2557 // Make sure parsed time zone type (Standard or Daylight)
2558 // matches the rule used by the parsed time zone.
2559 int32_t raw, dst;
2560 if (btz != nullptr) {
2561 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) {
2562 btz->getOffsetFromLocal(localMillis,
2563 UCAL_TZ_LOCAL_STANDARD_FORMER, UCAL_TZ_LOCAL_STANDARD_LATTER, raw, dst, status);
2564 } else {
2565 btz->getOffsetFromLocal(localMillis,
2566 UCAL_TZ_LOCAL_DAYLIGHT_FORMER, UCAL_TZ_LOCAL_DAYLIGHT_LATTER, raw, dst, status);
2567 }
2568 } else {
2569 // No good way to resolve ambiguous time at transition,
2570 // but following code work in most case.
2571 tz.getOffset(localMillis, true, raw, dst, status);
2572 }
2573
2574 // Now, compare the results with parsed type, either standard or daylight saving time
2575 int32_t resolvedSavings = dst;
2576 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) {
2577 if (dst != 0) {
2578 // Override DST_OFFSET = 0 in the result calendar
2579 resolvedSavings = 0;
2580 }
2581 } else { // tztype == TZTYPE_DST
2582 if (dst == 0) {
2583 if (btz != nullptr) {
2584 // This implementation resolves daylight saving time offset
2585 // closest rule after the given time.
2586 UDate baseTime = localMillis + raw;
2587 UDate time = baseTime;
2588 UDate limit = baseTime + MAX_DAYLIGHT_DETECTION_RANGE;
2589 TimeZoneTransition trs;
2590 UBool trsAvail;
2591
2592 // Search for DST rule after the given time
2593 while (time < limit) {
2594 trsAvail = btz->getNextTransition(time, false, trs);
2595 if (!trsAvail) {
2596 break;
2597 }
2598 resolvedSavings = trs.getTo()->getDSTSavings();
2599 if (resolvedSavings != 0) {
2600 break;
2601 }
2602 time = trs.getTime();
2603 }
2604
2605 if (resolvedSavings == 0) {
2606 // If no DST rule after the given time was found, search for
2607 // DST rule before.
2608 time = baseTime;
2609 limit = baseTime - MAX_DAYLIGHT_DETECTION_RANGE;
2610 while (time > limit) {
2611 trsAvail = btz->getPreviousTransition(time, true, trs);
2612 if (!trsAvail) {
2613 break;
2614 }
2615 resolvedSavings = trs.getFrom()->getDSTSavings();
2616 if (resolvedSavings != 0) {
2617 break;
2618 }
2619 time = trs.getTime() - 1;
2620 }
2621
2622 if (resolvedSavings == 0) {
2623 resolvedSavings = btz->getDSTSavings();
2624 }
2625 }
2626 } else {
2627 resolvedSavings = tz.getDSTSavings();
2628 }
2629 if (resolvedSavings == 0) {
2630 // final fallback
2631 resolvedSavings = U_MILLIS_PER_HOUR;
2632 }
2633 }
2634 }
2635 cal.set(UCAL_ZONE_OFFSET, raw);
2636 cal.set(UCAL_DST_OFFSET, resolvedSavings);
2637 delete copy;
2638 }
2639 }
2640 ExitParse:
2641 // Set the parsed result if local calendar is used
2642 // instead of the input calendar
2643 if (U_SUCCESS(status) && workCal != &cal) {
2644 cal.setTimeZone(workCal->getTimeZone());
2645 cal.setTime(workCal->getTime(status), status);
2646 }
2647
2648 if (numericLeapMonthFormatter != nullptr) {
2649 delete numericLeapMonthFormatter;
2650 }
2651 if (calClone != nullptr) {
2652 delete calClone;
2653 }
2654
2655 // If any Calendar calls failed, we pretend that we
2656 // couldn't parse the string, when in reality this isn't quite accurate--
2657 // we did parse it; the Calendar calls just failed.
2658 if (U_FAILURE(status)) {
2659 parsePos.setErrorIndex(pos);
2660 parsePos.setIndex(start);
2661 }
2662 }
2663
2664 //----------------------------------------------------------------------
2665
2666 static int32_t
2667 matchStringWithOptionalDot(const UnicodeString &text,
2668 int32_t index,
2669 const UnicodeString &data);
2670
matchQuarterString(const UnicodeString & text,int32_t start,UCalendarDateFields field,const UnicodeString * data,int32_t dataCount,Calendar & cal) const2671 int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
2672 int32_t start,
2673 UCalendarDateFields field,
2674 const UnicodeString* data,
2675 int32_t dataCount,
2676 Calendar& cal) const
2677 {
2678 int32_t i = 0;
2679 int32_t count = dataCount;
2680
2681 // There may be multiple strings in the data[] array which begin with
2682 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2683 // We keep track of the longest match, and return that. Note that this
2684 // unfortunately requires us to test all array elements.
2685 int32_t bestMatchLength = 0, bestMatch = -1;
2686 UnicodeString bestMatchName;
2687
2688 for (; i < count; ++i) {
2689 int32_t matchLength = 0;
2690 if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2691 bestMatchLength = matchLength;
2692 bestMatch = i;
2693 }
2694 }
2695
2696 if (bestMatch >= 0) {
2697 cal.set(field, bestMatch * 3);
2698 return start + bestMatchLength;
2699 }
2700
2701 return -start;
2702 }
2703
matchDayPeriodStrings(const UnicodeString & text,int32_t start,const UnicodeString * data,int32_t dataCount,int32_t & dayPeriod) const2704 int32_t SimpleDateFormat::matchDayPeriodStrings(const UnicodeString& text, int32_t start,
2705 const UnicodeString* data, int32_t dataCount,
2706 int32_t &dayPeriod) const
2707 {
2708
2709 int32_t bestMatchLength = 0, bestMatch = -1;
2710
2711 for (int32_t i = 0; i < dataCount; ++i) {
2712 int32_t matchLength = 0;
2713 if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2714 bestMatchLength = matchLength;
2715 bestMatch = i;
2716 }
2717 }
2718
2719 if (bestMatch >= 0) {
2720 dayPeriod = bestMatch;
2721 return start + bestMatchLength;
2722 }
2723
2724 return -start;
2725 }
2726
2727 //----------------------------------------------------------------------
matchLiterals(const UnicodeString & pattern,int32_t & patternOffset,const UnicodeString & text,int32_t & textOffset,UBool whitespaceLenient,UBool partialMatchLenient,UBool oldLeniency)2728 UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
2729 int32_t &patternOffset,
2730 const UnicodeString &text,
2731 int32_t &textOffset,
2732 UBool whitespaceLenient,
2733 UBool partialMatchLenient,
2734 UBool oldLeniency)
2735 {
2736 UBool inQuote = false;
2737 UnicodeString literal;
2738 int32_t i = patternOffset;
2739
2740 // scan pattern looking for contiguous literal characters
2741 for ( ; i < pattern.length(); i += 1) {
2742 char16_t ch = pattern.charAt(i);
2743
2744 if (!inQuote && isSyntaxChar(ch)) {
2745 break;
2746 }
2747
2748 if (ch == QUOTE) {
2749 // Match a quote literal ('') inside OR outside of quotes
2750 if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
2751 i += 1;
2752 } else {
2753 inQuote = !inQuote;
2754 continue;
2755 }
2756 }
2757
2758 literal += ch;
2759 }
2760
2761 // at this point, literal contains the literal text
2762 // and i is the index of the next non-literal pattern character.
2763 int32_t p;
2764 int32_t t = textOffset;
2765
2766 if (whitespaceLenient) {
2767 // trim leading, trailing whitespace from
2768 // the literal text
2769 literal.trim();
2770
2771 // ignore any leading whitespace in the text
2772 while (t < text.length() && u_isWhitespace(text.charAt(t))) {
2773 t += 1;
2774 }
2775 }
2776
2777 for (p = 0; p < literal.length() && t < text.length();) {
2778 UBool needWhitespace = false;
2779
2780 while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) {
2781 needWhitespace = true;
2782 p += 1;
2783 }
2784
2785 if (needWhitespace) {
2786 int32_t tStart = t;
2787
2788 while (t < text.length()) {
2789 char16_t tch = text.charAt(t);
2790
2791 if (!u_isUWhiteSpace(tch) && !PatternProps::isWhiteSpace(tch)) {
2792 break;
2793 }
2794
2795 t += 1;
2796 }
2797
2798 // TODO: should we require internal spaces
2799 // in lenient mode? (There won't be any
2800 // leading or trailing spaces)
2801 if (!whitespaceLenient && t == tStart) {
2802 // didn't find matching whitespace:
2803 // an error in strict mode
2804 return false;
2805 }
2806
2807 // In strict mode, this run of whitespace
2808 // may have been at the end.
2809 if (p >= literal.length()) {
2810 break;
2811 }
2812 }
2813 if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
2814 // Ran out of text, or found a non-matching character:
2815 // OK in lenient mode, an error in strict mode.
2816 if (whitespaceLenient) {
2817 if (t == textOffset && text.charAt(t) == 0x2e &&
2818 isAfterNonNumericField(pattern, patternOffset)) {
2819 // Lenient mode and the literal input text begins with a "." and
2820 // we are after a non-numeric field: We skip the "."
2821 ++t;
2822 continue; // Do not update p.
2823 }
2824 // if it is actual whitespace and we're whitespace lenient it's OK
2825
2826 char16_t wsc = text.charAt(t);
2827 if(PatternProps::isWhiteSpace(wsc)) {
2828 // Lenient mode and it's just whitespace we skip it
2829 ++t;
2830 continue; // Do not update p.
2831 }
2832 }
2833 // hack around oldleniency being a bit of a catch-all bucket and we're just adding support specifically for partial matches
2834 if(partialMatchLenient && oldLeniency) {
2835 break;
2836 }
2837
2838 return false;
2839 }
2840 ++p;
2841 ++t;
2842 }
2843
2844 // At this point if we're in strict mode we have a complete match.
2845 // If we're in lenient mode we may have a partial match, or no
2846 // match at all.
2847 if (p <= 0) {
2848 // no match. Pretend it matched a run of whitespace
2849 // and ignorables in the text.
2850 const UnicodeSet *ignorables = nullptr;
2851 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(pattern.charAt(i));
2852 if (patternCharIndex != UDAT_FIELD_COUNT) {
2853 ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
2854 }
2855
2856 for (t = textOffset; t < text.length(); t += 1) {
2857 char16_t ch = text.charAt(t);
2858
2859 if (ignorables == nullptr || !ignorables->contains(ch)) {
2860 break;
2861 }
2862 }
2863 }
2864
2865 // if we get here, we've got a complete match.
2866 patternOffset = i - 1;
2867 textOffset = t;
2868
2869 return true;
2870 }
2871
2872 //----------------------------------------------------------------------
2873 // check both wide and abbrev months.
2874 // Does not currently handle monthPattern.
2875 // UCalendarDateFields field = UCAL_MONTH
2876
matchAlphaMonthStrings(const UnicodeString & text,int32_t start,const UnicodeString * wideData,const UnicodeString * shortData,int32_t dataCount,Calendar & cal) const2877 int32_t SimpleDateFormat::matchAlphaMonthStrings(const UnicodeString& text,
2878 int32_t start,
2879 const UnicodeString* wideData,
2880 const UnicodeString* shortData,
2881 int32_t dataCount,
2882 Calendar& cal) const
2883 {
2884 int32_t i;
2885 int32_t bestMatchLength = 0, bestMatch = -1;
2886
2887 for (i = 0; i < dataCount; ++i) {
2888 int32_t matchLen = 0;
2889 if ((matchLen = matchStringWithOptionalDot(text, start, wideData[i])) > bestMatchLength) {
2890 bestMatch = i;
2891 bestMatchLength = matchLen;
2892 }
2893 }
2894 for (i = 0; i < dataCount; ++i) {
2895 int32_t matchLen = 0;
2896 if ((matchLen = matchStringWithOptionalDot(text, start, shortData[i])) > bestMatchLength) {
2897 bestMatch = i;
2898 bestMatchLength = matchLen;
2899 }
2900 }
2901
2902 if (bestMatch >= 0) {
2903 // Adjustment for Hebrew Calendar month Adar II
2904 if (!strcmp(cal.getType(),"hebrew") && bestMatch==13) {
2905 cal.set(UCAL_MONTH,6);
2906 } else {
2907 cal.set(UCAL_MONTH, bestMatch);
2908 }
2909 return start + bestMatchLength;
2910 }
2911
2912 return -start;
2913 }
2914
2915 //----------------------------------------------------------------------
2916
matchString(const UnicodeString & text,int32_t start,UCalendarDateFields field,const UnicodeString * data,int32_t dataCount,const UnicodeString * monthPattern,Calendar & cal) const2917 int32_t SimpleDateFormat::matchString(const UnicodeString& text,
2918 int32_t start,
2919 UCalendarDateFields field,
2920 const UnicodeString* data,
2921 int32_t dataCount,
2922 const UnicodeString* monthPattern,
2923 Calendar& cal) const
2924 {
2925 int32_t i = 0;
2926 int32_t count = dataCount;
2927
2928 if (field == UCAL_DAY_OF_WEEK) i = 1;
2929
2930 // There may be multiple strings in the data[] array which begin with
2931 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2932 // We keep track of the longest match, and return that. Note that this
2933 // unfortunately requires us to test all array elements.
2934 // But this does not really work for cases such as Chuvash in which
2935 // May is "ҫу" and August is "ҫурла"/"ҫур.", hence matchAlphaMonthStrings.
2936 int32_t bestMatchLength = 0, bestMatch = -1;
2937 UnicodeString bestMatchName;
2938 int32_t isLeapMonth = 0;
2939
2940 for (; i < count; ++i) {
2941 int32_t matchLen = 0;
2942 if ((matchLen = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2943 bestMatch = i;
2944 bestMatchLength = matchLen;
2945 }
2946
2947 if (monthPattern != nullptr) {
2948 UErrorCode status = U_ZERO_ERROR;
2949 UnicodeString leapMonthName;
2950 SimpleFormatter(*monthPattern, 1, 1, status).format(data[i], leapMonthName, status);
2951 if (U_SUCCESS(status)) {
2952 if ((matchLen = matchStringWithOptionalDot(text, start, leapMonthName)) > bestMatchLength) {
2953 bestMatch = i;
2954 bestMatchLength = matchLen;
2955 isLeapMonth = 1;
2956 }
2957 }
2958 }
2959 }
2960
2961 if (bestMatch >= 0) {
2962 if (field < UCAL_FIELD_COUNT) {
2963 // Adjustment for Hebrew Calendar month Adar II
2964 if (!strcmp(cal.getType(),"hebrew") && field==UCAL_MONTH && bestMatch==13) {
2965 cal.set(field,6);
2966 } else {
2967 if (field == UCAL_YEAR) {
2968 bestMatch++; // only get here for cyclic year names, which match 1-based years 1-60
2969 }
2970 cal.set(field, bestMatch);
2971 }
2972 if (monthPattern != nullptr) {
2973 cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth);
2974 }
2975 }
2976
2977 return start + bestMatchLength;
2978 }
2979
2980 return -start;
2981 }
2982
2983 static int32_t
matchStringWithOptionalDot(const UnicodeString & text,int32_t index,const UnicodeString & data)2984 matchStringWithOptionalDot(const UnicodeString &text,
2985 int32_t index,
2986 const UnicodeString &data) {
2987 UErrorCode sts = U_ZERO_ERROR;
2988 int32_t matchLenText = 0;
2989 int32_t matchLenData = 0;
2990
2991 u_caseInsensitivePrefixMatch(text.getBuffer() + index, text.length() - index,
2992 data.getBuffer(), data.length(),
2993 0 /* default case option */,
2994 &matchLenText, &matchLenData,
2995 &sts);
2996 U_ASSERT (U_SUCCESS(sts));
2997
2998 if (matchLenData == data.length() /* normal match */
2999 || (data.charAt(data.length() - 1) == 0x2e
3000 && matchLenData == data.length() - 1 /* match without trailing dot */)) {
3001 return matchLenText;
3002 }
3003
3004 return 0;
3005 }
3006
3007 //----------------------------------------------------------------------
3008
3009 void
set2DigitYearStart(UDate d,UErrorCode & status)3010 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
3011 {
3012 parseAmbiguousDatesAsAfter(d, status);
3013 }
3014
3015 /**
3016 * Private member function that converts the parsed date strings into
3017 * timeFields. Returns -start (for ParsePosition) if failed.
3018 */
subParse(const UnicodeString & text,int32_t & start,char16_t ch,int32_t count,UBool obeyCount,UBool allowNegative,UBool ambiguousYear[],int32_t & saveHebrewMonth,Calendar & cal,int32_t patLoc,MessageFormat * numericLeapMonthFormatter,UTimeZoneFormatTimeType * tzTimeType,int32_t * dayPeriod) const3019 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, char16_t ch, int32_t count,
3020 UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal,
3021 int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType,
3022 int32_t *dayPeriod) const
3023 {
3024 Formattable number;
3025 int32_t value = 0;
3026 int32_t i;
3027 int32_t ps = 0;
3028 UErrorCode status = U_ZERO_ERROR;
3029 ParsePosition pos(0);
3030 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
3031 const NumberFormat *currentNumberFormat;
3032 UnicodeString temp;
3033 UBool gotNumber = false;
3034
3035 #if defined (U_DEBUG_CAL)
3036 //fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start);
3037 #endif
3038
3039 if (patternCharIndex == UDAT_FIELD_COUNT) {
3040 return -start;
3041 }
3042
3043 currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
3044 if (currentNumberFormat == nullptr) {
3045 return -start;
3046 }
3047 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; // UCAL_FIELD_COUNT if irrelevant
3048 UnicodeString hebr("hebr", 4, US_INV);
3049
3050 if (numericLeapMonthFormatter != nullptr) {
3051 numericLeapMonthFormatter->setFormats((const Format **)¤tNumberFormat, 1);
3052 }
3053 UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0);
3054
3055 // If there are any spaces here, skip over them. If we hit the end
3056 // of the string, then fail.
3057 for (;;) {
3058 if (start >= text.length()) {
3059 return -start;
3060 }
3061 UChar32 c = text.char32At(start);
3062 if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) {
3063 break;
3064 }
3065 start += U16_LENGTH(c);
3066 }
3067 pos.setIndex(start);
3068
3069 // We handle a few special cases here where we need to parse
3070 // a number value. We handle further, more generic cases below. We need
3071 // to handle some of them here because some fields require extra processing on
3072 // the parsed value.
3073 if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || // k
3074 patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || // H
3075 patternCharIndex == UDAT_HOUR1_FIELD || // h
3076 patternCharIndex == UDAT_HOUR0_FIELD || // K
3077 (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) || // e
3078 (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) || // c
3079 (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) || // M
3080 (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) || // L
3081 (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) || // Q
3082 (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q
3083 patternCharIndex == UDAT_YEAR_FIELD || // y
3084 patternCharIndex == UDAT_YEAR_WOY_FIELD || // Y
3085 patternCharIndex == UDAT_YEAR_NAME_FIELD || // U (falls back to numeric)
3086 (patternCharIndex == UDAT_ERA_FIELD && isChineseCalendar) || // G
3087 patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) // S
3088 {
3089 int32_t parseStart = pos.getIndex();
3090 // It would be good to unify this with the obeyCount logic below,
3091 // but that's going to be difficult.
3092 const UnicodeString* src;
3093
3094 UBool parsedNumericLeapMonth = false;
3095 if (numericLeapMonthFormatter != nullptr && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) {
3096 int32_t argCount;
3097 Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount);
3098 if (args != nullptr && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) {
3099 parsedNumericLeapMonth = true;
3100 number.setLong(args[0].getLong());
3101 cal.set(UCAL_IS_LEAP_MONTH, 1);
3102 delete[] args;
3103 } else {
3104 pos.setIndex(parseStart);
3105 cal.set(UCAL_IS_LEAP_MONTH, 0);
3106 }
3107 }
3108
3109 if (!parsedNumericLeapMonth) {
3110 if (obeyCount) {
3111 if ((start+count) > text.length()) {
3112 return -start;
3113 }
3114
3115 text.extractBetween(0, start + count, temp);
3116 src = &temp;
3117 } else {
3118 src = &text;
3119 }
3120
3121 parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3122 }
3123
3124 int32_t txtLoc = pos.getIndex();
3125
3126 if (txtLoc > parseStart) {
3127 value = number.getLong();
3128 gotNumber = true;
3129
3130 // suffix processing
3131 if (value < 0 ) {
3132 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, true);
3133 if (txtLoc != pos.getIndex()) {
3134 value *= -1;
3135 }
3136 }
3137 else {
3138 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, false);
3139 }
3140
3141 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
3142 // Check the range of the value
3143 int32_t bias = gFieldRangeBias[patternCharIndex];
3144 if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
3145 return -start;
3146 }
3147 }
3148
3149 pos.setIndex(txtLoc);
3150 }
3151 }
3152
3153 // Make sure that we got a number if
3154 // we want one, and didn't get one
3155 // if we don't want one.
3156 switch (patternCharIndex) {
3157 case UDAT_HOUR_OF_DAY1_FIELD:
3158 case UDAT_HOUR_OF_DAY0_FIELD:
3159 case UDAT_HOUR1_FIELD:
3160 case UDAT_HOUR0_FIELD:
3161 // special range check for hours:
3162 if (value < 0 || value > 24) {
3163 return -start;
3164 }
3165
3166 // fall through to gotNumber check
3167 U_FALLTHROUGH;
3168 case UDAT_YEAR_FIELD:
3169 case UDAT_YEAR_WOY_FIELD:
3170 case UDAT_FRACTIONAL_SECOND_FIELD:
3171 // these must be a number
3172 if (! gotNumber) {
3173 return -start;
3174 }
3175
3176 break;
3177
3178 default:
3179 // we check the rest of the fields below.
3180 break;
3181 }
3182
3183 switch (patternCharIndex) {
3184 case UDAT_ERA_FIELD:
3185 if (isChineseCalendar) {
3186 if (!gotNumber) {
3187 return -start;
3188 }
3189 cal.set(UCAL_ERA, value);
3190 return pos.getIndex();
3191 }
3192 if (count == 5) {
3193 ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, nullptr, cal);
3194 } else if (count == 4) {
3195 ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, nullptr, cal);
3196 } else {
3197 ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, nullptr, cal);
3198 }
3199
3200 // check return position, if it equals -start, then matchString error
3201 // special case the return code so we don't necessarily fail out until we
3202 // verify no year information also
3203 if (ps == -start)
3204 ps--;
3205
3206 return ps;
3207
3208 case UDAT_YEAR_FIELD:
3209 // If there are 3 or more YEAR pattern characters, this indicates
3210 // that the year value is to be treated literally, without any
3211 // two-digit year adjustments (e.g., from "01" to 2001). Otherwise
3212 // we made adjustments to place the 2-digit year in the proper
3213 // century, for parsed strings from "00" to "99". Any other string
3214 // is treated literally: "2250", "-1", "1", "002".
3215 if (fDateOverride.compare(hebr)==0 && value < 1000) {
3216 value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
3217 } else if (text.moveIndex32(start, 2) == pos.getIndex() && !isChineseCalendar
3218 && u_isdigit(text.char32At(start))
3219 && u_isdigit(text.char32At(text.moveIndex32(start, 1))))
3220 {
3221 // only adjust year for patterns less than 3.
3222 if(count < 3) {
3223 // Assume for example that the defaultCenturyStart is 6/18/1903.
3224 // This means that two-digit years will be forced into the range
3225 // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02
3226 // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond
3227 // to 1904, 1905, etc. If the year is 03, then it is 2003 if the
3228 // other fields specify a date before 6/18, or 1903 if they specify a
3229 // date afterwards. As a result, 03 is an ambiguous year. All other
3230 // two-digit years are unambiguous.
3231 if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
3232 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
3233 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
3234 value += (fDefaultCenturyStartYear/100)*100 +
3235 (value < ambiguousTwoDigitYear ? 100 : 0);
3236 }
3237 }
3238 }
3239 cal.set(UCAL_YEAR, value);
3240
3241 // Delayed checking for adjustment of Hebrew month numbers in non-leap years.
3242 if (saveHebrewMonth >= 0) {
3243 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3244 if (!hc->isLeapYear(value) && saveHebrewMonth >= 6) {
3245 cal.set(UCAL_MONTH,saveHebrewMonth);
3246 } else {
3247 cal.set(UCAL_MONTH,saveHebrewMonth-1);
3248 }
3249 saveHebrewMonth = -1;
3250 }
3251 return pos.getIndex();
3252
3253 case UDAT_YEAR_WOY_FIELD:
3254 // Comment is the same as for UDAT_Year_FIELDs - look above
3255 if (fDateOverride.compare(hebr)==0 && value < 1000) {
3256 value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
3257 } else if (text.moveIndex32(start, 2) == pos.getIndex()
3258 && u_isdigit(text.char32At(start))
3259 && u_isdigit(text.char32At(text.moveIndex32(start, 1)))
3260 && fHaveDefaultCentury )
3261 {
3262 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
3263 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
3264 value += (fDefaultCenturyStartYear/100)*100 +
3265 (value < ambiguousTwoDigitYear ? 100 : 0);
3266 }
3267 cal.set(UCAL_YEAR_WOY, value);
3268 return pos.getIndex();
3269
3270 case UDAT_YEAR_NAME_FIELD:
3271 if (fSymbols->fShortYearNames != nullptr) {
3272 int32_t newStart = matchString(text, start, UCAL_YEAR, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount, nullptr, cal);
3273 if (newStart > 0) {
3274 return newStart;
3275 }
3276 }
3277 if (gotNumber && (getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC,status) || value > fSymbols->fShortYearNamesCount)) {
3278 cal.set(UCAL_YEAR, value);
3279 return pos.getIndex();
3280 }
3281 return -start;
3282
3283 case UDAT_MONTH_FIELD:
3284 case UDAT_STANDALONE_MONTH_FIELD:
3285 if (gotNumber) // i.e., M or MM.
3286 {
3287 // When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether
3288 // or not it was a leap year. We may or may not yet know what year it is, so might have to delay checking until
3289 // the year is parsed.
3290 if (!strcmp(cal.getType(),"hebrew")) {
3291 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3292 if (cal.isSet(UCAL_YEAR)) {
3293 UErrorCode monthStatus = U_ZERO_ERROR;
3294 if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && value >= 6) {
3295 cal.set(UCAL_MONTH, value);
3296 } else {
3297 cal.set(UCAL_MONTH, value - 1);
3298 }
3299 } else {
3300 saveHebrewMonth = value;
3301 }
3302 } else {
3303 // Don't want to parse the month if it is a string
3304 // while pattern uses numeric style: M/MM, L/LL
3305 // [We computed 'value' above.]
3306 cal.set(UCAL_MONTH, value - 1);
3307 }
3308 return pos.getIndex();
3309 } else {
3310 // count >= 3 // i.e., MMM/MMMM, LLL/LLLL
3311 // Want to be able to parse both short and long forms.
3312 // Try count == 4 first:
3313 UnicodeString * wideMonthPat = nullptr;
3314 UnicodeString * shortMonthPat = nullptr;
3315 if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
3316 if (patternCharIndex==UDAT_MONTH_FIELD) {
3317 wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide];
3318 shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev];
3319 } else {
3320 wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide];
3321 shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev];
3322 }
3323 }
3324 int32_t newStart = 0;
3325 if (patternCharIndex==UDAT_MONTH_FIELD) {
3326 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 &&
3327 fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fMonthsCount==fSymbols->fShortMonthsCount) {
3328 // single function to check both wide and short, an experiment
3329 newStart = matchAlphaMonthStrings(text, start, fSymbols->fMonths, fSymbols->fShortMonths, fSymbols->fMonthsCount, cal); // try MMMM,MMM
3330 if (newStart > 0) {
3331 return newStart;
3332 }
3333 }
3334 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3335 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal); // try MMMM
3336 if (newStart > 0) {
3337 return newStart;
3338 }
3339 }
3340 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3341 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal); // try MMM
3342 }
3343 } else {
3344 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 &&
3345 fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fStandaloneMonthsCount==fSymbols->fStandaloneShortMonthsCount) {
3346 // single function to check both wide and short, an experiment
3347 newStart = matchAlphaMonthStrings(text, start, fSymbols->fStandaloneMonths, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneMonthsCount, cal); // try MMMM,MMM
3348 if (newStart > 0) {
3349 return newStart;
3350 }
3351 }
3352 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3353 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal); // try LLLL
3354 if (newStart > 0) {
3355 return newStart;
3356 }
3357 }
3358 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3359 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL
3360 }
3361 }
3362 if (newStart > 0 || !getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) // currently we do not try to parse MMMMM/LLLLL: #8860
3363 return newStart;
3364 // else we allowing parsing as number, below
3365 }
3366 break;
3367
3368 case UDAT_HOUR_OF_DAY1_FIELD:
3369 // [We computed 'value' above.]
3370 if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
3371 value = 0;
3372
3373 // fall through to set field
3374 U_FALLTHROUGH;
3375 case UDAT_HOUR_OF_DAY0_FIELD:
3376 cal.set(UCAL_HOUR_OF_DAY, value);
3377 return pos.getIndex();
3378
3379 case UDAT_FRACTIONAL_SECOND_FIELD:
3380 // Fractional seconds left-justify
3381 i = countDigits(text, start, pos.getIndex());
3382 if (i < 3) {
3383 while (i < 3) {
3384 value *= 10;
3385 i++;
3386 }
3387 } else {
3388 int32_t a = 1;
3389 while (i > 3) {
3390 a *= 10;
3391 i--;
3392 }
3393 value /= a;
3394 }
3395 cal.set(UCAL_MILLISECOND, value);
3396 return pos.getIndex();
3397
3398 case UDAT_DOW_LOCAL_FIELD:
3399 if (gotNumber) // i.e., e or ee
3400 {
3401 // [We computed 'value' above.]
3402 cal.set(UCAL_DOW_LOCAL, value);
3403 return pos.getIndex();
3404 }
3405 // else for eee-eeeee fall through to handling of EEE-EEEEE
3406 // fall through, do not break here
3407 U_FALLTHROUGH;
3408 case UDAT_DAY_OF_WEEK_FIELD:
3409 {
3410 // Want to be able to parse both short and long forms.
3411 // Try count == 4 (EEEE) wide first:
3412 int32_t newStart = 0;
3413 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3414 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3415 fSymbols->fWeekdays, fSymbols->fWeekdaysCount, nullptr, cal)) > 0)
3416 return newStart;
3417 }
3418 // EEEE wide failed, now try EEE abbreviated
3419 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3420 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3421 fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, nullptr, cal)) > 0)
3422 return newStart;
3423 }
3424 // EEE abbreviated failed, now try EEEEEE short
3425 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) {
3426 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3427 fSymbols->fShorterWeekdays, fSymbols->fShorterWeekdaysCount, nullptr, cal)) > 0)
3428 return newStart;
3429 }
3430 // EEEEEE short failed, now try EEEEE narrow
3431 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3432 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3433 fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, nullptr, cal)) > 0)
3434 return newStart;
3435 }
3436 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status) || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
3437 return newStart;
3438 // else we allowing parsing as number, below
3439 }
3440 break;
3441
3442 case UDAT_STANDALONE_DAY_FIELD:
3443 {
3444 if (gotNumber) // c or cc
3445 {
3446 // [We computed 'value' above.]
3447 cal.set(UCAL_DOW_LOCAL, value);
3448 return pos.getIndex();
3449 }
3450 // Want to be able to parse both short and long forms.
3451 // Try count == 4 (cccc) first:
3452 int32_t newStart = 0;
3453 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3454 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3455 fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, nullptr, cal)) > 0)
3456 return newStart;
3457 }
3458 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3459 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3460 fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, nullptr, cal)) > 0)
3461 return newStart;
3462 }
3463 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) {
3464 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3465 fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, nullptr, cal)) > 0)
3466 return newStart;
3467 }
3468 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3469 return newStart;
3470 // else we allowing parsing as number, below
3471 }
3472 break;
3473
3474 case UDAT_AM_PM_FIELD:
3475 {
3476 // optionally try both wide/abbrev and narrow forms
3477 int32_t newStart = 0;
3478 // try wide/abbrev
3479 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count < 5 ) {
3480 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, nullptr, cal)) > 0) {
3481 return newStart;
3482 }
3483 }
3484 // try narrow
3485 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count >= 5 ) {
3486 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fNarrowAmPms, fSymbols->fNarrowAmPmsCount, nullptr, cal)) > 0) {
3487 return newStart;
3488 }
3489 }
3490 // no matches for given options
3491 return -start;
3492 }
3493
3494 case UDAT_HOUR1_FIELD:
3495 // [We computed 'value' above.]
3496 if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
3497 value = 0;
3498
3499 // fall through to set field
3500 U_FALLTHROUGH;
3501 case UDAT_HOUR0_FIELD:
3502 cal.set(UCAL_HOUR, value);
3503 return pos.getIndex();
3504
3505 case UDAT_QUARTER_FIELD:
3506 if (gotNumber) // i.e., Q or QQ.
3507 {
3508 // Don't want to parse the month if it is a string
3509 // while pattern uses numeric style: Q or QQ.
3510 // [We computed 'value' above.]
3511 cal.set(UCAL_MONTH, (value - 1) * 3);
3512 return pos.getIndex();
3513 } else {
3514 // count >= 3 // i.e., QQQ or QQQQ
3515 // Want to be able to parse short, long, and narrow forms.
3516 // Try count == 4 first:
3517 int32_t newStart = 0;
3518
3519 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3520 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3521 fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
3522 return newStart;
3523 }
3524 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3525 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3526 fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0)
3527 return newStart;
3528 }
3529 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3530 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3531 fSymbols->fNarrowQuarters, fSymbols->fNarrowQuartersCount, cal)) > 0)
3532 return newStart;
3533 }
3534 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3535 return newStart;
3536 // else we allowing parsing as number, below
3537 if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status))
3538 return -start;
3539 }
3540 break;
3541
3542 case UDAT_STANDALONE_QUARTER_FIELD:
3543 if (gotNumber) // i.e., q or qq.
3544 {
3545 // Don't want to parse the month if it is a string
3546 // while pattern uses numeric style: q or q.
3547 // [We computed 'value' above.]
3548 cal.set(UCAL_MONTH, (value - 1) * 3);
3549 return pos.getIndex();
3550 } else {
3551 // count >= 3 // i.e., qqq or qqqq
3552 // Want to be able to parse both short and long forms.
3553 // Try count == 4 first:
3554 int32_t newStart = 0;
3555
3556 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3557 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3558 fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
3559 return newStart;
3560 }
3561 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3562 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3563 fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0)
3564 return newStart;
3565 }
3566 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3567 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3568 fSymbols->fStandaloneNarrowQuarters, fSymbols->fStandaloneNarrowQuartersCount, cal)) > 0)
3569 return newStart;
3570 }
3571 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3572 return newStart;
3573 // else we allowing parsing as number, below
3574 if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status))
3575 return -start;
3576 }
3577 break;
3578
3579 case UDAT_TIMEZONE_FIELD: // 'z'
3580 {
3581 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_SPECIFIC_SHORT : UTZFMT_STYLE_SPECIFIC_LONG;
3582 const TimeZoneFormat *tzfmt = tzFormat(status);
3583 if (U_SUCCESS(status)) {
3584 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3585 if (tz != nullptr) {
3586 cal.adoptTimeZone(tz);
3587 return pos.getIndex();
3588 }
3589 }
3590 return -start;
3591 }
3592 break;
3593 case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
3594 {
3595 UTimeZoneFormatStyle style = (count < 4) ?
3596 UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL : ((count == 5) ? UTZFMT_STYLE_ISO_EXTENDED_FULL: UTZFMT_STYLE_LOCALIZED_GMT);
3597 const TimeZoneFormat *tzfmt = tzFormat(status);
3598 if (U_SUCCESS(status)) {
3599 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3600 if (tz != nullptr) {
3601 cal.adoptTimeZone(tz);
3602 return pos.getIndex();
3603 }
3604 }
3605 return -start;
3606 }
3607 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
3608 {
3609 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_GENERIC_SHORT : UTZFMT_STYLE_GENERIC_LONG;
3610 const TimeZoneFormat *tzfmt = tzFormat(status);
3611 if (U_SUCCESS(status)) {
3612 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3613 if (tz != nullptr) {
3614 cal.adoptTimeZone(tz);
3615 return pos.getIndex();
3616 }
3617 }
3618 return -start;
3619 }
3620 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
3621 {
3622 UTimeZoneFormatStyle style;
3623 switch (count) {
3624 case 1:
3625 style = UTZFMT_STYLE_ZONE_ID_SHORT;
3626 break;
3627 case 2:
3628 style = UTZFMT_STYLE_ZONE_ID;
3629 break;
3630 case 3:
3631 style = UTZFMT_STYLE_EXEMPLAR_LOCATION;
3632 break;
3633 default:
3634 style = UTZFMT_STYLE_GENERIC_LOCATION;
3635 break;
3636 }
3637 const TimeZoneFormat *tzfmt = tzFormat(status);
3638 if (U_SUCCESS(status)) {
3639 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3640 if (tz != nullptr) {
3641 cal.adoptTimeZone(tz);
3642 return pos.getIndex();
3643 }
3644 }
3645 return -start;
3646 }
3647 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
3648 {
3649 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_LOCALIZED_GMT_SHORT : UTZFMT_STYLE_LOCALIZED_GMT;
3650 const TimeZoneFormat *tzfmt = tzFormat(status);
3651 if (U_SUCCESS(status)) {
3652 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3653 if (tz != nullptr) {
3654 cal.adoptTimeZone(tz);
3655 return pos.getIndex();
3656 }
3657 }
3658 return -start;
3659 }
3660 case UDAT_TIMEZONE_ISO_FIELD: // 'X'
3661 {
3662 UTimeZoneFormatStyle style;
3663 switch (count) {
3664 case 1:
3665 style = UTZFMT_STYLE_ISO_BASIC_SHORT;
3666 break;
3667 case 2:
3668 style = UTZFMT_STYLE_ISO_BASIC_FIXED;
3669 break;
3670 case 3:
3671 style = UTZFMT_STYLE_ISO_EXTENDED_FIXED;
3672 break;
3673 case 4:
3674 style = UTZFMT_STYLE_ISO_BASIC_FULL;
3675 break;
3676 default:
3677 style = UTZFMT_STYLE_ISO_EXTENDED_FULL;
3678 break;
3679 }
3680 const TimeZoneFormat *tzfmt = tzFormat(status);
3681 if (U_SUCCESS(status)) {
3682 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3683 if (tz != nullptr) {
3684 cal.adoptTimeZone(tz);
3685 return pos.getIndex();
3686 }
3687 }
3688 return -start;
3689 }
3690 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
3691 {
3692 UTimeZoneFormatStyle style;
3693 switch (count) {
3694 case 1:
3695 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT;
3696 break;
3697 case 2:
3698 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED;
3699 break;
3700 case 3:
3701 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED;
3702 break;
3703 case 4:
3704 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL;
3705 break;
3706 default:
3707 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL;
3708 break;
3709 }
3710 const TimeZoneFormat *tzfmt = tzFormat(status);
3711 if (U_SUCCESS(status)) {
3712 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3713 if (tz != nullptr) {
3714 cal.adoptTimeZone(tz);
3715 return pos.getIndex();
3716 }
3717 }
3718 return -start;
3719 }
3720 // currently no pattern character is defined for UDAT_TIME_SEPARATOR_FIELD
3721 // so we should not get here. Leave support in for future definition.
3722 case UDAT_TIME_SEPARATOR_FIELD:
3723 {
3724 static const char16_t def_sep = DateFormatSymbols::DEFAULT_TIME_SEPARATOR;
3725 static const char16_t alt_sep = DateFormatSymbols::ALTERNATE_TIME_SEPARATOR;
3726
3727 // Try matching a time separator.
3728 int32_t count_sep = 1;
3729 UnicodeString data[3];
3730 fSymbols->getTimeSeparatorString(data[0]);
3731
3732 // Add the default, if different from the locale.
3733 if (data[0].compare(&def_sep, 1) != 0) {
3734 data[count_sep++].setTo(def_sep);
3735 }
3736
3737 // If lenient, add also the alternate, if different from the locale.
3738 if (isLenient() && data[0].compare(&alt_sep, 1) != 0) {
3739 data[count_sep++].setTo(alt_sep);
3740 }
3741
3742 return matchString(text, start, UCAL_FIELD_COUNT /* => nothing to set */, data, count_sep, nullptr, cal);
3743 }
3744
3745 case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
3746 {
3747 U_ASSERT(dayPeriod != nullptr);
3748 int32_t ampmStart = subParse(text, start, 0x61, count,
3749 obeyCount, allowNegative, ambiguousYear, saveHebrewMonth, cal,
3750 patLoc, numericLeapMonthFormatter, tzTimeType);
3751
3752 if (ampmStart > 0) {
3753 return ampmStart;
3754 } else {
3755 int32_t newStart = 0;
3756
3757 // Only match the first two strings from the day period strings array.
3758 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3759 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods,
3760 2, *dayPeriod)) > 0) {
3761 return newStart;
3762 }
3763 }
3764 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3765 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods,
3766 2, *dayPeriod)) > 0) {
3767 return newStart;
3768 }
3769 }
3770 // count == 4, but allow other counts
3771 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) {
3772 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods,
3773 2, *dayPeriod)) > 0) {
3774 return newStart;
3775 }
3776 }
3777
3778 return -start;
3779 }
3780 }
3781
3782 case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
3783 {
3784 U_ASSERT(dayPeriod != nullptr);
3785 int32_t newStart = 0;
3786
3787 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3788 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods,
3789 fSymbols->fAbbreviatedDayPeriodsCount, *dayPeriod)) > 0) {
3790 return newStart;
3791 }
3792 }
3793 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3794 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods,
3795 fSymbols->fNarrowDayPeriodsCount, *dayPeriod)) > 0) {
3796 return newStart;
3797 }
3798 }
3799 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3800 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods,
3801 fSymbols->fWideDayPeriodsCount, *dayPeriod)) > 0) {
3802 return newStart;
3803 }
3804 }
3805
3806 return -start;
3807 }
3808
3809 default:
3810 // Handle "generic" fields
3811 // this is now handled below, outside the switch block
3812 break;
3813 }
3814 // Handle "generic" fields:
3815 // switch default case now handled here (outside switch block) to allow
3816 // parsing of some string fields as digits for lenient case
3817
3818 int32_t parseStart = pos.getIndex();
3819 const UnicodeString* src;
3820 if (obeyCount) {
3821 if ((start+count) > text.length()) {
3822 return -start;
3823 }
3824 text.extractBetween(0, start + count, temp);
3825 src = &temp;
3826 } else {
3827 src = &text;
3828 }
3829 parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3830 if (obeyCount && !isLenient() && pos.getIndex() < start + count) {
3831 return -start;
3832 }
3833 if (pos.getIndex() != parseStart) {
3834 int32_t val = number.getLong();
3835
3836 // Don't need suffix processing here (as in number processing at the beginning of the function);
3837 // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes.
3838
3839 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) {
3840 // Check the range of the value
3841 int32_t bias = gFieldRangeBias[patternCharIndex];
3842 if (bias >= 0 && (val > cal.getMaximum(field) + bias || val < cal.getMinimum(field) + bias)) {
3843 return -start;
3844 }
3845 }
3846
3847 // For the following, need to repeat some of the "if (gotNumber)" code above:
3848 // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD,
3849 // UDAT_[STANDALONE_]QUARTER_FIELD
3850 switch (patternCharIndex) {
3851 case UDAT_MONTH_FIELD:
3852 // See notes under UDAT_MONTH_FIELD case above
3853 if (!strcmp(cal.getType(),"hebrew")) {
3854 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3855 if (cal.isSet(UCAL_YEAR)) {
3856 UErrorCode monthStatus = U_ZERO_ERROR;
3857 if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && val >= 6) {
3858 cal.set(UCAL_MONTH, val);
3859 } else {
3860 cal.set(UCAL_MONTH, val - 1);
3861 }
3862 } else {
3863 saveHebrewMonth = val;
3864 }
3865 } else {
3866 cal.set(UCAL_MONTH, val - 1);
3867 }
3868 break;
3869 case UDAT_STANDALONE_MONTH_FIELD:
3870 cal.set(UCAL_MONTH, val - 1);
3871 break;
3872 case UDAT_DOW_LOCAL_FIELD:
3873 case UDAT_STANDALONE_DAY_FIELD:
3874 cal.set(UCAL_DOW_LOCAL, val);
3875 break;
3876 case UDAT_QUARTER_FIELD:
3877 case UDAT_STANDALONE_QUARTER_FIELD:
3878 cal.set(UCAL_MONTH, (val - 1) * 3);
3879 break;
3880 case UDAT_RELATED_YEAR_FIELD:
3881 cal.setRelatedYear(val);
3882 break;
3883 default:
3884 cal.set(field, val);
3885 break;
3886 }
3887 return pos.getIndex();
3888 }
3889 return -start;
3890 }
3891
3892 /**
3893 * Parse an integer using fNumberFormat. This method is semantically
3894 * const, but actually may modify fNumberFormat.
3895 */
parseInt(const UnicodeString & text,Formattable & number,ParsePosition & pos,UBool allowNegative,const NumberFormat * fmt) const3896 void SimpleDateFormat::parseInt(const UnicodeString& text,
3897 Formattable& number,
3898 ParsePosition& pos,
3899 UBool allowNegative,
3900 const NumberFormat *fmt) const {
3901 parseInt(text, number, -1, pos, allowNegative,fmt);
3902 }
3903
3904 /**
3905 * Parse an integer using fNumberFormat up to maxDigits.
3906 */
parseInt(const UnicodeString & text,Formattable & number,int32_t maxDigits,ParsePosition & pos,UBool allowNegative,const NumberFormat * fmt) const3907 void SimpleDateFormat::parseInt(const UnicodeString& text,
3908 Formattable& number,
3909 int32_t maxDigits,
3910 ParsePosition& pos,
3911 UBool allowNegative,
3912 const NumberFormat *fmt) const {
3913 UnicodeString oldPrefix;
3914 auto* fmtAsDF = dynamic_cast<const DecimalFormat*>(fmt);
3915 LocalPointer<DecimalFormat> df;
3916 if (!allowNegative && fmtAsDF != nullptr) {
3917 df.adoptInstead(fmtAsDF->clone());
3918 if (df.isNull()) {
3919 // Memory allocation error
3920 return;
3921 }
3922 df->setNegativePrefix(UnicodeString(true, SUPPRESS_NEGATIVE_PREFIX, -1));
3923 fmt = df.getAlias();
3924 }
3925 int32_t oldPos = pos.getIndex();
3926 fmt->parse(text, number, pos);
3927
3928 if (maxDigits > 0) {
3929 // adjust the result to fit into
3930 // the maxDigits and move the position back
3931 int32_t nDigits = pos.getIndex() - oldPos;
3932 if (nDigits > maxDigits) {
3933 int32_t val = number.getLong();
3934 nDigits -= maxDigits;
3935 while (nDigits > 0) {
3936 val /= 10;
3937 nDigits--;
3938 }
3939 pos.setIndex(oldPos + maxDigits);
3940 number.setLong(val);
3941 }
3942 }
3943 }
3944
countDigits(const UnicodeString & text,int32_t start,int32_t end) const3945 int32_t SimpleDateFormat::countDigits(const UnicodeString& text, int32_t start, int32_t end) const {
3946 int32_t numDigits = 0;
3947 int32_t idx = start;
3948 while (idx < end) {
3949 UChar32 cp = text.char32At(idx);
3950 if (u_isdigit(cp)) {
3951 numDigits++;
3952 }
3953 idx += U16_LENGTH(cp);
3954 }
3955 return numDigits;
3956 }
3957
3958 //----------------------------------------------------------------------
3959
translatePattern(const UnicodeString & originalPattern,UnicodeString & translatedPattern,const UnicodeString & from,const UnicodeString & to,UErrorCode & status)3960 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
3961 UnicodeString& translatedPattern,
3962 const UnicodeString& from,
3963 const UnicodeString& to,
3964 UErrorCode& status)
3965 {
3966 // run through the pattern and convert any pattern symbols from the version
3967 // in "from" to the corresponding character in "to". This code takes
3968 // quoted strings into account (it doesn't try to translate them), and it signals
3969 // an error if a particular "pattern character" doesn't appear in "from".
3970 // Depending on the values of "from" and "to" this can convert from generic
3971 // to localized patterns or localized to generic.
3972 if (U_FAILURE(status)) {
3973 return;
3974 }
3975
3976 translatedPattern.remove();
3977 UBool inQuote = false;
3978 for (int32_t i = 0; i < originalPattern.length(); ++i) {
3979 char16_t c = originalPattern[i];
3980 if (inQuote) {
3981 if (c == QUOTE) {
3982 inQuote = false;
3983 }
3984 } else {
3985 if (c == QUOTE) {
3986 inQuote = true;
3987 } else if (isSyntaxChar(c)) {
3988 int32_t ci = from.indexOf(c);
3989 if (ci == -1) {
3990 status = U_INVALID_FORMAT_ERROR;
3991 return;
3992 }
3993 c = to[ci];
3994 }
3995 }
3996 translatedPattern += c;
3997 }
3998 if (inQuote) {
3999 status = U_INVALID_FORMAT_ERROR;
4000 return;
4001 }
4002 }
4003
4004 //----------------------------------------------------------------------
4005
4006 UnicodeString&
toPattern(UnicodeString & result) const4007 SimpleDateFormat::toPattern(UnicodeString& result) const
4008 {
4009 result = fPattern;
4010 return result;
4011 }
4012
4013 //----------------------------------------------------------------------
4014
4015 UnicodeString&
toLocalizedPattern(UnicodeString & result,UErrorCode & status) const4016 SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
4017 UErrorCode& status) const
4018 {
4019 translatePattern(fPattern, result,
4020 UnicodeString(DateFormatSymbols::getPatternUChars()),
4021 fSymbols->fLocalPatternChars, status);
4022 return result;
4023 }
4024
4025 //----------------------------------------------------------------------
4026
4027 void
applyPattern(const UnicodeString & pattern)4028 SimpleDateFormat::applyPattern(const UnicodeString& pattern)
4029 {
4030 fPattern = pattern;
4031 parsePattern();
4032
4033 // Hack to update use of Gannen year numbering for ja@calendar=japanese -
4034 // use only if format is non-numeric (includes 年) and no other fDateOverride.
4035 if (fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 &&
4036 uprv_strcmp(fLocale.getLanguage(),"ja") == 0) {
4037 if (fDateOverride==UnicodeString(u"y=jpanyear") && !fHasHanYearChar) {
4038 // Gannen numbering is set but new pattern should not use it, unset;
4039 // use procedure from adoptNumberFormat to clear overrides
4040 if (fSharedNumberFormatters) {
4041 freeSharedNumberFormatters(fSharedNumberFormatters);
4042 fSharedNumberFormatters = nullptr;
4043 }
4044 fDateOverride.setToBogus(); // record status
4045 } else if (fDateOverride.isBogus() && fHasHanYearChar) {
4046 // No current override (=> no Gannen numbering) but new pattern needs it;
4047 // use procedures from initNUmberFormatters / adoptNumberFormat
4048 umtx_lock(&LOCK);
4049 if (fSharedNumberFormatters == nullptr) {
4050 fSharedNumberFormatters = allocSharedNumberFormatters();
4051 }
4052 umtx_unlock(&LOCK);
4053 if (fSharedNumberFormatters != nullptr) {
4054 Locale ovrLoc(fLocale.getLanguage(),fLocale.getCountry(),fLocale.getVariant(),"numbers=jpanyear");
4055 UErrorCode status = U_ZERO_ERROR;
4056 const SharedNumberFormat *snf = createSharedNumberFormat(ovrLoc, status);
4057 if (U_SUCCESS(status)) {
4058 // Now that we have an appropriate number formatter, fill in the
4059 // appropriate slot in the number formatters table.
4060 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(u'y');
4061 SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]);
4062 snf->deleteIfZeroRefCount();
4063 fDateOverride.setTo(u"y=jpanyear", -1); // record status
4064 }
4065 }
4066 }
4067 }
4068 }
4069
4070 //----------------------------------------------------------------------
4071
4072 void
applyLocalizedPattern(const UnicodeString & pattern,UErrorCode & status)4073 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
4074 UErrorCode &status)
4075 {
4076 translatePattern(pattern, fPattern,
4077 fSymbols->fLocalPatternChars,
4078 UnicodeString(DateFormatSymbols::getPatternUChars()), status);
4079 }
4080
4081 //----------------------------------------------------------------------
4082
4083 const DateFormatSymbols*
getDateFormatSymbols() const4084 SimpleDateFormat::getDateFormatSymbols() const
4085 {
4086 return fSymbols;
4087 }
4088
4089 //----------------------------------------------------------------------
4090
4091 void
adoptDateFormatSymbols(DateFormatSymbols * newFormatSymbols)4092 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
4093 {
4094 delete fSymbols;
4095 fSymbols = newFormatSymbols;
4096 }
4097
4098 //----------------------------------------------------------------------
4099 void
setDateFormatSymbols(const DateFormatSymbols & newFormatSymbols)4100 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
4101 {
4102 delete fSymbols;
4103 fSymbols = new DateFormatSymbols(newFormatSymbols);
4104 }
4105
4106 //----------------------------------------------------------------------
4107 const TimeZoneFormat*
getTimeZoneFormat() const4108 SimpleDateFormat::getTimeZoneFormat() const {
4109 // TimeZoneFormat initialization might fail when out of memory.
4110 // If we always initialize TimeZoneFormat instance, we can return
4111 // such status there. For now, this implementation lazily instantiates
4112 // a TimeZoneFormat for performance optimization reasons, but cannot
4113 // propagate such error (probably just out of memory case) to the caller.
4114 UErrorCode status = U_ZERO_ERROR;
4115 return (const TimeZoneFormat*)tzFormat(status);
4116 }
4117
4118 //----------------------------------------------------------------------
4119 void
adoptTimeZoneFormat(TimeZoneFormat * timeZoneFormatToAdopt)4120 SimpleDateFormat::adoptTimeZoneFormat(TimeZoneFormat* timeZoneFormatToAdopt)
4121 {
4122 delete fTimeZoneFormat;
4123 fTimeZoneFormat = timeZoneFormatToAdopt;
4124 }
4125
4126 //----------------------------------------------------------------------
4127 void
setTimeZoneFormat(const TimeZoneFormat & newTimeZoneFormat)4128 SimpleDateFormat::setTimeZoneFormat(const TimeZoneFormat& newTimeZoneFormat)
4129 {
4130 delete fTimeZoneFormat;
4131 fTimeZoneFormat = new TimeZoneFormat(newTimeZoneFormat);
4132 }
4133
4134 //----------------------------------------------------------------------
4135
4136
adoptCalendar(Calendar * calendarToAdopt)4137 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
4138 {
4139 UErrorCode status = U_ZERO_ERROR;
4140 Locale calLocale(fLocale);
4141 calLocale.setKeywordValue("calendar", calendarToAdopt->getType(), status);
4142 DateFormatSymbols *newSymbols =
4143 DateFormatSymbols::createForLocale(calLocale, status);
4144 if (U_FAILURE(status)) {
4145 delete calendarToAdopt;
4146 return;
4147 }
4148 DateFormat::adoptCalendar(calendarToAdopt);
4149 delete fSymbols;
4150 fSymbols = newSymbols;
4151 initializeDefaultCentury(); // we need a new century (possibly)
4152 }
4153
4154
4155 //----------------------------------------------------------------------
4156
4157
4158 // override the DateFormat implementation in order to
4159 // lazily initialize fCapitalizationBrkIter
4160 void
setContext(UDisplayContext value,UErrorCode & status)4161 SimpleDateFormat::setContext(UDisplayContext value, UErrorCode& status)
4162 {
4163 DateFormat::setContext(value, status);
4164 #if !UCONFIG_NO_BREAK_ITERATION
4165 if (U_SUCCESS(status)) {
4166 if ( fCapitalizationBrkIter == nullptr && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
4167 value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE) ) {
4168 status = U_ZERO_ERROR;
4169 fCapitalizationBrkIter = BreakIterator::createSentenceInstance(fLocale, status);
4170 if (U_FAILURE(status)) {
4171 delete fCapitalizationBrkIter;
4172 fCapitalizationBrkIter = nullptr;
4173 }
4174 }
4175 }
4176 #endif
4177 }
4178
4179
4180 //----------------------------------------------------------------------
4181
4182
4183 UBool
isFieldUnitIgnored(UCalendarDateFields field) const4184 SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
4185 return isFieldUnitIgnored(fPattern, field);
4186 }
4187
4188
4189 UBool
isFieldUnitIgnored(const UnicodeString & pattern,UCalendarDateFields field)4190 SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
4191 UCalendarDateFields field) {
4192 int32_t fieldLevel = fgCalendarFieldToLevel[field];
4193 int32_t level;
4194 char16_t ch;
4195 UBool inQuote = false;
4196 char16_t prevCh = 0;
4197 int32_t count = 0;
4198
4199 for (int32_t i = 0; i < pattern.length(); ++i) {
4200 ch = pattern[i];
4201 if (ch != prevCh && count > 0) {
4202 level = getLevelFromChar(prevCh);
4203 // the larger the level, the smaller the field unit.
4204 if (fieldLevel <= level) {
4205 return false;
4206 }
4207 count = 0;
4208 }
4209 if (ch == QUOTE) {
4210 if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
4211 ++i;
4212 } else {
4213 inQuote = ! inQuote;
4214 }
4215 }
4216 else if (!inQuote && isSyntaxChar(ch)) {
4217 prevCh = ch;
4218 ++count;
4219 }
4220 }
4221 if (count > 0) {
4222 // last item
4223 level = getLevelFromChar(prevCh);
4224 if (fieldLevel <= level) {
4225 return false;
4226 }
4227 }
4228 return true;
4229 }
4230
4231 //----------------------------------------------------------------------
4232
4233 const Locale&
getSmpFmtLocale() const4234 SimpleDateFormat::getSmpFmtLocale() const {
4235 return fLocale;
4236 }
4237
4238 //----------------------------------------------------------------------
4239
4240 int32_t
checkIntSuffix(const UnicodeString & text,int32_t start,int32_t patLoc,UBool isNegative) const4241 SimpleDateFormat::checkIntSuffix(const UnicodeString& text, int32_t start,
4242 int32_t patLoc, UBool isNegative) const {
4243 // local variables
4244 UnicodeString suf;
4245 int32_t patternMatch;
4246 int32_t textPreMatch;
4247 int32_t textPostMatch;
4248
4249 // check that we are still in range
4250 if ( (start > text.length()) ||
4251 (start < 0) ||
4252 (patLoc < 0) ||
4253 (patLoc > fPattern.length())) {
4254 // out of range, don't advance location in text
4255 return start;
4256 }
4257
4258 // get the suffix
4259 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat);
4260 if (decfmt != nullptr) {
4261 if (isNegative) {
4262 suf = decfmt->getNegativeSuffix(suf);
4263 }
4264 else {
4265 suf = decfmt->getPositiveSuffix(suf);
4266 }
4267 }
4268
4269 // check for suffix
4270 if (suf.length() <= 0) {
4271 return start;
4272 }
4273
4274 // check suffix will be encountered in the pattern
4275 patternMatch = compareSimpleAffix(suf,fPattern,patLoc);
4276
4277 // check if a suffix will be encountered in the text
4278 textPreMatch = compareSimpleAffix(suf,text,start);
4279
4280 // check if a suffix was encountered in the text
4281 textPostMatch = compareSimpleAffix(suf,text,start-suf.length());
4282
4283 // check for suffix match
4284 if ((textPreMatch >= 0) && (patternMatch >= 0) && (textPreMatch == patternMatch)) {
4285 return start;
4286 }
4287 else if ((textPostMatch >= 0) && (patternMatch >= 0) && (textPostMatch == patternMatch)) {
4288 return start - suf.length();
4289 }
4290
4291 // should not get here
4292 return start;
4293 }
4294
4295 //----------------------------------------------------------------------
4296
4297 int32_t
compareSimpleAffix(const UnicodeString & affix,const UnicodeString & input,int32_t pos) const4298 SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
4299 const UnicodeString& input,
4300 int32_t pos) const {
4301 int32_t start = pos;
4302 for (int32_t i=0; i<affix.length(); ) {
4303 UChar32 c = affix.char32At(i);
4304 int32_t len = U16_LENGTH(c);
4305 if (PatternProps::isWhiteSpace(c)) {
4306 // We may have a pattern like: \u200F \u0020
4307 // and input text like: \u200F \u0020
4308 // Note that U+200F and U+0020 are Pattern_White_Space but only
4309 // U+0020 is UWhiteSpace. So we have to first do a direct
4310 // match of the run of Pattern_White_Space in the pattern,
4311 // then match any extra characters.
4312 UBool literalMatch = false;
4313 while (pos < input.length() &&
4314 input.char32At(pos) == c) {
4315 literalMatch = true;
4316 i += len;
4317 pos += len;
4318 if (i == affix.length()) {
4319 break;
4320 }
4321 c = affix.char32At(i);
4322 len = U16_LENGTH(c);
4323 if (!PatternProps::isWhiteSpace(c)) {
4324 break;
4325 }
4326 }
4327
4328 // Advance over run in pattern
4329 i = skipPatternWhiteSpace(affix, i);
4330
4331 // Advance over run in input text
4332 // Must see at least one white space char in input,
4333 // unless we've already matched some characters literally.
4334 int32_t s = pos;
4335 pos = skipUWhiteSpace(input, pos);
4336 if (pos == s && !literalMatch) {
4337 return -1;
4338 }
4339
4340 // If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
4341 // Otherwise, the previous lines may have skipped over text (such as U+00A0) that
4342 // is also in the affix.
4343 i = skipUWhiteSpace(affix, i);
4344 } else {
4345 if (pos < input.length() &&
4346 input.char32At(pos) == c) {
4347 i += len;
4348 pos += len;
4349 } else {
4350 return -1;
4351 }
4352 }
4353 }
4354 return pos - start;
4355 }
4356
4357 //----------------------------------------------------------------------
4358
4359 int32_t
skipPatternWhiteSpace(const UnicodeString & text,int32_t pos) const4360 SimpleDateFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const {
4361 const char16_t* s = text.getBuffer();
4362 return (int32_t)(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s);
4363 }
4364
4365 //----------------------------------------------------------------------
4366
4367 int32_t
skipUWhiteSpace(const UnicodeString & text,int32_t pos) const4368 SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const {
4369 while (pos < text.length()) {
4370 UChar32 c = text.char32At(pos);
4371 if (!u_isUWhiteSpace(c)) {
4372 break;
4373 }
4374 pos += U16_LENGTH(c);
4375 }
4376 return pos;
4377 }
4378
4379 //----------------------------------------------------------------------
4380
4381 // Lazy TimeZoneFormat instantiation, semantically const.
4382 TimeZoneFormat *
tzFormat(UErrorCode & status) const4383 SimpleDateFormat::tzFormat(UErrorCode &status) const {
4384 Mutex m(&LOCK);
4385 if (fTimeZoneFormat == nullptr && U_SUCCESS(status)) {
4386 const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat =
4387 TimeZoneFormat::createInstance(fLocale, status);
4388 }
4389 return fTimeZoneFormat;
4390 }
4391
parsePattern()4392 void SimpleDateFormat::parsePattern() {
4393 fHasMinute = false;
4394 fHasSecond = false;
4395 fHasHanYearChar = false;
4396
4397 int len = fPattern.length();
4398 UBool inQuote = false;
4399 for (int32_t i = 0; i < len; ++i) {
4400 char16_t ch = fPattern[i];
4401 if (ch == QUOTE) {
4402 inQuote = !inQuote;
4403 }
4404 if (ch == 0x5E74) { // don't care whether this is inside quotes
4405 fHasHanYearChar = true;
4406 }
4407 if (!inQuote) {
4408 if (ch == 0x6D) { // 0x6D == 'm'
4409 fHasMinute = true;
4410 }
4411 if (ch == 0x73) { // 0x73 == 's'
4412 fHasSecond = true;
4413 }
4414 }
4415 }
4416 }
4417
4418 U_NAMESPACE_END
4419
4420 #endif /* #if !UCONFIG_NO_FORMATTING */
4421
4422 //eof
4423