1 /* $OpenBSD: strptime.c,v 1.31 2023/03/02 16:21:51 millert Exp $ */
2 /* $NetBSD: strptime.c,v 1.12 1998/01/20 21:39:40 mycroft Exp $ */
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "private.h"
32
33 #include <ctype.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <time.h>
40
41 #include "localedef.h"
42 #include "tzfile.h"
43
44 // Android: ignore OpenBSD's DEF_WEAK() stuff.
45 #define DEF_WEAK(sym) /* */
46 // Android: this code is not pointer-sign clean.
47 #pragma clang diagnostic ignored "-Wpointer-sign"
48 #pragma clang diagnostic ignored "-Wunused-function"
49 // Android: clang thinks people don't know && has higher precedence than ||.
50 #pragma clang diagnostic ignored "-Wlogical-op-parentheses"
51
52 #define _ctloc(x) (_CurrentTimeLocale->x)
53
54 /*
55 * We do not implement alternate representations. However, we always
56 * check whether a given modifier is allowed for a certain conversion.
57 */
58 #define _ALT_E 0x01
59 #define _ALT_O 0x02
60 #define _LEGAL_ALT(x) { if (alt_format & ~(x)) return (0); }
61
62 /*
63 * We keep track of some of the fields we set in order to compute missing ones.
64 */
65 #define FIELD_TM_MON (1 << 0)
66 #define FIELD_TM_MDAY (1 << 1)
67 #define FIELD_TM_WDAY (1 << 2)
68 #define FIELD_TM_YDAY (1 << 3)
69 #define FIELD_TM_YEAR (1 << 4)
70
71 static const char gmt[] = { "GMT" };
72 static const char utc[] = { "UTC" };
73 /* RFC-822/RFC-2822 */
74 static const char * const nast[5] = {
75 "EST", "CST", "MST", "PST", "\0\0\0"
76 };
77 static const char * const nadt[5] = {
78 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
79 };
80
81 static const int mon_lengths[2][MONSPERYEAR] = {
82 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
83 { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
84 };
85
86 static int _conv_num(const unsigned char **, int *, int, int);
87 static int epoch_to_tm(const unsigned char **, struct tm *);
88 static int leaps_thru_end_of(const int y);
89 static char *_strptime(const char *, const char *, struct tm *, int);
90 static const u_char *_find_string(const u_char *, int *, const char * const *,
91 const char * const *, int);
92
93
94 char *
strptime(const char * buf,const char * fmt,struct tm * tm)95 strptime(const char *buf, const char *fmt, struct tm *tm)
96 {
97 return(_strptime(buf, fmt, tm, 1));
98 }
99 DEF_WEAK(strptime);
100 __strong_alias(strptime_l, strptime);
101
102 static char *
_strptime(const char * buf,const char * fmt,struct tm * tm,int initialize)103 _strptime(const char *buf, const char *fmt, struct tm *tm, int initialize)
104 {
105 unsigned char c;
106 const unsigned char *bp, *ep;
107 size_t len;
108 int alt_format, i, offs;
109 int neg = 0;
110 static int century, relyear, fields;
111
112 if (initialize) {
113 century = TM_YEAR_BASE;
114 relyear = -1;
115 fields = 0;
116 }
117
118 bp = (const unsigned char *)buf;
119 while ((c = *fmt) != '\0') {
120 /* Clear `alternate' modifier prior to new conversion. */
121 alt_format = 0;
122
123 /* Eat up white-space. */
124 if (isspace(c)) {
125 while (isspace(*bp))
126 bp++;
127
128 fmt++;
129 continue;
130 }
131
132 if ((c = *fmt++) != '%')
133 goto literal;
134
135
136 again: switch (c = *fmt++) {
137 case '%': /* "%%" is converted to "%". */
138 literal:
139 if (c != *bp++)
140 return (NULL);
141
142 break;
143
144 /*
145 * "Alternative" modifiers. Just set the appropriate flag
146 * and start over again.
147 */
148 case 'E': /* "%E?" alternative conversion modifier. */
149 _LEGAL_ALT(0);
150 alt_format |= _ALT_E;
151 goto again;
152
153 case 'O': /* "%O?" alternative conversion modifier. */
154 _LEGAL_ALT(0);
155 alt_format |= _ALT_O;
156 goto again;
157
158 /*
159 * "Complex" conversion rules, implemented through recursion.
160 */
161 case 'c': /* Date and time, using the locale's format. */
162 _LEGAL_ALT(_ALT_E);
163 if (!(bp = _strptime(bp, _ctloc(d_t_fmt), tm, 0)))
164 return (NULL);
165 break;
166
167 case 'D': /* The date as "%m/%d/%y". */
168 _LEGAL_ALT(0);
169 if (!(bp = _strptime(bp, "%m/%d/%y", tm, 0)))
170 return (NULL);
171 break;
172
173 case 'F': /* The date as "%Y-%m-%d". */
174 _LEGAL_ALT(0);
175 if (!(bp = _strptime(bp, "%Y-%m-%d", tm, 0)))
176 return (NULL);
177 continue;
178
179 case 'R': /* The time as "%H:%M". */
180 _LEGAL_ALT(0);
181 if (!(bp = _strptime(bp, "%H:%M", tm, 0)))
182 return (NULL);
183 break;
184
185 case 'r': /* The time as "%I:%M:%S %p". */
186 _LEGAL_ALT(0);
187 if (!(bp = _strptime(bp, "%I:%M:%S %p", tm, 0)))
188 return (NULL);
189 break;
190
191 case 'T': /* The time as "%H:%M:%S". */
192 _LEGAL_ALT(0);
193 if (!(bp = _strptime(bp, "%H:%M:%S", tm, 0)))
194 return (NULL);
195 break;
196
197 case 'v': /* Android: the date as "%e-%b-%Y" for strftime() compat; glibc does this too. */
198 _LEGAL_ALT(0);
199 if (!(bp = _strptime(bp, "%e-%b-%Y", tm, 0)))
200 return (NULL);
201 break;
202
203 case 'X': /* The time, using the locale's format. */
204 _LEGAL_ALT(_ALT_E);
205 if (!(bp = _strptime(bp, _ctloc(t_fmt), tm, 0)))
206 return (NULL);
207 break;
208
209 case 'x': /* The date, using the locale's format. */
210 _LEGAL_ALT(_ALT_E);
211 if (!(bp = _strptime(bp, _ctloc(d_fmt), tm, 0)))
212 return (NULL);
213 break;
214
215 /*
216 * "Elementary" conversion rules.
217 */
218 case 'A': /* The day of week, using the locale's form. */
219 case 'a':
220 _LEGAL_ALT(0);
221 for (i = 0; i < 7; i++) {
222 /* Full name. */
223 len = strlen(_ctloc(day[i]));
224 if (strncasecmp(_ctloc(day[i]), bp, len) == 0)
225 break;
226
227 /* Abbreviated name. */
228 len = strlen(_ctloc(abday[i]));
229 if (strncasecmp(_ctloc(abday[i]), bp, len) == 0)
230 break;
231 }
232
233 /* Nothing matched. */
234 if (i == 7)
235 return (NULL);
236
237 tm->tm_wday = i;
238 bp += len;
239 fields |= FIELD_TM_WDAY;
240 break;
241
242 case 'B': /* The month, using the locale's form. */
243 case 'b':
244 case 'h':
245 _LEGAL_ALT(0);
246 for (i = 0; i < 12; i++) {
247 /* Full name. */
248 len = strlen(_ctloc(mon[i]));
249 if (strncasecmp(_ctloc(mon[i]), bp, len) == 0)
250 break;
251
252 /* Abbreviated name. */
253 len = strlen(_ctloc(abmon[i]));
254 if (strncasecmp(_ctloc(abmon[i]), bp, len) == 0)
255 break;
256 }
257
258 /* Nothing matched. */
259 if (i == 12)
260 return (NULL);
261
262 tm->tm_mon = i;
263 bp += len;
264 fields |= FIELD_TM_MON;
265 break;
266
267 case 'C': /* The century number. */
268 _LEGAL_ALT(_ALT_E);
269 if (!(_conv_num(&bp, &i, 0, 99)))
270 return (NULL);
271
272 century = i * 100;
273 break;
274
275 case 'e': /* The day of month. */
276 if (isspace(*bp))
277 bp++;
278 /* FALLTHROUGH */
279 case 'd':
280 _LEGAL_ALT(_ALT_O);
281 if (!(_conv_num(&bp, &tm->tm_mday, 1, 31)))
282 return (NULL);
283 fields |= FIELD_TM_MDAY;
284 break;
285
286 case 'k': /* The hour (24-hour clock representation). */
287 _LEGAL_ALT(0);
288 /* FALLTHROUGH */
289 case 'H':
290 _LEGAL_ALT(_ALT_O);
291 if (!(_conv_num(&bp, &tm->tm_hour, 0, 23)))
292 return (NULL);
293 break;
294
295 case 'l': /* The hour (12-hour clock representation). */
296 _LEGAL_ALT(0);
297 /* FALLTHROUGH */
298 case 'I':
299 _LEGAL_ALT(_ALT_O);
300 if (!(_conv_num(&bp, &tm->tm_hour, 1, 12)))
301 return (NULL);
302 break;
303
304 case 'j': /* The day of year. */
305 _LEGAL_ALT(0);
306 if (!(_conv_num(&bp, &tm->tm_yday, 1, 366)))
307 return (NULL);
308 tm->tm_yday--;
309 fields |= FIELD_TM_YDAY;
310 break;
311
312 case 'M': /* The minute. */
313 _LEGAL_ALT(_ALT_O);
314 if (!(_conv_num(&bp, &tm->tm_min, 0, 59)))
315 return (NULL);
316 break;
317
318 case 'm': /* The month. */
319 _LEGAL_ALT(_ALT_O);
320 if (!(_conv_num(&bp, &tm->tm_mon, 1, 12)))
321 return (NULL);
322 tm->tm_mon--;
323 fields |= FIELD_TM_MON;
324 break;
325
326 case 'P': /* Android addition for strftime() compat; glibc does this too. */
327 case 'p': /* The locale's equivalent of AM/PM. */
328 _LEGAL_ALT(0);
329 /* AM? */
330 len = strlen(_ctloc(am_pm[0]));
331 if (strncasecmp(_ctloc(am_pm[0]), bp, len) == 0) {
332 if (tm->tm_hour > 12) /* i.e., 13:00 AM ?! */
333 return (NULL);
334 else if (tm->tm_hour == 12)
335 tm->tm_hour = 0;
336
337 bp += len;
338 break;
339 }
340 /* PM? */
341 len = strlen(_ctloc(am_pm[1]));
342 if (strncasecmp(_ctloc(am_pm[1]), bp, len) == 0) {
343 if (tm->tm_hour > 12) /* i.e., 13:00 PM ?! */
344 return (NULL);
345 else if (tm->tm_hour < 12)
346 tm->tm_hour += 12;
347
348 bp += len;
349 break;
350 }
351
352 /* Nothing matched. */
353 return (NULL);
354
355 case 'S': /* The seconds. */
356 _LEGAL_ALT(_ALT_O);
357 if (!(_conv_num(&bp, &tm->tm_sec, 0, 60)))
358 return (NULL);
359 break;
360 case 's': /* Seconds since epoch. */
361 if (!(epoch_to_tm(&bp, tm)))
362 return (NULL);
363 fields = 0xffff; /* everything */
364 break;
365 case 'U': /* The week of year, beginning on sunday. */
366 case 'W': /* The week of year, beginning on monday. */
367 _LEGAL_ALT(_ALT_O);
368 /*
369 * XXX This is bogus, as we can not assume any valid
370 * information present in the tm structure at this
371 * point to calculate a real value, so just check the
372 * range for now.
373 */
374 if (!(_conv_num(&bp, &i, 0, 53)))
375 return (NULL);
376 break;
377
378 case 'w': /* The day of week, beginning on sunday. */
379 _LEGAL_ALT(_ALT_O);
380 if (!(_conv_num(&bp, &tm->tm_wday, 0, 6)))
381 return (NULL);
382 fields |= FIELD_TM_WDAY;
383 break;
384
385 case 'u': /* The day of week, monday = 1. */
386 _LEGAL_ALT(_ALT_O);
387 if (!(_conv_num(&bp, &i, 1, 7)))
388 return (NULL);
389 tm->tm_wday = i % 7;
390 fields |= FIELD_TM_WDAY;
391 continue;
392
393 case 'g': /* The year corresponding to the ISO week
394 * number but without the century.
395 */
396 if (!(_conv_num(&bp, &i, 0, 99)))
397 return (NULL);
398 continue;
399
400 case 'G': /* The year corresponding to the ISO week
401 * number with century.
402 */
403 do
404 bp++;
405 while (isdigit(*bp));
406 continue;
407
408 case 'V': /* The ISO 8601:1988 week number as decimal */
409 if (!(_conv_num(&bp, &i, 0, 53)))
410 return (NULL);
411 continue;
412
413 case 'Y': /* The year. */
414 _LEGAL_ALT(_ALT_E);
415 if (!(_conv_num(&bp, &i, 0, 9999)))
416 return (NULL);
417
418 relyear = -1;
419 tm->tm_year = i - TM_YEAR_BASE;
420 fields |= FIELD_TM_YEAR;
421 break;
422
423 case 'y': /* The year within the century (2 digits). */
424 _LEGAL_ALT(_ALT_E | _ALT_O);
425 if (!(_conv_num(&bp, &relyear, 0, 99)))
426 return (NULL);
427 break;
428
429 case 'Z':
430 tzset();
431 if (strncmp((const char *)bp, gmt, 3) == 0) {
432 tm->tm_isdst = 0;
433 tm->tm_gmtoff = 0;
434 tm->tm_zone = gmt;
435 bp += 3;
436 } else if (strncmp((const char *)bp, utc, 3) == 0) {
437 tm->tm_isdst = 0;
438 tm->tm_gmtoff = 0;
439 tm->tm_zone = utc;
440 bp += 3;
441 } else {
442 ep = _find_string(bp, &i,
443 (const char * const *)tzname,
444 NULL, 2);
445 if (ep == NULL)
446 return (NULL);
447
448 tm->tm_isdst = i;
449 tm->tm_gmtoff = -(timezone);
450 tm->tm_zone = tzname[i];
451 bp = ep;
452 }
453 continue;
454
455 case 'z':
456 /*
457 * We recognize all ISO 8601 formats:
458 * Z = Zulu time/UTC
459 * [+-]hhmm
460 * [+-]hh:mm
461 * [+-]hh
462 * We recognize all RFC-822/RFC-2822 formats:
463 * UT|GMT
464 * North American : UTC offsets
465 * E[DS]T = Eastern : -4 | -5
466 * C[DS]T = Central : -5 | -6
467 * M[DS]T = Mountain: -6 | -7
468 * P[DS]T = Pacific : -7 | -8
469 */
470 while (isspace(*bp))
471 bp++;
472
473 switch (*bp++) {
474 case 'G':
475 if (*bp++ != 'M')
476 return NULL;
477 /*FALLTHROUGH*/
478 case 'U':
479 if (*bp++ != 'T')
480 return NULL;
481 /*FALLTHROUGH*/
482 case 'Z':
483 tm->tm_isdst = 0;
484 tm->tm_gmtoff = 0;
485 tm->tm_zone = utc;
486 continue;
487 case '+':
488 neg = 0;
489 break;
490 case '-':
491 neg = 1;
492 break;
493 default:
494 --bp;
495 ep = _find_string(bp, &i, nast, NULL, 4);
496 if (ep != NULL) {
497 tm->tm_gmtoff = (-5 - i) * SECSPERHOUR;
498 tm->tm_zone = (char *)nast[i];
499 bp = ep;
500 continue;
501 }
502 ep = _find_string(bp, &i, nadt, NULL, 4);
503 if (ep != NULL) {
504 tm->tm_isdst = 1;
505 tm->tm_gmtoff = (-4 - i) * SECSPERHOUR;
506 tm->tm_zone = (char *)nadt[i];
507 bp = ep;
508 continue;
509 }
510 return NULL;
511 }
512 if (!isdigit(bp[0]) || !isdigit(bp[1]))
513 return NULL;
514 offs = ((bp[0]-'0') * 10 + (bp[1]-'0')) * SECSPERHOUR;
515 bp += 2;
516 if (*bp == ':')
517 bp++;
518 if (isdigit(*bp)) {
519 offs += (*bp++ - '0') * 10 * SECSPERMIN;
520 if (!isdigit(*bp))
521 return NULL;
522 offs += (*bp++ - '0') * SECSPERMIN;
523 }
524 if (neg)
525 offs = -offs;
526 tm->tm_isdst = 0; /* XXX */
527 tm->tm_gmtoff = offs;
528 tm->tm_zone = NULL; /* XXX */
529 continue;
530
531 /*
532 * Miscellaneous conversions.
533 */
534 case 'n': /* Any kind of white-space. */
535 case 't':
536 _LEGAL_ALT(0);
537 while (isspace(*bp))
538 bp++;
539 break;
540
541
542 default: /* Unknown/unsupported conversion. */
543 return (NULL);
544 }
545
546
547 }
548
549 /*
550 * We need to evaluate the two digit year spec (%y)
551 * last as we can get a century spec (%C) at any time.
552 */
553 if (relyear != -1) {
554 if (century == TM_YEAR_BASE) {
555 if (relyear <= 68)
556 tm->tm_year = relyear + 2000 - TM_YEAR_BASE;
557 else
558 tm->tm_year = relyear + 1900 - TM_YEAR_BASE;
559 } else {
560 tm->tm_year = relyear + century - TM_YEAR_BASE;
561 }
562 fields |= FIELD_TM_YEAR;
563 }
564
565 /* Compute some missing values when possible. */
566 if (fields & FIELD_TM_YEAR) {
567 const int year = tm->tm_year + TM_YEAR_BASE;
568 const int *mon_lens = mon_lengths[isleap(year)];
569 if (!(fields & FIELD_TM_YDAY) &&
570 (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) {
571 tm->tm_yday = tm->tm_mday - 1;
572 for (i = 0; i < tm->tm_mon; i++)
573 tm->tm_yday += mon_lens[i];
574 fields |= FIELD_TM_YDAY;
575 }
576 if (fields & FIELD_TM_YDAY) {
577 int days = tm->tm_yday;
578 if (!(fields & FIELD_TM_WDAY)) {
579 tm->tm_wday = EPOCH_WDAY +
580 ((year - EPOCH_YEAR) % DAYSPERWEEK) *
581 (DAYSPERNYEAR % DAYSPERWEEK) +
582 leaps_thru_end_of(year - 1) -
583 leaps_thru_end_of(EPOCH_YEAR - 1) +
584 tm->tm_yday;
585 tm->tm_wday %= DAYSPERWEEK;
586 if (tm->tm_wday < 0)
587 tm->tm_wday += DAYSPERWEEK;
588 }
589 if (!(fields & FIELD_TM_MON)) {
590 tm->tm_mon = 0;
591 while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon])
592 days -= mon_lens[tm->tm_mon++];
593 }
594 if (!(fields & FIELD_TM_MDAY))
595 tm->tm_mday = days + 1;
596 }
597 }
598
599 return ((char *)bp);
600 }
601
602
603 static int
_conv_num(const unsigned char ** buf,int * dest,int llim,int ulim)604 _conv_num(const unsigned char **buf, int *dest, int llim, int ulim)
605 {
606 int result = 0;
607 int rulim = ulim;
608
609 if (**buf < '0' || **buf > '9')
610 return (0);
611
612 /* we use rulim to break out of the loop when we run out of digits */
613 do {
614 result *= 10;
615 result += *(*buf)++ - '0';
616 rulim /= 10;
617 } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9');
618
619 if (result < llim || result > ulim)
620 return (0);
621
622 *dest = result;
623 return (1);
624 }
625
626 static int
epoch_to_tm(const unsigned char ** buf,struct tm * tm)627 epoch_to_tm(const unsigned char **buf, struct tm *tm)
628 {
629 int saved_errno = errno;
630 int ret = 0;
631 time_t secs;
632 char *ep;
633
634 errno = 0;
635 secs = strtoll(*buf, &ep, 10);
636 if (*buf == (unsigned char *)ep)
637 goto done;
638 if (secs < 0 ||
639 secs == LLONG_MAX && errno == ERANGE)
640 goto done;
641 if (localtime_r(&secs, tm) == NULL)
642 goto done;
643 ret = 1;
644 done:
645 *buf = ep;
646 errno = saved_errno;
647 return (ret);
648 }
649
650 static const u_char *
_find_string(const u_char * bp,int * tgt,const char * const * n1,const char * const * n2,int c)651 _find_string(const u_char *bp, int *tgt, const char * const *n1,
652 const char * const *n2, int c)
653 {
654 int i;
655 unsigned int len;
656
657 /* check full name - then abbreviated ones */
658 for (; n1 != NULL; n1 = n2, n2 = NULL) {
659 for (i = 0; i < c; i++, n1++) {
660 len = strlen(*n1);
661 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
662 *tgt = i;
663 return bp + len;
664 }
665 }
666 }
667
668 /* Nothing matched */
669 return NULL;
670 }
671
672 static int
leaps_thru_end_of(const int y)673 leaps_thru_end_of(const int y)
674 {
675 return (y >= 0) ? (y / 4 - y / 100 + y / 400) :
676 -(leaps_thru_end_of(-(y + 1)) + 1);
677 }
678