1 #ifndef Py_BUILD_CORE_BUILTIN
2 # define Py_BUILD_CORE_MODULE 1
3 #endif
4
5 #include "Python.h"
6 #include "pycore_long.h" // _PyLong_GetOne()
7 #include "structmember.h"
8
9 #include <ctype.h>
10 #include <stddef.h>
11 #include <stdint.h>
12
13 #include "datetime.h"
14
15 // Imports
16 static PyObject *io_open = NULL;
17 static PyObject *_tzpath_find_tzfile = NULL;
18 static PyObject *_common_mod = NULL;
19
20 typedef struct TransitionRuleType TransitionRuleType;
21 typedef struct StrongCacheNode StrongCacheNode;
22
23 typedef struct {
24 PyObject *utcoff;
25 PyObject *dstoff;
26 PyObject *tzname;
27 long utcoff_seconds;
28 } _ttinfo;
29
30 typedef struct {
31 _ttinfo std;
32 _ttinfo dst;
33 int dst_diff;
34 TransitionRuleType *start;
35 TransitionRuleType *end;
36 unsigned char std_only;
37 } _tzrule;
38
39 typedef struct {
40 PyDateTime_TZInfo base;
41 PyObject *key;
42 PyObject *file_repr;
43 PyObject *weakreflist;
44 size_t num_transitions;
45 size_t num_ttinfos;
46 int64_t *trans_list_utc;
47 int64_t *trans_list_wall[2];
48 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition
49 _ttinfo *ttinfo_before;
50 _tzrule tzrule_after;
51 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation
52 unsigned char fixed_offset;
53 unsigned char source;
54 } PyZoneInfo_ZoneInfo;
55
56 struct TransitionRuleType {
57 int64_t (*year_to_timestamp)(TransitionRuleType *, int);
58 };
59
60 typedef struct {
61 TransitionRuleType base;
62 uint8_t month;
63 uint8_t week;
64 uint8_t day;
65 int8_t hour;
66 int8_t minute;
67 int8_t second;
68 } CalendarRule;
69
70 typedef struct {
71 TransitionRuleType base;
72 uint8_t julian;
73 unsigned int day;
74 int8_t hour;
75 int8_t minute;
76 int8_t second;
77 } DayRule;
78
79 struct StrongCacheNode {
80 StrongCacheNode *next;
81 StrongCacheNode *prev;
82 PyObject *key;
83 PyObject *zone;
84 };
85
86 static PyTypeObject PyZoneInfo_ZoneInfoType;
87
88 // Globals
89 static PyObject *TIMEDELTA_CACHE = NULL;
90 static PyObject *ZONEINFO_WEAK_CACHE = NULL;
91 static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
92 static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
93
94 static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
95
96 // Constants
97 static const int EPOCHORDINAL = 719163;
98 static int DAYS_IN_MONTH[] = {
99 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
100 };
101
102 static int DAYS_BEFORE_MONTH[] = {
103 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
104 };
105
106 static const int SOURCE_NOCACHE = 0;
107 static const int SOURCE_CACHE = 1;
108 static const int SOURCE_FILE = 2;
109
110 // Forward declarations
111 static int
112 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
113 static void
114 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
115 unsigned char *isdsts, size_t num_transitions,
116 size_t num_ttinfos);
117 static int
118 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
119 int64_t *trans_local[2], size_t num_ttinfos,
120 size_t num_transitions);
121
122 static int
123 parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
124
125 static Py_ssize_t
126 parse_abbr(const char *const p, PyObject **abbr);
127 static Py_ssize_t
128 parse_tz_delta(const char *const p, long *total_seconds);
129 static Py_ssize_t
130 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
131 int8_t *second);
132 static Py_ssize_t
133 parse_transition_rule(const char *const p, TransitionRuleType **out);
134
135 static _ttinfo *
136 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
137 static _ttinfo *
138 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
139 unsigned char *fold);
140
141 static int
142 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
143 static void
144 xdecref_ttinfo(_ttinfo *ttinfo);
145 static int
146 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
147
148 static int
149 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
150 long dst_offset, TransitionRuleType *start,
151 TransitionRuleType *end, _tzrule *out);
152 static void
153 free_tzrule(_tzrule *tzrule);
154
155 static PyObject *
156 load_timedelta(long seconds);
157
158 static int
159 get_local_timestamp(PyObject *dt, int64_t *local_ts);
160 static _ttinfo *
161 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
162
163 static int
164 ymd_to_ord(int y, int m, int d);
165 static int
166 is_leap_year(int year);
167
168 static size_t
169 _bisect(const int64_t value, const int64_t *arr, size_t size);
170
171 static int
172 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
173 static void
174 clear_strong_cache(const PyTypeObject *const type);
175 static void
176 update_strong_cache(const PyTypeObject *const type, PyObject *key,
177 PyObject *zone);
178 static PyObject *
179 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key);
180
181 static PyObject *
zoneinfo_new_instance(PyTypeObject * type,PyObject * key)182 zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
183 {
184 PyObject *file_obj = NULL;
185 PyObject *file_path = NULL;
186
187 file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
188 if (file_path == NULL) {
189 return NULL;
190 }
191 else if (file_path == Py_None) {
192 file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
193 if (file_obj == NULL) {
194 Py_DECREF(file_path);
195 return NULL;
196 }
197 }
198
199 PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
200 if (self == NULL) {
201 goto error;
202 }
203
204 if (file_obj == NULL) {
205 file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
206 if (file_obj == NULL) {
207 goto error;
208 }
209 }
210
211 if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
212 goto error;
213 }
214
215 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
216 Py_DECREF(file_obj);
217 file_obj = NULL;
218 if (rv == NULL) {
219 goto error;
220 }
221 Py_DECREF(rv);
222
223 ((PyZoneInfo_ZoneInfo *)self)->key = key;
224 Py_INCREF(key);
225
226 goto cleanup;
227 error:
228 Py_XDECREF(self);
229 self = NULL;
230 cleanup:
231 if (file_obj != NULL) {
232 PyObject *exc, *val, *tb;
233 PyErr_Fetch(&exc, &val, &tb);
234 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
235 _PyErr_ChainExceptions(exc, val, tb);
236 if (tmp == NULL) {
237 Py_CLEAR(self);
238 }
239 Py_XDECREF(tmp);
240 Py_DECREF(file_obj);
241 }
242 Py_DECREF(file_path);
243 return self;
244 }
245
246 static PyObject *
get_weak_cache(PyTypeObject * type)247 get_weak_cache(PyTypeObject *type)
248 {
249 if (type == &PyZoneInfo_ZoneInfoType) {
250 return ZONEINFO_WEAK_CACHE;
251 }
252 else {
253 PyObject *cache =
254 PyObject_GetAttrString((PyObject *)type, "_weak_cache");
255 // We are assuming that the type lives at least as long as the function
256 // that calls get_weak_cache, and that it holds a reference to the
257 // cache, so we'll return a "borrowed reference".
258 Py_XDECREF(cache);
259 return cache;
260 }
261 }
262
263 static PyObject *
zoneinfo_new(PyTypeObject * type,PyObject * args,PyObject * kw)264 zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
265 {
266 PyObject *key = NULL;
267 static char *kwlist[] = {"key", NULL};
268 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
269 return NULL;
270 }
271
272 PyObject *instance = zone_from_strong_cache(type, key);
273 if (instance != NULL || PyErr_Occurred()) {
274 return instance;
275 }
276
277 PyObject *weak_cache = get_weak_cache(type);
278 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
279 if (instance == NULL) {
280 return NULL;
281 }
282
283 if (instance == Py_None) {
284 Py_DECREF(instance);
285 PyObject *tmp = zoneinfo_new_instance(type, key);
286 if (tmp == NULL) {
287 return NULL;
288 }
289
290 instance =
291 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
292 Py_DECREF(tmp);
293 if (instance == NULL) {
294 return NULL;
295 }
296 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
297 }
298
299 update_strong_cache(type, key, instance);
300 return instance;
301 }
302
303 static void
zoneinfo_dealloc(PyObject * obj_self)304 zoneinfo_dealloc(PyObject *obj_self)
305 {
306 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
307
308 if (self->weakreflist != NULL) {
309 PyObject_ClearWeakRefs(obj_self);
310 }
311
312 if (self->trans_list_utc != NULL) {
313 PyMem_Free(self->trans_list_utc);
314 }
315
316 for (size_t i = 0; i < 2; i++) {
317 if (self->trans_list_wall[i] != NULL) {
318 PyMem_Free(self->trans_list_wall[i]);
319 }
320 }
321
322 if (self->_ttinfos != NULL) {
323 for (size_t i = 0; i < self->num_ttinfos; ++i) {
324 xdecref_ttinfo(&(self->_ttinfos[i]));
325 }
326 PyMem_Free(self->_ttinfos);
327 }
328
329 if (self->trans_ttinfos != NULL) {
330 PyMem_Free(self->trans_ttinfos);
331 }
332
333 free_tzrule(&(self->tzrule_after));
334
335 Py_XDECREF(self->key);
336 Py_XDECREF(self->file_repr);
337
338 Py_TYPE(self)->tp_free((PyObject *)self);
339 }
340
341 static PyObject *
zoneinfo_from_file(PyTypeObject * type,PyObject * args,PyObject * kwargs)342 zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
343 {
344 PyObject *file_obj = NULL;
345 PyObject *file_repr = NULL;
346 PyObject *key = Py_None;
347 PyZoneInfo_ZoneInfo *self = NULL;
348
349 static char *kwlist[] = {"", "key", NULL};
350 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
351 &key)) {
352 return NULL;
353 }
354
355 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
356 self = (PyZoneInfo_ZoneInfo *)obj_self;
357 if (self == NULL) {
358 return NULL;
359 }
360
361 file_repr = PyUnicode_FromFormat("%R", file_obj);
362 if (file_repr == NULL) {
363 goto error;
364 }
365
366 if (load_data(self, file_obj)) {
367 goto error;
368 }
369
370 self->source = SOURCE_FILE;
371 self->file_repr = file_repr;
372 self->key = key;
373 Py_INCREF(key);
374
375 return obj_self;
376 error:
377 Py_XDECREF(file_repr);
378 Py_XDECREF(self);
379 return NULL;
380 }
381
382 static PyObject *
zoneinfo_no_cache(PyTypeObject * cls,PyObject * args,PyObject * kwargs)383 zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
384 {
385 static char *kwlist[] = {"key", NULL};
386 PyObject *key = NULL;
387 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
388 return NULL;
389 }
390
391 PyObject *out = zoneinfo_new_instance(cls, key);
392 if (out != NULL) {
393 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
394 }
395
396 return out;
397 }
398
399 static PyObject *
zoneinfo_clear_cache(PyObject * cls,PyObject * args,PyObject * kwargs)400 zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
401 {
402 PyObject *only_keys = NULL;
403 static char *kwlist[] = {"only_keys", NULL};
404
405 if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
406 &only_keys))) {
407 return NULL;
408 }
409
410 PyTypeObject *type = (PyTypeObject *)cls;
411 PyObject *weak_cache = get_weak_cache(type);
412
413 if (only_keys == NULL || only_keys == Py_None) {
414 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
415 if (rv != NULL) {
416 Py_DECREF(rv);
417 }
418
419 clear_strong_cache(type);
420 }
421 else {
422 PyObject *item = NULL;
423 PyObject *pop = PyUnicode_FromString("pop");
424 if (pop == NULL) {
425 return NULL;
426 }
427
428 PyObject *iter = PyObject_GetIter(only_keys);
429 if (iter == NULL) {
430 Py_DECREF(pop);
431 return NULL;
432 }
433
434 while ((item = PyIter_Next(iter))) {
435 // Remove from strong cache
436 if (eject_from_strong_cache(type, item) < 0) {
437 Py_DECREF(item);
438 break;
439 }
440
441 // Remove from weak cache
442 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
443 Py_None, NULL);
444
445 Py_DECREF(item);
446 if (tmp == NULL) {
447 break;
448 }
449 Py_DECREF(tmp);
450 }
451 Py_DECREF(iter);
452 Py_DECREF(pop);
453 }
454
455 if (PyErr_Occurred()) {
456 return NULL;
457 }
458
459 Py_RETURN_NONE;
460 }
461
462 static PyObject *
zoneinfo_utcoffset(PyObject * self,PyObject * dt)463 zoneinfo_utcoffset(PyObject *self, PyObject *dt)
464 {
465 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
466 if (tti == NULL) {
467 return NULL;
468 }
469 Py_INCREF(tti->utcoff);
470 return tti->utcoff;
471 }
472
473 static PyObject *
zoneinfo_dst(PyObject * self,PyObject * dt)474 zoneinfo_dst(PyObject *self, PyObject *dt)
475 {
476 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
477 if (tti == NULL) {
478 return NULL;
479 }
480 Py_INCREF(tti->dstoff);
481 return tti->dstoff;
482 }
483
484 static PyObject *
zoneinfo_tzname(PyObject * self,PyObject * dt)485 zoneinfo_tzname(PyObject *self, PyObject *dt)
486 {
487 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
488 if (tti == NULL) {
489 return NULL;
490 }
491 Py_INCREF(tti->tzname);
492 return tti->tzname;
493 }
494
495 #define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
496
497 static PyObject *
zoneinfo_fromutc(PyObject * obj_self,PyObject * dt)498 zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
499 {
500 if (!PyDateTime_Check(dt)) {
501 PyErr_SetString(PyExc_TypeError,
502 "fromutc: argument must be a datetime");
503 return NULL;
504 }
505 if (GET_DT_TZINFO(dt) != obj_self) {
506 PyErr_SetString(PyExc_ValueError,
507 "fromutc: dt.tzinfo "
508 "is not self");
509 return NULL;
510 }
511
512 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
513
514 int64_t timestamp;
515 if (get_local_timestamp(dt, ×tamp)) {
516 return NULL;
517 }
518 size_t num_trans = self->num_transitions;
519
520 _ttinfo *tti = NULL;
521 unsigned char fold = 0;
522
523 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
524 tti = self->ttinfo_before;
525 }
526 else if (num_trans == 0 ||
527 timestamp > self->trans_list_utc[num_trans - 1]) {
528 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
529 PyDateTime_GET_YEAR(dt), &fold);
530
531 // Immediately after the last manual transition, the fold/gap is
532 // between self->trans_ttinfos[num_transitions - 1] and whatever
533 // ttinfo applies immediately after the last transition, not between
534 // the STD and DST rules in the tzrule_after, so we may need to
535 // adjust the fold value.
536 if (num_trans) {
537 _ttinfo *tti_prev = NULL;
538 if (num_trans == 1) {
539 tti_prev = self->ttinfo_before;
540 }
541 else {
542 tti_prev = self->trans_ttinfos[num_trans - 2];
543 }
544 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
545 if (diff > 0 &&
546 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
547 fold = 1;
548 }
549 }
550 }
551 else {
552 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
553 _ttinfo *tti_prev = NULL;
554
555 if (idx >= 2) {
556 tti_prev = self->trans_ttinfos[idx - 2];
557 tti = self->trans_ttinfos[idx - 1];
558 }
559 else {
560 tti_prev = self->ttinfo_before;
561 tti = self->trans_ttinfos[0];
562 }
563
564 // Detect fold
565 int64_t shift =
566 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
567 if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
568 fold = 1;
569 }
570 }
571
572 PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
573 if (tmp == NULL) {
574 return NULL;
575 }
576
577 if (fold) {
578 if (PyDateTime_CheckExact(tmp)) {
579 ((PyDateTime_DateTime *)tmp)->fold = 1;
580 dt = tmp;
581 }
582 else {
583 PyObject *replace = PyObject_GetAttrString(tmp, "replace");
584 PyObject *args = PyTuple_New(0);
585 PyObject *kwargs = PyDict_New();
586
587 Py_DECREF(tmp);
588 if (args == NULL || kwargs == NULL || replace == NULL) {
589 Py_XDECREF(args);
590 Py_XDECREF(kwargs);
591 Py_XDECREF(replace);
592 return NULL;
593 }
594
595 dt = NULL;
596 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) {
597 dt = PyObject_Call(replace, args, kwargs);
598 }
599
600 Py_DECREF(args);
601 Py_DECREF(kwargs);
602 Py_DECREF(replace);
603
604 if (dt == NULL) {
605 return NULL;
606 }
607 }
608 }
609 else {
610 dt = tmp;
611 }
612 return dt;
613 }
614
615 static PyObject *
zoneinfo_repr(PyZoneInfo_ZoneInfo * self)616 zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
617 {
618 PyObject *rv = NULL;
619 const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
620 if (!(self->key == Py_None)) {
621 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
622 }
623 else {
624 assert(PyUnicode_Check(self->file_repr));
625 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
626 self->file_repr);
627 }
628
629 return rv;
630 }
631
632 static PyObject *
zoneinfo_str(PyZoneInfo_ZoneInfo * self)633 zoneinfo_str(PyZoneInfo_ZoneInfo *self)
634 {
635 if (!(self->key == Py_None)) {
636 Py_INCREF(self->key);
637 return self->key;
638 }
639 else {
640 return zoneinfo_repr(self);
641 }
642 }
643
644 /* Pickles the ZoneInfo object by key and source.
645 *
646 * ZoneInfo objects are pickled by reference to the TZif file that they came
647 * from, which means that the exact transitions may be different or the file
648 * may not un-pickle if the data has changed on disk in the interim.
649 *
650 * It is necessary to include a bit indicating whether or not the object
651 * was constructed from the cache, because from-cache objects will hit the
652 * unpickling process's cache, whereas no-cache objects will bypass it.
653 *
654 * Objects constructed from ZoneInfo.from_file cannot be pickled.
655 */
656 static PyObject *
zoneinfo_reduce(PyObject * obj_self,PyObject * unused)657 zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
658 {
659 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
660 if (self->source == SOURCE_FILE) {
661 // Objects constructed from files cannot be pickled.
662 PyObject *pickle = PyImport_ImportModule("pickle");
663 if (pickle == NULL) {
664 return NULL;
665 }
666
667 PyObject *pickle_error =
668 PyObject_GetAttrString(pickle, "PicklingError");
669 Py_DECREF(pickle);
670 if (pickle_error == NULL) {
671 return NULL;
672 }
673
674 PyErr_Format(pickle_error,
675 "Cannot pickle a ZoneInfo file from a file stream.");
676 Py_DECREF(pickle_error);
677 return NULL;
678 }
679
680 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
681 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
682
683 if (constructor == NULL) {
684 return NULL;
685 }
686
687 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
688 Py_DECREF(constructor);
689 return rv;
690 }
691
692 static PyObject *
zoneinfo__unpickle(PyTypeObject * cls,PyObject * args)693 zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
694 {
695 PyObject *key;
696 unsigned char from_cache;
697 if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
698 return NULL;
699 }
700
701 if (from_cache) {
702 PyObject *val_args = Py_BuildValue("(O)", key);
703 if (val_args == NULL) {
704 return NULL;
705 }
706
707 PyObject *rv = zoneinfo_new(cls, val_args, NULL);
708
709 Py_DECREF(val_args);
710 return rv;
711 }
712 else {
713 return zoneinfo_new_instance(cls, key);
714 }
715 }
716
717 /* It is relatively expensive to construct new timedelta objects, and in most
718 * cases we're looking at a relatively small number of timedeltas, such as
719 * integer number of hours, etc. We will keep a cache so that we construct
720 * a minimal number of these.
721 *
722 * Possibly this should be replaced with an LRU cache so that it's not possible
723 * for the memory usage to explode from this, but in order for this to be a
724 * serious problem, one would need to deliberately craft a malicious time zone
725 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
726 * fills the cache with ~450 timedeltas for a total size of ~12kB.
727 *
728 * This returns a new reference to the timedelta.
729 */
730 static PyObject *
load_timedelta(long seconds)731 load_timedelta(long seconds)
732 {
733 PyObject *rv;
734 PyObject *pyoffset = PyLong_FromLong(seconds);
735 if (pyoffset == NULL) {
736 return NULL;
737 }
738 rv = PyDict_GetItemWithError(TIMEDELTA_CACHE, pyoffset);
739 if (rv == NULL) {
740 if (PyErr_Occurred()) {
741 goto error;
742 }
743 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
744 0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
745
746 if (tmp == NULL) {
747 goto error;
748 }
749
750 rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
751 Py_DECREF(tmp);
752 }
753
754 Py_XINCREF(rv);
755 Py_DECREF(pyoffset);
756 return rv;
757 error:
758 Py_DECREF(pyoffset);
759 return NULL;
760 }
761
762 /* Constructor for _ttinfo object - this starts by initializing the _ttinfo
763 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
764 * initialized _ttinfo objects.
765 */
766 static int
build_ttinfo(long utcoffset,long dstoffset,PyObject * tzname,_ttinfo * out)767 build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
768 {
769 out->utcoff = NULL;
770 out->dstoff = NULL;
771 out->tzname = NULL;
772
773 out->utcoff_seconds = utcoffset;
774 out->utcoff = load_timedelta(utcoffset);
775 if (out->utcoff == NULL) {
776 return -1;
777 }
778
779 out->dstoff = load_timedelta(dstoffset);
780 if (out->dstoff == NULL) {
781 return -1;
782 }
783
784 out->tzname = tzname;
785 Py_INCREF(tzname);
786
787 return 0;
788 }
789
790 /* Decrease reference count on any non-NULL members of a _ttinfo */
791 static void
xdecref_ttinfo(_ttinfo * ttinfo)792 xdecref_ttinfo(_ttinfo *ttinfo)
793 {
794 if (ttinfo != NULL) {
795 Py_XDECREF(ttinfo->utcoff);
796 Py_XDECREF(ttinfo->dstoff);
797 Py_XDECREF(ttinfo->tzname);
798 }
799 }
800
801 /* Equality function for _ttinfo. */
802 static int
ttinfo_eq(const _ttinfo * const tti0,const _ttinfo * const tti1)803 ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
804 {
805 int rv;
806 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
807 1) {
808 goto end;
809 }
810
811 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
812 1) {
813 goto end;
814 }
815
816 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
817 1) {
818 goto end;
819 }
820 end:
821 return rv;
822 }
823
824 /* Given a file-like object, this populates a ZoneInfo object
825 *
826 * The current version calls into a Python function to read the data from
827 * file into Python objects, and this translates those Python objects into
828 * C values and calculates derived values (e.g. dstoff) in C.
829 *
830 * This returns 0 on success and -1 on failure.
831 *
832 * The function will never return while `self` is partially initialized —
833 * the object only needs to be freed / deallocated if this succeeds.
834 */
835 static int
load_data(PyZoneInfo_ZoneInfo * self,PyObject * file_obj)836 load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
837 {
838 PyObject *data_tuple = NULL;
839
840 long *utcoff = NULL;
841 long *dstoff = NULL;
842 size_t *trans_idx = NULL;
843 unsigned char *isdst = NULL;
844
845 self->trans_list_utc = NULL;
846 self->trans_list_wall[0] = NULL;
847 self->trans_list_wall[1] = NULL;
848 self->trans_ttinfos = NULL;
849 self->_ttinfos = NULL;
850 self->file_repr = NULL;
851
852 size_t ttinfos_allocated = 0;
853
854 data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
855
856 if (data_tuple == NULL) {
857 goto error;
858 }
859
860 if (!PyTuple_CheckExact(data_tuple)) {
861 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
862 data_tuple);
863 goto error;
864 }
865
866 // Unpack the data tuple
867 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
868 if (trans_idx_list == NULL) {
869 goto error;
870 }
871
872 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
873 if (trans_utc == NULL) {
874 goto error;
875 }
876
877 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
878 if (utcoff_list == NULL) {
879 goto error;
880 }
881
882 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
883 if (isdst_list == NULL) {
884 goto error;
885 }
886
887 PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
888 if (abbr == NULL) {
889 goto error;
890 }
891
892 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
893 if (tz_str == NULL) {
894 goto error;
895 }
896
897 // Load the relevant sizes
898 Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
899 if (num_transitions < 0) {
900 goto error;
901 }
902
903 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
904 if (num_ttinfos < 0) {
905 goto error;
906 }
907
908 self->num_transitions = (size_t)num_transitions;
909 self->num_ttinfos = (size_t)num_ttinfos;
910
911 // Load the transition indices and list
912 self->trans_list_utc =
913 PyMem_Malloc(self->num_transitions * sizeof(int64_t));
914 if (self->trans_list_utc == NULL) {
915 goto error;
916 }
917 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
918 if (trans_idx == NULL) {
919 goto error;
920 }
921
922 for (size_t i = 0; i < self->num_transitions; ++i) {
923 PyObject *num = PyTuple_GetItem(trans_utc, i);
924 if (num == NULL) {
925 goto error;
926 }
927 self->trans_list_utc[i] = PyLong_AsLongLong(num);
928 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
929 goto error;
930 }
931
932 num = PyTuple_GetItem(trans_idx_list, i);
933 if (num == NULL) {
934 goto error;
935 }
936
937 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
938 if (cur_trans_idx == -1) {
939 goto error;
940 }
941
942 trans_idx[i] = (size_t)cur_trans_idx;
943 if (trans_idx[i] > self->num_ttinfos) {
944 PyErr_Format(
945 PyExc_ValueError,
946 "Invalid transition index found while reading TZif: %zd",
947 cur_trans_idx);
948
949 goto error;
950 }
951 }
952
953 // Load UTC offsets and isdst (size num_ttinfos)
954 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
955 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
956
957 if (utcoff == NULL || isdst == NULL) {
958 goto error;
959 }
960 for (size_t i = 0; i < self->num_ttinfos; ++i) {
961 PyObject *num = PyTuple_GetItem(utcoff_list, i);
962 if (num == NULL) {
963 goto error;
964 }
965
966 utcoff[i] = PyLong_AsLong(num);
967 if (utcoff[i] == -1 && PyErr_Occurred()) {
968 goto error;
969 }
970
971 num = PyTuple_GetItem(isdst_list, i);
972 if (num == NULL) {
973 goto error;
974 }
975
976 int isdst_with_error = PyObject_IsTrue(num);
977 if (isdst_with_error == -1) {
978 goto error;
979 }
980 else {
981 isdst[i] = (unsigned char)isdst_with_error;
982 }
983 }
984
985 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
986 if (dstoff == NULL) {
987 goto error;
988 }
989
990 // Derive dstoff and trans_list_wall from the information we've loaded
991 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
992 self->num_ttinfos);
993
994 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
995 self->trans_list_wall, self->num_ttinfos,
996 self->num_transitions)) {
997 goto error;
998 }
999
1000 // Build _ttinfo objects from utcoff, dstoff and abbr
1001 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
1002 if (self->_ttinfos == NULL) {
1003 goto error;
1004 }
1005 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1006 PyObject *tzname = PyTuple_GetItem(abbr, i);
1007 if (tzname == NULL) {
1008 goto error;
1009 }
1010
1011 ttinfos_allocated++;
1012 if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1013 goto error;
1014 }
1015 }
1016
1017 // Build our mapping from transition to the ttinfo that applies
1018 self->trans_ttinfos =
1019 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1020 if (self->trans_ttinfos == NULL) {
1021 goto error;
1022 }
1023 for (size_t i = 0; i < self->num_transitions; ++i) {
1024 size_t ttinfo_idx = trans_idx[i];
1025 assert(ttinfo_idx < self->num_ttinfos);
1026 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1027 }
1028
1029 // Set ttinfo_before to the first non-DST transition
1030 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1031 if (!isdst[i]) {
1032 self->ttinfo_before = &(self->_ttinfos[i]);
1033 break;
1034 }
1035 }
1036
1037 // If there are only DST ttinfos, pick the first one, if there are no
1038 // ttinfos at all, set ttinfo_before to NULL
1039 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1040 self->ttinfo_before = &(self->_ttinfos[0]);
1041 }
1042
1043 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1044 if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1045 goto error;
1046 }
1047 }
1048 else {
1049 if (!self->num_ttinfos) {
1050 PyErr_Format(PyExc_ValueError, "No time zone information found.");
1051 goto error;
1052 }
1053
1054 size_t idx;
1055 if (!self->num_transitions) {
1056 idx = self->num_ttinfos - 1;
1057 }
1058 else {
1059 idx = trans_idx[self->num_transitions - 1];
1060 }
1061
1062 _ttinfo *tti = &(self->_ttinfos[idx]);
1063 build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1064 &(self->tzrule_after));
1065
1066 // We've abused the build_tzrule constructor to construct an STD-only
1067 // rule mimicking whatever ttinfo we've picked up, but it's possible
1068 // that the one we've picked up is a DST zone, so we need to make sure
1069 // that the dstoff is set correctly in that case.
1070 if (PyObject_IsTrue(tti->dstoff)) {
1071 _ttinfo *tti_after = &(self->tzrule_after.std);
1072 Py_DECREF(tti_after->dstoff);
1073 tti_after->dstoff = tti->dstoff;
1074 Py_INCREF(tti_after->dstoff);
1075 }
1076 }
1077
1078 // Determine if this is a "fixed offset" zone, meaning that the output of
1079 // the utcoffset, dst and tzname functions does not depend on the specific
1080 // datetime passed.
1081 //
1082 // We make three simplifying assumptions here:
1083 //
1084 // 1. If tzrule_after is not std_only, it has transitions that might occur
1085 // (it is possible to construct TZ strings that specify STD and DST but
1086 // no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1087 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1088 // represent different offsets.
1089 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1090 // fixed-offset zone with extra _ttinfos defined may appear to *not* be
1091 // a fixed offset zone).
1092 //
1093 // Violations to these assumptions would be fairly exotic, and exotic
1094 // zones should almost certainly not be used with datetime.time (the
1095 // only thing that would be affected by this).
1096 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1097 self->fixed_offset = 0;
1098 }
1099 else if (self->num_ttinfos == 0) {
1100 self->fixed_offset = 1;
1101 }
1102 else {
1103 int constant_offset =
1104 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1105 if (constant_offset < 0) {
1106 goto error;
1107 }
1108 else {
1109 self->fixed_offset = constant_offset;
1110 }
1111 }
1112
1113 int rv = 0;
1114 goto cleanup;
1115 error:
1116 // These resources only need to be freed if we have failed, if we succeed
1117 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1118 // method to free the relevant resources.
1119 if (self->trans_list_utc != NULL) {
1120 PyMem_Free(self->trans_list_utc);
1121 self->trans_list_utc = NULL;
1122 }
1123
1124 for (size_t i = 0; i < 2; ++i) {
1125 if (self->trans_list_wall[i] != NULL) {
1126 PyMem_Free(self->trans_list_wall[i]);
1127 self->trans_list_wall[i] = NULL;
1128 }
1129 }
1130
1131 if (self->_ttinfos != NULL) {
1132 for (size_t i = 0; i < ttinfos_allocated; ++i) {
1133 xdecref_ttinfo(&(self->_ttinfos[i]));
1134 }
1135 PyMem_Free(self->_ttinfos);
1136 self->_ttinfos = NULL;
1137 }
1138
1139 if (self->trans_ttinfos != NULL) {
1140 PyMem_Free(self->trans_ttinfos);
1141 self->trans_ttinfos = NULL;
1142 }
1143
1144 rv = -1;
1145 cleanup:
1146 Py_XDECREF(data_tuple);
1147
1148 if (utcoff != NULL) {
1149 PyMem_Free(utcoff);
1150 }
1151
1152 if (dstoff != NULL) {
1153 PyMem_Free(dstoff);
1154 }
1155
1156 if (isdst != NULL) {
1157 PyMem_Free(isdst);
1158 }
1159
1160 if (trans_idx != NULL) {
1161 PyMem_Free(trans_idx);
1162 }
1163
1164 return rv;
1165 }
1166
1167 /* Function to calculate the local timestamp of a transition from the year. */
1168 int64_t
calendarrule_year_to_timestamp(TransitionRuleType * base_self,int year)1169 calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1170 {
1171 CalendarRule *self = (CalendarRule *)base_self;
1172
1173 // We want (year, month, day of month); we have year and month, but we
1174 // need to turn (week, day-of-week) into day-of-month
1175 //
1176 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1177 // Week 5 represents the last occurrence of day `day`, so we need to know
1178 // the first weekday of the month and the number of days in the month.
1179 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1180 uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1181 if (self->month == 2 && is_leap_year(year)) {
1182 days_in_month += 1;
1183 }
1184
1185 // This equation seems magical, so I'll break it down:
1186 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1187 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1188 // because this math is mod 7
1189 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1190 // numbers so that -1 % 7 = 6).
1191 // 3. Add 1 because month days are a 1-based index.
1192 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1193 if (month_day < 0) {
1194 month_day += 7;
1195 }
1196 month_day += 1;
1197
1198 // Now use a 0-based index version of `week` to calculate the w-th
1199 // occurrence of `day`
1200 month_day += ((int8_t)(self->week) - 1) * 7;
1201
1202 // month_day will only be > days_in_month if w was 5, and `w` means "last
1203 // occurrence of `d`", so now we just check if we over-shot the end of the
1204 // month and if so knock off 1 week.
1205 if (month_day > days_in_month) {
1206 month_day -= 7;
1207 }
1208
1209 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1210 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1211 (int64_t)(self->minute * 60) + (int64_t)(self->second));
1212 }
1213
1214 /* Constructor for CalendarRule. */
1215 int
calendarrule_new(uint8_t month,uint8_t week,uint8_t day,int8_t hour,int8_t minute,int8_t second,CalendarRule * out)1216 calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1217 int8_t minute, int8_t second, CalendarRule *out)
1218 {
1219 // These bounds come from the POSIX standard, which describes an Mm.n.d
1220 // rule as:
1221 //
1222 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1223 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1224 // may occur in either the fourth or the fifth week). Week 1 is the first
1225 // week in which the d'th day occurs. Day zero is Sunday.
1226 if (month <= 0 || month > 12) {
1227 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1228 return -1;
1229 }
1230
1231 if (week <= 0 || week > 5) {
1232 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1233 return -1;
1234 }
1235
1236 // If the 'day' parameter type is changed to a signed type,
1237 // "day < 0" check must be added.
1238 if (/* day < 0 || */ day > 6) {
1239 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1240 return -1;
1241 }
1242
1243 TransitionRuleType base = {&calendarrule_year_to_timestamp};
1244
1245 CalendarRule new_offset = {
1246 .base = base,
1247 .month = month,
1248 .week = week,
1249 .day = day,
1250 .hour = hour,
1251 .minute = minute,
1252 .second = second,
1253 };
1254
1255 *out = new_offset;
1256 return 0;
1257 }
1258
1259 /* Function to calculate the local timestamp of a transition from the year.
1260 *
1261 * This translates the day of the year into a local timestamp — either a
1262 * 1-based Julian day, not including leap days, or the 0-based year-day,
1263 * including leap days.
1264 * */
1265 int64_t
dayrule_year_to_timestamp(TransitionRuleType * base_self,int year)1266 dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1267 {
1268 // The function signature requires a TransitionRuleType pointer, but this
1269 // function is only applicable to DayRule* objects.
1270 DayRule *self = (DayRule *)base_self;
1271
1272 // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1273 // to know the number of days since 1970-01-01, so we must subtract off
1274 // the equivalent of ymd_to_ord(1970, 1, 1).
1275 //
1276 // We subtract off an additional 1 day to account for January 1st (we want
1277 // the number of full days *before* the date of the transition - partial
1278 // days are accounted for in the hour, minute and second portions.
1279 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1280
1281 // The Julian day specification skips over February 29th in leap years,
1282 // from the POSIX standard:
1283 //
1284 // Leap days shall not be counted. That is, in all years-including leap
1285 // years-February 28 is day 59 and March 1 is day 60. It is impossible to
1286 // refer explicitly to the occasional February 29.
1287 //
1288 // This is actually more useful than you'd think — if you want a rule that
1289 // always transitions on a given calendar day (other than February 29th),
1290 // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1291 // always refers to December 31st.
1292 unsigned int day = self->day;
1293 if (self->julian && day >= 59 && is_leap_year(year)) {
1294 day += 1;
1295 }
1296
1297 return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1298 (self->minute * 60) + self->second;
1299 }
1300
1301 /* Constructor for DayRule. */
1302 static int
dayrule_new(uint8_t julian,unsigned int day,int8_t hour,int8_t minute,int8_t second,DayRule * out)1303 dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1304 int8_t second, DayRule *out)
1305 {
1306 // The POSIX standard specifies that Julian days must be in the range (1 <=
1307 // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1308 // be in the range (0 <= n <= 365).
1309 if (day < julian || day > 365) {
1310 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1311 julian, day);
1312 return -1;
1313 }
1314
1315 TransitionRuleType base = {
1316 &dayrule_year_to_timestamp,
1317 };
1318
1319 DayRule tmp = {
1320 .base = base,
1321 .julian = julian,
1322 .day = day,
1323 .hour = hour,
1324 .minute = minute,
1325 .second = second,
1326 };
1327
1328 *out = tmp;
1329
1330 return 0;
1331 }
1332
1333 /* Calculate the start and end rules for a _tzrule in the given year. */
1334 static void
tzrule_transitions(_tzrule * rule,int year,int64_t * start,int64_t * end)1335 tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1336 {
1337 assert(rule->start != NULL);
1338 assert(rule->end != NULL);
1339 *start = rule->start->year_to_timestamp(rule->start, year);
1340 *end = rule->end->year_to_timestamp(rule->end, year);
1341 }
1342
1343 /* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1344 *
1345 * This takes a local timestamp and fold for disambiguation purposes; the year
1346 * could technically be calculated from the timestamp, but given that the
1347 * callers of this function already have the year information accessible from
1348 * the datetime struct, it is taken as an additional parameter to reduce
1349 * unnecessary calculation.
1350 * */
1351 static _ttinfo *
find_tzrule_ttinfo(_tzrule * rule,int64_t ts,unsigned char fold,int year)1352 find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1353 {
1354 if (rule->std_only) {
1355 return &(rule->std);
1356 }
1357
1358 int64_t start, end;
1359 uint8_t isdst;
1360
1361 tzrule_transitions(rule, year, &start, &end);
1362
1363 // With fold = 0, the period (denominated in local time) with the smaller
1364 // offset starts at the end of the gap and ends at the end of the fold;
1365 // with fold = 1, it runs from the start of the gap to the beginning of the
1366 // fold.
1367 //
1368 // So in order to determine the DST boundaries we need to know both the
1369 // fold and whether DST is positive or negative (rare), and it turns out
1370 // that this boils down to fold XOR is_positive.
1371 if (fold == (rule->dst_diff >= 0)) {
1372 end -= rule->dst_diff;
1373 }
1374 else {
1375 start += rule->dst_diff;
1376 }
1377
1378 if (start < end) {
1379 isdst = (ts >= start) && (ts < end);
1380 }
1381 else {
1382 isdst = (ts < end) || (ts >= start);
1383 }
1384
1385 if (isdst) {
1386 return &(rule->dst);
1387 }
1388 else {
1389 return &(rule->std);
1390 }
1391 }
1392
1393 /* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1394 *
1395 * This function can determine the _ttinfo that applies at a given epoch time,
1396 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1397 * This is to be used in the .fromutc() function.
1398 *
1399 * The year is technically a redundant parameter, because it can be calculated
1400 * from the timestamp, but all callers of this function should have the year
1401 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1402 * calculation.
1403 **/
1404 static _ttinfo *
find_tzrule_ttinfo_fromutc(_tzrule * rule,int64_t ts,int year,unsigned char * fold)1405 find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1406 unsigned char *fold)
1407 {
1408 if (rule->std_only) {
1409 *fold = 0;
1410 return &(rule->std);
1411 }
1412
1413 int64_t start, end;
1414 uint8_t isdst;
1415 tzrule_transitions(rule, year, &start, &end);
1416 start -= rule->std.utcoff_seconds;
1417 end -= rule->dst.utcoff_seconds;
1418
1419 if (start < end) {
1420 isdst = (ts >= start) && (ts < end);
1421 }
1422 else {
1423 isdst = (ts < end) || (ts >= start);
1424 }
1425
1426 // For positive DST, the ambiguous period is one dst_diff after the end of
1427 // DST; for negative DST, the ambiguous period is one dst_diff before the
1428 // start of DST.
1429 int64_t ambig_start, ambig_end;
1430 if (rule->dst_diff > 0) {
1431 ambig_start = end;
1432 ambig_end = end + rule->dst_diff;
1433 }
1434 else {
1435 ambig_start = start;
1436 ambig_end = start - rule->dst_diff;
1437 }
1438
1439 *fold = (ts >= ambig_start) && (ts < ambig_end);
1440
1441 if (isdst) {
1442 return &(rule->dst);
1443 }
1444 else {
1445 return &(rule->std);
1446 }
1447 }
1448
1449 /* Parse a TZ string in the format specified by the POSIX standard:
1450 *
1451 * std offset[dst[offset],start[/time],end[/time]]
1452 *
1453 * std and dst must be 3 or more characters long and must not contain a
1454 * leading colon, embedded digits, commas, nor a plus or minus signs; The
1455 * spaces between "std" and "offset" are only for display and are not actually
1456 * present in the string.
1457 *
1458 * The format of the offset is ``[+|-]hh[:mm[:ss]]``
1459 *
1460 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1461 *
1462 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1463 */
1464 static int
parse_tz_str(PyObject * tz_str_obj,_tzrule * out)1465 parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1466 {
1467 PyObject *std_abbr = NULL;
1468 PyObject *dst_abbr = NULL;
1469 TransitionRuleType *start = NULL;
1470 TransitionRuleType *end = NULL;
1471 // Initialize offsets to invalid value (> 24 hours)
1472 long std_offset = 1 << 20;
1473 long dst_offset = 1 << 20;
1474
1475 const char *tz_str = PyBytes_AsString(tz_str_obj);
1476 if (tz_str == NULL) {
1477 return -1;
1478 }
1479 const char *p = tz_str;
1480
1481 // Read the `std` abbreviation, which must be at least 3 characters long.
1482 Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1483 if (num_chars < 1) {
1484 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1485 goto error;
1486 }
1487
1488 p += num_chars;
1489
1490 // Now read the STD offset, which is required
1491 num_chars = parse_tz_delta(p, &std_offset);
1492 if (num_chars < 0) {
1493 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1494 goto error;
1495 }
1496 p += num_chars;
1497
1498 // If the string ends here, there is no DST, otherwise we must parse the
1499 // DST abbreviation and start and end dates and times.
1500 if (*p == '\0') {
1501 goto complete;
1502 }
1503
1504 num_chars = parse_abbr(p, &dst_abbr);
1505 if (num_chars < 1) {
1506 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1507 goto error;
1508 }
1509 p += num_chars;
1510
1511 if (*p == ',') {
1512 // From the POSIX standard:
1513 //
1514 // If no offset follows dst, the alternative time is assumed to be one
1515 // hour ahead of standard time.
1516 dst_offset = std_offset + 3600;
1517 }
1518 else {
1519 num_chars = parse_tz_delta(p, &dst_offset);
1520 if (num_chars < 0) {
1521 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1522 tz_str_obj);
1523 goto error;
1524 }
1525
1526 p += num_chars;
1527 }
1528
1529 TransitionRuleType **transitions[2] = {&start, &end};
1530 for (size_t i = 0; i < 2; ++i) {
1531 if (*p != ',') {
1532 PyErr_Format(PyExc_ValueError,
1533 "Missing transition rules in TZ string: %R",
1534 tz_str_obj);
1535 goto error;
1536 }
1537 p++;
1538
1539 num_chars = parse_transition_rule(p, transitions[i]);
1540 if (num_chars < 0) {
1541 PyErr_Format(PyExc_ValueError,
1542 "Malformed transition rule in TZ string: %R",
1543 tz_str_obj);
1544 goto error;
1545 }
1546 p += num_chars;
1547 }
1548
1549 if (*p != '\0') {
1550 PyErr_Format(PyExc_ValueError,
1551 "Extraneous characters at end of TZ string: %R",
1552 tz_str_obj);
1553 goto error;
1554 }
1555
1556 complete:
1557 build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1558 Py_DECREF(std_abbr);
1559 Py_XDECREF(dst_abbr);
1560
1561 return 0;
1562 error:
1563 Py_XDECREF(std_abbr);
1564 if (dst_abbr != NULL && dst_abbr != Py_None) {
1565 Py_DECREF(dst_abbr);
1566 }
1567
1568 if (start != NULL) {
1569 PyMem_Free(start);
1570 }
1571
1572 if (end != NULL) {
1573 PyMem_Free(end);
1574 }
1575
1576 return -1;
1577 }
1578
1579 static int
parse_uint(const char * const p,uint8_t * value)1580 parse_uint(const char *const p, uint8_t *value)
1581 {
1582 if (!isdigit(*p)) {
1583 return -1;
1584 }
1585
1586 *value = (*p) - '0';
1587 return 0;
1588 }
1589
1590 /* Parse the STD and DST abbreviations from a TZ string. */
1591 static Py_ssize_t
parse_abbr(const char * const p,PyObject ** abbr)1592 parse_abbr(const char *const p, PyObject **abbr)
1593 {
1594 const char *ptr = p;
1595 char buff = *ptr;
1596 const char *str_start;
1597 const char *str_end;
1598
1599 if (*ptr == '<') {
1600 ptr++;
1601 str_start = ptr;
1602 while ((buff = *ptr) != '>') {
1603 // From the POSIX standard:
1604 //
1605 // In the quoted form, the first character shall be the less-than
1606 // ( '<' ) character and the last character shall be the
1607 // greater-than ( '>' ) character. All characters between these
1608 // quoting characters shall be alphanumeric characters from the
1609 // portable character set in the current locale, the plus-sign (
1610 // '+' ) character, or the minus-sign ( '-' ) character. The std
1611 // and dst fields in this case shall not include the quoting
1612 // characters.
1613 if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1614 buff != '-') {
1615 return -1;
1616 }
1617 ptr++;
1618 }
1619 str_end = ptr;
1620 ptr++;
1621 }
1622 else {
1623 str_start = p;
1624 // From the POSIX standard:
1625 //
1626 // In the unquoted form, all characters in these fields shall be
1627 // alphabetic characters from the portable character set in the
1628 // current locale.
1629 while (isalpha(*ptr)) {
1630 ptr++;
1631 }
1632 str_end = ptr;
1633 }
1634
1635 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1636 if (*abbr == NULL) {
1637 return -1;
1638 }
1639
1640 return ptr - p;
1641 }
1642
1643 /* Parse a UTC offset from a TZ str. */
1644 static Py_ssize_t
parse_tz_delta(const char * const p,long * total_seconds)1645 parse_tz_delta(const char *const p, long *total_seconds)
1646 {
1647 // From the POSIX spec:
1648 //
1649 // Indicates the value added to the local time to arrive at Coordinated
1650 // Universal Time. The offset has the form:
1651 //
1652 // hh[:mm[:ss]]
1653 //
1654 // One or more digits may be used; the value is always interpreted as a
1655 // decimal number.
1656 //
1657 // The POSIX spec says that the values for `hour` must be between 0 and 24
1658 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1659 // transition times may be signed and range from -167 to 167.
1660 long sign = -1;
1661 long hours = 0;
1662 long minutes = 0;
1663 long seconds = 0;
1664
1665 const char *ptr = p;
1666 char buff = *ptr;
1667 if (buff == '-' || buff == '+') {
1668 // Negative numbers correspond to *positive* offsets, from the spec:
1669 //
1670 // If preceded by a '-', the timezone shall be east of the Prime
1671 // Meridian; otherwise, it shall be west (which may be indicated by
1672 // an optional preceding '+' ).
1673 if (buff == '-') {
1674 sign = 1;
1675 }
1676
1677 ptr++;
1678 }
1679
1680 // The hour can be 1 or 2 numeric characters
1681 for (size_t i = 0; i < 2; ++i) {
1682 buff = *ptr;
1683 if (!isdigit(buff)) {
1684 if (i == 0) {
1685 return -1;
1686 }
1687 else {
1688 break;
1689 }
1690 }
1691
1692 hours *= 10;
1693 hours += buff - '0';
1694 ptr++;
1695 }
1696
1697 if (hours > 24 || hours < 0) {
1698 return -1;
1699 }
1700
1701 // Minutes and seconds always of the format ":dd"
1702 long *outputs[2] = {&minutes, &seconds};
1703 for (size_t i = 0; i < 2; ++i) {
1704 if (*ptr != ':') {
1705 goto complete;
1706 }
1707 ptr++;
1708
1709 for (size_t j = 0; j < 2; ++j) {
1710 buff = *ptr;
1711 if (!isdigit(buff)) {
1712 return -1;
1713 }
1714 *(outputs[i]) *= 10;
1715 *(outputs[i]) += buff - '0';
1716 ptr++;
1717 }
1718 }
1719
1720 complete:
1721 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1722
1723 return ptr - p;
1724 }
1725
1726 /* Parse the date portion of a transition rule. */
1727 static Py_ssize_t
parse_transition_rule(const char * const p,TransitionRuleType ** out)1728 parse_transition_rule(const char *const p, TransitionRuleType **out)
1729 {
1730 // The full transition rule indicates when to change back and forth between
1731 // STD and DST, and has the form:
1732 //
1733 // date[/time],date[/time]
1734 //
1735 // This function parses an individual date[/time] section, and returns
1736 // the number of characters that contributed to the transition rule. This
1737 // does not include the ',' at the end of the first rule.
1738 //
1739 // The POSIX spec states that if *time* is not given, the default is 02:00.
1740 const char *ptr = p;
1741 int8_t hour = 2;
1742 int8_t minute = 0;
1743 int8_t second = 0;
1744
1745 // Rules come in one of three flavors:
1746 //
1747 // 1. Jn: Julian day n, with no leap days.
1748 // 2. n: Day of year (0-based, with leap days)
1749 // 3. Mm.n.d: Specifying by month, week and day-of-week.
1750
1751 if (*ptr == 'M') {
1752 uint8_t month, week, day;
1753 ptr++;
1754 if (parse_uint(ptr, &month)) {
1755 return -1;
1756 }
1757 ptr++;
1758 if (*ptr != '.') {
1759 uint8_t tmp;
1760 if (parse_uint(ptr, &tmp)) {
1761 return -1;
1762 }
1763
1764 month *= 10;
1765 month += tmp;
1766 ptr++;
1767 }
1768
1769 uint8_t *values[2] = {&week, &day};
1770 for (size_t i = 0; i < 2; ++i) {
1771 if (*ptr != '.') {
1772 return -1;
1773 }
1774 ptr++;
1775
1776 if (parse_uint(ptr, values[i])) {
1777 return -1;
1778 }
1779 ptr++;
1780 }
1781
1782 if (*ptr == '/') {
1783 ptr++;
1784 Py_ssize_t num_chars =
1785 parse_transition_time(ptr, &hour, &minute, &second);
1786 if (num_chars < 0) {
1787 return -1;
1788 }
1789 ptr += num_chars;
1790 }
1791
1792 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1793 if (rv == NULL) {
1794 return -1;
1795 }
1796
1797 if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1798 PyMem_Free(rv);
1799 return -1;
1800 }
1801
1802 *out = (TransitionRuleType *)rv;
1803 }
1804 else {
1805 uint8_t julian = 0;
1806 unsigned int day = 0;
1807 if (*ptr == 'J') {
1808 julian = 1;
1809 ptr++;
1810 }
1811
1812 for (size_t i = 0; i < 3; ++i) {
1813 if (!isdigit(*ptr)) {
1814 if (i == 0) {
1815 return -1;
1816 }
1817 break;
1818 }
1819 day *= 10;
1820 day += (*ptr) - '0';
1821 ptr++;
1822 }
1823
1824 if (*ptr == '/') {
1825 ptr++;
1826 Py_ssize_t num_chars =
1827 parse_transition_time(ptr, &hour, &minute, &second);
1828 if (num_chars < 0) {
1829 return -1;
1830 }
1831 ptr += num_chars;
1832 }
1833
1834 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1835 if (rv == NULL) {
1836 return -1;
1837 }
1838
1839 if (dayrule_new(julian, day, hour, minute, second, rv)) {
1840 PyMem_Free(rv);
1841 return -1;
1842 }
1843 *out = (TransitionRuleType *)rv;
1844 }
1845
1846 return ptr - p;
1847 }
1848
1849 /* Parse the time portion of a transition rule (e.g. following an /) */
1850 static Py_ssize_t
parse_transition_time(const char * const p,int8_t * hour,int8_t * minute,int8_t * second)1851 parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1852 int8_t *second)
1853 {
1854 // From the spec:
1855 //
1856 // The time has the same format as offset except that no leading sign
1857 // ( '-' or '+' ) is allowed.
1858 //
1859 // The format for the offset is:
1860 //
1861 // h[h][:mm[:ss]]
1862 //
1863 // RFC 8536 also allows transition times to be signed and to range from
1864 // -167 to +167, but the current version only supports [0, 99].
1865 //
1866 // TODO: Support the full range of transition hours.
1867 int8_t *components[3] = {hour, minute, second};
1868 const char *ptr = p;
1869 int8_t sign = 1;
1870
1871 if (*ptr == '-' || *ptr == '+') {
1872 if (*ptr == '-') {
1873 sign = -1;
1874 }
1875 ptr++;
1876 }
1877
1878 for (size_t i = 0; i < 3; ++i) {
1879 if (i > 0) {
1880 if (*ptr != ':') {
1881 break;
1882 }
1883 ptr++;
1884 }
1885
1886 uint8_t buff = 0;
1887 for (size_t j = 0; j < 2; j++) {
1888 if (!isdigit(*ptr)) {
1889 if (i == 0 && j > 0) {
1890 break;
1891 }
1892 return -1;
1893 }
1894
1895 buff *= 10;
1896 buff += (*ptr) - '0';
1897 ptr++;
1898 }
1899
1900 *(components[i]) = sign * buff;
1901 }
1902
1903 return ptr - p;
1904 }
1905
1906 /* Constructor for a _tzrule.
1907 *
1908 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1909 * case `dst_offset` will be ignored and `start` and `end` are expected to be
1910 * NULL as well.
1911 *
1912 * Returns 0 on success.
1913 */
1914 static int
build_tzrule(PyObject * std_abbr,PyObject * dst_abbr,long std_offset,long dst_offset,TransitionRuleType * start,TransitionRuleType * end,_tzrule * out)1915 build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1916 long dst_offset, TransitionRuleType *start,
1917 TransitionRuleType *end, _tzrule *out)
1918 {
1919 _tzrule rv = {{0}};
1920
1921 rv.start = start;
1922 rv.end = end;
1923
1924 if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1925 goto error;
1926 }
1927
1928 if (dst_abbr != NULL) {
1929 rv.dst_diff = dst_offset - std_offset;
1930 if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1931 goto error;
1932 }
1933 }
1934 else {
1935 rv.std_only = 1;
1936 }
1937
1938 *out = rv;
1939
1940 return 0;
1941 error:
1942 xdecref_ttinfo(&rv.std);
1943 xdecref_ttinfo(&rv.dst);
1944 return -1;
1945 }
1946
1947 /* Destructor for _tzrule. */
1948 static void
free_tzrule(_tzrule * tzrule)1949 free_tzrule(_tzrule *tzrule)
1950 {
1951 xdecref_ttinfo(&(tzrule->std));
1952 if (!tzrule->std_only) {
1953 xdecref_ttinfo(&(tzrule->dst));
1954 }
1955
1956 if (tzrule->start != NULL) {
1957 PyMem_Free(tzrule->start);
1958 }
1959
1960 if (tzrule->end != NULL) {
1961 PyMem_Free(tzrule->end);
1962 }
1963 }
1964
1965 /* Calculate DST offsets from transitions and UTC offsets
1966 *
1967 * This is necessary because each C `ttinfo` only contains the UTC offset,
1968 * time zone abbreviation and an isdst boolean - it does not include the
1969 * amount of the DST offset, but we need the amount for the dst() function.
1970 *
1971 * Thus function uses heuristics to infer what the offset should be, so it
1972 * is not guaranteed that this will work for all zones. If we cannot assign
1973 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1974 * bool(dt.dst()) will always match ttinfo.isdst.
1975 */
1976 static void
utcoff_to_dstoff(size_t * trans_idx,long * utcoffs,long * dstoffs,unsigned char * isdsts,size_t num_transitions,size_t num_ttinfos)1977 utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1978 unsigned char *isdsts, size_t num_transitions,
1979 size_t num_ttinfos)
1980 {
1981 size_t dst_count = 0;
1982 size_t dst_found = 0;
1983 for (size_t i = 0; i < num_ttinfos; ++i) {
1984 dst_count++;
1985 }
1986
1987 for (size_t i = 1; i < num_transitions; ++i) {
1988 if (dst_count == dst_found) {
1989 break;
1990 }
1991
1992 size_t idx = trans_idx[i];
1993 size_t comp_idx = trans_idx[i - 1];
1994
1995 // Only look at DST offsets that have nto been assigned already
1996 if (!isdsts[idx] || dstoffs[idx] != 0) {
1997 continue;
1998 }
1999
2000 long dstoff = 0;
2001 long utcoff = utcoffs[idx];
2002
2003 if (!isdsts[comp_idx]) {
2004 dstoff = utcoff - utcoffs[comp_idx];
2005 }
2006
2007 if (!dstoff && idx < (num_ttinfos - 1)) {
2008 comp_idx = trans_idx[i + 1];
2009
2010 // If the following transition is also DST and we couldn't find
2011 // the DST offset by this point, we're going to have to skip it
2012 // and hope this transition gets assigned later
2013 if (isdsts[comp_idx]) {
2014 continue;
2015 }
2016
2017 dstoff = utcoff - utcoffs[comp_idx];
2018 }
2019
2020 if (dstoff) {
2021 dst_found++;
2022 dstoffs[idx] = dstoff;
2023 }
2024 }
2025
2026 if (dst_found < dst_count) {
2027 // If there are time zones we didn't find a value for, we'll end up
2028 // with dstoff = 0 for something where isdst=1. This is obviously
2029 // wrong — one hour will be a much better guess than 0.
2030 for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2031 if (isdsts[idx] && !dstoffs[idx]) {
2032 dstoffs[idx] = 3600;
2033 }
2034 }
2035 }
2036 }
2037
2038 #define _swap(x, y, buffer) \
2039 buffer = x; \
2040 x = y; \
2041 y = buffer;
2042
2043 /* Calculate transitions in local time from UTC time and offsets.
2044 *
2045 * We want to know when each transition occurs, denominated in the number of
2046 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2047 * *local time* (note: this is *not* equivalent to the output of
2048 * datetime.timestamp, which is the total number of seconds actual elapsed
2049 * since 1970-01-01T00:00:00Z in UTC).
2050 *
2051 * This is an ambiguous question because "local time" can be ambiguous — but it
2052 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2053 *
2054 * trans_local[0]: The wall-time transitions for fold=0
2055 * trans_local[1]: The wall-time transitions for fold=1
2056 *
2057 * This returns 0 on success and a negative number of failure. The trans_local
2058 * arrays must be freed if they are not NULL.
2059 */
2060 static int
ts_to_local(size_t * trans_idx,int64_t * trans_utc,long * utcoff,int64_t * trans_local[2],size_t num_ttinfos,size_t num_transitions)2061 ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2062 int64_t *trans_local[2], size_t num_ttinfos,
2063 size_t num_transitions)
2064 {
2065 if (num_transitions == 0) {
2066 return 0;
2067 }
2068
2069 // Copy the UTC transitions into each array to be modified in place later
2070 for (size_t i = 0; i < 2; ++i) {
2071 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2072 if (trans_local[i] == NULL) {
2073 return -1;
2074 }
2075
2076 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2077 }
2078
2079 int64_t offset_0, offset_1, buff;
2080 if (num_ttinfos > 1) {
2081 offset_0 = utcoff[0];
2082 offset_1 = utcoff[trans_idx[0]];
2083
2084 if (offset_1 > offset_0) {
2085 _swap(offset_0, offset_1, buff);
2086 }
2087 }
2088 else {
2089 offset_0 = utcoff[0];
2090 offset_1 = utcoff[0];
2091 }
2092
2093 trans_local[0][0] += offset_0;
2094 trans_local[1][0] += offset_1;
2095
2096 for (size_t i = 1; i < num_transitions; ++i) {
2097 offset_0 = utcoff[trans_idx[i - 1]];
2098 offset_1 = utcoff[trans_idx[i]];
2099
2100 if (offset_1 > offset_0) {
2101 _swap(offset_1, offset_0, buff);
2102 }
2103
2104 trans_local[0][i] += offset_0;
2105 trans_local[1][i] += offset_1;
2106 }
2107
2108 return 0;
2109 }
2110
2111 /* Simple bisect_right binary search implementation */
2112 static size_t
_bisect(const int64_t value,const int64_t * arr,size_t size)2113 _bisect(const int64_t value, const int64_t *arr, size_t size)
2114 {
2115 size_t lo = 0;
2116 size_t hi = size;
2117 size_t m;
2118
2119 while (lo < hi) {
2120 m = (lo + hi) / 2;
2121 if (arr[m] > value) {
2122 hi = m;
2123 }
2124 else {
2125 lo = m + 1;
2126 }
2127 }
2128
2129 return hi;
2130 }
2131
2132 /* Find the ttinfo rules that apply at a given local datetime. */
2133 static _ttinfo *
find_ttinfo(PyZoneInfo_ZoneInfo * self,PyObject * dt)2134 find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2135 {
2136 // datetime.time has a .tzinfo attribute that passes None as the dt
2137 // argument; it only really has meaning for fixed-offset zones.
2138 if (dt == Py_None) {
2139 if (self->fixed_offset) {
2140 return &(self->tzrule_after.std);
2141 }
2142 else {
2143 return &NO_TTINFO;
2144 }
2145 }
2146
2147 int64_t ts;
2148 if (get_local_timestamp(dt, &ts)) {
2149 return NULL;
2150 }
2151
2152 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2153 assert(fold < 2);
2154 int64_t *local_transitions = self->trans_list_wall[fold];
2155 size_t num_trans = self->num_transitions;
2156
2157 if (num_trans && ts < local_transitions[0]) {
2158 return self->ttinfo_before;
2159 }
2160 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2161 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2162 PyDateTime_GET_YEAR(dt));
2163 }
2164 else {
2165 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2166 assert(idx < self->num_transitions);
2167 return self->trans_ttinfos[idx];
2168 }
2169 }
2170
2171 static int
is_leap_year(int year)2172 is_leap_year(int year)
2173 {
2174 const unsigned int ayear = (unsigned int)year;
2175 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2176 }
2177
2178 /* Calculates ordinal datetime from year, month and day. */
2179 static int
ymd_to_ord(int y,int m,int d)2180 ymd_to_ord(int y, int m, int d)
2181 {
2182 y -= 1;
2183 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2184 int yearday = DAYS_BEFORE_MONTH[m];
2185 if (m > 2 && is_leap_year(y + 1)) {
2186 yearday += 1;
2187 }
2188
2189 return days_before_year + yearday + d;
2190 }
2191
2192 /* Calculate the number of seconds since 1970-01-01 in local time.
2193 *
2194 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2195 * can easily determine which transitions a datetime falls between. See the
2196 * comment above ts_to_local for more information.
2197 * */
2198 static int
get_local_timestamp(PyObject * dt,int64_t * local_ts)2199 get_local_timestamp(PyObject *dt, int64_t *local_ts)
2200 {
2201 assert(local_ts != NULL);
2202
2203 int hour, minute, second;
2204 int ord;
2205 if (PyDateTime_CheckExact(dt)) {
2206 int y = PyDateTime_GET_YEAR(dt);
2207 int m = PyDateTime_GET_MONTH(dt);
2208 int d = PyDateTime_GET_DAY(dt);
2209 hour = PyDateTime_DATE_GET_HOUR(dt);
2210 minute = PyDateTime_DATE_GET_MINUTE(dt);
2211 second = PyDateTime_DATE_GET_SECOND(dt);
2212
2213 ord = ymd_to_ord(y, m, d);
2214 }
2215 else {
2216 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2217 if (num == NULL) {
2218 return -1;
2219 }
2220
2221 ord = PyLong_AsLong(num);
2222 Py_DECREF(num);
2223 if (ord == -1 && PyErr_Occurred()) {
2224 return -1;
2225 }
2226
2227 num = PyObject_GetAttrString(dt, "hour");
2228 if (num == NULL) {
2229 return -1;
2230 }
2231 hour = PyLong_AsLong(num);
2232 Py_DECREF(num);
2233 if (hour == -1) {
2234 return -1;
2235 }
2236
2237 num = PyObject_GetAttrString(dt, "minute");
2238 if (num == NULL) {
2239 return -1;
2240 }
2241 minute = PyLong_AsLong(num);
2242 Py_DECREF(num);
2243 if (minute == -1) {
2244 return -1;
2245 }
2246
2247 num = PyObject_GetAttrString(dt, "second");
2248 if (num == NULL) {
2249 return -1;
2250 }
2251 second = PyLong_AsLong(num);
2252 Py_DECREF(num);
2253 if (second == -1) {
2254 return -1;
2255 }
2256 }
2257
2258 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2259 (int64_t)(hour * 3600 + minute * 60 + second);
2260
2261 return 0;
2262 }
2263
2264 /////
2265 // Functions for cache handling
2266
2267 /* Constructor for StrongCacheNode */
2268 static StrongCacheNode *
strong_cache_node_new(PyObject * key,PyObject * zone)2269 strong_cache_node_new(PyObject *key, PyObject *zone)
2270 {
2271 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2272 if (node == NULL) {
2273 return NULL;
2274 }
2275
2276 Py_INCREF(key);
2277 Py_INCREF(zone);
2278
2279 node->next = NULL;
2280 node->prev = NULL;
2281 node->key = key;
2282 node->zone = zone;
2283
2284 return node;
2285 }
2286
2287 /* Destructor for StrongCacheNode */
2288 void
strong_cache_node_free(StrongCacheNode * node)2289 strong_cache_node_free(StrongCacheNode *node)
2290 {
2291 Py_XDECREF(node->key);
2292 Py_XDECREF(node->zone);
2293
2294 PyMem_Free(node);
2295 }
2296
2297 /* Frees all nodes at or after a specified root in the strong cache.
2298 *
2299 * This can be used on the root node to free the entire cache or it can be used
2300 * to clear all nodes that have been expired (which, if everything is going
2301 * right, will actually only be 1 node at a time).
2302 */
2303 void
strong_cache_free(StrongCacheNode * root)2304 strong_cache_free(StrongCacheNode *root)
2305 {
2306 StrongCacheNode *node = root;
2307 StrongCacheNode *next_node;
2308 while (node != NULL) {
2309 next_node = node->next;
2310 strong_cache_node_free(node);
2311
2312 node = next_node;
2313 }
2314 }
2315
2316 /* Removes a node from the cache and update its neighbors.
2317 *
2318 * This is used both when ejecting a node from the cache and when moving it to
2319 * the front of the cache.
2320 */
2321 static void
remove_from_strong_cache(StrongCacheNode * node)2322 remove_from_strong_cache(StrongCacheNode *node)
2323 {
2324 if (ZONEINFO_STRONG_CACHE == node) {
2325 ZONEINFO_STRONG_CACHE = node->next;
2326 }
2327
2328 if (node->prev != NULL) {
2329 node->prev->next = node->next;
2330 }
2331
2332 if (node->next != NULL) {
2333 node->next->prev = node->prev;
2334 }
2335
2336 node->next = NULL;
2337 node->prev = NULL;
2338 }
2339
2340 /* Retrieves the node associated with a key, if it exists.
2341 *
2342 * This traverses the strong cache until it finds a matching key and returns a
2343 * pointer to the relevant node if found. Returns NULL if no node is found.
2344 *
2345 * root may be NULL, indicating an empty cache.
2346 */
2347 static StrongCacheNode *
find_in_strong_cache(const StrongCacheNode * const root,PyObject * const key)2348 find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2349 {
2350 const StrongCacheNode *node = root;
2351 while (node != NULL) {
2352 int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2353 if (rv < 0) {
2354 return NULL;
2355 }
2356 if (rv) {
2357 return (StrongCacheNode *)node;
2358 }
2359
2360 node = node->next;
2361 }
2362
2363 return NULL;
2364 }
2365
2366 /* Ejects a given key from the class's strong cache, if applicable.
2367 *
2368 * This function is used to enable the per-key functionality in clear_cache.
2369 */
2370 static int
eject_from_strong_cache(const PyTypeObject * const type,PyObject * key)2371 eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2372 {
2373 if (type != &PyZoneInfo_ZoneInfoType) {
2374 return 0;
2375 }
2376
2377 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2378 if (node != NULL) {
2379 remove_from_strong_cache(node);
2380
2381 strong_cache_node_free(node);
2382 }
2383 else if (PyErr_Occurred()) {
2384 return -1;
2385 }
2386 return 0;
2387 }
2388
2389 /* Moves a node to the front of the LRU cache.
2390 *
2391 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2392 * it is not at the front of the cache, it needs to be moved there.
2393 */
2394 static void
move_strong_cache_node_to_front(StrongCacheNode ** root,StrongCacheNode * node)2395 move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2396 {
2397 StrongCacheNode *root_p = *root;
2398 if (root_p == node) {
2399 return;
2400 }
2401
2402 remove_from_strong_cache(node);
2403
2404 node->prev = NULL;
2405 node->next = root_p;
2406
2407 if (root_p != NULL) {
2408 root_p->prev = node;
2409 }
2410
2411 *root = node;
2412 }
2413
2414 /* Retrieves a ZoneInfo from the strong cache if it's present.
2415 *
2416 * This function finds the ZoneInfo by key and if found will move the node to
2417 * the front of the LRU cache and return a new reference to it. It returns NULL
2418 * if the key is not in the cache.
2419 *
2420 * The strong cache is currently only implemented for the base class, so this
2421 * always returns a cache miss for subclasses.
2422 */
2423 static PyObject *
zone_from_strong_cache(const PyTypeObject * const type,PyObject * const key)2424 zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2425 {
2426 if (type != &PyZoneInfo_ZoneInfoType) {
2427 return NULL; // Strong cache currently only implemented for base class
2428 }
2429
2430 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2431
2432 if (node != NULL) {
2433 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2434 Py_INCREF(node->zone);
2435 return node->zone;
2436 }
2437
2438 return NULL; // Cache miss
2439 }
2440
2441 /* Inserts a new key into the strong LRU cache.
2442 *
2443 * This function is only to be used after a cache miss — it creates a new node
2444 * at the front of the cache and ejects any stale entries (keeping the size of
2445 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2446 */
2447 static void
update_strong_cache(const PyTypeObject * const type,PyObject * key,PyObject * zone)2448 update_strong_cache(const PyTypeObject *const type, PyObject *key,
2449 PyObject *zone)
2450 {
2451 if (type != &PyZoneInfo_ZoneInfoType) {
2452 return;
2453 }
2454
2455 StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2456
2457 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2458
2459 StrongCacheNode *node = new_node->next;
2460 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2461 if (node == NULL) {
2462 return;
2463 }
2464 node = node->next;
2465 }
2466
2467 // Everything beyond this point needs to be freed
2468 if (node != NULL) {
2469 if (node->prev != NULL) {
2470 node->prev->next = NULL;
2471 }
2472 strong_cache_free(node);
2473 }
2474 }
2475
2476 /* Clears all entries into a type's strong cache.
2477 *
2478 * Because the strong cache is not implemented for subclasses, this is a no-op
2479 * for everything except the base class.
2480 */
2481 void
clear_strong_cache(const PyTypeObject * const type)2482 clear_strong_cache(const PyTypeObject *const type)
2483 {
2484 if (type != &PyZoneInfo_ZoneInfoType) {
2485 return;
2486 }
2487
2488 strong_cache_free(ZONEINFO_STRONG_CACHE);
2489 ZONEINFO_STRONG_CACHE = NULL;
2490 }
2491
2492 static PyObject *
new_weak_cache(void)2493 new_weak_cache(void)
2494 {
2495 PyObject *weakref_module = PyImport_ImportModule("weakref");
2496 if (weakref_module == NULL) {
2497 return NULL;
2498 }
2499
2500 PyObject *weak_cache =
2501 PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2502 Py_DECREF(weakref_module);
2503 return weak_cache;
2504 }
2505
2506 static int
initialize_caches(void)2507 initialize_caches(void)
2508 {
2509 // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
2510 if (TIMEDELTA_CACHE == NULL) {
2511 TIMEDELTA_CACHE = PyDict_New();
2512 }
2513 else {
2514 Py_INCREF(TIMEDELTA_CACHE);
2515 }
2516
2517 if (TIMEDELTA_CACHE == NULL) {
2518 return -1;
2519 }
2520
2521 if (ZONEINFO_WEAK_CACHE == NULL) {
2522 ZONEINFO_WEAK_CACHE = new_weak_cache();
2523 }
2524 else {
2525 Py_INCREF(ZONEINFO_WEAK_CACHE);
2526 }
2527
2528 if (ZONEINFO_WEAK_CACHE == NULL) {
2529 return -1;
2530 }
2531
2532 return 0;
2533 }
2534
2535 static PyObject *
zoneinfo_init_subclass(PyTypeObject * cls,PyObject * args,PyObject ** kwargs)2536 zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2537 {
2538 PyObject *weak_cache = new_weak_cache();
2539 if (weak_cache == NULL) {
2540 return NULL;
2541 }
2542
2543 if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2544 weak_cache) < 0) {
2545 Py_DECREF(weak_cache);
2546 return NULL;
2547 }
2548 Py_DECREF(weak_cache);
2549 Py_RETURN_NONE;
2550 }
2551
2552 /////
2553 // Specify the ZoneInfo type
2554 static PyMethodDef zoneinfo_methods[] = {
2555 {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2556 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2557 PyDoc_STR("Clear the ZoneInfo cache.")},
2558 {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2559 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2560 PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2561 {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2562 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2563 PyDoc_STR("Create a ZoneInfo file from a file object.")},
2564 {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2565 PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2566 "the given datetime.")},
2567 {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2568 PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2569 "in a zone at the given datetime.")},
2570 {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2571 PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2572 "zone that applies in a zone at a given datetime.")},
2573 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2574 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2575 "datetime in local time.")},
2576 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2577 PyDoc_STR("Function for serialization with the pickle protocol.")},
2578 {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2579 PyDoc_STR("Private method used in unpickling.")},
2580 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2581 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2582 PyDoc_STR("Function to initialize subclasses.")},
2583 {NULL} /* Sentinel */
2584 };
2585
2586 static PyMemberDef zoneinfo_members[] = {
2587 {.name = "key",
2588 .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2589 .type = T_OBJECT_EX,
2590 .flags = READONLY,
2591 .doc = NULL},
2592 {NULL}, /* Sentinel */
2593 };
2594
2595 static PyTypeObject PyZoneInfo_ZoneInfoType = {
2596 PyVarObject_HEAD_INIT(NULL, 0) //
2597 .tp_name = "zoneinfo.ZoneInfo",
2598 .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2599 .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2600 .tp_repr = (reprfunc)zoneinfo_repr,
2601 .tp_str = (reprfunc)zoneinfo_str,
2602 .tp_getattro = PyObject_GenericGetAttr,
2603 .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2604 /* .tp_doc = zoneinfo_doc, */
2605 .tp_methods = zoneinfo_methods,
2606 .tp_members = zoneinfo_members,
2607 .tp_new = zoneinfo_new,
2608 .tp_dealloc = zoneinfo_dealloc,
2609 };
2610
2611 /////
2612 // Specify the _zoneinfo module
2613 static PyMethodDef module_methods[] = {{NULL, NULL}};
2614 static void
module_free(void * m)2615 module_free(void *m)
2616 {
2617 Py_XDECREF(_tzpath_find_tzfile);
2618 _tzpath_find_tzfile = NULL;
2619
2620 Py_XDECREF(_common_mod);
2621 _common_mod = NULL;
2622
2623 Py_XDECREF(io_open);
2624 io_open = NULL;
2625
2626 xdecref_ttinfo(&NO_TTINFO);
2627
2628 if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2629 Py_DECREF(TIMEDELTA_CACHE);
2630 } else {
2631 Py_CLEAR(TIMEDELTA_CACHE);
2632 }
2633
2634 if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2635 Py_DECREF(ZONEINFO_WEAK_CACHE);
2636 } else {
2637 Py_CLEAR(ZONEINFO_WEAK_CACHE);
2638 }
2639
2640 clear_strong_cache(&PyZoneInfo_ZoneInfoType);
2641 }
2642
2643 static int
zoneinfomodule_exec(PyObject * m)2644 zoneinfomodule_exec(PyObject *m)
2645 {
2646 PyDateTime_IMPORT;
2647 if (PyDateTimeAPI == NULL) {
2648 goto error;
2649 }
2650 PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2651 if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2652 goto error;
2653 }
2654
2655 if (PyModule_AddObjectRef(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType) < 0) {
2656 goto error;
2657 }
2658
2659 /* Populate imports */
2660 PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2661 if (_tzpath_module == NULL) {
2662 goto error;
2663 }
2664
2665 _tzpath_find_tzfile =
2666 PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2667 Py_DECREF(_tzpath_module);
2668 if (_tzpath_find_tzfile == NULL) {
2669 goto error;
2670 }
2671
2672 PyObject *io_module = PyImport_ImportModule("io");
2673 if (io_module == NULL) {
2674 goto error;
2675 }
2676
2677 io_open = PyObject_GetAttrString(io_module, "open");
2678 Py_DECREF(io_module);
2679 if (io_open == NULL) {
2680 goto error;
2681 }
2682
2683 _common_mod = PyImport_ImportModule("zoneinfo._common");
2684 if (_common_mod == NULL) {
2685 goto error;
2686 }
2687
2688 if (NO_TTINFO.utcoff == NULL) {
2689 NO_TTINFO.utcoff = Py_None;
2690 NO_TTINFO.dstoff = Py_None;
2691 NO_TTINFO.tzname = Py_None;
2692
2693 for (size_t i = 0; i < 3; ++i) {
2694 Py_INCREF(Py_None);
2695 }
2696 }
2697
2698 if (initialize_caches()) {
2699 goto error;
2700 }
2701
2702 return 0;
2703
2704 error:
2705 return -1;
2706 }
2707
2708 static PyModuleDef_Slot zoneinfomodule_slots[] = {
2709 {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2710
2711 static struct PyModuleDef zoneinfomodule = {
2712 PyModuleDef_HEAD_INIT,
2713 .m_name = "_zoneinfo",
2714 .m_doc = "C implementation of the zoneinfo module",
2715 .m_size = 0,
2716 .m_methods = module_methods,
2717 .m_slots = zoneinfomodule_slots,
2718 .m_free = (freefunc)module_free};
2719
2720 PyMODINIT_FUNC
PyInit__zoneinfo(void)2721 PyInit__zoneinfo(void)
2722 {
2723 return PyModuleDef_Init(&zoneinfomodule);
2724 }
2725