1from test import support, seq_tests 2import unittest 3 4import gc 5import pickle 6 7# For tuple hashes, we normally only run a test to ensure that we get 8# the same results across platforms in a handful of cases. If that's 9# so, there's no real point to running more. Set RUN_ALL_HASH_TESTS to 10# run more anyway. That's usually of real interest only when analyzing, 11# or changing, the hash algorithm. In which case it's usually also 12# most useful to set JUST_SHOW_HASH_RESULTS, to see all the results 13# instead of wrestling with test "failures". See the bottom of the 14# file for extensive notes on what we're testing here and why. 15RUN_ALL_HASH_TESTS = False 16JUST_SHOW_HASH_RESULTS = False # if RUN_ALL_HASH_TESTS, just display 17 18class TupleTest(seq_tests.CommonTest): 19 type2test = tuple 20 21 def test_getitem_error(self): 22 t = () 23 msg = "tuple indices must be integers or slices" 24 with self.assertRaisesRegex(TypeError, msg): 25 t['a'] 26 27 def test_constructors(self): 28 super().test_constructors() 29 # calling built-in types without argument must return empty 30 self.assertEqual(tuple(), ()) 31 t0_3 = (0, 1, 2, 3) 32 t0_3_bis = tuple(t0_3) 33 self.assertTrue(t0_3 is t0_3_bis) 34 self.assertEqual(tuple([]), ()) 35 self.assertEqual(tuple([0, 1, 2, 3]), (0, 1, 2, 3)) 36 self.assertEqual(tuple(''), ()) 37 self.assertEqual(tuple('spam'), ('s', 'p', 'a', 'm')) 38 self.assertEqual(tuple(x for x in range(10) if x % 2), 39 (1, 3, 5, 7, 9)) 40 41 def test_keyword_args(self): 42 with self.assertRaisesRegex(TypeError, 'keyword argument'): 43 tuple(sequence=()) 44 45 def test_keywords_in_subclass(self): 46 class subclass(tuple): 47 pass 48 u = subclass([1, 2]) 49 self.assertIs(type(u), subclass) 50 self.assertEqual(list(u), [1, 2]) 51 with self.assertRaises(TypeError): 52 subclass(sequence=()) 53 54 class subclass_with_init(tuple): 55 def __init__(self, arg, newarg=None): 56 self.newarg = newarg 57 u = subclass_with_init([1, 2], newarg=3) 58 self.assertIs(type(u), subclass_with_init) 59 self.assertEqual(list(u), [1, 2]) 60 self.assertEqual(u.newarg, 3) 61 62 class subclass_with_new(tuple): 63 def __new__(cls, arg, newarg=None): 64 self = super().__new__(cls, arg) 65 self.newarg = newarg 66 return self 67 u = subclass_with_new([1, 2], newarg=3) 68 self.assertIs(type(u), subclass_with_new) 69 self.assertEqual(list(u), [1, 2]) 70 self.assertEqual(u.newarg, 3) 71 72 def test_truth(self): 73 super().test_truth() 74 self.assertTrue(not ()) 75 self.assertTrue((42, )) 76 77 def test_len(self): 78 super().test_len() 79 self.assertEqual(len(()), 0) 80 self.assertEqual(len((0,)), 1) 81 self.assertEqual(len((0, 1, 2)), 3) 82 83 def test_iadd(self): 84 super().test_iadd() 85 u = (0, 1) 86 u2 = u 87 u += (2, 3) 88 self.assertTrue(u is not u2) 89 90 def test_imul(self): 91 super().test_imul() 92 u = (0, 1) 93 u2 = u 94 u *= 3 95 self.assertTrue(u is not u2) 96 97 def test_tupleresizebug(self): 98 # Check that a specific bug in _PyTuple_Resize() is squashed. 99 def f(): 100 for i in range(1000): 101 yield i 102 self.assertEqual(list(tuple(f())), list(range(1000))) 103 104 # We expect tuples whose base components have deterministic hashes to 105 # have deterministic hashes too - and, indeed, the same hashes across 106 # platforms with hash codes of the same bit width. 107 def test_hash_exact(self): 108 def check_one_exact(t, e32, e64): 109 got = hash(t) 110 expected = e32 if support.NHASHBITS == 32 else e64 111 if got != expected: 112 msg = f"FAIL hash({t!r}) == {got} != {expected}" 113 self.fail(msg) 114 115 check_one_exact((), 750394483, 5740354900026072187) 116 check_one_exact((0,), 1214856301, -8753497827991233192) 117 check_one_exact((0, 0), -168982784, -8458139203682520985) 118 check_one_exact((0.5,), 2077348973, -408149959306781352) 119 check_one_exact((0.5, (), (-2, 3, (4, 6))), 714642271, 120 -1845940830829704396) 121 122 # Various tests for hashing of tuples to check that we get few collisions. 123 # Does something only if RUN_ALL_HASH_TESTS is true. 124 # 125 # Earlier versions of the tuple hash algorithm had massive collisions 126 # reported at: 127 # - https://bugs.python.org/issue942952 128 # - https://bugs.python.org/issue34751 129 def test_hash_optional(self): 130 from itertools import product 131 132 if not RUN_ALL_HASH_TESTS: 133 return 134 135 # If specified, `expected` is a 2-tuple of expected 136 # (number_of_collisions, pileup) values, and the test fails if 137 # those aren't the values we get. Also if specified, the test 138 # fails if z > `zlimit`. 139 def tryone_inner(tag, nbins, hashes, expected=None, zlimit=None): 140 from collections import Counter 141 142 nballs = len(hashes) 143 mean, sdev = support.collision_stats(nbins, nballs) 144 c = Counter(hashes) 145 collisions = nballs - len(c) 146 z = (collisions - mean) / sdev 147 pileup = max(c.values()) - 1 148 del c 149 got = (collisions, pileup) 150 failed = False 151 prefix = "" 152 if zlimit is not None and z > zlimit: 153 failed = True 154 prefix = f"FAIL z > {zlimit}; " 155 if expected is not None and got != expected: 156 failed = True 157 prefix += f"FAIL {got} != {expected}; " 158 if failed or JUST_SHOW_HASH_RESULTS: 159 msg = f"{prefix}{tag}; pileup {pileup:,} mean {mean:.1f} " 160 msg += f"coll {collisions:,} z {z:+.1f}" 161 if JUST_SHOW_HASH_RESULTS: 162 import sys 163 print(msg, file=sys.__stdout__) 164 else: 165 self.fail(msg) 166 167 def tryone(tag, xs, 168 native32=None, native64=None, hi32=None, lo32=None, 169 zlimit=None): 170 NHASHBITS = support.NHASHBITS 171 hashes = list(map(hash, xs)) 172 tryone_inner(tag + f"; {NHASHBITS}-bit hash codes", 173 1 << NHASHBITS, 174 hashes, 175 native32 if NHASHBITS == 32 else native64, 176 zlimit) 177 178 if NHASHBITS > 32: 179 shift = NHASHBITS - 32 180 tryone_inner(tag + "; 32-bit upper hash codes", 181 1 << 32, 182 [h >> shift for h in hashes], 183 hi32, 184 zlimit) 185 186 mask = (1 << 32) - 1 187 tryone_inner(tag + "; 32-bit lower hash codes", 188 1 << 32, 189 [h & mask for h in hashes], 190 lo32, 191 zlimit) 192 193 # Tuples of smallish positive integers are common - nice if we 194 # get "better than random" for these. 195 tryone("range(100) by 3", list(product(range(100), repeat=3)), 196 (0, 0), (0, 0), (4, 1), (0, 0)) 197 198 # A previous hash had systematic problems when mixing integers of 199 # similar magnitude but opposite sign, obscurely related to that 200 # j ^ -2 == -j when j is odd. 201 cands = list(range(-10, -1)) + list(range(9)) 202 203 # Note: -1 is omitted because hash(-1) == hash(-2) == -2, and 204 # there's nothing the tuple hash can do to avoid collisions 205 # inherited from collisions in the tuple components' hashes. 206 tryone("-10 .. 8 by 4", list(product(cands, repeat=4)), 207 (0, 0), (0, 0), (0, 0), (0, 0)) 208 del cands 209 210 # The hashes here are a weird mix of values where all the 211 # variation is in the lowest bits and across a single high-order 212 # bit - the middle bits are all zeroes. A decent hash has to 213 # both propagate low bits to the left and high bits to the 214 # right. This is also complicated a bit in that there are 215 # collisions among the hashes of the integers in L alone. 216 L = [n << 60 for n in range(100)] 217 tryone("0..99 << 60 by 3", list(product(L, repeat=3)), 218 (0, 0), (0, 0), (0, 0), (324, 1)) 219 del L 220 221 # Used to suffer a massive number of collisions. 222 tryone("[-3, 3] by 18", list(product([-3, 3], repeat=18)), 223 (7, 1), (0, 0), (7, 1), (6, 1)) 224 225 # And even worse. hash(0.5) has only a single bit set, at the 226 # high end. A decent hash needs to propagate high bits right. 227 tryone("[0, 0.5] by 18", list(product([0, 0.5], repeat=18)), 228 (5, 1), (0, 0), (9, 1), (12, 1)) 229 230 # Hashes of ints and floats are the same across platforms. 231 # String hashes vary even on a single platform across runs, due 232 # to hash randomization for strings. So we can't say exactly 233 # what this should do. Instead we insist that the # of 234 # collisions is no more than 4 sdevs above the theoretically 235 # random mean. Even if the tuple hash can't achieve that on its 236 # own, the string hash is trying to be decently pseudo-random 237 # (in all bit positions) on _its_ own. We can at least test 238 # that the tuple hash doesn't systematically ruin that. 239 tryone("4-char tuples", 240 list(product("abcdefghijklmnopqrstuvwxyz", repeat=4)), 241 zlimit=4.0) 242 243 # The "old tuple test". See https://bugs.python.org/issue942952. 244 # Ensures, for example, that the hash: 245 # is non-commutative 246 # spreads closely spaced values 247 # doesn't exhibit cancellation in tuples like (x,(x,y)) 248 N = 50 249 base = list(range(N)) 250 xp = list(product(base, repeat=2)) 251 inps = base + list(product(base, xp)) + \ 252 list(product(xp, base)) + xp + list(zip(base)) 253 tryone("old tuple test", inps, 254 (2, 1), (0, 0), (52, 49), (7, 1)) 255 del base, xp, inps 256 257 # The "new tuple test". See https://bugs.python.org/issue34751. 258 # Even more tortured nesting, and a mix of signed ints of very 259 # small magnitude. 260 n = 5 261 A = [x for x in range(-n, n+1) if x != -1] 262 B = A + [(a,) for a in A] 263 L2 = list(product(A, repeat=2)) 264 L3 = L2 + list(product(A, repeat=3)) 265 L4 = L3 + list(product(A, repeat=4)) 266 # T = list of testcases. These consist of all (possibly nested 267 # at most 2 levels deep) tuples containing at most 4 items from 268 # the set A. 269 T = A 270 T += [(a,) for a in B + L4] 271 T += product(L3, B) 272 T += product(L2, repeat=2) 273 T += product(B, L3) 274 T += product(B, B, L2) 275 T += product(B, L2, B) 276 T += product(L2, B, B) 277 T += product(B, repeat=4) 278 assert len(T) == 345130 279 tryone("new tuple test", T, 280 (9, 1), (0, 0), (21, 5), (6, 1)) 281 282 def test_repr(self): 283 l0 = tuple() 284 l2 = (0, 1, 2) 285 a0 = self.type2test(l0) 286 a2 = self.type2test(l2) 287 288 self.assertEqual(str(a0), repr(l0)) 289 self.assertEqual(str(a2), repr(l2)) 290 self.assertEqual(repr(a0), "()") 291 self.assertEqual(repr(a2), "(0, 1, 2)") 292 293 def _not_tracked(self, t): 294 # Nested tuples can take several collections to untrack 295 gc.collect() 296 gc.collect() 297 self.assertFalse(gc.is_tracked(t), t) 298 299 def _tracked(self, t): 300 self.assertTrue(gc.is_tracked(t), t) 301 gc.collect() 302 gc.collect() 303 self.assertTrue(gc.is_tracked(t), t) 304 305 @support.cpython_only 306 def test_track_literals(self): 307 # Test GC-optimization of tuple literals 308 x, y, z = 1.5, "a", [] 309 310 self._not_tracked(()) 311 self._not_tracked((1,)) 312 self._not_tracked((1, 2)) 313 self._not_tracked((1, 2, "a")) 314 self._not_tracked((1, 2, (None, True, False, ()), int)) 315 self._not_tracked((object(),)) 316 self._not_tracked(((1, x), y, (2, 3))) 317 318 # Tuples with mutable elements are always tracked, even if those 319 # elements are not tracked right now. 320 self._tracked(([],)) 321 self._tracked(([1],)) 322 self._tracked(({},)) 323 self._tracked((set(),)) 324 self._tracked((x, y, z)) 325 326 def check_track_dynamic(self, tp, always_track): 327 x, y, z = 1.5, "a", [] 328 329 check = self._tracked if always_track else self._not_tracked 330 check(tp()) 331 check(tp([])) 332 check(tp(set())) 333 check(tp([1, x, y])) 334 check(tp(obj for obj in [1, x, y])) 335 check(tp(set([1, x, y]))) 336 check(tp(tuple([obj]) for obj in [1, x, y])) 337 check(tuple(tp([obj]) for obj in [1, x, y])) 338 339 self._tracked(tp([z])) 340 self._tracked(tp([[x, y]])) 341 self._tracked(tp([{x: y}])) 342 self._tracked(tp(obj for obj in [x, y, z])) 343 self._tracked(tp(tuple([obj]) for obj in [x, y, z])) 344 self._tracked(tuple(tp([obj]) for obj in [x, y, z])) 345 346 @support.cpython_only 347 def test_track_dynamic(self): 348 # Test GC-optimization of dynamically constructed tuples. 349 self.check_track_dynamic(tuple, False) 350 351 @support.cpython_only 352 def test_track_subtypes(self): 353 # Tuple subtypes must always be tracked 354 class MyTuple(tuple): 355 pass 356 self.check_track_dynamic(MyTuple, True) 357 358 @support.cpython_only 359 def test_bug7466(self): 360 # Trying to untrack an unfinished tuple could crash Python 361 self._not_tracked(tuple(gc.collect() for i in range(101))) 362 363 def test_repr_large(self): 364 # Check the repr of large list objects 365 def check(n): 366 l = (0,) * n 367 s = repr(l) 368 self.assertEqual(s, 369 '(' + ', '.join(['0'] * n) + ')') 370 check(10) # check our checking code 371 check(1000000) 372 373 def test_iterator_pickle(self): 374 # Userlist iterators don't support pickling yet since 375 # they are based on generators. 376 data = self.type2test([4, 5, 6, 7]) 377 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 378 itorg = iter(data) 379 d = pickle.dumps(itorg, proto) 380 it = pickle.loads(d) 381 self.assertEqual(type(itorg), type(it)) 382 self.assertEqual(self.type2test(it), self.type2test(data)) 383 384 it = pickle.loads(d) 385 next(it) 386 d = pickle.dumps(it, proto) 387 self.assertEqual(self.type2test(it), self.type2test(data)[1:]) 388 389 def test_reversed_pickle(self): 390 data = self.type2test([4, 5, 6, 7]) 391 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 392 itorg = reversed(data) 393 d = pickle.dumps(itorg, proto) 394 it = pickle.loads(d) 395 self.assertEqual(type(itorg), type(it)) 396 self.assertEqual(self.type2test(it), self.type2test(reversed(data))) 397 398 it = pickle.loads(d) 399 next(it) 400 d = pickle.dumps(it, proto) 401 self.assertEqual(self.type2test(it), self.type2test(reversed(data))[1:]) 402 403 def test_no_comdat_folding(self): 404 # Issue 8847: In the PGO build, the MSVC linker's COMDAT folding 405 # optimization causes failures in code that relies on distinct 406 # function addresses. 407 class T(tuple): pass 408 with self.assertRaises(TypeError): 409 [3,] + T((1,2)) 410 411 def test_lexicographic_ordering(self): 412 # Issue 21100 413 a = self.type2test([1, 2]) 414 b = self.type2test([1, 2, 0]) 415 c = self.type2test([1, 3]) 416 self.assertLess(a, b) 417 self.assertLess(b, c) 418 419# Notes on testing hash codes. The primary thing is that Python doesn't 420# care about "random" hash codes. To the contrary, we like them to be 421# very regular when possible, so that the low-order bits are as evenly 422# distributed as possible. For integers this is easy: hash(i) == i for 423# all not-huge i except i==-1. 424# 425# For tuples of mixed type there's really no hope of that, so we want 426# "randomish" here instead. But getting close to pseudo-random in all 427# bit positions is more expensive than we've been willing to pay for. 428# 429# We can tolerate large deviations from random - what we don't want is 430# catastrophic pileups on a relative handful of hash codes. The dict 431# and set lookup routines remain effective provided that full-width hash 432# codes for not-equal objects are distinct. 433# 434# So we compute various statistics here based on what a "truly random" 435# hash would do, but don't automate "pass or fail" based on those 436# results. Instead those are viewed as inputs to human judgment, and the 437# automated tests merely ensure we get the _same_ results across 438# platforms. In fact, we normally don't bother to run them at all - 439# set RUN_ALL_HASH_TESTS to force it. 440# 441# When global JUST_SHOW_HASH_RESULTS is True, the tuple hash statistics 442# are just displayed to stdout. A typical output line looks like: 443# 444# old tuple test; 32-bit upper hash codes; \ 445# pileup 49 mean 7.4 coll 52 z +16.4 446# 447# "old tuple test" is just a string name for the test being run. 448# 449# "32-bit upper hash codes" means this was run under a 64-bit build and 450# we've shifted away the lower 32 bits of the hash codes. 451# 452# "pileup" is 0 if there were no collisions across those hash codes. 453# It's 1 less than the maximum number of times any single hash code was 454# seen. So in this case, there was (at least) one hash code that was 455# seen 50 times: that hash code "piled up" 49 more times than ideal. 456# 457# "mean" is the number of collisions a perfectly random hash function 458# would have yielded, on average. 459# 460# "coll" is the number of collisions actually seen. 461# 462# "z" is "coll - mean" divided by the standard deviation of the number 463# of collisions a perfectly random hash function would suffer. A 464# positive value is "worse than random", and negative value "better than 465# random". Anything of magnitude greater than 3 would be highly suspect 466# for a hash function that claimed to be random. It's essentially 467# impossible that a truly random function would deliver a result 16.4 468# sdevs "worse than random". 469# 470# But we don't care here! That's why the test isn't coded to fail. 471# Knowing something about how the high-order hash code bits behave 472# provides insight, but is irrelevant to how the dict and set lookup 473# code performs. The low-order bits are much more important to that, 474# and on the same test those did "just like random": 475# 476# old tuple test; 32-bit lower hash codes; \ 477# pileup 1 mean 7.4 coll 7 z -0.2 478# 479# So there are always tradeoffs to consider. For another: 480# 481# 0..99 << 60 by 3; 32-bit hash codes; \ 482# pileup 0 mean 116.4 coll 0 z -10.8 483# 484# That was run under a 32-bit build, and is spectacularly "better than 485# random". On a 64-bit build the wider hash codes are fine too: 486# 487# 0..99 << 60 by 3; 64-bit hash codes; \ 488# pileup 0 mean 0.0 coll 0 z -0.0 489# 490# but their lower 32 bits are poor: 491# 492# 0..99 << 60 by 3; 32-bit lower hash codes; \ 493# pileup 1 mean 116.4 coll 324 z +19.2 494# 495# In a statistical sense that's waaaaay too many collisions, but (a) 324 496# collisions out of a million hash codes isn't anywhere near being a 497# real problem; and, (b) the worst pileup on a single hash code is a measly 498# 1 extra. It's a relatively poor case for the tuple hash, but still 499# fine for practical use. 500# 501# This isn't, which is what Python 3.7.1 produced for the hashes of 502# itertools.product([0, 0.5], repeat=18). Even with a fat 64-bit 503# hashcode, the highest pileup was over 16,000 - making a dict/set 504# lookup on one of the colliding values thousands of times slower (on 505# average) than we expect. 506# 507# [0, 0.5] by 18; 64-bit hash codes; \ 508# pileup 16,383 mean 0.0 coll 262,128 z +6073641856.9 509# [0, 0.5] by 18; 32-bit lower hash codes; \ 510# pileup 262,143 mean 8.0 coll 262,143 z +92683.6 511 512if __name__ == "__main__": 513 unittest.main() 514