1# test the invariant that 2# iff a==b then hash(a)==hash(b) 3# 4# Also test that hash implementations are inherited as expected 5 6import datetime 7import os 8import sys 9import unittest 10from test.support.script_helper import assert_python_ok 11from collections.abc import Hashable 12 13IS_64BIT = sys.maxsize > 2**32 14 15def lcg(x, length=16): 16 """Linear congruential generator""" 17 if x == 0: 18 return bytes(length) 19 out = bytearray(length) 20 for i in range(length): 21 x = (214013 * x + 2531011) & 0x7fffffff 22 out[i] = (x >> 16) & 0xff 23 return bytes(out) 24 25def pysiphash(uint64): 26 """Convert SipHash24 output to Py_hash_t 27 """ 28 assert 0 <= uint64 < (1 << 64) 29 # simple unsigned to signed int64 30 if uint64 > (1 << 63) - 1: 31 int64 = uint64 - (1 << 64) 32 else: 33 int64 = uint64 34 # mangle uint64 to uint32 35 uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff 36 # simple unsigned to signed int32 37 if uint32 > (1 << 31) - 1: 38 int32 = uint32 - (1 << 32) 39 else: 40 int32 = uint32 41 return int32, int64 42 43def skip_unless_internalhash(test): 44 """Skip decorator for tests that depend on SipHash24 or FNV""" 45 ok = sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"} 46 msg = "Requires SipHash13, SipHash24 or FNV" 47 return test if ok else unittest.skip(msg)(test) 48 49 50class HashEqualityTestCase(unittest.TestCase): 51 52 def same_hash(self, *objlist): 53 # Hash each object given and fail if 54 # the hash values are not all the same. 55 hashed = list(map(hash, objlist)) 56 for h in hashed[1:]: 57 if h != hashed[0]: 58 self.fail("hashed values differ: %r" % (objlist,)) 59 60 def test_numeric_literals(self): 61 self.same_hash(1, 1, 1.0, 1.0+0.0j) 62 self.same_hash(0, 0.0, 0.0+0.0j) 63 self.same_hash(-1, -1.0, -1.0+0.0j) 64 self.same_hash(-2, -2.0, -2.0+0.0j) 65 66 def test_coerced_integers(self): 67 self.same_hash(int(1), int(1), float(1), complex(1), 68 int('1'), float('1.0')) 69 self.same_hash(int(-2**31), float(-2**31)) 70 self.same_hash(int(1-2**31), float(1-2**31)) 71 self.same_hash(int(2**31-1), float(2**31-1)) 72 # for 64-bit platforms 73 self.same_hash(int(2**31), float(2**31)) 74 self.same_hash(int(-2**63), float(-2**63)) 75 self.same_hash(int(2**63), float(2**63)) 76 77 def test_coerced_floats(self): 78 self.same_hash(int(1.23e300), float(1.23e300)) 79 self.same_hash(float(0.5), complex(0.5, 0.0)) 80 81 def test_unaligned_buffers(self): 82 # The hash function for bytes-like objects shouldn't have 83 # alignment-dependent results (example in issue #16427). 84 b = b"123456789abcdefghijklmnopqrstuvwxyz" * 128 85 for i in range(16): 86 for j in range(16): 87 aligned = b[i:128+j] 88 unaligned = memoryview(b)[i:128+j] 89 self.assertEqual(hash(aligned), hash(unaligned)) 90 91 92_default_hash = object.__hash__ 93class DefaultHash(object): pass 94 95_FIXED_HASH_VALUE = 42 96class FixedHash(object): 97 def __hash__(self): 98 return _FIXED_HASH_VALUE 99 100class OnlyEquality(object): 101 def __eq__(self, other): 102 return self is other 103 104class OnlyInequality(object): 105 def __ne__(self, other): 106 return self is not other 107 108class InheritedHashWithEquality(FixedHash, OnlyEquality): pass 109class InheritedHashWithInequality(FixedHash, OnlyInequality): pass 110 111class NoHash(object): 112 __hash__ = None 113 114class HashInheritanceTestCase(unittest.TestCase): 115 default_expected = [object(), 116 DefaultHash(), 117 OnlyInequality(), 118 ] 119 fixed_expected = [FixedHash(), 120 InheritedHashWithEquality(), 121 InheritedHashWithInequality(), 122 ] 123 error_expected = [NoHash(), 124 OnlyEquality(), 125 ] 126 127 def test_default_hash(self): 128 for obj in self.default_expected: 129 self.assertEqual(hash(obj), _default_hash(obj)) 130 131 def test_fixed_hash(self): 132 for obj in self.fixed_expected: 133 self.assertEqual(hash(obj), _FIXED_HASH_VALUE) 134 135 def test_error_hash(self): 136 for obj in self.error_expected: 137 self.assertRaises(TypeError, hash, obj) 138 139 def test_hashable(self): 140 objects = (self.default_expected + 141 self.fixed_expected) 142 for obj in objects: 143 self.assertIsInstance(obj, Hashable) 144 145 def test_not_hashable(self): 146 for obj in self.error_expected: 147 self.assertNotIsInstance(obj, Hashable) 148 149 150# Issue #4701: Check that some builtin types are correctly hashable 151class DefaultIterSeq(object): 152 seq = range(10) 153 def __len__(self): 154 return len(self.seq) 155 def __getitem__(self, index): 156 return self.seq[index] 157 158class HashBuiltinsTestCase(unittest.TestCase): 159 hashes_to_check = [enumerate(range(10)), 160 iter(DefaultIterSeq()), 161 iter(lambda: 0, 0), 162 ] 163 164 def test_hashes(self): 165 _default_hash = object.__hash__ 166 for obj in self.hashes_to_check: 167 self.assertEqual(hash(obj), _default_hash(obj)) 168 169class HashRandomizationTests: 170 171 # Each subclass should define a field "repr_", containing the repr() of 172 # an object to be tested 173 174 def get_hash_command(self, repr_): 175 return 'print(hash(eval(%a)))' % repr_ 176 177 def get_hash(self, repr_, seed=None): 178 env = os.environ.copy() 179 env['__cleanenv'] = True # signal to assert_python not to do a copy 180 # of os.environ on its own 181 if seed is not None: 182 env['PYTHONHASHSEED'] = str(seed) 183 else: 184 env.pop('PYTHONHASHSEED', None) 185 out = assert_python_ok( 186 '-c', self.get_hash_command(repr_), 187 **env) 188 stdout = out[1].strip() 189 return int(stdout) 190 191 def test_randomized_hash(self): 192 # two runs should return different hashes 193 run1 = self.get_hash(self.repr_, seed='random') 194 run2 = self.get_hash(self.repr_, seed='random') 195 self.assertNotEqual(run1, run2) 196 197class StringlikeHashRandomizationTests(HashRandomizationTests): 198 repr_ = None 199 repr_long = None 200 201 # 32bit little, 64bit little, 32bit big, 64bit big 202 known_hashes = { 203 'djba33x': [ # only used for small strings 204 # seed 0, 'abc' 205 [193485960, 193485960, 193485960, 193485960], 206 # seed 42, 'abc' 207 [-678966196, 573763426263223372, -820489388, -4282905804826039665], 208 ], 209 'siphash13': [ 210 # NOTE: PyUCS2 layout depends on endianness 211 # seed 0, 'abc' 212 [69611762, -4594863902769663758, 69611762, -4594863902769663758], 213 # seed 42, 'abc' 214 [-975800855, 3869580338025362921, -975800855, 3869580338025362921], 215 # seed 42, 'abcdefghijk' 216 [-595844228, 7764564197781545852, -595844228, 7764564197781545852], 217 # seed 0, 'äú∑ℇ' 218 [-1093288643, -2810468059467891395, -1041341092, 4925090034378237276], 219 # seed 42, 'äú∑ℇ' 220 [-585999602, -2845126246016066802, -817336969, -2219421378907968137], 221 ], 222 'siphash24': [ 223 # NOTE: PyUCS2 layout depends on endianness 224 # seed 0, 'abc' 225 [1198583518, 4596069200710135518, 1198583518, 4596069200710135518], 226 # seed 42, 'abc' 227 [273876886, -4501618152524544106, 273876886, -4501618152524544106], 228 # seed 42, 'abcdefghijk' 229 [-1745215313, 4436719588892876975, -1745215313, 4436719588892876975], 230 # seed 0, 'äú∑ℇ' 231 [493570806, 5749986484189612790, -1006381564, -5915111450199468540], 232 # seed 42, 'äú∑ℇ' 233 [-1677110816, -2947981342227738144, -1860207793, -4296699217652516017], 234 ], 235 'fnv': [ 236 # seed 0, 'abc' 237 [-1600925533, 1453079729188098211, -1600925533, 238 1453079729188098211], 239 # seed 42, 'abc' 240 [-206076799, -4410911502303878509, -1024014457, 241 -3570150969479994130], 242 # seed 42, 'abcdefghijk' 243 [811136751, -5046230049376118746, -77208053 , 244 -4779029615281019666], 245 # seed 0, 'äú∑ℇ' 246 [44402817, 8998297579845987431, -1956240331, 247 -782697888614047887], 248 # seed 42, 'äú∑ℇ' 249 [-283066365, -4576729883824601543, -271871407, 250 -3927695501187247084], 251 ] 252 } 253 254 def get_expected_hash(self, position, length): 255 if length < sys.hash_info.cutoff: 256 algorithm = "djba33x" 257 else: 258 algorithm = sys.hash_info.algorithm 259 if sys.byteorder == 'little': 260 platform = 1 if IS_64BIT else 0 261 else: 262 assert(sys.byteorder == 'big') 263 platform = 3 if IS_64BIT else 2 264 return self.known_hashes[algorithm][position][platform] 265 266 def test_null_hash(self): 267 # PYTHONHASHSEED=0 disables the randomized hash 268 known_hash_of_obj = self.get_expected_hash(0, 3) 269 270 # Randomization is enabled by default: 271 self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj) 272 273 # It can also be disabled by setting the seed to 0: 274 self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj) 275 276 @skip_unless_internalhash 277 def test_fixed_hash(self): 278 # test a fixed seed for the randomized hash 279 # Note that all types share the same values: 280 h = self.get_expected_hash(1, 3) 281 self.assertEqual(self.get_hash(self.repr_, seed=42), h) 282 283 @skip_unless_internalhash 284 def test_long_fixed_hash(self): 285 if self.repr_long is None: 286 return 287 h = self.get_expected_hash(2, 11) 288 self.assertEqual(self.get_hash(self.repr_long, seed=42), h) 289 290 291class StrHashRandomizationTests(StringlikeHashRandomizationTests, 292 unittest.TestCase): 293 repr_ = repr('abc') 294 repr_long = repr('abcdefghijk') 295 repr_ucs2 = repr('äú∑ℇ') 296 297 @skip_unless_internalhash 298 def test_empty_string(self): 299 self.assertEqual(hash(""), 0) 300 301 @skip_unless_internalhash 302 def test_ucs2_string(self): 303 h = self.get_expected_hash(3, 6) 304 self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h) 305 h = self.get_expected_hash(4, 6) 306 self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h) 307 308class BytesHashRandomizationTests(StringlikeHashRandomizationTests, 309 unittest.TestCase): 310 repr_ = repr(b'abc') 311 repr_long = repr(b'abcdefghijk') 312 313 @skip_unless_internalhash 314 def test_empty_string(self): 315 self.assertEqual(hash(b""), 0) 316 317class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests, 318 unittest.TestCase): 319 repr_ = "memoryview(b'abc')" 320 repr_long = "memoryview(b'abcdefghijk')" 321 322 @skip_unless_internalhash 323 def test_empty_string(self): 324 self.assertEqual(hash(memoryview(b"")), 0) 325 326class DatetimeTests(HashRandomizationTests): 327 def get_hash_command(self, repr_): 328 return 'import datetime; print(hash(%s))' % repr_ 329 330class DatetimeDateTests(DatetimeTests, unittest.TestCase): 331 repr_ = repr(datetime.date(1066, 10, 14)) 332 333class DatetimeDatetimeTests(DatetimeTests, unittest.TestCase): 334 repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7)) 335 336class DatetimeTimeTests(DatetimeTests, unittest.TestCase): 337 repr_ = repr(datetime.time(0)) 338 339 340class HashDistributionTestCase(unittest.TestCase): 341 342 def test_hash_distribution(self): 343 # check for hash collision 344 base = "abcdefghabcdefg" 345 for i in range(1, len(base)): 346 prefix = base[:i] 347 with self.subTest(prefix=prefix): 348 s15 = set() 349 s255 = set() 350 for c in range(256): 351 h = hash(prefix + chr(c)) 352 s15.add(h & 0xf) 353 s255.add(h & 0xff) 354 # SipHash24 distribution depends on key, usually > 60% 355 self.assertGreater(len(s15), 8, prefix) 356 self.assertGreater(len(s255), 128, prefix) 357 358if __name__ == "__main__": 359 unittest.main() 360