1#! /usr/bin/env python3 2from pathlib import Path 3from urllib.parse import urljoin 4import argparse 5import json 6import os 7import random 8import shutil 9import sys 10import textwrap 11import unittest 12import warnings 13 14try: 15 import jsonschema.validators 16except ImportError: 17 jsonschema = None 18 VALIDATORS = {} 19else: 20 VALIDATORS = { 21 "draft3": jsonschema.validators.Draft3Validator, 22 "draft4": jsonschema.validators.Draft4Validator, 23 "draft6": jsonschema.validators.Draft6Validator, 24 "draft7": jsonschema.validators.Draft7Validator, 25 "draft2019-09": jsonschema.validators.Draft201909Validator, 26 "draft2020-12": jsonschema.validators.Draft202012Validator, 27 "latest": jsonschema.validators.Draft202012Validator, 28 } 29 30 31ROOT_DIR = Path(__file__).parent.parent 32SUITE_ROOT_DIR = ROOT_DIR / "tests" 33OUTPUT_ROOT_DIR = ROOT_DIR / "output-tests" 34 35REMOTES_DIR = ROOT_DIR / "remotes" 36REMOTES_BASE_URL = "http://localhost:1234/" 37 38TEST_SCHEMA = json.loads(ROOT_DIR.joinpath("test-schema.json").read_text()) 39OUTPUT_TEST_SCHEMA = json.loads( 40 ROOT_DIR.joinpath("output-test-schema.json").read_text(), 41) 42 43 44def files(paths): 45 """ 46 Each test file in the provided paths, as an array of test cases. 47 """ 48 for path in paths: 49 yield path, json.loads(path.read_text()) 50 51 52def cases(paths): 53 """ 54 Each test case within each file in the provided paths. 55 """ 56 for _, test_file in files(paths): 57 yield from test_file 58 59 60def tests(paths): 61 """ 62 Each individual test within all cases within the provided paths. 63 """ 64 for case in cases(paths): 65 for test in case["tests"]: 66 test["schema"] = case["schema"] 67 yield test 68 69 70def collect(root_dir): 71 """ 72 All of the test file paths within the given root directory, recursively. 73 """ 74 return root_dir.rglob("*.json") 75 76 77def url_for_path(path): 78 """ 79 Return the assumed remote URL for a file in the remotes/ directory. 80 81 Tests in the refRemote.json file reference this URL, and assume the 82 corresponding contents are available at the URL. 83 """ 84 85 return urljoin( 86 REMOTES_BASE_URL, 87 str(path.relative_to(REMOTES_DIR)).replace("\\", "/"), # Windows... 88 ) 89 90 91def versions_and_validators(): 92 """ 93 All versions we can validate schemas from. 94 """ 95 96 for version in SUITE_ROOT_DIR.iterdir(): 97 if not version.is_dir(): 98 continue 99 100 Validator = VALIDATORS.get(version.name) 101 if Validator is None: 102 warnings.warn(f"No schema validator for {version.name}") 103 continue 104 105 yield version, Validator 106 107 108class SanityTests(unittest.TestCase): 109 @classmethod 110 def setUpClass(cls): 111 print(f"Looking for tests in {SUITE_ROOT_DIR}") 112 print(f"Looking for output tests in {OUTPUT_ROOT_DIR}") 113 print(f"Looking for remotes in {REMOTES_DIR}") 114 115 cls.test_files = list(collect(SUITE_ROOT_DIR)) 116 assert cls.test_files, "Didn't find the test files!" 117 print(f"Found {len(cls.test_files)} test files") 118 119 cls.output_test_files = [ 120 each 121 for each in collect(OUTPUT_ROOT_DIR) 122 if each.name != "output-schema.json" 123 ] 124 assert cls.output_test_files, "Didn't find the output test files!" 125 print(f"Found {len(cls.output_test_files)} output test files") 126 127 cls.remote_files = list(collect(REMOTES_DIR)) 128 assert cls.remote_files, "Didn't find the remote files!" 129 print(f"Found {len(cls.remote_files)} remote files") 130 131 def assertUnique(self, iterable): 132 """ 133 Assert that the elements of an iterable are unique. 134 """ 135 136 seen, duplicated = set(), set() 137 for each in iterable: 138 if each in seen: 139 duplicated.add(each) 140 seen.add(each) 141 self.assertFalse(duplicated, "Elements are not unique.") 142 143 def assertFollowsDescriptionStyle(self, description): 144 """ 145 Instead of saying "test that X frobs" or "X should frob" use "X frobs". 146 147 See e.g. https://jml.io/pages/test-docstrings.html 148 149 This test isn't comprehensive (it doesn't catch all the extra 150 verbiage there), but it's just to catch whatever it manages to 151 cover. 152 """ 153 154 message = ( 155 "In descriptions, don't say 'Test that X frobs' or 'X should " 156 "frob' or 'X should be valid'. Just say 'X frobs' or 'X is " 157 "valid'. It's shorter, and the test suite is entirely about " 158 "what *should* be already. " 159 "See https://jml.io/pages/test-docstrings.html for help." 160 ) 161 self.assertNotRegex(description, r"\bshould\b", message) 162 self.assertNotRegex(description, r"(?i)\btest(s)? that\b", message) 163 164 def test_all_json_files_are_valid(self): 165 """ 166 All files (tests, output tests, remotes, etc.) contain valid JSON. 167 """ 168 for path in collect(ROOT_DIR): 169 with self.subTest(path=path): 170 try: 171 json.loads(path.read_text()) 172 except ValueError as error: 173 self.fail(f"{path} contains invalid JSON ({error})") 174 175 def test_all_case_descriptions_have_reasonable_length(self): 176 """ 177 All cases have reasonably long descriptions. 178 """ 179 for case in cases(self.test_files + self.output_test_files): 180 with self.subTest(description=case["description"]): 181 self.assertLess( 182 len(case["description"]), 183 150, 184 "Description is too long (keep it to less than 150 chars).", 185 ) 186 187 def test_all_test_descriptions_have_reasonable_length(self): 188 """ 189 All tests have reasonably long descriptions. 190 """ 191 for count, test in enumerate( 192 tests(self.test_files + self.output_test_files) 193 ): 194 with self.subTest(description=test["description"]): 195 self.assertLess( 196 len(test["description"]), 197 70, 198 "Description is too long (keep it to less than 70 chars).", 199 ) 200 print(f"Found {count} tests.") 201 202 def test_all_case_descriptions_are_unique(self): 203 """ 204 All cases have unique descriptions in their files. 205 """ 206 for path, cases in files(self.test_files + self.output_test_files): 207 with self.subTest(path=path): 208 self.assertUnique(case["description"] for case in cases) 209 210 def test_all_test_descriptions_are_unique(self): 211 """ 212 All test cases have unique test descriptions in their tests. 213 """ 214 for count, case in enumerate( 215 cases(self.test_files + self.output_test_files) 216 ): 217 with self.subTest(description=case["description"]): 218 self.assertUnique( 219 test["description"] for test in case["tests"] 220 ) 221 print(f"Found {count} test cases.") 222 223 def test_case_descriptions_do_not_use_modal_verbs(self): 224 for case in cases(self.test_files + self.output_test_files): 225 with self.subTest(description=case["description"]): 226 self.assertFollowsDescriptionStyle(case["description"]) 227 228 def test_test_descriptions_do_not_use_modal_verbs(self): 229 for test in tests(self.test_files + self.output_test_files): 230 with self.subTest(description=test["description"]): 231 self.assertFollowsDescriptionStyle(test["description"]) 232 233 @unittest.skipIf(jsonschema is None, "Validation library not present!") 234 def test_all_schemas_are_valid(self): 235 """ 236 All schemas are valid under their metaschemas. 237 """ 238 for version, Validator in versions_and_validators(): 239 # Valid (optional test) schemas contain regexes which 240 # aren't valid Python regexes, so skip checking it 241 Validator.FORMAT_CHECKER.checkers.pop("regex", None) 242 243 test_files = collect(version) 244 for case in cases(test_files): 245 with self.subTest(case=case): 246 try: 247 Validator.check_schema( 248 case["schema"], 249 format_checker=Validator.FORMAT_CHECKER, 250 ) 251 except jsonschema.SchemaError: 252 self.fail( 253 "Found an invalid schema. " 254 "See the traceback for details on why." 255 ) 256 257 @unittest.skipIf(jsonschema is None, "Validation library not present!") 258 def test_arbitrary_schemas_do_not_use_unknown_keywords(self): 259 """ 260 Test cases do not use unknown keywords. 261 262 (Unless they specifically are testing the specified behavior for 263 unknown keywords). 264 265 This helps prevent accidental leakage of newer keywords into older 266 drafts where they didn't exist. 267 """ 268 269 KNOWN = { 270 "draft2020-12": { 271 "$anchor", 272 "$comment", 273 "$defs", 274 "$dynamicAnchor", 275 "$dynamicRef", 276 "$id", 277 "$ref", 278 "$schema", 279 "$vocabulary", 280 "additionalProperties", 281 "allOf", 282 "allOf", 283 "anyOf", 284 "const", 285 "contains", 286 "contentEncoding", 287 "contentMediaType", 288 "contentSchema", 289 "dependencies", 290 "dependentRequired", 291 "dependentSchemas", 292 "description", 293 "else", 294 "enum", 295 "exclusiveMaximum", 296 "exclusiveMinimum", 297 "format", 298 "if", 299 "items", 300 "maxContains", 301 "maxItems", 302 "maxItems", 303 "maxLength", 304 "maxProperties", 305 "maximum", 306 "minContains", 307 "minItems", 308 "minLength", 309 "minProperties", 310 "minimum", 311 "multipleOf", 312 "not", 313 "oneOf", 314 "pattern", 315 "patternProperties", 316 "prefixItems", 317 "properties", 318 "propertyNames", 319 "required", 320 "then", 321 "title", 322 "type", 323 "unevaluatedItems", 324 "unevaluatedProperties", 325 "uniqueItems", 326 }, 327 "draft2019-09": { 328 "$anchor", 329 "$comment", 330 "$defs", 331 "$id", 332 "$recursiveAnchor", 333 "$recursiveRef", 334 "$ref", 335 "$schema", 336 "$vocabulary", 337 "additionalItems", 338 "additionalProperties", 339 "allOf", 340 "anyOf", 341 "const", 342 "contains", 343 "contentEncoding", 344 "contentMediaType", 345 "contentSchema", 346 "dependencies", 347 "dependentRequired", 348 "dependentSchemas", 349 "description", 350 "else", 351 "enum", 352 "exclusiveMaximum", 353 "exclusiveMinimum", 354 "format", 355 "if", 356 "items", 357 "maxContains", 358 "maxItems", 359 "maxLength", 360 "maxProperties", 361 "maximum", 362 "minContains", 363 "minItems", 364 "minLength", 365 "minProperties", 366 "minimum", 367 "multipleOf", 368 "not", 369 "oneOf", 370 "pattern", 371 "patternProperties", 372 "properties", 373 "propertyNames", 374 "required", 375 "then", 376 "title", 377 "type", 378 "unevaluatedItems", 379 "unevaluatedProperties", 380 "uniqueItems", 381 }, 382 "draft7": { 383 "$comment", 384 "$id", 385 "$ref", 386 "$schema", 387 "additionalItems", 388 "additionalProperties", 389 "allOf", 390 "anyOf", 391 "const", 392 "contains", 393 "contentEncoding", 394 "contentMediaType", 395 "definitions", 396 "dependencies", 397 "description", 398 "else", 399 "enum", 400 "exclusiveMaximum", 401 "exclusiveMinimum", 402 "format", 403 "if", 404 "items", 405 "maxItems", 406 "maxLength", 407 "maxProperties", 408 "maximum", 409 "minItems", 410 "minLength", 411 "minProperties", 412 "minimum", 413 "multipleOf", 414 "not", 415 "oneOf", 416 "pattern", 417 "patternProperties", 418 "properties", 419 "propertyNames", 420 "required", 421 "then", 422 "title", 423 "type", 424 "type", 425 "uniqueItems", 426 }, 427 "draft6": { 428 "$comment", 429 "$id", 430 "$ref", 431 "$schema", 432 "additionalItems", 433 "additionalProperties", 434 "allOf", 435 "anyOf", 436 "const", 437 "contains", 438 "definitions", 439 "dependencies", 440 "description", 441 "enum", 442 "exclusiveMaximum", 443 "exclusiveMinimum", 444 "format", 445 "items", 446 "maxItems", 447 "maxLength", 448 "maxProperties", 449 "maximum", 450 "minItems", 451 "minLength", 452 "minProperties", 453 "minimum", 454 "multipleOf", 455 "not", 456 "oneOf", 457 "pattern", 458 "patternProperties", 459 "properties", 460 "propertyNames", 461 "required", 462 "title", 463 "type", 464 "uniqueItems", 465 }, 466 "draft4": { 467 "$ref", 468 "$schema", 469 "additionalItems", 470 "additionalItems", 471 "additionalProperties", 472 "allOf", 473 "anyOf", 474 "definitions", 475 "dependencies", 476 "description", 477 "enum", 478 "exclusiveMaximum", 479 "exclusiveMinimum", 480 "format", 481 "id", 482 "items", 483 "maxItems", 484 "maxLength", 485 "maxProperties", 486 "maximum", 487 "minItems", 488 "minLength", 489 "minProperties", 490 "minimum", 491 "multipleOf", 492 "not", 493 "oneOf", 494 "pattern", 495 "patternProperties", 496 "properties", 497 "required", 498 "title", 499 "type", 500 "uniqueItems", 501 502 # Technically this is wrong, $comment doesn't exist in this 503 # draft, but the point of this test is to detect mistakes by, 504 # test authors, whereas the point of the $comment keyword is 505 # to just standardize a place for a comment, so it's not a 506 # mistake to use it in earlier drafts in tests per se. 507 "$comment", 508 }, 509 "draft3": { 510 "$ref", 511 "$schema", 512 "additionalItems", 513 "additionalProperties", 514 "definitions", 515 "dependencies", 516 "description", 517 "disallow", 518 "divisibleBy", 519 "enum", 520 "exclusiveMaximum", 521 "exclusiveMinimum", 522 "extends", 523 "format", 524 "id", 525 "items", 526 "maxItems", 527 "maxLength", 528 "maximum", 529 "minItems", 530 "minLength", 531 "minimum", 532 "pattern", 533 "patternProperties", 534 "properties", 535 "title", 536 "type", 537 "uniqueItems", 538 539 # Technically this is wrong, $comment doesn't exist in this 540 # draft, but the point of this test is to detect mistakes by, 541 # test authors, whereas the point of the $comment keyword is 542 # to just standardize a place for a comment, so it's not a 543 # mistake to use it in earlier drafts in tests per se. 544 "$comment", 545 }, 546 } 547 548 def missing(d): 549 from collections.abc import Mapping 550 551 class BlowUpForUnknownProperties(Mapping): 552 def __iter__(this): 553 return iter(d) 554 555 def __getitem__(this, k): 556 if k not in KNOWN[version.name]: 557 self.fail( 558 f"{k} is not a known keyword for {version.name}. " 559 "If this test is testing behavior related to " 560 "unknown keywords you may need to add it to the " 561 "allowlist in the jsonschema_suite checker. " 562 "Otherwise it may contain a typo!" 563 ) 564 return d[k] 565 566 def __len__(this): 567 return len(d) 568 569 return BlowUpForUnknownProperties() 570 571 for version, Validator in versions_and_validators(): 572 if version.name == "latest": 573 continue 574 575 self.addCleanup( 576 setattr, Validator, "VALIDATORS", Validator.VALIDATORS, 577 ) 578 Validator.VALIDATORS = missing(dict(Validator.VALIDATORS)) 579 580 test_files = [ 581 each for each in collect(version) 582 if each.stem != "refOfUnknownKeyword" 583 ] 584 for case in cases(test_files): 585 if "unknown keyword" in case["description"]: 586 continue 587 with self.subTest(case=case, version=version.name): 588 try: 589 Validator(case["schema"]).is_valid(12) 590 except jsonschema.exceptions.RefResolutionError: 591 pass 592 593 @unittest.skipIf(jsonschema is None, "Validation library not present!") 594 def test_suites_are_valid(self): 595 """ 596 All test files are valid under test-schema.json. 597 """ 598 Validator = jsonschema.validators.validator_for(TEST_SCHEMA) 599 validator = Validator(TEST_SCHEMA) 600 for path, cases in files(self.test_files): 601 with self.subTest(path=path): 602 try: 603 validator.validate(cases) 604 except jsonschema.ValidationError as error: 605 self.fail(str(error)) 606 607 @unittest.skipIf(jsonschema is None, "Validation library not present!") 608 def test_output_suites_are_valid(self): 609 """ 610 All output test files are valid under output-test-schema.json. 611 """ 612 Validator = jsonschema.validators.validator_for(OUTPUT_TEST_SCHEMA) 613 validator = Validator(OUTPUT_TEST_SCHEMA) 614 for path, cases in files(self.output_test_files): 615 with self.subTest(path=path): 616 try: 617 validator.validate(cases) 618 except jsonschema.exceptions.RefResolutionError: 619 # python-jsonschema/jsonschema#884 620 pass 621 except jsonschema.ValidationError as error: 622 self.fail(str(error)) 623 624 625def main(arguments): 626 if arguments.command == "check": 627 suite = unittest.TestLoader().loadTestsFromTestCase(SanityTests) 628 result = unittest.TextTestRunner().run(suite) 629 sys.exit(not result.wasSuccessful()) 630 elif arguments.command == "flatten": 631 selected_cases = [case for case in cases(collect(arguments.version))] 632 633 if arguments.randomize: 634 random.shuffle(selected_cases) 635 636 json.dump(selected_cases, sys.stdout, indent=4, sort_keys=True) 637 elif arguments.command == "remotes": 638 remotes = { 639 url_for_path(path): json.loads(path.read_text()) 640 for path in collect(REMOTES_DIR) 641 } 642 json.dump(remotes, sys.stdout, indent=4, sort_keys=True) 643 elif arguments.command == "dump_remotes": 644 if arguments.update: 645 shutil.rmtree(arguments.out_dir, ignore_errors=True) 646 647 try: 648 shutil.copytree(REMOTES_DIR, arguments.out_dir) 649 except FileExistsError: 650 print(f"{arguments.out_dir} already exists. Aborting.") 651 sys.exit(1) 652 elif arguments.command == "serve": 653 try: 654 import flask 655 except ImportError: 656 print( 657 textwrap.dedent( 658 """ 659 The Flask library is required to serve the remote schemas. 660 661 You can install it by running `pip install Flask`. 662 663 Alternatively, see the `jsonschema_suite remotes` or 664 `jsonschema_suite dump_remotes` commands to create static files 665 that can be served with your own web server. 666 """.strip( 667 "\n" 668 ) 669 ) 670 ) 671 sys.exit(1) 672 673 app = flask.Flask(__name__) 674 675 @app.route("/<path:path>") 676 def serve_path(path): 677 return flask.send_from_directory(REMOTES_DIR, path) 678 679 app.run(port=1234) 680 681 682parser = argparse.ArgumentParser( 683 description="JSON Schema Test Suite utilities", 684) 685subparsers = parser.add_subparsers( 686 help="utility commands", dest="command", metavar="COMMAND" 687) 688subparsers.required = True 689 690check = subparsers.add_parser("check", help="Sanity check the test suite.") 691 692flatten = subparsers.add_parser( 693 "flatten", 694 help="Output a flattened file containing a selected version's test cases.", 695) 696flatten.add_argument( 697 "--randomize", 698 action="store_true", 699 help="Randomize the order of the outputted cases.", 700) 701flatten.add_argument( 702 "version", 703 help="The directory containing the version to output", 704) 705 706remotes = subparsers.add_parser( 707 "remotes", 708 help="Output the expected URLs and their associated schemas for remote " 709 "ref tests as a JSON object.", 710) 711 712dump_remotes = subparsers.add_parser( 713 "dump_remotes", 714 help="Dump the remote ref schemas into a file tree", 715) 716dump_remotes.add_argument( 717 "--update", 718 action="store_true", 719 help="Update the remotes in an existing directory.", 720) 721dump_remotes.add_argument( 722 "--out-dir", 723 default=REMOTES_DIR, 724 type=os.path.abspath, 725 help="The output directory to create as the root of the file tree", 726) 727 728serve = subparsers.add_parser( 729 "serve", 730 help="Start a webserver to serve schemas used by remote ref tests.", 731) 732 733if __name__ == "__main__": 734 main(parser.parse_args()) 735