xref: /aosp_15_r20/external/json-schema-validator/src/test/suite/bin/jsonschema_suite (revision 78c4dd6aa35290980cdcd1623a7e337e8d021c7c)
1#! /usr/bin/env python3
2from pathlib import Path
3from urllib.parse import urljoin
4import argparse
5import json
6import os
7import random
8import shutil
9import sys
10import textwrap
11import unittest
12import warnings
13
14try:
15    import jsonschema.validators
16except ImportError:
17    jsonschema = None
18    VALIDATORS = {}
19else:
20    VALIDATORS = {
21        "draft3": jsonschema.validators.Draft3Validator,
22        "draft4": jsonschema.validators.Draft4Validator,
23        "draft6": jsonschema.validators.Draft6Validator,
24        "draft7": jsonschema.validators.Draft7Validator,
25        "draft2019-09": jsonschema.validators.Draft201909Validator,
26        "draft2020-12": jsonschema.validators.Draft202012Validator,
27        "latest": jsonschema.validators.Draft202012Validator,
28    }
29
30
31ROOT_DIR = Path(__file__).parent.parent
32SUITE_ROOT_DIR = ROOT_DIR / "tests"
33OUTPUT_ROOT_DIR = ROOT_DIR / "output-tests"
34
35REMOTES_DIR = ROOT_DIR / "remotes"
36REMOTES_BASE_URL = "http://localhost:1234/"
37
38TEST_SCHEMA = json.loads(ROOT_DIR.joinpath("test-schema.json").read_text())
39OUTPUT_TEST_SCHEMA = json.loads(
40    ROOT_DIR.joinpath("output-test-schema.json").read_text(),
41)
42
43
44def files(paths):
45    """
46    Each test file in the provided paths, as an array of test cases.
47    """
48    for path in paths:
49        yield path, json.loads(path.read_text())
50
51
52def cases(paths):
53    """
54    Each test case within each file in the provided paths.
55    """
56    for _, test_file in files(paths):
57        yield from test_file
58
59
60def tests(paths):
61    """
62    Each individual test within all cases within the provided paths.
63    """
64    for case in cases(paths):
65        for test in case["tests"]:
66            test["schema"] = case["schema"]
67            yield test
68
69
70def collect(root_dir):
71    """
72    All of the test file paths within the given root directory, recursively.
73    """
74    return root_dir.rglob("*.json")
75
76
77def url_for_path(path):
78    """
79    Return the assumed remote URL for a file in the remotes/ directory.
80
81    Tests in the refRemote.json file reference this URL, and assume the
82    corresponding contents are available at the URL.
83    """
84
85    return urljoin(
86        REMOTES_BASE_URL,
87        str(path.relative_to(REMOTES_DIR)).replace("\\", "/"),  # Windows...
88    )
89
90
91def versions_and_validators():
92    """
93    All versions we can validate schemas from.
94    """
95
96    for version in SUITE_ROOT_DIR.iterdir():
97        if not version.is_dir():
98            continue
99
100        Validator = VALIDATORS.get(version.name)
101        if Validator is None:
102            warnings.warn(f"No schema validator for {version.name}")
103            continue
104
105        yield version, Validator
106
107
108class SanityTests(unittest.TestCase):
109    @classmethod
110    def setUpClass(cls):
111        print(f"Looking for tests in {SUITE_ROOT_DIR}")
112        print(f"Looking for output tests in {OUTPUT_ROOT_DIR}")
113        print(f"Looking for remotes in {REMOTES_DIR}")
114
115        cls.test_files = list(collect(SUITE_ROOT_DIR))
116        assert cls.test_files, "Didn't find the test files!"
117        print(f"Found {len(cls.test_files)} test files")
118
119        cls.output_test_files = [
120            each
121            for each in collect(OUTPUT_ROOT_DIR)
122            if each.name != "output-schema.json"
123        ]
124        assert cls.output_test_files, "Didn't find the output test files!"
125        print(f"Found {len(cls.output_test_files)} output test files")
126
127        cls.remote_files = list(collect(REMOTES_DIR))
128        assert cls.remote_files, "Didn't find the remote files!"
129        print(f"Found {len(cls.remote_files)} remote files")
130
131    def assertUnique(self, iterable):
132        """
133        Assert that the elements of an iterable are unique.
134        """
135
136        seen, duplicated = set(), set()
137        for each in iterable:
138            if each in seen:
139                duplicated.add(each)
140            seen.add(each)
141        self.assertFalse(duplicated, "Elements are not unique.")
142
143    def assertFollowsDescriptionStyle(self, description):
144        """
145        Instead of saying "test that X frobs" or "X should frob" use "X frobs".
146
147        See e.g. https://jml.io/pages/test-docstrings.html
148
149        This test isn't comprehensive (it doesn't catch all the extra
150        verbiage there), but it's just to catch whatever it manages to
151        cover.
152        """
153
154        message = (
155            "In descriptions, don't say 'Test that X frobs' or 'X should "
156            "frob' or 'X should be valid'. Just say 'X frobs' or 'X is "
157            "valid'. It's shorter, and the test suite is entirely about "
158            "what *should* be already. "
159            "See https://jml.io/pages/test-docstrings.html for help."
160        )
161        self.assertNotRegex(description, r"\bshould\b", message)
162        self.assertNotRegex(description, r"(?i)\btest(s)? that\b", message)
163
164    def test_all_json_files_are_valid(self):
165        """
166        All files (tests, output tests, remotes, etc.) contain valid JSON.
167        """
168        for path in collect(ROOT_DIR):
169            with self.subTest(path=path):
170                try:
171                    json.loads(path.read_text())
172                except ValueError as error:
173                    self.fail(f"{path} contains invalid JSON ({error})")
174
175    def test_all_case_descriptions_have_reasonable_length(self):
176        """
177        All cases have reasonably long descriptions.
178        """
179        for case in cases(self.test_files + self.output_test_files):
180            with self.subTest(description=case["description"]):
181                self.assertLess(
182                    len(case["description"]),
183                    150,
184                    "Description is too long (keep it to less than 150 chars).",
185                )
186
187    def test_all_test_descriptions_have_reasonable_length(self):
188        """
189        All tests have reasonably long descriptions.
190        """
191        for count, test in enumerate(
192            tests(self.test_files + self.output_test_files)
193        ):
194            with self.subTest(description=test["description"]):
195                self.assertLess(
196                    len(test["description"]),
197                    70,
198                    "Description is too long (keep it to less than 70 chars).",
199                )
200        print(f"Found {count} tests.")
201
202    def test_all_case_descriptions_are_unique(self):
203        """
204        All cases have unique descriptions in their files.
205        """
206        for path, cases in files(self.test_files + self.output_test_files):
207            with self.subTest(path=path):
208                self.assertUnique(case["description"] for case in cases)
209
210    def test_all_test_descriptions_are_unique(self):
211        """
212        All test cases have unique test descriptions in their tests.
213        """
214        for count, case in enumerate(
215            cases(self.test_files + self.output_test_files)
216        ):
217            with self.subTest(description=case["description"]):
218                self.assertUnique(
219                    test["description"] for test in case["tests"]
220                )
221        print(f"Found {count} test cases.")
222
223    def test_case_descriptions_do_not_use_modal_verbs(self):
224        for case in cases(self.test_files + self.output_test_files):
225            with self.subTest(description=case["description"]):
226                self.assertFollowsDescriptionStyle(case["description"])
227
228    def test_test_descriptions_do_not_use_modal_verbs(self):
229        for test in tests(self.test_files + self.output_test_files):
230            with self.subTest(description=test["description"]):
231                self.assertFollowsDescriptionStyle(test["description"])
232
233    @unittest.skipIf(jsonschema is None, "Validation library not present!")
234    def test_all_schemas_are_valid(self):
235        """
236        All schemas are valid under their metaschemas.
237        """
238        for version, Validator in versions_and_validators():
239            # Valid (optional test) schemas contain regexes which
240            # aren't valid Python regexes, so skip checking it
241            Validator.FORMAT_CHECKER.checkers.pop("regex", None)
242
243            test_files = collect(version)
244            for case in cases(test_files):
245                with self.subTest(case=case):
246                    try:
247                        Validator.check_schema(
248                            case["schema"],
249                            format_checker=Validator.FORMAT_CHECKER,
250                        )
251                    except jsonschema.SchemaError:
252                        self.fail(
253                            "Found an invalid schema. "
254                            "See the traceback for details on why."
255                        )
256
257    @unittest.skipIf(jsonschema is None, "Validation library not present!")
258    def test_arbitrary_schemas_do_not_use_unknown_keywords(self):
259        """
260        Test cases do not use unknown keywords.
261
262        (Unless they specifically are testing the specified behavior for
263        unknown keywords).
264
265        This helps prevent accidental leakage of newer keywords into older
266        drafts where they didn't exist.
267        """
268
269        KNOWN = {
270            "draft2020-12": {
271                "$anchor",
272                "$comment",
273                "$defs",
274                "$dynamicAnchor",
275                "$dynamicRef",
276                "$id",
277                "$ref",
278                "$schema",
279                "$vocabulary",
280                "additionalProperties",
281                "allOf",
282                "allOf",
283                "anyOf",
284                "const",
285                "contains",
286                "contentEncoding",
287                "contentMediaType",
288                "contentSchema",
289                "dependencies",
290                "dependentRequired",
291                "dependentSchemas",
292                "description",
293                "else",
294                "enum",
295                "exclusiveMaximum",
296                "exclusiveMinimum",
297                "format",
298                "if",
299                "items",
300                "maxContains",
301                "maxItems",
302                "maxItems",
303                "maxLength",
304                "maxProperties",
305                "maximum",
306                "minContains",
307                "minItems",
308                "minLength",
309                "minProperties",
310                "minimum",
311                "multipleOf",
312                "not",
313                "oneOf",
314                "pattern",
315                "patternProperties",
316                "prefixItems",
317                "properties",
318                "propertyNames",
319                "required",
320                "then",
321                "title",
322                "type",
323                "unevaluatedItems",
324                "unevaluatedProperties",
325                "uniqueItems",
326            },
327            "draft2019-09": {
328                "$anchor",
329                "$comment",
330                "$defs",
331                "$id",
332                "$recursiveAnchor",
333                "$recursiveRef",
334                "$ref",
335                "$schema",
336                "$vocabulary",
337                "additionalItems",
338                "additionalProperties",
339                "allOf",
340                "anyOf",
341                "const",
342                "contains",
343                "contentEncoding",
344                "contentMediaType",
345                "contentSchema",
346                "dependencies",
347                "dependentRequired",
348                "dependentSchemas",
349                "description",
350                "else",
351                "enum",
352                "exclusiveMaximum",
353                "exclusiveMinimum",
354                "format",
355                "if",
356                "items",
357                "maxContains",
358                "maxItems",
359                "maxLength",
360                "maxProperties",
361                "maximum",
362                "minContains",
363                "minItems",
364                "minLength",
365                "minProperties",
366                "minimum",
367                "multipleOf",
368                "not",
369                "oneOf",
370                "pattern",
371                "patternProperties",
372                "properties",
373                "propertyNames",
374                "required",
375                "then",
376                "title",
377                "type",
378                "unevaluatedItems",
379                "unevaluatedProperties",
380                "uniqueItems",
381            },
382            "draft7": {
383                "$comment",
384                "$id",
385                "$ref",
386                "$schema",
387                "additionalItems",
388                "additionalProperties",
389                "allOf",
390                "anyOf",
391                "const",
392                "contains",
393                "contentEncoding",
394                "contentMediaType",
395                "definitions",
396                "dependencies",
397                "description",
398                "else",
399                "enum",
400                "exclusiveMaximum",
401                "exclusiveMinimum",
402                "format",
403                "if",
404                "items",
405                "maxItems",
406                "maxLength",
407                "maxProperties",
408                "maximum",
409                "minItems",
410                "minLength",
411                "minProperties",
412                "minimum",
413                "multipleOf",
414                "not",
415                "oneOf",
416                "pattern",
417                "patternProperties",
418                "properties",
419                "propertyNames",
420                "required",
421                "then",
422                "title",
423                "type",
424                "type",
425                "uniqueItems",
426            },
427            "draft6": {
428                "$comment",
429                "$id",
430                "$ref",
431                "$schema",
432                "additionalItems",
433                "additionalProperties",
434                "allOf",
435                "anyOf",
436                "const",
437                "contains",
438                "definitions",
439                "dependencies",
440                "description",
441                "enum",
442                "exclusiveMaximum",
443                "exclusiveMinimum",
444                "format",
445                "items",
446                "maxItems",
447                "maxLength",
448                "maxProperties",
449                "maximum",
450                "minItems",
451                "minLength",
452                "minProperties",
453                "minimum",
454                "multipleOf",
455                "not",
456                "oneOf",
457                "pattern",
458                "patternProperties",
459                "properties",
460                "propertyNames",
461                "required",
462                "title",
463                "type",
464                "uniqueItems",
465            },
466            "draft4": {
467                "$ref",
468                "$schema",
469                "additionalItems",
470                "additionalItems",
471                "additionalProperties",
472                "allOf",
473                "anyOf",
474                "definitions",
475                "dependencies",
476                "description",
477                "enum",
478                "exclusiveMaximum",
479                "exclusiveMinimum",
480                "format",
481                "id",
482                "items",
483                "maxItems",
484                "maxLength",
485                "maxProperties",
486                "maximum",
487                "minItems",
488                "minLength",
489                "minProperties",
490                "minimum",
491                "multipleOf",
492                "not",
493                "oneOf",
494                "pattern",
495                "patternProperties",
496                "properties",
497                "required",
498                "title",
499                "type",
500                "uniqueItems",
501
502                # Technically this is wrong, $comment doesn't exist in this
503                # draft, but the point of this test is to detect mistakes by,
504                # test authors, whereas the point of the $comment keyword is
505                # to just standardize a place for a comment, so it's not a
506                # mistake to use it in earlier drafts in tests per se.
507                "$comment",
508            },
509            "draft3": {
510                "$ref",
511                "$schema",
512                "additionalItems",
513                "additionalProperties",
514                "definitions",
515                "dependencies",
516                "description",
517                "disallow",
518                "divisibleBy",
519                "enum",
520                "exclusiveMaximum",
521                "exclusiveMinimum",
522                "extends",
523                "format",
524                "id",
525                "items",
526                "maxItems",
527                "maxLength",
528                "maximum",
529                "minItems",
530                "minLength",
531                "minimum",
532                "pattern",
533                "patternProperties",
534                "properties",
535                "title",
536                "type",
537                "uniqueItems",
538
539                # Technically this is wrong, $comment doesn't exist in this
540                # draft, but the point of this test is to detect mistakes by,
541                # test authors, whereas the point of the $comment keyword is
542                # to just standardize a place for a comment, so it's not a
543                # mistake to use it in earlier drafts in tests per se.
544                "$comment",
545            },
546        }
547
548        def missing(d):
549            from collections.abc import Mapping
550
551            class BlowUpForUnknownProperties(Mapping):
552                def __iter__(this):
553                    return iter(d)
554
555                def __getitem__(this, k):
556                    if k not in KNOWN[version.name]:
557                        self.fail(
558                            f"{k} is not a known keyword for {version.name}. "
559                            "If this test is testing behavior related to "
560                            "unknown keywords you may need to add it to the "
561                            "allowlist in the jsonschema_suite checker. "
562                            "Otherwise it may contain a typo!"
563                        )
564                    return d[k]
565
566                def __len__(this):
567                    return len(d)
568
569            return BlowUpForUnknownProperties()
570
571        for version, Validator in versions_and_validators():
572            if version.name == "latest":
573                continue
574
575            self.addCleanup(
576                setattr, Validator, "VALIDATORS", Validator.VALIDATORS,
577            )
578            Validator.VALIDATORS = missing(dict(Validator.VALIDATORS))
579
580            test_files = [
581                each for each in collect(version)
582                if each.stem != "refOfUnknownKeyword"
583            ]
584            for case in cases(test_files):
585                if "unknown keyword" in case["description"]:
586                    continue
587                with self.subTest(case=case, version=version.name):
588                    try:
589                        Validator(case["schema"]).is_valid(12)
590                    except jsonschema.exceptions.RefResolutionError:
591                        pass
592
593    @unittest.skipIf(jsonschema is None, "Validation library not present!")
594    def test_suites_are_valid(self):
595        """
596        All test files are valid under test-schema.json.
597        """
598        Validator = jsonschema.validators.validator_for(TEST_SCHEMA)
599        validator = Validator(TEST_SCHEMA)
600        for path, cases in files(self.test_files):
601            with self.subTest(path=path):
602                try:
603                    validator.validate(cases)
604                except jsonschema.ValidationError as error:
605                    self.fail(str(error))
606
607    @unittest.skipIf(jsonschema is None, "Validation library not present!")
608    def test_output_suites_are_valid(self):
609        """
610        All output test files are valid under output-test-schema.json.
611        """
612        Validator = jsonschema.validators.validator_for(OUTPUT_TEST_SCHEMA)
613        validator = Validator(OUTPUT_TEST_SCHEMA)
614        for path, cases in files(self.output_test_files):
615            with self.subTest(path=path):
616                try:
617                    validator.validate(cases)
618                except jsonschema.exceptions.RefResolutionError:
619                    # python-jsonschema/jsonschema#884
620                    pass
621                except jsonschema.ValidationError as error:
622                    self.fail(str(error))
623
624
625def main(arguments):
626    if arguments.command == "check":
627        suite = unittest.TestLoader().loadTestsFromTestCase(SanityTests)
628        result = unittest.TextTestRunner().run(suite)
629        sys.exit(not result.wasSuccessful())
630    elif arguments.command == "flatten":
631        selected_cases = [case for case in cases(collect(arguments.version))]
632
633        if arguments.randomize:
634            random.shuffle(selected_cases)
635
636        json.dump(selected_cases, sys.stdout, indent=4, sort_keys=True)
637    elif arguments.command == "remotes":
638        remotes = {
639            url_for_path(path): json.loads(path.read_text())
640            for path in collect(REMOTES_DIR)
641        }
642        json.dump(remotes, sys.stdout, indent=4, sort_keys=True)
643    elif arguments.command == "dump_remotes":
644        if arguments.update:
645            shutil.rmtree(arguments.out_dir, ignore_errors=True)
646
647        try:
648            shutil.copytree(REMOTES_DIR, arguments.out_dir)
649        except FileExistsError:
650            print(f"{arguments.out_dir} already exists. Aborting.")
651            sys.exit(1)
652    elif arguments.command == "serve":
653        try:
654            import flask
655        except ImportError:
656            print(
657                textwrap.dedent(
658                    """
659                The Flask library is required to serve the remote schemas.
660
661                You can install it by running `pip install Flask`.
662
663                Alternatively, see the `jsonschema_suite remotes` or
664                `jsonschema_suite dump_remotes` commands to create static files
665                that can be served with your own web server.
666            """.strip(
667                        "\n"
668                    )
669                )
670            )
671            sys.exit(1)
672
673        app = flask.Flask(__name__)
674
675        @app.route("/<path:path>")
676        def serve_path(path):
677            return flask.send_from_directory(REMOTES_DIR, path)
678
679        app.run(port=1234)
680
681
682parser = argparse.ArgumentParser(
683    description="JSON Schema Test Suite utilities",
684)
685subparsers = parser.add_subparsers(
686    help="utility commands", dest="command", metavar="COMMAND"
687)
688subparsers.required = True
689
690check = subparsers.add_parser("check", help="Sanity check the test suite.")
691
692flatten = subparsers.add_parser(
693    "flatten",
694    help="Output a flattened file containing a selected version's test cases.",
695)
696flatten.add_argument(
697    "--randomize",
698    action="store_true",
699    help="Randomize the order of the outputted cases.",
700)
701flatten.add_argument(
702    "version",
703    help="The directory containing the version to output",
704)
705
706remotes = subparsers.add_parser(
707    "remotes",
708    help="Output the expected URLs and their associated schemas for remote "
709    "ref tests as a JSON object.",
710)
711
712dump_remotes = subparsers.add_parser(
713    "dump_remotes",
714    help="Dump the remote ref schemas into a file tree",
715)
716dump_remotes.add_argument(
717    "--update",
718    action="store_true",
719    help="Update the remotes in an existing directory.",
720)
721dump_remotes.add_argument(
722    "--out-dir",
723    default=REMOTES_DIR,
724    type=os.path.abspath,
725    help="The output directory to create as the root of the file tree",
726)
727
728serve = subparsers.add_parser(
729    "serve",
730    help="Start a webserver to serve schemas used by remote ref tests.",
731)
732
733if __name__ == "__main__":
734    main(parser.parse_args())
735