xref: /aosp_15_r20/external/pigweed/pw_tokenizer/py/generate_hash_test_data.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1#!/usr/bin/env python3
2# Copyright 2020 The Pigweed Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5# use this file except in compliance with the License. You may obtain a copy of
6# the License at
7#
8#     https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations under
14# the License.
15"""Generates test data for hash_test.cc."""
16
17import datetime
18import os
19import random
20
21from pw_tokenizer import tokens
22
23HASH_LENGTHS = 80, 96, 128
24HASH_MACRO = 'PW_TOKENIZER_65599_FIXED_LENGTH_{}_HASH'
25
26SHARED_HEADER = """\
27// Copyright {year} The Pigweed Authors
28//
29// Licensed under the Apache License, Version 2.0 (the "License"); you may not
30// use this file except in compliance with the License. You may obtain a copy of
31// the License at
32//
33//     https://www.apache.org/licenses/LICENSE-2.0
34//
35// Unless required by applicable law or agreed to in writing, software
36// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
37// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
38// License for the specific language governing permissions and limitations under
39// the License.
40
41// AUTOGENERATED - DO NOT EDIT
42//
43// This file was generated by {script}.
44// To make changes, update the script and run it to generate new files.
45"""
46
47CPP_HEADER = """\
48#pragma once
49
50#include <cstddef>
51#include <cstdint>
52#include <string_view>
53
54{includes}
55
56namespace pw::tokenizer {{
57
58// Test a series of generated test cases.
59inline constexpr struct {{
60  std::string_view string;
61  size_t hash_length;
62  uint32_t python_calculated_hash;
63  uint32_t macro_calculated_hash;  // clang-format off
64}} kHashTests[] = {{
65
66"""
67
68CPP_FOOTER = """
69};  // kHashTests
70
71// clang-format on
72
73}  // namespace pw::tokenizer
74"""
75
76_CPP_TEST_CASE = """{{
77  std::string_view("{str}", {string_length}u),  // NOLINT(bugprone-string-constructor)
78  {hash_length}u,  // fixed hash length
79  UINT32_C({hash}),  // Python-calculated hash
80  {macro}("{str}"),  // macro-calculated hash
81}},
82"""
83
84RUST_HEADER = """
85fn test_cases() -> Vec<TestCase> {{
86    vec![
87"""
88
89RUST_FOOTER = """
90    ]
91}
92"""
93
94_RUST_TEST_CASE = """       TestCase{{
95            string: b"{str}",
96            hash_length: {hash_length},
97            hash: {hash},
98        }},
99"""
100
101
102def _include_paths(lengths):
103    return '\n'.join(
104        sorted(
105            '#include "pw_tokenizer/internal/'
106            'pw_tokenizer_65599_fixed_length_{}_hash_macro.h"'.format(length)
107            for length in lengths
108        )
109    )
110
111
112def _test_case_at_length(test_case_template, data, hash_length):
113    """Generates a test case for a particular hash length."""
114
115    if isinstance(data, str):
116        data = data.encode()
117
118    if all(ord(' ') <= b <= ord('~') for b in data):
119        escaped_str = data.decode().replace('"', r'\"')
120    else:
121        escaped_str = ''.join(r'\x{:02x}'.format(b) for b in data)
122
123    return test_case_template.format(
124        str=escaped_str,
125        string_length=len(data),
126        hash_length=hash_length,
127        hash=tokens.c_hash(data, hash_length),
128        macro=HASH_MACRO.format(hash_length),
129    )
130
131
132def test_case(test_case_template, data):
133    return ''.join(
134        _test_case_at_length(test_case_template, data, length)
135        for length in (80, 96, 128)
136    )
137
138
139def generate_test_cases(test_case_template):
140    yield test_case(test_case_template, '')
141    yield test_case(test_case_template, b'\xa1')
142    yield test_case(test_case_template, b'\xff')
143    yield test_case(test_case_template, '\0')
144    yield test_case(test_case_template, '\0\0')
145    yield test_case(test_case_template, 'a')
146    yield test_case(test_case_template, 'A')
147    yield test_case(test_case_template, 'hello, "world"')
148    yield test_case(test_case_template, 'YO' * 100)
149
150    random.seed(600613)
151
152    def random_string(size):
153        return bytes(random.randrange(256) for _ in range(size))
154
155    for i in range(1, 16):
156        yield test_case(test_case_template, random_string(i))
157        yield test_case(test_case_template, random_string(i))
158
159    for length in HASH_LENGTHS:
160        yield test_case(test_case_template, random_string(length - 1))
161        yield test_case(test_case_template, random_string(length))
162        yield test_case(test_case_template, random_string(length + 1))
163
164
165def generate_file(
166    path_array, header_template, footer_template, test_case_template
167):
168    path = os.path.realpath(
169        os.path.join(os.path.dirname(__file__), *path_array)
170    )
171
172    with open(path, 'w') as output:
173        output.write(
174            SHARED_HEADER.format(
175                year=datetime.date.today().year,
176                script=os.path.basename(__file__),
177            )
178        )
179        output.write(
180            header_template.format(
181                includes=_include_paths(HASH_LENGTHS),
182            )
183        )
184
185        for case in generate_test_cases(test_case_template):
186            output.write(case)
187
188        output.write(footer_template)
189        print('Wrote test data to', path)
190
191
192if __name__ == '__main__':
193    generate_file(
194        [
195            '..',
196            'pw_tokenizer_private',
197            'generated_hash_test_cases.h',
198        ],
199        CPP_HEADER,
200        CPP_FOOTER,
201        _CPP_TEST_CASE,
202    )
203    generate_file(
204        [
205            '..',
206            'rust',
207            'pw_tokenizer_core_test_cases.rs',
208        ],
209        RUST_HEADER,
210        RUST_FOOTER,
211        _RUST_TEST_CASE,
212    )
213