1#!/usr/bin/env python3 2# Copyright 2020 The Pigweed Authors 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may not 5# use this file except in compliance with the License. You may obtain a copy of 6# the License at 7# 8# https://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations under 14# the License. 15"""Generates test data for hash_test.cc.""" 16 17import datetime 18import os 19import random 20 21from pw_tokenizer import tokens 22 23HASH_LENGTHS = 80, 96, 128 24HASH_MACRO = 'PW_TOKENIZER_65599_FIXED_LENGTH_{}_HASH' 25 26SHARED_HEADER = """\ 27// Copyright {year} The Pigweed Authors 28// 29// Licensed under the Apache License, Version 2.0 (the "License"); you may not 30// use this file except in compliance with the License. You may obtain a copy of 31// the License at 32// 33// https://www.apache.org/licenses/LICENSE-2.0 34// 35// Unless required by applicable law or agreed to in writing, software 36// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 37// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 38// License for the specific language governing permissions and limitations under 39// the License. 40 41// AUTOGENERATED - DO NOT EDIT 42// 43// This file was generated by {script}. 44// To make changes, update the script and run it to generate new files. 45""" 46 47CPP_HEADER = """\ 48#pragma once 49 50#include <cstddef> 51#include <cstdint> 52#include <string_view> 53 54{includes} 55 56namespace pw::tokenizer {{ 57 58// Test a series of generated test cases. 59inline constexpr struct {{ 60 std::string_view string; 61 size_t hash_length; 62 uint32_t python_calculated_hash; 63 uint32_t macro_calculated_hash; // clang-format off 64}} kHashTests[] = {{ 65 66""" 67 68CPP_FOOTER = """ 69}; // kHashTests 70 71// clang-format on 72 73} // namespace pw::tokenizer 74""" 75 76_CPP_TEST_CASE = """{{ 77 std::string_view("{str}", {string_length}u), // NOLINT(bugprone-string-constructor) 78 {hash_length}u, // fixed hash length 79 UINT32_C({hash}), // Python-calculated hash 80 {macro}("{str}"), // macro-calculated hash 81}}, 82""" 83 84RUST_HEADER = """ 85fn test_cases() -> Vec<TestCase> {{ 86 vec![ 87""" 88 89RUST_FOOTER = """ 90 ] 91} 92""" 93 94_RUST_TEST_CASE = """ TestCase{{ 95 string: b"{str}", 96 hash_length: {hash_length}, 97 hash: {hash}, 98 }}, 99""" 100 101 102def _include_paths(lengths): 103 return '\n'.join( 104 sorted( 105 '#include "pw_tokenizer/internal/' 106 'pw_tokenizer_65599_fixed_length_{}_hash_macro.h"'.format(length) 107 for length in lengths 108 ) 109 ) 110 111 112def _test_case_at_length(test_case_template, data, hash_length): 113 """Generates a test case for a particular hash length.""" 114 115 if isinstance(data, str): 116 data = data.encode() 117 118 if all(ord(' ') <= b <= ord('~') for b in data): 119 escaped_str = data.decode().replace('"', r'\"') 120 else: 121 escaped_str = ''.join(r'\x{:02x}'.format(b) for b in data) 122 123 return test_case_template.format( 124 str=escaped_str, 125 string_length=len(data), 126 hash_length=hash_length, 127 hash=tokens.c_hash(data, hash_length), 128 macro=HASH_MACRO.format(hash_length), 129 ) 130 131 132def test_case(test_case_template, data): 133 return ''.join( 134 _test_case_at_length(test_case_template, data, length) 135 for length in (80, 96, 128) 136 ) 137 138 139def generate_test_cases(test_case_template): 140 yield test_case(test_case_template, '') 141 yield test_case(test_case_template, b'\xa1') 142 yield test_case(test_case_template, b'\xff') 143 yield test_case(test_case_template, '\0') 144 yield test_case(test_case_template, '\0\0') 145 yield test_case(test_case_template, 'a') 146 yield test_case(test_case_template, 'A') 147 yield test_case(test_case_template, 'hello, "world"') 148 yield test_case(test_case_template, 'YO' * 100) 149 150 random.seed(600613) 151 152 def random_string(size): 153 return bytes(random.randrange(256) for _ in range(size)) 154 155 for i in range(1, 16): 156 yield test_case(test_case_template, random_string(i)) 157 yield test_case(test_case_template, random_string(i)) 158 159 for length in HASH_LENGTHS: 160 yield test_case(test_case_template, random_string(length - 1)) 161 yield test_case(test_case_template, random_string(length)) 162 yield test_case(test_case_template, random_string(length + 1)) 163 164 165def generate_file( 166 path_array, header_template, footer_template, test_case_template 167): 168 path = os.path.realpath( 169 os.path.join(os.path.dirname(__file__), *path_array) 170 ) 171 172 with open(path, 'w') as output: 173 output.write( 174 SHARED_HEADER.format( 175 year=datetime.date.today().year, 176 script=os.path.basename(__file__), 177 ) 178 ) 179 output.write( 180 header_template.format( 181 includes=_include_paths(HASH_LENGTHS), 182 ) 183 ) 184 185 for case in generate_test_cases(test_case_template): 186 output.write(case) 187 188 output.write(footer_template) 189 print('Wrote test data to', path) 190 191 192if __name__ == '__main__': 193 generate_file( 194 [ 195 '..', 196 'pw_tokenizer_private', 197 'generated_hash_test_cases.h', 198 ], 199 CPP_HEADER, 200 CPP_FOOTER, 201 _CPP_TEST_CASE, 202 ) 203 generate_file( 204 [ 205 '..', 206 'rust', 207 'pw_tokenizer_core_test_cases.rs', 208 ], 209 RUST_HEADER, 210 RUST_FOOTER, 211 _RUST_TEST_CASE, 212 ) 213