1#!/usr/bin/env python 2# 3# Copyright 2011-2018 The Rust Project Developers. See the COPYRIGHT 4# file at the top-level directory of this distribution and at 5# http://rust-lang.org/COPYRIGHT. 6# 7# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 8# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 9# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 10# option. This file may not be copied, modified, or distributed 11# except according to those terms. 12 13# This script uses the following Unicode tables: 14# - DerivedNormalizationProps.txt 15# - NormalizationTest.txt 16# - UnicodeData.txt 17# - StandardizedVariants.txt 18# 19# Since this should not require frequent updates, we just store this 20# out-of-line and check the tables.rs and normalization_tests.rs files into git. 21import collections 22import urllib.request 23 24UNICODE_VERSION = "15.0.0" 25UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION 26 27PREAMBLE = """// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT 28// file at the top-level directory of this distribution and at 29// http://rust-lang.org/COPYRIGHT. 30// 31// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 32// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 33// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 34// option. This file may not be copied, modified, or distributed 35// except according to those terms. 36 37// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly 38 39#![allow(missing_docs)] 40""" 41 42NormalizationTest = collections.namedtuple( 43 "NormalizationTest", 44 ["source", "nfc", "nfd", "nfkc", "nfkd"], 45) 46 47# Mapping taken from Table 12 from: 48# http://www.unicode.org/reports/tr44/#General_Category_Values 49expanded_categories = { 50 'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'], 51 'Lm': ['L'], 'Lo': ['L'], 52 'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'], 53 'Nd': ['N'], 'Nl': ['N'], 'No': ['No'], 54 'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'], 55 'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'], 56 'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'], 57 'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'], 58 'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'], 59} 60 61# Constants from Unicode 9.0.0 Section 3.12 Conjoining Jamo Behavior 62# http://www.unicode.org/versions/Unicode9.0.0/ch03.pdf#M9.32468.Heading.310.Combining.Jamo.Behavior 63S_BASE, L_COUNT, V_COUNT, T_COUNT = 0xAC00, 19, 21, 28 64S_COUNT = L_COUNT * V_COUNT * T_COUNT 65 66class UnicodeData(object): 67 def __init__(self): 68 self._load_unicode_data() 69 self.norm_props = self._load_norm_props() 70 self.norm_tests = self._load_norm_tests() 71 72 self.canon_comp = self._compute_canonical_comp() 73 self.canon_fully_decomp, self.compat_fully_decomp = self._compute_fully_decomposed() 74 75 self.cjk_compat_variants_fully_decomp = {} 76 self._load_cjk_compat_ideograph_variants() 77 78 def stats(name, table): 79 count = sum(len(v) for v in table.values()) 80 print("%s: %d chars => %d decomposed chars" % (name, len(table), count)) 81 82 print("Decomposition table stats:") 83 stats("Canonical decomp", self.canon_decomp) 84 stats("Compatible decomp", self.compat_decomp) 85 stats("Canonical fully decomp", self.canon_fully_decomp) 86 stats("Compatible fully decomp", self.compat_fully_decomp) 87 stats("CJK Compat Variants fully decomp", self.cjk_compat_variants_fully_decomp) 88 89 self.ss_leading, self.ss_trailing = self._compute_stream_safe_tables() 90 91 def _fetch(self, filename): 92 resp = urllib.request.urlopen(UCD_URL + filename) 93 return resp.read().decode('utf-8') 94 95 def _load_unicode_data(self): 96 self.name_to_char_int = {} 97 self.combining_classes = {} 98 self.compat_decomp = {} 99 self.canon_decomp = {} 100 self.general_category_mark = [] 101 self.general_category_public_assigned = [] 102 103 assigned_start = 0; 104 prev_char_int = -1; 105 prev_name = ""; 106 107 for line in self._fetch("UnicodeData.txt").splitlines(): 108 # See ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html 109 pieces = line.split(';') 110 assert len(pieces) == 15 111 char, name, category, cc, decomp = pieces[0], pieces[1], pieces[2], pieces[3], pieces[5] 112 char_int = int(char, 16) 113 114 name = pieces[1].strip() 115 self.name_to_char_int[name] = char_int 116 117 if cc != '0': 118 self.combining_classes[char_int] = cc 119 120 if decomp.startswith('<'): 121 self.compat_decomp[char_int] = [int(c, 16) for c in decomp.split()[1:]] 122 elif decomp != '': 123 self.canon_decomp[char_int] = [int(c, 16) for c in decomp.split()] 124 125 if category == 'M' or 'M' in expanded_categories.get(category, []): 126 self.general_category_mark.append(char_int) 127 128 assert category != 'Cn', "Unexpected: Unassigned codepoint in UnicodeData.txt" 129 if category not in ['Co', 'Cs']: 130 if char_int != prev_char_int + 1 and not is_first_and_last(prev_name, name): 131 self.general_category_public_assigned.append((assigned_start, prev_char_int)) 132 assigned_start = char_int 133 prev_char_int = char_int 134 prev_name = name; 135 136 self.general_category_public_assigned.append((assigned_start, prev_char_int)) 137 138 def _load_cjk_compat_ideograph_variants(self): 139 for line in self._fetch("StandardizedVariants.txt").splitlines(): 140 strip_comments = line.split('#', 1)[0].strip() 141 if not strip_comments: 142 continue 143 144 variation_sequence, description, differences = strip_comments.split(';') 145 description = description.strip() 146 147 # Don't use variations that only apply in particular shaping environments. 148 if differences: 149 continue 150 151 # Look for entries where the description field is a codepoint name. 152 if description not in self.name_to_char_int: 153 continue 154 155 # Only consider the CJK Compatibility Ideographs. 156 if not description.startswith('CJK COMPATIBILITY IDEOGRAPH-'): 157 continue 158 159 char_int = self.name_to_char_int[description] 160 161 assert not char_int in self.combining_classes, "Unexpected: CJK compat variant with a combining class" 162 assert not char_int in self.compat_decomp, "Unexpected: CJK compat variant and compatibility decomposition" 163 assert len(self.canon_decomp[char_int]) == 1, "Unexpected: CJK compat variant and non-singleton canonical decomposition" 164 # If we ever need to handle Hangul here, we'll need to handle it separately. 165 assert not (S_BASE <= char_int < S_BASE + S_COUNT) 166 167 cjk_compat_variant_parts = [int(c, 16) for c in variation_sequence.split()] 168 for c in cjk_compat_variant_parts: 169 assert not c in self.canon_decomp, "Unexpected: CJK compat variant is unnormalized (canon)" 170 assert not c in self.compat_decomp, "Unexpected: CJK compat variant is unnormalized (compat)" 171 self.cjk_compat_variants_fully_decomp[char_int] = cjk_compat_variant_parts 172 173 def _load_norm_props(self): 174 props = collections.defaultdict(list) 175 176 for line in self._fetch("DerivedNormalizationProps.txt").splitlines(): 177 (prop_data, _, _) = line.partition("#") 178 prop_pieces = prop_data.split(";") 179 180 if len(prop_pieces) < 2: 181 continue 182 183 assert len(prop_pieces) <= 3 184 (low, _, high) = prop_pieces[0].strip().partition("..") 185 186 prop = prop_pieces[1].strip() 187 188 data = None 189 if len(prop_pieces) == 3: 190 data = prop_pieces[2].strip() 191 192 props[prop].append((low, high, data)) 193 194 return props 195 196 def _load_norm_tests(self): 197 tests = [] 198 for line in self._fetch("NormalizationTest.txt").splitlines(): 199 (test_data, _, _) = line.partition("#") 200 test_pieces = test_data.split(";") 201 202 if len(test_pieces) < 5: 203 continue 204 205 source, nfc, nfd, nfkc, nfkd = [[c.strip() for c in p.split()] for p in test_pieces[:5]] 206 tests.append(NormalizationTest(source, nfc, nfd, nfkc, nfkd)) 207 208 return tests 209 210 def _compute_canonical_comp(self): 211 canon_comp = {} 212 comp_exclusions = [ 213 (int(low, 16), int(high or low, 16)) 214 for low, high, _ in self.norm_props["Full_Composition_Exclusion"] 215 ] 216 for char_int, decomp in self.canon_decomp.items(): 217 if any(lo <= char_int <= hi for lo, hi in comp_exclusions): 218 continue 219 220 assert len(decomp) == 2 221 assert (decomp[0], decomp[1]) not in canon_comp 222 canon_comp[(decomp[0], decomp[1])] = char_int 223 224 return canon_comp 225 226 def _compute_fully_decomposed(self): 227 """ 228 Even though the decomposition algorithm is recursive, it is possible 229 to precompute the recursion at table generation time with modest 230 increase to the table size. Then, for these precomputed tables, we 231 note that 1) compatible decomposition is a subset of canonical 232 decomposition and 2) they mostly agree on their intersection. 233 Therefore, we don't store entries in the compatible table for 234 characters that decompose the same way under canonical decomposition. 235 236 Decomposition table stats: 237 Canonical decomp: 2060 chars => 3085 decomposed chars 238 Compatible decomp: 3662 chars => 5440 decomposed chars 239 Canonical fully decomp: 2060 chars => 3404 decomposed chars 240 Compatible fully decomp: 3678 chars => 5599 decomposed chars 241 242 The upshot is that decomposition code is very simple and easy to inline 243 at mild code size cost. 244 """ 245 def _decompose(char_int, compatible): 246 # 7-bit ASCII never decomposes 247 if char_int <= 0x7f: 248 yield char_int 249 return 250 251 # Assert that we're handling Hangul separately. 252 assert not (S_BASE <= char_int < S_BASE + S_COUNT) 253 254 decomp = self.canon_decomp.get(char_int) 255 if decomp is not None: 256 for decomposed_ch in decomp: 257 for fully_decomposed_ch in _decompose(decomposed_ch, compatible): 258 yield fully_decomposed_ch 259 return 260 261 if compatible and char_int in self.compat_decomp: 262 for decomposed_ch in self.compat_decomp[char_int]: 263 for fully_decomposed_ch in _decompose(decomposed_ch, compatible): 264 yield fully_decomposed_ch 265 return 266 267 yield char_int 268 return 269 270 end_codepoint = max( 271 max(self.canon_decomp.keys()), 272 max(self.compat_decomp.keys()), 273 ) 274 275 canon_fully_decomp = {} 276 compat_fully_decomp = {} 277 278 for char_int in range(0, end_codepoint + 1): 279 # Always skip Hangul, since it's more efficient to represent its 280 # decomposition programmatically. 281 if S_BASE <= char_int < S_BASE + S_COUNT: 282 continue 283 284 canon = list(_decompose(char_int, False)) 285 if not (len(canon) == 1 and canon[0] == char_int): 286 canon_fully_decomp[char_int] = canon 287 288 compat = list(_decompose(char_int, True)) 289 if not (len(compat) == 1 and compat[0] == char_int): 290 compat_fully_decomp[char_int] = compat 291 292 # Since canon_fully_decomp is a subset of compat_fully_decomp, we don't 293 # need to store their overlap when they agree. When they don't agree, 294 # store the decomposition in the compatibility table since we'll check 295 # that first when normalizing to NFKD. 296 assert set(canon_fully_decomp) <= set(compat_fully_decomp) 297 298 for ch in set(canon_fully_decomp) & set(compat_fully_decomp): 299 if canon_fully_decomp[ch] == compat_fully_decomp[ch]: 300 del compat_fully_decomp[ch] 301 302 return canon_fully_decomp, compat_fully_decomp 303 304 def _compute_stream_safe_tables(self): 305 """ 306 To make a text stream-safe with the Stream-Safe Text Process (UAX15-D4), 307 we need to be able to know the number of contiguous non-starters *after* 308 applying compatibility decomposition to each character. 309 310 We can do this incrementally by computing the number of leading and 311 trailing non-starters for each character's compatibility decomposition 312 with the following rules: 313 314 1) If a character is not affected by compatibility decomposition, look 315 up its canonical combining class to find out if it's a non-starter. 316 2) All Hangul characters are starters, even under decomposition. 317 3) Otherwise, very few decomposing characters have a nonzero count 318 of leading or trailing non-starters, so store these characters 319 with their associated counts in a separate table. 320 """ 321 leading_nonstarters = {} 322 trailing_nonstarters = {} 323 324 for c in set(self.canon_fully_decomp) | set(self.compat_fully_decomp): 325 decomposed = self.compat_fully_decomp.get(c) or self.canon_fully_decomp[c] 326 327 num_leading = 0 328 for d in decomposed: 329 if d not in self.combining_classes: 330 break 331 num_leading += 1 332 333 num_trailing = 0 334 for d in reversed(decomposed): 335 if d not in self.combining_classes: 336 break 337 num_trailing += 1 338 339 if num_leading > 0: 340 leading_nonstarters[c] = num_leading 341 if num_trailing > 0: 342 trailing_nonstarters[c] = num_trailing 343 344 return leading_nonstarters, trailing_nonstarters 345 346hexify = lambda c: '{:04X}'.format(c) 347 348# Test whether `first` and `last` are corresponding "<..., First>" and 349# "<..., Last>" markers. 350def is_first_and_last(first, last): 351 if not first.startswith('<') or not first.endswith(', First>'): 352 return False 353 if not last.startswith('<') or not last.endswith(', Last>'): 354 return False 355 return first[1:-8] == last[1:-7] 356 357def gen_mph_data(name, d, kv_type, kv_callback): 358 (salt, keys) = minimal_perfect_hash(d) 359 out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper()) 360 for s in salt: 361 out.write(" 0x{:x},\n".format(s)) 362 out.write("];\n") 363 out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type)) 364 for k in keys: 365 out.write(" {},\n".format(kv_callback(k))) 366 out.write("];\n\n") 367 368def gen_combining_class(combining_classes, out): 369 gen_mph_data('canonical_combining_class', combining_classes, 'u32', 370 lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8))) 371 372def gen_composition_table(canon_comp, out): 373 table = {} 374 for (c1, c2), c3 in canon_comp.items(): 375 if c1 < 0x10000 and c2 < 0x10000: 376 table[(c1 << 16) | c2] = c3 377 (salt, keys) = minimal_perfect_hash(table) 378 gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)', 379 lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k]))) 380 381 out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n") 382 out.write(" match (c1, c2) {\n") 383 for (c1, c2), c3 in sorted(canon_comp.items()): 384 if c1 >= 0x10000 and c2 >= 0x10000: 385 out.write(" ('\\u{%s}', '\\u{%s}') => Some('\\u{%s}'),\n" % (hexify(c1), hexify(c2), hexify(c3))) 386 387 out.write(" _ => None,\n") 388 out.write(" }\n") 389 out.write("}\n") 390 391def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_decomp, out): 392 tables = [(canon_decomp, 'canonical'), (compat_decomp, 'compatibility'), (cjk_compat_variants_decomp, 'cjk_compat_variants')] 393 for table, name in tables: 394 offsets = {} 395 offset = 0 396 out.write("pub(crate) const %s_DECOMPOSED_CHARS: &[char] = &[\n" % name.upper()) 397 for k, v in table.items(): 398 offsets[k] = offset 399 offset += len(v) 400 for c in v: 401 out.write(" '\\u{%s}',\n" % hexify(c)) 402 # The largest offset must fit in a u16. 403 assert offset < 65536 404 out.write("];\n") 405 gen_mph_data(name + '_decomposed', table, "(u32, (u16, u16))", 406 lambda k: "(0x{:x}, ({}, {}))".format(k, offsets[k], len(table[k]))) 407 408def gen_qc_match(prop_table, out): 409 out.write(" match c {\n") 410 411 for low, high, data in prop_table: 412 assert data in ('N', 'M') 413 result = "No" if data == 'N' else "Maybe" 414 if high: 415 out.write(r" '\u{%s}'...'\u{%s}' => %s," % (low, high, result)) 416 else: 417 out.write(r" '\u{%s}' => %s," % (low, result)) 418 out.write("\n") 419 420 out.write(" _ => Yes,\n") 421 out.write(" }\n") 422 423def gen_nfc_qc(prop_tables, out): 424 out.write("#[inline]\n") 425 out.write("#[allow(ellipsis_inclusive_range_patterns)]\n") 426 out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n") 427 gen_qc_match(prop_tables['NFC_QC'], out) 428 out.write("}\n") 429 430def gen_nfkc_qc(prop_tables, out): 431 out.write("#[inline]\n") 432 out.write("#[allow(ellipsis_inclusive_range_patterns)]\n") 433 out.write("pub fn qc_nfkc(c: char) -> IsNormalized {\n") 434 gen_qc_match(prop_tables['NFKC_QC'], out) 435 out.write("}\n") 436 437def gen_nfd_qc(prop_tables, out): 438 out.write("#[inline]\n") 439 out.write("#[allow(ellipsis_inclusive_range_patterns)]\n") 440 out.write("pub fn qc_nfd(c: char) -> IsNormalized {\n") 441 gen_qc_match(prop_tables['NFD_QC'], out) 442 out.write("}\n") 443 444def gen_nfkd_qc(prop_tables, out): 445 out.write("#[inline]\n") 446 out.write("#[allow(ellipsis_inclusive_range_patterns)]\n") 447 out.write("pub fn qc_nfkd(c: char) -> IsNormalized {\n") 448 gen_qc_match(prop_tables['NFKD_QC'], out) 449 out.write("}\n") 450 451def gen_combining_mark(general_category_mark, out): 452 gen_mph_data('combining_mark', general_category_mark, 'u32', 453 lambda k: '0x{:04x}'.format(k)) 454 455def gen_public_assigned(general_category_public_assigned, out): 456 # This could be done as a hash but the table is somewhat small. 457 out.write("#[inline]\n") 458 out.write("pub fn is_public_assigned(c: char) -> bool {\n") 459 out.write(" match c {\n") 460 461 start = True 462 for first, last in general_category_public_assigned: 463 if start: 464 out.write(" ") 465 start = False 466 else: 467 out.write(" | ") 468 if first == last: 469 out.write("'\\u{%s}'\n" % hexify(first)) 470 else: 471 out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last))) 472 out.write(" => true,\n") 473 474 out.write(" _ => false,\n") 475 out.write(" }\n") 476 out.write("}\n") 477 out.write("\n") 478 479def gen_stream_safe(leading, trailing, out): 480 # This could be done as a hash but the table is very small. 481 out.write("#[inline]\n") 482 out.write("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n") 483 out.write(" match c {\n") 484 485 for char, num_leading in sorted(leading.items()): 486 out.write(" '\\u{%s}' => %d,\n" % (hexify(char), num_leading)) 487 488 out.write(" _ => 0,\n") 489 out.write(" }\n") 490 out.write("}\n") 491 out.write("\n") 492 493 gen_mph_data('trailing_nonstarters', trailing, 'u32', 494 lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8))) 495 496def gen_tests(tests, out): 497 out.write("""#[derive(Debug)] 498pub struct NormalizationTest { 499 pub source: &'static str, 500 pub nfc: &'static str, 501 pub nfd: &'static str, 502 pub nfkc: &'static str, 503 pub nfkd: &'static str, 504} 505 506""") 507 508 out.write("pub const NORMALIZATION_TESTS: &[NormalizationTest] = &[\n") 509 str_literal = lambda s: '"%s"' % "".join("\\u{%s}" % c for c in s) 510 511 for test in tests: 512 out.write(" NormalizationTest {\n") 513 out.write(" source: %s,\n" % str_literal(test.source)) 514 out.write(" nfc: %s,\n" % str_literal(test.nfc)) 515 out.write(" nfd: %s,\n" % str_literal(test.nfd)) 516 out.write(" nfkc: %s,\n" % str_literal(test.nfkc)) 517 out.write(" nfkd: %s,\n" % str_literal(test.nfkd)) 518 out.write(" },\n") 519 520 out.write("];\n") 521 522# Guaranteed to be less than n. 523def my_hash(x, salt, n): 524 # This is hash based on the theory that multiplication is efficient 525 mask_32 = 0xffffffff 526 y = ((x + salt) * 2654435769) & mask_32 527 y ^= (x * 0x31415926) & mask_32 528 return (y * n) >> 32 529 530# Compute minimal perfect hash function, d can be either a dict or list of keys. 531def minimal_perfect_hash(d): 532 n = len(d) 533 buckets = dict((h, []) for h in range(n)) 534 for key in d: 535 h = my_hash(key, 0, n) 536 buckets[h].append(key) 537 bsorted = [(len(buckets[h]), h) for h in range(n)] 538 bsorted.sort(reverse = True) 539 claimed = [False] * n 540 salts = [0] * n 541 keys = [0] * n 542 for (bucket_size, h) in bsorted: 543 # Note: the traditional perfect hashing approach would also special-case 544 # bucket_size == 1 here and assign any empty slot, rather than iterating 545 # until rehash finds an empty slot. But we're not doing that so we can 546 # avoid the branch. 547 if bucket_size == 0: 548 break 549 else: 550 for salt in range(1, 32768): 551 rehashes = [my_hash(key, salt, n) for key in buckets[h]] 552 # Make sure there are no rehash collisions within this bucket. 553 if all(not claimed[hash] for hash in rehashes): 554 if len(set(rehashes)) < bucket_size: 555 continue 556 salts[h] = salt 557 for key in buckets[h]: 558 rehash = my_hash(key, salt, n) 559 claimed[rehash] = True 560 keys[rehash] = key 561 break 562 if salts[h] == 0: 563 print("minimal perfect hashing failed") 564 # Note: if this happens (because of unfortunate data), then there are 565 # a few things that could be done. First, the hash function could be 566 # tweaked. Second, the bucket order could be scrambled (especially the 567 # singletons). Right now, the buckets are sorted, which has the advantage 568 # of being deterministic. 569 # 570 # As a more extreme approach, the singleton bucket optimization could be 571 # applied (give the direct address for singleton buckets, rather than 572 # relying on a rehash). That is definitely the more standard approach in 573 # the minimal perfect hashing literature, but in testing the branch was a 574 # significant slowdown. 575 exit(1) 576 return (salts, keys) 577 578if __name__ == '__main__': 579 data = UnicodeData() 580 with open("tables.rs", "w", newline = "\n") as out: 581 out.write(PREAMBLE) 582 out.write("use crate::quick_check::IsNormalized;\n") 583 out.write("use crate::quick_check::IsNormalized::*;\n") 584 out.write("\n") 585 586 version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split(".")) 587 out.write("#[allow(unused)]\n") 588 out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version) 589 590 gen_combining_class(data.combining_classes, out) 591 out.write("\n") 592 593 gen_composition_table(data.canon_comp, out) 594 out.write("\n") 595 596 gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out) 597 598 gen_combining_mark(data.general_category_mark, out) 599 out.write("\n") 600 601 gen_public_assigned(data.general_category_public_assigned, out) 602 out.write("\n") 603 604 gen_nfc_qc(data.norm_props, out) 605 out.write("\n") 606 607 gen_nfkc_qc(data.norm_props, out) 608 out.write("\n") 609 610 gen_nfd_qc(data.norm_props, out) 611 out.write("\n") 612 613 gen_nfkd_qc(data.norm_props, out) 614 out.write("\n") 615 616 gen_stream_safe(data.ss_leading, data.ss_trailing, out) 617 out.write("\n") 618 619 with open("normalization_tests.rs", "w", newline = "\n") as out: 620 out.write(PREAMBLE) 621 gen_tests(data.norm_tests, out) 622