xref: /aosp_15_r20/external/pcre/maint/GenerateTest26.py (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi#! /usr/bin/python
2*22dc650dSSadaf Ebrahimi
3*22dc650dSSadaf Ebrahimi#                   PCRE2 UNICODE PROPERTY SUPPORT
4*22dc650dSSadaf Ebrahimi#                   ------------------------------
5*22dc650dSSadaf Ebrahimi#
6*22dc650dSSadaf Ebrahimi# This file auto-generates unicode property tests and their expected output.
7*22dc650dSSadaf Ebrahimi# It is recommended to re-run this generator after the unicode files are
8*22dc650dSSadaf Ebrahimi# updated. The names of the generated files are `testinput26` and `testoutput26`
9*22dc650dSSadaf Ebrahimi
10*22dc650dSSadaf Ebrahimiimport re
11*22dc650dSSadaf Ebrahimiimport sys
12*22dc650dSSadaf Ebrahimi
13*22dc650dSSadaf Ebrahimifrom GenerateCommon import \
14*22dc650dSSadaf Ebrahimi  script_names, \
15*22dc650dSSadaf Ebrahimi  script_abbrevs
16*22dc650dSSadaf Ebrahimi
17*22dc650dSSadaf Ebrahimidef write_both(text):
18*22dc650dSSadaf Ebrahimi  input_file.write(text)
19*22dc650dSSadaf Ebrahimi  output_file.write(text)
20*22dc650dSSadaf Ebrahimi
21*22dc650dSSadaf Ebrahimidef to_string_char(ch_idx):
22*22dc650dSSadaf Ebrahimi  if ch_idx < 128:
23*22dc650dSSadaf Ebrahimi    if ch_idx < 16:
24*22dc650dSSadaf Ebrahimi      return "\\x{0%x}" % ch_idx
25*22dc650dSSadaf Ebrahimi    if ch_idx >= 32:
26*22dc650dSSadaf Ebrahimi      return chr(ch_idx)
27*22dc650dSSadaf Ebrahimi  return "\\x{%x}" % ch_idx
28*22dc650dSSadaf Ebrahimi
29*22dc650dSSadaf Ebrahimioutput_directory = ""
30*22dc650dSSadaf Ebrahimi
31*22dc650dSSadaf Ebrahimiif len(sys.argv) > 2:
32*22dc650dSSadaf Ebrahimi  print('** Too many arguments: just give a directory name')
33*22dc650dSSadaf Ebrahimi  sys.exit(1)
34*22dc650dSSadaf Ebrahimiif len(sys.argv) == 2:
35*22dc650dSSadaf Ebrahimi  output_directory = sys.argv[1]
36*22dc650dSSadaf Ebrahimi  if not output_directory.endswith("/"):
37*22dc650dSSadaf Ebrahimi    output_directory += "/"
38*22dc650dSSadaf Ebrahimi
39*22dc650dSSadaf Ebrahimitry:
40*22dc650dSSadaf Ebrahimi  input_file = open(output_directory + "testinput26", "w")
41*22dc650dSSadaf Ebrahimi  output_file = open(output_directory + "testoutput26", "w")
42*22dc650dSSadaf Ebrahimiexcept IOError:
43*22dc650dSSadaf Ebrahimi  print("** Couldn't open output files")
44*22dc650dSSadaf Ebrahimi  sys.exit(1)
45*22dc650dSSadaf Ebrahimi
46*22dc650dSSadaf Ebrahimiwrite_both("# These tests are generated by maint/GenerateTest26.py, do not edit.\n\n")
47*22dc650dSSadaf Ebrahimi
48*22dc650dSSadaf Ebrahimi# ---------------------------------------------------------------------------
49*22dc650dSSadaf Ebrahimi#                      UNICODE SCRIPT EXTENSION TESTS
50*22dc650dSSadaf Ebrahimi# ---------------------------------------------------------------------------
51*22dc650dSSadaf Ebrahimi
52*22dc650dSSadaf Ebrahimiwrite_both("# Unicode Script Extension tests.\n\n")
53*22dc650dSSadaf Ebrahimi
54*22dc650dSSadaf Ebrahimidef gen_script_tests():
55*22dc650dSSadaf Ebrahimi  script_data = [None] * len(script_names)
56*22dc650dSSadaf Ebrahimi  char_data = [None] * 0x110000
57*22dc650dSSadaf Ebrahimi
58*22dc650dSSadaf Ebrahimi  property_re = re.compile("^([0-9A-F]{4,6})(?:\\.\\.([0-9A-F]{4,6}))? +; ([A-Za-z_ ]+) #")
59*22dc650dSSadaf Ebrahimi  prev_name = ""
60*22dc650dSSadaf Ebrahimi  script_idx = -1
61*22dc650dSSadaf Ebrahimi
62*22dc650dSSadaf Ebrahimi  with open("Unicode.tables/Scripts.txt") as f:
63*22dc650dSSadaf Ebrahimi    for line in f:
64*22dc650dSSadaf Ebrahimi      match_obj = property_re.match(line)
65*22dc650dSSadaf Ebrahimi
66*22dc650dSSadaf Ebrahimi      if match_obj == None:
67*22dc650dSSadaf Ebrahimi        continue
68*22dc650dSSadaf Ebrahimi
69*22dc650dSSadaf Ebrahimi      name = match_obj.group(3)
70*22dc650dSSadaf Ebrahimi      if name != prev_name:
71*22dc650dSSadaf Ebrahimi        script_idx = script_names.index(name)
72*22dc650dSSadaf Ebrahimi        prev_name = name
73*22dc650dSSadaf Ebrahimi
74*22dc650dSSadaf Ebrahimi      low = int(match_obj.group(1), 16)
75*22dc650dSSadaf Ebrahimi      high = low
76*22dc650dSSadaf Ebrahimi      char_data[low] = name
77*22dc650dSSadaf Ebrahimi
78*22dc650dSSadaf Ebrahimi      if match_obj.group(2) != None:
79*22dc650dSSadaf Ebrahimi        high = int(match_obj.group(2), 16)
80*22dc650dSSadaf Ebrahimi        for idx in range(low + 1, high + 1):
81*22dc650dSSadaf Ebrahimi           char_data[idx] = name
82*22dc650dSSadaf Ebrahimi
83*22dc650dSSadaf Ebrahimi      if script_data[script_idx] == None:
84*22dc650dSSadaf Ebrahimi        script_data[script_idx] = [low, None, None, None, None]
85*22dc650dSSadaf Ebrahimi      script_data[script_idx][1] = high
86*22dc650dSSadaf Ebrahimi
87*22dc650dSSadaf Ebrahimi  extended_script_indicies = {}
88*22dc650dSSadaf Ebrahimi
89*22dc650dSSadaf Ebrahimi  with open("Unicode.tables/ScriptExtensions.txt") as f:
90*22dc650dSSadaf Ebrahimi    for line in f:
91*22dc650dSSadaf Ebrahimi      match_obj = property_re.match(line)
92*22dc650dSSadaf Ebrahimi
93*22dc650dSSadaf Ebrahimi      if match_obj == None:
94*22dc650dSSadaf Ebrahimi        continue
95*22dc650dSSadaf Ebrahimi
96*22dc650dSSadaf Ebrahimi      low = int(match_obj.group(1), 16)
97*22dc650dSSadaf Ebrahimi      high = low
98*22dc650dSSadaf Ebrahimi      if match_obj.group(2) != None:
99*22dc650dSSadaf Ebrahimi        high = int(match_obj.group(2), 16)
100*22dc650dSSadaf Ebrahimi
101*22dc650dSSadaf Ebrahimi      for abbrev in match_obj.group(3).split(" "):
102*22dc650dSSadaf Ebrahimi        if abbrev not in extended_script_indicies:
103*22dc650dSSadaf Ebrahimi          idx = script_abbrevs.index(abbrev)
104*22dc650dSSadaf Ebrahimi          extended_script_indicies[abbrev] = idx
105*22dc650dSSadaf Ebrahimi          rec = script_data[idx]
106*22dc650dSSadaf Ebrahimi          rec[2] = low
107*22dc650dSSadaf Ebrahimi          rec[3] = high
108*22dc650dSSadaf Ebrahimi        else:
109*22dc650dSSadaf Ebrahimi          idx = extended_script_indicies[abbrev]
110*22dc650dSSadaf Ebrahimi          rec = script_data[idx]
111*22dc650dSSadaf Ebrahimi          if rec[2] > low:
112*22dc650dSSadaf Ebrahimi            rec[2] = low
113*22dc650dSSadaf Ebrahimi          if rec[3] < high:
114*22dc650dSSadaf Ebrahimi            rec[3] = high
115*22dc650dSSadaf Ebrahimi
116*22dc650dSSadaf Ebrahimi        if rec[4] == None:
117*22dc650dSSadaf Ebrahimi          name = script_names[idx]
118*22dc650dSSadaf Ebrahimi          for idx in range(low, high + 1):
119*22dc650dSSadaf Ebrahimi            if char_data[idx] != name:
120*22dc650dSSadaf Ebrahimi              rec[4] = idx
121*22dc650dSSadaf Ebrahimi              break
122*22dc650dSSadaf Ebrahimi
123*22dc650dSSadaf Ebrahimi  long_property_name = False
124*22dc650dSSadaf Ebrahimi
125*22dc650dSSadaf Ebrahimi  for idx, rec in enumerate(script_data):
126*22dc650dSSadaf Ebrahimi    script_name = script_names[idx]
127*22dc650dSSadaf Ebrahimi
128*22dc650dSSadaf Ebrahimi    if script_name == "Unknown":
129*22dc650dSSadaf Ebrahimi      continue
130*22dc650dSSadaf Ebrahimi
131*22dc650dSSadaf Ebrahimi    script_abbrev = script_abbrevs[idx]
132*22dc650dSSadaf Ebrahimi
133*22dc650dSSadaf Ebrahimi    write_both("# Base script check\n")
134*22dc650dSSadaf Ebrahimi    write_both("/^\\p{sc=%s}/utf\n" % script_name)
135*22dc650dSSadaf Ebrahimi    write_both("  %s\n" % to_string_char(rec[0]))
136*22dc650dSSadaf Ebrahimi    output_file.write(" 0: %s\n" % to_string_char(rec[0]))
137*22dc650dSSadaf Ebrahimi    write_both("\n")
138*22dc650dSSadaf Ebrahimi
139*22dc650dSSadaf Ebrahimi    write_both("/^\\p{Script=%s}/utf\n" % script_abbrev)
140*22dc650dSSadaf Ebrahimi    write_both("  %s\n" % to_string_char(rec[1]))
141*22dc650dSSadaf Ebrahimi    output_file.write(" 0: %s\n" % to_string_char(rec[1]))
142*22dc650dSSadaf Ebrahimi    write_both("\n")
143*22dc650dSSadaf Ebrahimi
144*22dc650dSSadaf Ebrahimi    if rec[2] != None:
145*22dc650dSSadaf Ebrahimi      property_name = "scx"
146*22dc650dSSadaf Ebrahimi      if long_property_name:
147*22dc650dSSadaf Ebrahimi        property_name = "Script_Extensions"
148*22dc650dSSadaf Ebrahimi
149*22dc650dSSadaf Ebrahimi      write_both("# Script extension check\n")
150*22dc650dSSadaf Ebrahimi      write_both("/^\\p{%s}/utf\n" % script_name)
151*22dc650dSSadaf Ebrahimi      write_both("  %s\n" % to_string_char(rec[2]))
152*22dc650dSSadaf Ebrahimi      output_file.write(" 0: %s\n" % to_string_char(rec[2]))
153*22dc650dSSadaf Ebrahimi      write_both("\n")
154*22dc650dSSadaf Ebrahimi
155*22dc650dSSadaf Ebrahimi      write_both("/^\\p{%s=%s}/utf\n" % (property_name, script_abbrev))
156*22dc650dSSadaf Ebrahimi      write_both("  %s\n" % to_string_char(rec[3]))
157*22dc650dSSadaf Ebrahimi      output_file.write(" 0: %s\n" % to_string_char(rec[3]))
158*22dc650dSSadaf Ebrahimi      write_both("\n")
159*22dc650dSSadaf Ebrahimi
160*22dc650dSSadaf Ebrahimi      long_property_name = not long_property_name
161*22dc650dSSadaf Ebrahimi
162*22dc650dSSadaf Ebrahimi      if rec[4] != None:
163*22dc650dSSadaf Ebrahimi        write_both("# Script extension only character\n")
164*22dc650dSSadaf Ebrahimi        write_both("/^\\p{%s}/utf\n" % script_name)
165*22dc650dSSadaf Ebrahimi        write_both("  %s\n" % to_string_char(rec[4]))
166*22dc650dSSadaf Ebrahimi        output_file.write(" 0: %s\n" % to_string_char(rec[4]))
167*22dc650dSSadaf Ebrahimi        write_both("\n")
168*22dc650dSSadaf Ebrahimi
169*22dc650dSSadaf Ebrahimi        write_both("/^\\p{sc=%s}/utf\n" % script_name)
170*22dc650dSSadaf Ebrahimi        write_both("  %s\n" % to_string_char(rec[4]))
171*22dc650dSSadaf Ebrahimi        output_file.write("No match\n")
172*22dc650dSSadaf Ebrahimi        write_both("\n")
173*22dc650dSSadaf Ebrahimi      else:
174*22dc650dSSadaf Ebrahimi        print("External character has not found for %s" % script_name)
175*22dc650dSSadaf Ebrahimi
176*22dc650dSSadaf Ebrahimi    high = rec[1]
177*22dc650dSSadaf Ebrahimi    if rec[3] != None and rec[3] > rec[1]:
178*22dc650dSSadaf Ebrahimi      high = rec[3]
179*22dc650dSSadaf Ebrahimi    write_both("# Character not in script\n")
180*22dc650dSSadaf Ebrahimi    write_both("/^\\p{%s}/utf\n" % script_name)
181*22dc650dSSadaf Ebrahimi    write_both("  %s\n" % to_string_char(high + 1))
182*22dc650dSSadaf Ebrahimi    output_file.write("No match\n")
183*22dc650dSSadaf Ebrahimi    write_both("\n")
184*22dc650dSSadaf Ebrahimi
185*22dc650dSSadaf Ebrahimi
186*22dc650dSSadaf Ebrahimigen_script_tests()
187*22dc650dSSadaf Ebrahimi
188*22dc650dSSadaf Ebrahimiwrite_both("# End of testinput26\n")
189