xref: /aosp_15_r20/external/pcre/maint/GenerateUcpHeader.py (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi#! /usr/bin/python
2*22dc650dSSadaf Ebrahimi
3*22dc650dSSadaf Ebrahimi#                   PCRE2 UNICODE PROPERTY SUPPORT
4*22dc650dSSadaf Ebrahimi#                   ------------------------------
5*22dc650dSSadaf Ebrahimi
6*22dc650dSSadaf Ebrahimi# This script generates the pcre2_ucp.h file from Unicode data files. This
7*22dc650dSSadaf Ebrahimi# header uses enumerations to give names to Unicode property types and script
8*22dc650dSSadaf Ebrahimi# names.
9*22dc650dSSadaf Ebrahimi
10*22dc650dSSadaf Ebrahimi# This script was created in December 2021 as part of the Unicode data
11*22dc650dSSadaf Ebrahimi# generation refactoring.
12*22dc650dSSadaf Ebrahimi
13*22dc650dSSadaf Ebrahimi
14*22dc650dSSadaf Ebrahimi# Import common data lists and functions
15*22dc650dSSadaf Ebrahimi
16*22dc650dSSadaf Ebrahimifrom GenerateCommon import \
17*22dc650dSSadaf Ebrahimi  bidi_classes, \
18*22dc650dSSadaf Ebrahimi  bool_properties, \
19*22dc650dSSadaf Ebrahimi  bool_props_list_item_size, \
20*22dc650dSSadaf Ebrahimi  break_properties, \
21*22dc650dSSadaf Ebrahimi  category_names, \
22*22dc650dSSadaf Ebrahimi  general_category_names, \
23*22dc650dSSadaf Ebrahimi  script_list_item_size, \
24*22dc650dSSadaf Ebrahimi  script_names, \
25*22dc650dSSadaf Ebrahimi  open_output
26*22dc650dSSadaf Ebrahimi
27*22dc650dSSadaf Ebrahimi# Open the output file (no return on failure). This call also writes standard
28*22dc650dSSadaf Ebrahimi# header boilerplate.
29*22dc650dSSadaf Ebrahimi
30*22dc650dSSadaf Ebrahimif = open_output("pcre2_ucp.h")
31*22dc650dSSadaf Ebrahimi
32*22dc650dSSadaf Ebrahimi# Output this file's heading text
33*22dc650dSSadaf Ebrahimi
34*22dc650dSSadaf Ebrahimif.write("""\
35*22dc650dSSadaf Ebrahimi#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
36*22dc650dSSadaf Ebrahimi#define PCRE2_UCP_H_IDEMPOTENT_GUARD
37*22dc650dSSadaf Ebrahimi
38*22dc650dSSadaf Ebrahimi/* This file contains definitions of the Unicode property values that are
39*22dc650dSSadaf Ebrahimireturned by the UCD access macros and used throughout PCRE2.
40*22dc650dSSadaf Ebrahimi
41*22dc650dSSadaf EbrahimiIMPORTANT: The specific values of the first two enums (general and particular
42*22dc650dSSadaf Ebrahimicharacter categories) are assumed by the table called catposstab in the file
43*22dc650dSSadaf Ebrahimipcre2_auto_possess.c. They are unlikely to change, but should be checked after
44*22dc650dSSadaf Ebrahimian update. */
45*22dc650dSSadaf Ebrahimi\n""")
46*22dc650dSSadaf Ebrahimi
47*22dc650dSSadaf Ebrahimif.write("/* These are the general character categories. */\n\nenum {\n")
48*22dc650dSSadaf Ebrahimifor i in general_category_names:
49*22dc650dSSadaf Ebrahimi  f.write("  ucp_%s,\n" % i)
50*22dc650dSSadaf Ebrahimif.write("};\n\n")
51*22dc650dSSadaf Ebrahimi
52*22dc650dSSadaf Ebrahimif.write("/* These are the particular character categories. */\n\nenum {\n")
53*22dc650dSSadaf Ebrahimifor i in range(0, len(category_names), 2):
54*22dc650dSSadaf Ebrahimi  f.write("  ucp_%s,    /* %s */\n" % (category_names[i], category_names[i+1]))
55*22dc650dSSadaf Ebrahimif.write("};\n\n")
56*22dc650dSSadaf Ebrahimi
57*22dc650dSSadaf Ebrahimif.write("/* These are Boolean properties. */\n\nenum {\n")
58*22dc650dSSadaf Ebrahimifor i in bool_properties:
59*22dc650dSSadaf Ebrahimi  f.write("  ucp_%s,\n" % i)
60*22dc650dSSadaf Ebrahimi
61*22dc650dSSadaf Ebrahimif.write("  /* This must be last */\n")
62*22dc650dSSadaf Ebrahimif.write("  ucp_Bprop_Count\n};\n\n")
63*22dc650dSSadaf Ebrahimi
64*22dc650dSSadaf Ebrahimif.write("/* Size of entries in ucd_boolprop_sets[] */\n\n")
65*22dc650dSSadaf Ebrahimif.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size)
66*22dc650dSSadaf Ebrahimi
67*22dc650dSSadaf Ebrahimif.write("/* These are the bidi class values. */\n\nenum {\n")
68*22dc650dSSadaf Ebrahimifor i in range(0, len(bidi_classes), 2):
69*22dc650dSSadaf Ebrahimi  sp = ' ' * (4 - len(bidi_classes[i]))
70*22dc650dSSadaf Ebrahimi  f.write("  ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1]))
71*22dc650dSSadaf Ebrahimif.write("};\n\n")
72*22dc650dSSadaf Ebrahimi
73*22dc650dSSadaf Ebrahimif.write("/* These are grapheme break properties. The Extended Pictographic "
74*22dc650dSSadaf Ebrahimi  "property\ncomes from the emoji-data.txt file. */\n\nenum {\n")
75*22dc650dSSadaf Ebrahimifor i in range(0, len(break_properties), 2):
76*22dc650dSSadaf Ebrahimi  sp = ' ' * (21 - len(break_properties[i]))
77*22dc650dSSadaf Ebrahimi  f.write("  ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1]))
78*22dc650dSSadaf Ebrahimif.write("};\n\n")
79*22dc650dSSadaf Ebrahimi
80*22dc650dSSadaf Ebrahimif.write("/* These are the script identifications. */\n\nenum {\n  /* Scripts which has characters in other scripts. */\n")
81*22dc650dSSadaf Ebrahimifor i in script_names:
82*22dc650dSSadaf Ebrahimi  if i == "Unknown":
83*22dc650dSSadaf Ebrahimi    f.write("\n  /* Scripts which has no characters in other scripts. */\n")
84*22dc650dSSadaf Ebrahimi  f.write("  ucp_%s,\n" % i)
85*22dc650dSSadaf Ebrahimif.write("\n")
86*22dc650dSSadaf Ebrahimi
87*22dc650dSSadaf Ebrahimif.write("  /* This must be last */\n")
88*22dc650dSSadaf Ebrahimif.write("  ucp_Script_Count\n};\n\n")
89*22dc650dSSadaf Ebrahimi
90*22dc650dSSadaf Ebrahimif.write("/* Size of entries in ucd_script_sets[] */\n\n")
91*22dc650dSSadaf Ebrahimif.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size)
92*22dc650dSSadaf Ebrahimi
93*22dc650dSSadaf Ebrahimif.write("#endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n")
94*22dc650dSSadaf Ebrahimif.write("/* End of pcre2_ucp.h */\n")
95*22dc650dSSadaf Ebrahimi
96*22dc650dSSadaf Ebrahimif.close()
97*22dc650dSSadaf Ebrahimi
98*22dc650dSSadaf Ebrahimi# End
99