1*22dc650dSSadaf Ebrahimi#! /usr/bin/python 2*22dc650dSSadaf Ebrahimi 3*22dc650dSSadaf Ebrahimi# PCRE2 UNICODE PROPERTY SUPPORT 4*22dc650dSSadaf Ebrahimi# ------------------------------ 5*22dc650dSSadaf Ebrahimi 6*22dc650dSSadaf Ebrahimi# This script generates the pcre2_ucp.h file from Unicode data files. This 7*22dc650dSSadaf Ebrahimi# header uses enumerations to give names to Unicode property types and script 8*22dc650dSSadaf Ebrahimi# names. 9*22dc650dSSadaf Ebrahimi 10*22dc650dSSadaf Ebrahimi# This script was created in December 2021 as part of the Unicode data 11*22dc650dSSadaf Ebrahimi# generation refactoring. 12*22dc650dSSadaf Ebrahimi 13*22dc650dSSadaf Ebrahimi 14*22dc650dSSadaf Ebrahimi# Import common data lists and functions 15*22dc650dSSadaf Ebrahimi 16*22dc650dSSadaf Ebrahimifrom GenerateCommon import \ 17*22dc650dSSadaf Ebrahimi bidi_classes, \ 18*22dc650dSSadaf Ebrahimi bool_properties, \ 19*22dc650dSSadaf Ebrahimi bool_props_list_item_size, \ 20*22dc650dSSadaf Ebrahimi break_properties, \ 21*22dc650dSSadaf Ebrahimi category_names, \ 22*22dc650dSSadaf Ebrahimi general_category_names, \ 23*22dc650dSSadaf Ebrahimi script_list_item_size, \ 24*22dc650dSSadaf Ebrahimi script_names, \ 25*22dc650dSSadaf Ebrahimi open_output 26*22dc650dSSadaf Ebrahimi 27*22dc650dSSadaf Ebrahimi# Open the output file (no return on failure). This call also writes standard 28*22dc650dSSadaf Ebrahimi# header boilerplate. 29*22dc650dSSadaf Ebrahimi 30*22dc650dSSadaf Ebrahimif = open_output("pcre2_ucp.h") 31*22dc650dSSadaf Ebrahimi 32*22dc650dSSadaf Ebrahimi# Output this file's heading text 33*22dc650dSSadaf Ebrahimi 34*22dc650dSSadaf Ebrahimif.write("""\ 35*22dc650dSSadaf Ebrahimi#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD 36*22dc650dSSadaf Ebrahimi#define PCRE2_UCP_H_IDEMPOTENT_GUARD 37*22dc650dSSadaf Ebrahimi 38*22dc650dSSadaf Ebrahimi/* This file contains definitions of the Unicode property values that are 39*22dc650dSSadaf Ebrahimireturned by the UCD access macros and used throughout PCRE2. 40*22dc650dSSadaf Ebrahimi 41*22dc650dSSadaf EbrahimiIMPORTANT: The specific values of the first two enums (general and particular 42*22dc650dSSadaf Ebrahimicharacter categories) are assumed by the table called catposstab in the file 43*22dc650dSSadaf Ebrahimipcre2_auto_possess.c. They are unlikely to change, but should be checked after 44*22dc650dSSadaf Ebrahimian update. */ 45*22dc650dSSadaf Ebrahimi\n""") 46*22dc650dSSadaf Ebrahimi 47*22dc650dSSadaf Ebrahimif.write("/* These are the general character categories. */\n\nenum {\n") 48*22dc650dSSadaf Ebrahimifor i in general_category_names: 49*22dc650dSSadaf Ebrahimi f.write(" ucp_%s,\n" % i) 50*22dc650dSSadaf Ebrahimif.write("};\n\n") 51*22dc650dSSadaf Ebrahimi 52*22dc650dSSadaf Ebrahimif.write("/* These are the particular character categories. */\n\nenum {\n") 53*22dc650dSSadaf Ebrahimifor i in range(0, len(category_names), 2): 54*22dc650dSSadaf Ebrahimi f.write(" ucp_%s, /* %s */\n" % (category_names[i], category_names[i+1])) 55*22dc650dSSadaf Ebrahimif.write("};\n\n") 56*22dc650dSSadaf Ebrahimi 57*22dc650dSSadaf Ebrahimif.write("/* These are Boolean properties. */\n\nenum {\n") 58*22dc650dSSadaf Ebrahimifor i in bool_properties: 59*22dc650dSSadaf Ebrahimi f.write(" ucp_%s,\n" % i) 60*22dc650dSSadaf Ebrahimi 61*22dc650dSSadaf Ebrahimif.write(" /* This must be last */\n") 62*22dc650dSSadaf Ebrahimif.write(" ucp_Bprop_Count\n};\n\n") 63*22dc650dSSadaf Ebrahimi 64*22dc650dSSadaf Ebrahimif.write("/* Size of entries in ucd_boolprop_sets[] */\n\n") 65*22dc650dSSadaf Ebrahimif.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size) 66*22dc650dSSadaf Ebrahimi 67*22dc650dSSadaf Ebrahimif.write("/* These are the bidi class values. */\n\nenum {\n") 68*22dc650dSSadaf Ebrahimifor i in range(0, len(bidi_classes), 2): 69*22dc650dSSadaf Ebrahimi sp = ' ' * (4 - len(bidi_classes[i])) 70*22dc650dSSadaf Ebrahimi f.write(" ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1])) 71*22dc650dSSadaf Ebrahimif.write("};\n\n") 72*22dc650dSSadaf Ebrahimi 73*22dc650dSSadaf Ebrahimif.write("/* These are grapheme break properties. The Extended Pictographic " 74*22dc650dSSadaf Ebrahimi "property\ncomes from the emoji-data.txt file. */\n\nenum {\n") 75*22dc650dSSadaf Ebrahimifor i in range(0, len(break_properties), 2): 76*22dc650dSSadaf Ebrahimi sp = ' ' * (21 - len(break_properties[i])) 77*22dc650dSSadaf Ebrahimi f.write(" ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1])) 78*22dc650dSSadaf Ebrahimif.write("};\n\n") 79*22dc650dSSadaf Ebrahimi 80*22dc650dSSadaf Ebrahimif.write("/* These are the script identifications. */\n\nenum {\n /* Scripts which has characters in other scripts. */\n") 81*22dc650dSSadaf Ebrahimifor i in script_names: 82*22dc650dSSadaf Ebrahimi if i == "Unknown": 83*22dc650dSSadaf Ebrahimi f.write("\n /* Scripts which has no characters in other scripts. */\n") 84*22dc650dSSadaf Ebrahimi f.write(" ucp_%s,\n" % i) 85*22dc650dSSadaf Ebrahimif.write("\n") 86*22dc650dSSadaf Ebrahimi 87*22dc650dSSadaf Ebrahimif.write(" /* This must be last */\n") 88*22dc650dSSadaf Ebrahimif.write(" ucp_Script_Count\n};\n\n") 89*22dc650dSSadaf Ebrahimi 90*22dc650dSSadaf Ebrahimif.write("/* Size of entries in ucd_script_sets[] */\n\n") 91*22dc650dSSadaf Ebrahimif.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size) 92*22dc650dSSadaf Ebrahimi 93*22dc650dSSadaf Ebrahimif.write("#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n") 94*22dc650dSSadaf Ebrahimif.write("/* End of pcre2_ucp.h */\n") 95*22dc650dSSadaf Ebrahimi 96*22dc650dSSadaf Ebrahimif.close() 97*22dc650dSSadaf Ebrahimi 98*22dc650dSSadaf Ebrahimi# End 99