1*2d1272b8SAndroid Build Coastguard Worker#!/usr/bin/env python3 2*2d1272b8SAndroid Build Coastguard Worker 3*2d1272b8SAndroid Build Coastguard Worker"""Generator of the function to prohibit certain vowel sequences. 4*2d1272b8SAndroid Build Coastguard Worker 5*2d1272b8SAndroid Build Coastguard WorkerIt creates ``_hb_preprocess_text_vowel_constraints``, which inserts dotted 6*2d1272b8SAndroid Build Coastguard Workercircles into sequences prohibited by the USE script development spec. 7*2d1272b8SAndroid Build Coastguard WorkerThis function should be used as the ``preprocess_text`` of an 8*2d1272b8SAndroid Build Coastguard Worker``hb_ot_shaper_t``. 9*2d1272b8SAndroid Build Coastguard Worker 10*2d1272b8SAndroid Build Coastguard Workerusage: ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt 11*2d1272b8SAndroid Build Coastguard Worker 12*2d1272b8SAndroid Build Coastguard WorkerInput file: 13*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/Scripts.txt 14*2d1272b8SAndroid Build Coastguard Worker""" 15*2d1272b8SAndroid Build Coastguard Worker 16*2d1272b8SAndroid Build Coastguard Workerimport collections 17*2d1272b8SAndroid Build Coastguard Workerdef write (s): 18*2d1272b8SAndroid Build Coastguard Worker sys.stdout.flush () 19*2d1272b8SAndroid Build Coastguard Worker sys.stdout.buffer.write (s.encode ('utf-8')) 20*2d1272b8SAndroid Build Coastguard Workerimport sys 21*2d1272b8SAndroid Build Coastguard Worker 22*2d1272b8SAndroid Build Coastguard Workerif len (sys.argv) != 3: 23*2d1272b8SAndroid Build Coastguard Worker sys.exit (__doc__) 24*2d1272b8SAndroid Build Coastguard Worker 25*2d1272b8SAndroid Build Coastguard Workerwith open (sys.argv[2], encoding='utf-8') as f: 26*2d1272b8SAndroid Build Coastguard Worker scripts_header = [f.readline () for i in range (2)] 27*2d1272b8SAndroid Build Coastguard Worker scripts = {} 28*2d1272b8SAndroid Build Coastguard Worker script_order = {} 29*2d1272b8SAndroid Build Coastguard Worker for line in f: 30*2d1272b8SAndroid Build Coastguard Worker j = line.find ('#') 31*2d1272b8SAndroid Build Coastguard Worker if j >= 0: 32*2d1272b8SAndroid Build Coastguard Worker line = line[:j] 33*2d1272b8SAndroid Build Coastguard Worker fields = [x.strip () for x in line.split (';')] 34*2d1272b8SAndroid Build Coastguard Worker if len (fields) == 1: 35*2d1272b8SAndroid Build Coastguard Worker continue 36*2d1272b8SAndroid Build Coastguard Worker uu = fields[0].split ('..') 37*2d1272b8SAndroid Build Coastguard Worker start = int (uu[0], 16) 38*2d1272b8SAndroid Build Coastguard Worker if len (uu) == 1: 39*2d1272b8SAndroid Build Coastguard Worker end = start 40*2d1272b8SAndroid Build Coastguard Worker else: 41*2d1272b8SAndroid Build Coastguard Worker end = int (uu[1], 16) 42*2d1272b8SAndroid Build Coastguard Worker script = fields[1] 43*2d1272b8SAndroid Build Coastguard Worker for u in range (start, end + 1): 44*2d1272b8SAndroid Build Coastguard Worker scripts[u] = script 45*2d1272b8SAndroid Build Coastguard Worker if script not in script_order: 46*2d1272b8SAndroid Build Coastguard Worker script_order[script] = start 47*2d1272b8SAndroid Build Coastguard Worker 48*2d1272b8SAndroid Build Coastguard Workerclass ConstraintSet (object): 49*2d1272b8SAndroid Build Coastguard Worker """A set of prohibited code point sequences. 50*2d1272b8SAndroid Build Coastguard Worker 51*2d1272b8SAndroid Build Coastguard Worker Args: 52*2d1272b8SAndroid Build Coastguard Worker constraint (List[int]): A prohibited code point sequence. 53*2d1272b8SAndroid Build Coastguard Worker 54*2d1272b8SAndroid Build Coastguard Worker """ 55*2d1272b8SAndroid Build Coastguard Worker def __init__ (self, constraint): 56*2d1272b8SAndroid Build Coastguard Worker # Either a list or a dictionary. As a list of code points, it 57*2d1272b8SAndroid Build Coastguard Worker # represents a prohibited code point sequence. As a dictionary, 58*2d1272b8SAndroid Build Coastguard Worker # it represents a set of prohibited sequences, where each item 59*2d1272b8SAndroid Build Coastguard Worker # represents the set of prohibited sequences starting with the 60*2d1272b8SAndroid Build Coastguard Worker # key (a code point) concatenated with any of the values 61*2d1272b8SAndroid Build Coastguard Worker # (ConstraintSets). 62*2d1272b8SAndroid Build Coastguard Worker self._c = constraint 63*2d1272b8SAndroid Build Coastguard Worker 64*2d1272b8SAndroid Build Coastguard Worker def add (self, constraint): 65*2d1272b8SAndroid Build Coastguard Worker """Add a constraint to this set.""" 66*2d1272b8SAndroid Build Coastguard Worker if not constraint: 67*2d1272b8SAndroid Build Coastguard Worker return 68*2d1272b8SAndroid Build Coastguard Worker first = constraint[0] 69*2d1272b8SAndroid Build Coastguard Worker rest = constraint[1:] 70*2d1272b8SAndroid Build Coastguard Worker if isinstance (self._c, list): 71*2d1272b8SAndroid Build Coastguard Worker if constraint == self._c[:len (constraint)]: 72*2d1272b8SAndroid Build Coastguard Worker self._c = constraint 73*2d1272b8SAndroid Build Coastguard Worker elif self._c != constraint[:len (self._c)]: 74*2d1272b8SAndroid Build Coastguard Worker self._c = {self._c[0]: ConstraintSet (self._c[1:])} 75*2d1272b8SAndroid Build Coastguard Worker if isinstance (self._c, dict): 76*2d1272b8SAndroid Build Coastguard Worker if first in self._c: 77*2d1272b8SAndroid Build Coastguard Worker self._c[first].add (rest) 78*2d1272b8SAndroid Build Coastguard Worker else: 79*2d1272b8SAndroid Build Coastguard Worker self._c[first] = ConstraintSet (rest) 80*2d1272b8SAndroid Build Coastguard Worker 81*2d1272b8SAndroid Build Coastguard Worker @staticmethod 82*2d1272b8SAndroid Build Coastguard Worker def _indent (depth): 83*2d1272b8SAndroid Build Coastguard Worker return (' ' * depth).replace (' ', '\t') 84*2d1272b8SAndroid Build Coastguard Worker 85*2d1272b8SAndroid Build Coastguard Worker def __str__ (self, index=0, depth=4): 86*2d1272b8SAndroid Build Coastguard Worker s = [] 87*2d1272b8SAndroid Build Coastguard Worker indent = self._indent (depth) 88*2d1272b8SAndroid Build Coastguard Worker if isinstance (self._c, list): 89*2d1272b8SAndroid Build Coastguard Worker if len (self._c) == 0: 90*2d1272b8SAndroid Build Coastguard Worker assert index == 2, 'Cannot use `matched` for this constraint; the general case has not been implemented' 91*2d1272b8SAndroid Build Coastguard Worker s.append ('{}matched = true;\n'.format (indent)) 92*2d1272b8SAndroid Build Coastguard Worker elif len (self._c) == 1: 93*2d1272b8SAndroid Build Coastguard Worker assert index == 1, 'Cannot use `matched` for this constraint; the general case has not been implemented' 94*2d1272b8SAndroid Build Coastguard Worker s.append ('{}matched = 0x{:04X}u == buffer->cur ({}).codepoint;\n'.format (indent, next (iter (self._c)), index or '')) 95*2d1272b8SAndroid Build Coastguard Worker else: 96*2d1272b8SAndroid Build Coastguard Worker s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index or '')) 97*2d1272b8SAndroid Build Coastguard Worker if index: 98*2d1272b8SAndroid Build Coastguard Worker s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), index + 1)) 99*2d1272b8SAndroid Build Coastguard Worker for i, cp in enumerate (self._c[1:], start=1): 100*2d1272b8SAndroid Build Coastguard Worker s.append ('{}0x{:04X}u == buffer->cur ({}).codepoint{}\n'.format ( 101*2d1272b8SAndroid Build Coastguard Worker self._indent (depth + 2), cp, index + i, ')' if i == len (self._c) - 1 else ' &&')) 102*2d1272b8SAndroid Build Coastguard Worker s.append ('{}{{\n'.format (indent)) 103*2d1272b8SAndroid Build Coastguard Worker for i in range (index): 104*2d1272b8SAndroid Build Coastguard Worker s.append ('{}(void) buffer->next_glyph ();\n'.format (self._indent (depth + 1))) 105*2d1272b8SAndroid Build Coastguard Worker s.append ('{}matched = true;\n'.format (self._indent (depth + 1))) 106*2d1272b8SAndroid Build Coastguard Worker s.append ('{}}}\n'.format (indent)) 107*2d1272b8SAndroid Build Coastguard Worker else: 108*2d1272b8SAndroid Build Coastguard Worker s.append ('{}switch (buffer->cur ({}).codepoint)\n'.format(indent, index or '')) 109*2d1272b8SAndroid Build Coastguard Worker s.append ('{}{{\n'.format (indent)) 110*2d1272b8SAndroid Build Coastguard Worker cases = collections.defaultdict (set) 111*2d1272b8SAndroid Build Coastguard Worker for first, rest in sorted (self._c.items ()): 112*2d1272b8SAndroid Build Coastguard Worker cases[rest.__str__ (index + 1, depth + 2)].add (first) 113*2d1272b8SAndroid Build Coastguard Worker for body, labels in sorted (cases.items (), key=lambda b_ls: sorted (b_ls[1])[0]): 114*2d1272b8SAndroid Build Coastguard Worker for i, cp in enumerate (sorted (labels)): 115*2d1272b8SAndroid Build Coastguard Worker if i % 4 == 0: 116*2d1272b8SAndroid Build Coastguard Worker s.append (self._indent (depth + 1)) 117*2d1272b8SAndroid Build Coastguard Worker else: 118*2d1272b8SAndroid Build Coastguard Worker s.append (' ') 119*2d1272b8SAndroid Build Coastguard Worker s.append ('case 0x{:04X}u:{}'.format (cp, '\n' if i % 4 == 3 else '')) 120*2d1272b8SAndroid Build Coastguard Worker if len (labels) % 4 != 0: 121*2d1272b8SAndroid Build Coastguard Worker s.append ('\n') 122*2d1272b8SAndroid Build Coastguard Worker s.append (body) 123*2d1272b8SAndroid Build Coastguard Worker s.append ('{}break;\n'.format (self._indent (depth + 2))) 124*2d1272b8SAndroid Build Coastguard Worker s.append ('{}}}\n'.format (indent)) 125*2d1272b8SAndroid Build Coastguard Worker return ''.join (s) 126*2d1272b8SAndroid Build Coastguard Worker 127*2d1272b8SAndroid Build Coastguard Workerconstraints = {} 128*2d1272b8SAndroid Build Coastguard Workerwith open (sys.argv[1], encoding='utf-8') as f: 129*2d1272b8SAndroid Build Coastguard Worker constraints_header = [] 130*2d1272b8SAndroid Build Coastguard Worker while True: 131*2d1272b8SAndroid Build Coastguard Worker line = f.readline ().strip () 132*2d1272b8SAndroid Build Coastguard Worker if line == '#': 133*2d1272b8SAndroid Build Coastguard Worker break 134*2d1272b8SAndroid Build Coastguard Worker constraints_header.append(line) 135*2d1272b8SAndroid Build Coastguard Worker for line in f: 136*2d1272b8SAndroid Build Coastguard Worker j = line.find ('#') 137*2d1272b8SAndroid Build Coastguard Worker if j >= 0: 138*2d1272b8SAndroid Build Coastguard Worker line = line[:j] 139*2d1272b8SAndroid Build Coastguard Worker constraint = [int (cp, 16) for cp in line.split (';')[0].split ()] 140*2d1272b8SAndroid Build Coastguard Worker if not constraint: continue 141*2d1272b8SAndroid Build Coastguard Worker assert 2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint) 142*2d1272b8SAndroid Build Coastguard Worker script = scripts[constraint[0]] 143*2d1272b8SAndroid Build Coastguard Worker if script in constraints: 144*2d1272b8SAndroid Build Coastguard Worker constraints[script].add (constraint) 145*2d1272b8SAndroid Build Coastguard Worker else: 146*2d1272b8SAndroid Build Coastguard Worker constraints[script] = ConstraintSet (constraint) 147*2d1272b8SAndroid Build Coastguard Worker assert constraints, 'No constraints found' 148*2d1272b8SAndroid Build Coastguard Worker 149*2d1272b8SAndroid Build Coastguard Workerprint ('/* == Start of generated functions == */') 150*2d1272b8SAndroid Build Coastguard Workerprint ('/*') 151*2d1272b8SAndroid Build Coastguard Workerprint (' * The following functions are generated by running:') 152*2d1272b8SAndroid Build Coastguard Workerprint (' *') 153*2d1272b8SAndroid Build Coastguard Workerprint (' * %s ms-use/IndicShapingInvalidCluster.txt Scripts.txt' % sys.argv[0]) 154*2d1272b8SAndroid Build Coastguard Workerprint (' *') 155*2d1272b8SAndroid Build Coastguard Workerprint (' * on files with these headers:') 156*2d1272b8SAndroid Build Coastguard Workerprint (' *') 157*2d1272b8SAndroid Build Coastguard Workerfor line in constraints_header: 158*2d1272b8SAndroid Build Coastguard Worker print (' * %s' % line.strip ()) 159*2d1272b8SAndroid Build Coastguard Workerprint (' *') 160*2d1272b8SAndroid Build Coastguard Workerfor line in scripts_header: 161*2d1272b8SAndroid Build Coastguard Worker print (' * %s' % line.strip ()) 162*2d1272b8SAndroid Build Coastguard Workerprint (' */') 163*2d1272b8SAndroid Build Coastguard Worker 164*2d1272b8SAndroid Build Coastguard Workerprint () 165*2d1272b8SAndroid Build Coastguard Workerprint ('#include "hb.hh"') 166*2d1272b8SAndroid Build Coastguard Workerprint () 167*2d1272b8SAndroid Build Coastguard Workerprint ('#ifndef HB_NO_OT_SHAPE') 168*2d1272b8SAndroid Build Coastguard Workerprint () 169*2d1272b8SAndroid Build Coastguard Workerprint ('#include "hb-ot-shaper-vowel-constraints.hh"') 170*2d1272b8SAndroid Build Coastguard Workerprint () 171*2d1272b8SAndroid Build Coastguard Workerprint ('static void') 172*2d1272b8SAndroid Build Coastguard Workerprint ('_output_dotted_circle (hb_buffer_t *buffer)') 173*2d1272b8SAndroid Build Coastguard Workerprint ('{') 174*2d1272b8SAndroid Build Coastguard Workerprint (' (void) buffer->output_glyph (0x25CCu);') 175*2d1272b8SAndroid Build Coastguard Workerprint (' _hb_glyph_info_reset_continuation (&buffer->prev());') 176*2d1272b8SAndroid Build Coastguard Workerprint ('}') 177*2d1272b8SAndroid Build Coastguard Workerprint () 178*2d1272b8SAndroid Build Coastguard Workerprint ('static void') 179*2d1272b8SAndroid Build Coastguard Workerprint ('_output_with_dotted_circle (hb_buffer_t *buffer)') 180*2d1272b8SAndroid Build Coastguard Workerprint ('{') 181*2d1272b8SAndroid Build Coastguard Workerprint (' _output_dotted_circle (buffer);') 182*2d1272b8SAndroid Build Coastguard Workerprint (' (void) buffer->next_glyph ();') 183*2d1272b8SAndroid Build Coastguard Workerprint ('}') 184*2d1272b8SAndroid Build Coastguard Workerprint () 185*2d1272b8SAndroid Build Coastguard Worker 186*2d1272b8SAndroid Build Coastguard Workerprint ('void') 187*2d1272b8SAndroid Build Coastguard Workerprint ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED,') 188*2d1272b8SAndroid Build Coastguard Workerprint ('\t\t\t\t hb_buffer_t *buffer,') 189*2d1272b8SAndroid Build Coastguard Workerprint ('\t\t\t\t hb_font_t *font HB_UNUSED)') 190*2d1272b8SAndroid Build Coastguard Workerprint ('{') 191*2d1272b8SAndroid Build Coastguard Workerprint ('#ifdef HB_NO_OT_SHAPER_VOWEL_CONSTRAINTS') 192*2d1272b8SAndroid Build Coastguard Workerprint (' return;') 193*2d1272b8SAndroid Build Coastguard Workerprint ('#endif') 194*2d1272b8SAndroid Build Coastguard Workerprint (' if (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)') 195*2d1272b8SAndroid Build Coastguard Workerprint (' return;') 196*2d1272b8SAndroid Build Coastguard Workerprint () 197*2d1272b8SAndroid Build Coastguard Workerprint (' /* UGLY UGLY UGLY business of adding dotted-circle in the middle of') 198*2d1272b8SAndroid Build Coastguard Workerprint (' * vowel-sequences that look like another vowel. Data for each script') 199*2d1272b8SAndroid Build Coastguard Workerprint (' * collected from the USE script development spec.') 200*2d1272b8SAndroid Build Coastguard Workerprint (' *') 201*2d1272b8SAndroid Build Coastguard Workerprint (' * https://github.com/harfbuzz/harfbuzz/issues/1019') 202*2d1272b8SAndroid Build Coastguard Workerprint (' */') 203*2d1272b8SAndroid Build Coastguard Workerprint (' buffer->clear_output ();') 204*2d1272b8SAndroid Build Coastguard Workerprint (' unsigned int count = buffer->len;') 205*2d1272b8SAndroid Build Coastguard Workerprint (' switch ((unsigned) buffer->props.script)') 206*2d1272b8SAndroid Build Coastguard Workerprint (' {') 207*2d1272b8SAndroid Build Coastguard Worker 208*2d1272b8SAndroid Build Coastguard Workerfor script, constraints in sorted (constraints.items (), key=lambda s_c: script_order[s_c[0]]): 209*2d1272b8SAndroid Build Coastguard Worker print (' case HB_SCRIPT_{}:'.format (script.upper ())) 210*2d1272b8SAndroid Build Coastguard Worker print (' for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)') 211*2d1272b8SAndroid Build Coastguard Worker print (' {') 212*2d1272b8SAndroid Build Coastguard Worker print ('\tbool matched = false;') 213*2d1272b8SAndroid Build Coastguard Worker write (str (constraints)) 214*2d1272b8SAndroid Build Coastguard Worker print ('\t(void) buffer->next_glyph ();') 215*2d1272b8SAndroid Build Coastguard Worker print ('\tif (matched) _output_with_dotted_circle (buffer);') 216*2d1272b8SAndroid Build Coastguard Worker print (' }') 217*2d1272b8SAndroid Build Coastguard Worker print (' break;') 218*2d1272b8SAndroid Build Coastguard Worker print () 219*2d1272b8SAndroid Build Coastguard Worker 220*2d1272b8SAndroid Build Coastguard Workerprint (' default:') 221*2d1272b8SAndroid Build Coastguard Workerprint (' break;') 222*2d1272b8SAndroid Build Coastguard Workerprint (' }') 223*2d1272b8SAndroid Build Coastguard Workerprint (' buffer->sync ();') 224*2d1272b8SAndroid Build Coastguard Workerprint ('}') 225*2d1272b8SAndroid Build Coastguard Worker 226*2d1272b8SAndroid Build Coastguard Workerprint () 227*2d1272b8SAndroid Build Coastguard Workerprint () 228*2d1272b8SAndroid Build Coastguard Workerprint ('#endif') 229*2d1272b8SAndroid Build Coastguard Workerprint ('/* == End of generated functions == */') 230