xref: /aosp_15_r20/external/harfbuzz_ng/src/gen-vowel-constraints.py (revision 2d1272b857b1f7575e6e246373e1cb218663db8a)
1*2d1272b8SAndroid Build Coastguard Worker#!/usr/bin/env python3
2*2d1272b8SAndroid Build Coastguard Worker
3*2d1272b8SAndroid Build Coastguard Worker"""Generator of the function to prohibit certain vowel sequences.
4*2d1272b8SAndroid Build Coastguard Worker
5*2d1272b8SAndroid Build Coastguard WorkerIt creates ``_hb_preprocess_text_vowel_constraints``, which inserts dotted
6*2d1272b8SAndroid Build Coastguard Workercircles into sequences prohibited by the USE script development spec.
7*2d1272b8SAndroid Build Coastguard WorkerThis function should be used as the ``preprocess_text`` of an
8*2d1272b8SAndroid Build Coastguard Worker``hb_ot_shaper_t``.
9*2d1272b8SAndroid Build Coastguard Worker
10*2d1272b8SAndroid Build Coastguard Workerusage: ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt
11*2d1272b8SAndroid Build Coastguard Worker
12*2d1272b8SAndroid Build Coastguard WorkerInput file:
13*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/Scripts.txt
14*2d1272b8SAndroid Build Coastguard Worker"""
15*2d1272b8SAndroid Build Coastguard Worker
16*2d1272b8SAndroid Build Coastguard Workerimport collections
17*2d1272b8SAndroid Build Coastguard Workerdef write (s):
18*2d1272b8SAndroid Build Coastguard Worker	sys.stdout.flush ()
19*2d1272b8SAndroid Build Coastguard Worker	sys.stdout.buffer.write (s.encode ('utf-8'))
20*2d1272b8SAndroid Build Coastguard Workerimport sys
21*2d1272b8SAndroid Build Coastguard Worker
22*2d1272b8SAndroid Build Coastguard Workerif len (sys.argv) != 3:
23*2d1272b8SAndroid Build Coastguard Worker	sys.exit (__doc__)
24*2d1272b8SAndroid Build Coastguard Worker
25*2d1272b8SAndroid Build Coastguard Workerwith open (sys.argv[2], encoding='utf-8') as f:
26*2d1272b8SAndroid Build Coastguard Worker	scripts_header = [f.readline () for i in range (2)]
27*2d1272b8SAndroid Build Coastguard Worker	scripts = {}
28*2d1272b8SAndroid Build Coastguard Worker	script_order = {}
29*2d1272b8SAndroid Build Coastguard Worker	for line in f:
30*2d1272b8SAndroid Build Coastguard Worker		j = line.find ('#')
31*2d1272b8SAndroid Build Coastguard Worker		if j >= 0:
32*2d1272b8SAndroid Build Coastguard Worker			line = line[:j]
33*2d1272b8SAndroid Build Coastguard Worker		fields = [x.strip () for x in line.split (';')]
34*2d1272b8SAndroid Build Coastguard Worker		if len (fields) == 1:
35*2d1272b8SAndroid Build Coastguard Worker			continue
36*2d1272b8SAndroid Build Coastguard Worker		uu = fields[0].split ('..')
37*2d1272b8SAndroid Build Coastguard Worker		start = int (uu[0], 16)
38*2d1272b8SAndroid Build Coastguard Worker		if len (uu) == 1:
39*2d1272b8SAndroid Build Coastguard Worker			end = start
40*2d1272b8SAndroid Build Coastguard Worker		else:
41*2d1272b8SAndroid Build Coastguard Worker			end = int (uu[1], 16)
42*2d1272b8SAndroid Build Coastguard Worker		script = fields[1]
43*2d1272b8SAndroid Build Coastguard Worker		for u in range (start, end + 1):
44*2d1272b8SAndroid Build Coastguard Worker			scripts[u] = script
45*2d1272b8SAndroid Build Coastguard Worker		if script not in script_order:
46*2d1272b8SAndroid Build Coastguard Worker			script_order[script] = start
47*2d1272b8SAndroid Build Coastguard Worker
48*2d1272b8SAndroid Build Coastguard Workerclass ConstraintSet (object):
49*2d1272b8SAndroid Build Coastguard Worker	"""A set of prohibited code point sequences.
50*2d1272b8SAndroid Build Coastguard Worker
51*2d1272b8SAndroid Build Coastguard Worker	Args:
52*2d1272b8SAndroid Build Coastguard Worker		constraint (List[int]): A prohibited code point sequence.
53*2d1272b8SAndroid Build Coastguard Worker
54*2d1272b8SAndroid Build Coastguard Worker	"""
55*2d1272b8SAndroid Build Coastguard Worker	def __init__ (self, constraint):
56*2d1272b8SAndroid Build Coastguard Worker		# Either a list or a dictionary. As a list of code points, it
57*2d1272b8SAndroid Build Coastguard Worker		# represents a prohibited code point sequence. As a dictionary,
58*2d1272b8SAndroid Build Coastguard Worker		# it represents a set of prohibited sequences, where each item
59*2d1272b8SAndroid Build Coastguard Worker		# represents the set of prohibited sequences starting with the
60*2d1272b8SAndroid Build Coastguard Worker		# key (a code point) concatenated with any of the values
61*2d1272b8SAndroid Build Coastguard Worker		# (ConstraintSets).
62*2d1272b8SAndroid Build Coastguard Worker		self._c = constraint
63*2d1272b8SAndroid Build Coastguard Worker
64*2d1272b8SAndroid Build Coastguard Worker	def add (self, constraint):
65*2d1272b8SAndroid Build Coastguard Worker		"""Add a constraint to this set."""
66*2d1272b8SAndroid Build Coastguard Worker		if not constraint:
67*2d1272b8SAndroid Build Coastguard Worker			return
68*2d1272b8SAndroid Build Coastguard Worker		first = constraint[0]
69*2d1272b8SAndroid Build Coastguard Worker		rest = constraint[1:]
70*2d1272b8SAndroid Build Coastguard Worker		if isinstance (self._c, list):
71*2d1272b8SAndroid Build Coastguard Worker			if constraint == self._c[:len (constraint)]:
72*2d1272b8SAndroid Build Coastguard Worker				self._c = constraint
73*2d1272b8SAndroid Build Coastguard Worker			elif self._c != constraint[:len (self._c)]:
74*2d1272b8SAndroid Build Coastguard Worker				self._c = {self._c[0]: ConstraintSet (self._c[1:])}
75*2d1272b8SAndroid Build Coastguard Worker		if isinstance (self._c, dict):
76*2d1272b8SAndroid Build Coastguard Worker			if first in self._c:
77*2d1272b8SAndroid Build Coastguard Worker				self._c[first].add (rest)
78*2d1272b8SAndroid Build Coastguard Worker			else:
79*2d1272b8SAndroid Build Coastguard Worker				self._c[first] = ConstraintSet (rest)
80*2d1272b8SAndroid Build Coastguard Worker
81*2d1272b8SAndroid Build Coastguard Worker	@staticmethod
82*2d1272b8SAndroid Build Coastguard Worker	def _indent (depth):
83*2d1272b8SAndroid Build Coastguard Worker		return ('  ' * depth).replace ('        ', '\t')
84*2d1272b8SAndroid Build Coastguard Worker
85*2d1272b8SAndroid Build Coastguard Worker	def __str__ (self, index=0, depth=4):
86*2d1272b8SAndroid Build Coastguard Worker		s = []
87*2d1272b8SAndroid Build Coastguard Worker		indent = self._indent (depth)
88*2d1272b8SAndroid Build Coastguard Worker		if isinstance (self._c, list):
89*2d1272b8SAndroid Build Coastguard Worker			if len (self._c) == 0:
90*2d1272b8SAndroid Build Coastguard Worker				assert index == 2, 'Cannot use `matched` for this constraint; the general case has not been implemented'
91*2d1272b8SAndroid Build Coastguard Worker				s.append ('{}matched = true;\n'.format (indent))
92*2d1272b8SAndroid Build Coastguard Worker			elif len (self._c) == 1:
93*2d1272b8SAndroid Build Coastguard Worker				assert index == 1, 'Cannot use `matched` for this constraint; the general case has not been implemented'
94*2d1272b8SAndroid Build Coastguard Worker				s.append ('{}matched = 0x{:04X}u == buffer->cur ({}).codepoint;\n'.format (indent, next (iter (self._c)), index or ''))
95*2d1272b8SAndroid Build Coastguard Worker			else:
96*2d1272b8SAndroid Build Coastguard Worker				s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index or ''))
97*2d1272b8SAndroid Build Coastguard Worker				if index:
98*2d1272b8SAndroid Build Coastguard Worker					s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), index + 1))
99*2d1272b8SAndroid Build Coastguard Worker				for i, cp in enumerate (self._c[1:], start=1):
100*2d1272b8SAndroid Build Coastguard Worker					s.append ('{}0x{:04X}u == buffer->cur ({}).codepoint{}\n'.format (
101*2d1272b8SAndroid Build Coastguard Worker						self._indent (depth + 2), cp, index + i, ')' if i == len (self._c) - 1 else ' &&'))
102*2d1272b8SAndroid Build Coastguard Worker				s.append ('{}{{\n'.format (indent))
103*2d1272b8SAndroid Build Coastguard Worker				for i in range (index):
104*2d1272b8SAndroid Build Coastguard Worker					s.append ('{}(void) buffer->next_glyph ();\n'.format (self._indent (depth + 1)))
105*2d1272b8SAndroid Build Coastguard Worker				s.append ('{}matched = true;\n'.format (self._indent (depth + 1)))
106*2d1272b8SAndroid Build Coastguard Worker				s.append ('{}}}\n'.format (indent))
107*2d1272b8SAndroid Build Coastguard Worker		else:
108*2d1272b8SAndroid Build Coastguard Worker			s.append ('{}switch (buffer->cur ({}).codepoint)\n'.format(indent, index or ''))
109*2d1272b8SAndroid Build Coastguard Worker			s.append ('{}{{\n'.format (indent))
110*2d1272b8SAndroid Build Coastguard Worker			cases = collections.defaultdict (set)
111*2d1272b8SAndroid Build Coastguard Worker			for first, rest in sorted (self._c.items ()):
112*2d1272b8SAndroid Build Coastguard Worker				cases[rest.__str__ (index + 1, depth + 2)].add (first)
113*2d1272b8SAndroid Build Coastguard Worker			for body, labels in sorted (cases.items (), key=lambda b_ls: sorted (b_ls[1])[0]):
114*2d1272b8SAndroid Build Coastguard Worker				for i, cp in enumerate (sorted (labels)):
115*2d1272b8SAndroid Build Coastguard Worker					if i % 4 == 0:
116*2d1272b8SAndroid Build Coastguard Worker						s.append (self._indent (depth + 1))
117*2d1272b8SAndroid Build Coastguard Worker					else:
118*2d1272b8SAndroid Build Coastguard Worker						s.append (' ')
119*2d1272b8SAndroid Build Coastguard Worker					s.append ('case 0x{:04X}u:{}'.format (cp, '\n' if i % 4 == 3 else ''))
120*2d1272b8SAndroid Build Coastguard Worker				if len (labels) % 4 != 0:
121*2d1272b8SAndroid Build Coastguard Worker					s.append ('\n')
122*2d1272b8SAndroid Build Coastguard Worker				s.append (body)
123*2d1272b8SAndroid Build Coastguard Worker				s.append ('{}break;\n'.format (self._indent (depth + 2)))
124*2d1272b8SAndroid Build Coastguard Worker			s.append ('{}}}\n'.format (indent))
125*2d1272b8SAndroid Build Coastguard Worker		return ''.join (s)
126*2d1272b8SAndroid Build Coastguard Worker
127*2d1272b8SAndroid Build Coastguard Workerconstraints = {}
128*2d1272b8SAndroid Build Coastguard Workerwith open (sys.argv[1], encoding='utf-8') as f:
129*2d1272b8SAndroid Build Coastguard Worker	constraints_header = []
130*2d1272b8SAndroid Build Coastguard Worker	while True:
131*2d1272b8SAndroid Build Coastguard Worker		line = f.readline ().strip ()
132*2d1272b8SAndroid Build Coastguard Worker		if line == '#':
133*2d1272b8SAndroid Build Coastguard Worker			break
134*2d1272b8SAndroid Build Coastguard Worker		constraints_header.append(line)
135*2d1272b8SAndroid Build Coastguard Worker	for line in f:
136*2d1272b8SAndroid Build Coastguard Worker		j = line.find ('#')
137*2d1272b8SAndroid Build Coastguard Worker		if j >= 0:
138*2d1272b8SAndroid Build Coastguard Worker			line = line[:j]
139*2d1272b8SAndroid Build Coastguard Worker		constraint = [int (cp, 16) for cp in line.split (';')[0].split ()]
140*2d1272b8SAndroid Build Coastguard Worker		if not constraint: continue
141*2d1272b8SAndroid Build Coastguard Worker		assert 2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint)
142*2d1272b8SAndroid Build Coastguard Worker		script = scripts[constraint[0]]
143*2d1272b8SAndroid Build Coastguard Worker		if script in constraints:
144*2d1272b8SAndroid Build Coastguard Worker			constraints[script].add (constraint)
145*2d1272b8SAndroid Build Coastguard Worker		else:
146*2d1272b8SAndroid Build Coastguard Worker			constraints[script] = ConstraintSet (constraint)
147*2d1272b8SAndroid Build Coastguard Worker		assert constraints, 'No constraints found'
148*2d1272b8SAndroid Build Coastguard Worker
149*2d1272b8SAndroid Build Coastguard Workerprint ('/* == Start of generated functions == */')
150*2d1272b8SAndroid Build Coastguard Workerprint ('/*')
151*2d1272b8SAndroid Build Coastguard Workerprint (' * The following functions are generated by running:')
152*2d1272b8SAndroid Build Coastguard Workerprint (' *')
153*2d1272b8SAndroid Build Coastguard Workerprint (' *   %s ms-use/IndicShapingInvalidCluster.txt Scripts.txt' % sys.argv[0])
154*2d1272b8SAndroid Build Coastguard Workerprint (' *')
155*2d1272b8SAndroid Build Coastguard Workerprint (' * on files with these headers:')
156*2d1272b8SAndroid Build Coastguard Workerprint (' *')
157*2d1272b8SAndroid Build Coastguard Workerfor line in constraints_header:
158*2d1272b8SAndroid Build Coastguard Worker	print (' * %s' % line.strip ())
159*2d1272b8SAndroid Build Coastguard Workerprint (' *')
160*2d1272b8SAndroid Build Coastguard Workerfor line in scripts_header:
161*2d1272b8SAndroid Build Coastguard Worker	print (' * %s' % line.strip ())
162*2d1272b8SAndroid Build Coastguard Workerprint (' */')
163*2d1272b8SAndroid Build Coastguard Worker
164*2d1272b8SAndroid Build Coastguard Workerprint ()
165*2d1272b8SAndroid Build Coastguard Workerprint ('#include "hb.hh"')
166*2d1272b8SAndroid Build Coastguard Workerprint ()
167*2d1272b8SAndroid Build Coastguard Workerprint ('#ifndef HB_NO_OT_SHAPE')
168*2d1272b8SAndroid Build Coastguard Workerprint ()
169*2d1272b8SAndroid Build Coastguard Workerprint ('#include "hb-ot-shaper-vowel-constraints.hh"')
170*2d1272b8SAndroid Build Coastguard Workerprint ()
171*2d1272b8SAndroid Build Coastguard Workerprint ('static void')
172*2d1272b8SAndroid Build Coastguard Workerprint ('_output_dotted_circle (hb_buffer_t *buffer)')
173*2d1272b8SAndroid Build Coastguard Workerprint ('{')
174*2d1272b8SAndroid Build Coastguard Workerprint ('  (void) buffer->output_glyph (0x25CCu);')
175*2d1272b8SAndroid Build Coastguard Workerprint ('  _hb_glyph_info_reset_continuation (&buffer->prev());')
176*2d1272b8SAndroid Build Coastguard Workerprint ('}')
177*2d1272b8SAndroid Build Coastguard Workerprint ()
178*2d1272b8SAndroid Build Coastguard Workerprint ('static void')
179*2d1272b8SAndroid Build Coastguard Workerprint ('_output_with_dotted_circle (hb_buffer_t *buffer)')
180*2d1272b8SAndroid Build Coastguard Workerprint ('{')
181*2d1272b8SAndroid Build Coastguard Workerprint ('  _output_dotted_circle (buffer);')
182*2d1272b8SAndroid Build Coastguard Workerprint ('  (void) buffer->next_glyph ();')
183*2d1272b8SAndroid Build Coastguard Workerprint ('}')
184*2d1272b8SAndroid Build Coastguard Workerprint ()
185*2d1272b8SAndroid Build Coastguard Worker
186*2d1272b8SAndroid Build Coastguard Workerprint ('void')
187*2d1272b8SAndroid Build Coastguard Workerprint ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED,')
188*2d1272b8SAndroid Build Coastguard Workerprint ('\t\t\t\t       hb_buffer_t              *buffer,')
189*2d1272b8SAndroid Build Coastguard Workerprint ('\t\t\t\t       hb_font_t                *font HB_UNUSED)')
190*2d1272b8SAndroid Build Coastguard Workerprint ('{')
191*2d1272b8SAndroid Build Coastguard Workerprint ('#ifdef HB_NO_OT_SHAPER_VOWEL_CONSTRAINTS')
192*2d1272b8SAndroid Build Coastguard Workerprint ('  return;')
193*2d1272b8SAndroid Build Coastguard Workerprint ('#endif')
194*2d1272b8SAndroid Build Coastguard Workerprint ('  if (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)')
195*2d1272b8SAndroid Build Coastguard Workerprint ('    return;')
196*2d1272b8SAndroid Build Coastguard Workerprint ()
197*2d1272b8SAndroid Build Coastguard Workerprint ('  /* UGLY UGLY UGLY business of adding dotted-circle in the middle of')
198*2d1272b8SAndroid Build Coastguard Workerprint ('   * vowel-sequences that look like another vowel.  Data for each script')
199*2d1272b8SAndroid Build Coastguard Workerprint ('   * collected from the USE script development spec.')
200*2d1272b8SAndroid Build Coastguard Workerprint ('   *')
201*2d1272b8SAndroid Build Coastguard Workerprint ('   * https://github.com/harfbuzz/harfbuzz/issues/1019')
202*2d1272b8SAndroid Build Coastguard Workerprint ('   */')
203*2d1272b8SAndroid Build Coastguard Workerprint ('  buffer->clear_output ();')
204*2d1272b8SAndroid Build Coastguard Workerprint ('  unsigned int count = buffer->len;')
205*2d1272b8SAndroid Build Coastguard Workerprint ('  switch ((unsigned) buffer->props.script)')
206*2d1272b8SAndroid Build Coastguard Workerprint ('  {')
207*2d1272b8SAndroid Build Coastguard Worker
208*2d1272b8SAndroid Build Coastguard Workerfor script, constraints in sorted (constraints.items (), key=lambda s_c: script_order[s_c[0]]):
209*2d1272b8SAndroid Build Coastguard Worker	print ('    case HB_SCRIPT_{}:'.format (script.upper ()))
210*2d1272b8SAndroid Build Coastguard Worker	print ('      for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)')
211*2d1272b8SAndroid Build Coastguard Worker	print ('      {')
212*2d1272b8SAndroid Build Coastguard Worker	print ('\tbool matched = false;')
213*2d1272b8SAndroid Build Coastguard Worker	write (str (constraints))
214*2d1272b8SAndroid Build Coastguard Worker	print ('\t(void) buffer->next_glyph ();')
215*2d1272b8SAndroid Build Coastguard Worker	print ('\tif (matched) _output_with_dotted_circle (buffer);')
216*2d1272b8SAndroid Build Coastguard Worker	print ('      }')
217*2d1272b8SAndroid Build Coastguard Worker	print ('      break;')
218*2d1272b8SAndroid Build Coastguard Worker	print ()
219*2d1272b8SAndroid Build Coastguard Worker
220*2d1272b8SAndroid Build Coastguard Workerprint ('    default:')
221*2d1272b8SAndroid Build Coastguard Workerprint ('      break;')
222*2d1272b8SAndroid Build Coastguard Workerprint ('  }')
223*2d1272b8SAndroid Build Coastguard Workerprint ('  buffer->sync ();')
224*2d1272b8SAndroid Build Coastguard Workerprint ('}')
225*2d1272b8SAndroid Build Coastguard Worker
226*2d1272b8SAndroid Build Coastguard Workerprint ()
227*2d1272b8SAndroid Build Coastguard Workerprint ()
228*2d1272b8SAndroid Build Coastguard Workerprint ('#endif')
229*2d1272b8SAndroid Build Coastguard Workerprint ('/* == End of generated functions == */')
230