xref: /aosp_15_r20/external/fonttools/Lib/fontTools/otlLib/optimize/gpos.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1import logging
2import os
3from collections import defaultdict, namedtuple
4from functools import reduce
5from itertools import chain
6from math import log2
7from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple
8
9from fontTools.config import OPTIONS
10from fontTools.misc.intTools import bit_count, bit_indices
11from fontTools.ttLib import TTFont
12from fontTools.ttLib.tables import otBase, otTables
13
14log = logging.getLogger(__name__)
15
16COMPRESSION_LEVEL = OPTIONS[f"{__name__}:COMPRESSION_LEVEL"]
17
18# Kept because ufo2ft depends on it, to be removed once ufo2ft uses the config instead
19# https://github.com/fonttools/fonttools/issues/2592
20GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE"
21GPOS_COMPACT_MODE_DEFAULT = str(COMPRESSION_LEVEL.default)
22
23
24def _compression_level_from_env() -> int:
25    env_level = GPOS_COMPACT_MODE_DEFAULT
26    if GPOS_COMPACT_MODE_ENV_KEY in os.environ:
27        import warnings
28
29        warnings.warn(
30            f"'{GPOS_COMPACT_MODE_ENV_KEY}' environment variable is deprecated. "
31            "Please set the 'fontTools.otlLib.optimize.gpos:COMPRESSION_LEVEL' option "
32            "in TTFont.cfg.",
33            DeprecationWarning,
34        )
35
36        env_level = os.environ[GPOS_COMPACT_MODE_ENV_KEY]
37    if len(env_level) == 1 and env_level in "0123456789":
38        return int(env_level)
39    raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={env_level}")
40
41
42def compact(font: TTFont, level: int) -> TTFont:
43    # Ideal plan:
44    #  1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable
45    #     https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable
46    #  2. Extract glyph-glyph kerning and class-kerning from all present subtables
47    #  3. Regroup into different subtable arrangements
48    #  4. Put back into the lookup
49    #
50    # Actual implementation:
51    #  2. Only class kerning is optimized currently
52    #  3. If the input kerning is already in several subtables, the subtables
53    #     are not grouped together first; instead each subtable is treated
54    #     independently, so currently this step is:
55    #     Split existing subtables into more smaller subtables
56    gpos = font["GPOS"]
57    for lookup in gpos.table.LookupList.Lookup:
58        if lookup.LookupType == 2:
59            compact_lookup(font, level, lookup)
60        elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2:
61            compact_ext_lookup(font, level, lookup)
62    return font
63
64
65def compact_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
66    new_subtables = compact_pair_pos(font, level, lookup.SubTable)
67    lookup.SubTable = new_subtables
68    lookup.SubTableCount = len(new_subtables)
69
70
71def compact_ext_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None:
72    new_subtables = compact_pair_pos(
73        font, level, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable]
74    )
75    new_ext_subtables = []
76    for subtable in new_subtables:
77        ext_subtable = otTables.ExtensionPos()
78        ext_subtable.Format = 1
79        ext_subtable.ExtSubTable = subtable
80        new_ext_subtables.append(ext_subtable)
81    lookup.SubTable = new_ext_subtables
82    lookup.SubTableCount = len(new_ext_subtables)
83
84
85def compact_pair_pos(
86    font: TTFont, level: int, subtables: Sequence[otTables.PairPos]
87) -> Sequence[otTables.PairPos]:
88    new_subtables = []
89    for subtable in subtables:
90        if subtable.Format == 1:
91            # Not doing anything to Format 1 (yet?)
92            new_subtables.append(subtable)
93        elif subtable.Format == 2:
94            new_subtables.extend(compact_class_pairs(font, level, subtable))
95    return new_subtables
96
97
98def compact_class_pairs(
99    font: TTFont, level: int, subtable: otTables.PairPos
100) -> List[otTables.PairPos]:
101    from fontTools.otlLib.builder import buildPairPosClassesSubtable
102
103    subtables = []
104    classes1: DefaultDict[int, List[str]] = defaultdict(list)
105    for g in subtable.Coverage.glyphs:
106        classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g)
107    classes2: DefaultDict[int, List[str]] = defaultdict(list)
108    for g, i in subtable.ClassDef2.classDefs.items():
109        classes2[i].append(g)
110    all_pairs = {}
111    for i, class1 in enumerate(subtable.Class1Record):
112        for j, class2 in enumerate(class1.Class2Record):
113            if is_really_zero(class2):
114                continue
115            all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = (
116                getattr(class2, "Value1", None),
117                getattr(class2, "Value2", None),
118            )
119    grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(font, all_pairs, level)
120    for pairs in grouped_pairs:
121        subtables.append(buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap()))
122    return subtables
123
124
125def is_really_zero(class2: otTables.Class2Record) -> bool:
126    v1 = getattr(class2, "Value1", None)
127    v2 = getattr(class2, "Value2", None)
128    return (v1 is None or v1.getEffectiveFormat() == 0) and (
129        v2 is None or v2.getEffectiveFormat() == 0
130    )
131
132
133Pairs = Dict[
134    Tuple[Tuple[str, ...], Tuple[str, ...]],
135    Tuple[otBase.ValueRecord, otBase.ValueRecord],
136]
137
138
139# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958
140def _getClassRanges(glyphIDs: Iterable[int]):
141    glyphIDs = sorted(glyphIDs)
142    last = glyphIDs[0]
143    ranges = [[last]]
144    for glyphID in glyphIDs[1:]:
145        if glyphID != last + 1:
146            ranges[-1].append(last)
147            ranges.append([glyphID])
148        last = glyphID
149    ranges[-1].append(last)
150    return ranges, glyphIDs[0], glyphIDs[-1]
151
152
153# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989
154def _classDef_bytes(
155    class_data: List[Tuple[List[Tuple[int, int]], int, int]],
156    class_ids: List[int],
157    coverage=False,
158):
159    if not class_ids:
160        return 0
161    first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]]
162    range_count = len(first_ranges)
163    for i in class_ids[1:]:
164        data = class_data[i]
165        range_count += len(data[0])
166        min_glyph_id = min(min_glyph_id, data[1])
167        max_glyph_id = max(max_glyph_id, data[2])
168    glyphCount = max_glyph_id - min_glyph_id + 1
169    # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1
170    format1_bytes = 6 + glyphCount * 2
171    # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2
172    format2_bytes = 4 + range_count * 6
173    return min(format1_bytes, format2_bytes)
174
175
176ClusteringContext = namedtuple(
177    "ClusteringContext",
178    [
179        "lines",
180        "all_class1",
181        "all_class1_data",
182        "all_class2_data",
183        "valueFormat1_bytes",
184        "valueFormat2_bytes",
185    ],
186)
187
188
189class Cluster:
190    # TODO(Python 3.7): Turn this into a dataclass
191    # ctx: ClusteringContext
192    # indices: int
193    # Caches
194    # TODO(Python 3.8): use functools.cached_property instead of the
195    # manually cached properties, and remove the cache fields listed below.
196    # _indices: Optional[List[int]] = None
197    # _column_indices: Optional[List[int]] = None
198    # _cost: Optional[int] = None
199
200    __slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost"
201
202    def __init__(self, ctx: ClusteringContext, indices_bitmask: int):
203        self.ctx = ctx
204        self.indices_bitmask = indices_bitmask
205        self._indices = None
206        self._column_indices = None
207        self._cost = None
208
209    @property
210    def indices(self):
211        if self._indices is None:
212            self._indices = bit_indices(self.indices_bitmask)
213        return self._indices
214
215    @property
216    def column_indices(self):
217        if self._column_indices is None:
218            # Indices of columns that have a 1 in at least 1 line
219            #   => binary OR all the lines
220            bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices))
221            self._column_indices = bit_indices(bitmask)
222        return self._column_indices
223
224    @property
225    def width(self):
226        # Add 1 because Class2=0 cannot be used but needs to be encoded.
227        return len(self.column_indices) + 1
228
229    @property
230    def cost(self):
231        if self._cost is None:
232            self._cost = (
233                # 2 bytes to store the offset to this subtable in the Lookup table above
234                2
235                # Contents of the subtable
236                # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment
237                # uint16	posFormat	Format identifier: format = 2
238                + 2
239                # Offset16	coverageOffset	Offset to Coverage table, from beginning of PairPos subtable.
240                + 2
241                + self.coverage_bytes
242                # uint16	valueFormat1	ValueRecord definition — for the first glyph of the pair (may be zero).
243                + 2
244                # uint16	valueFormat2	ValueRecord definition — for the second glyph of the pair (may be zero).
245                + 2
246                # Offset16	classDef1Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair.
247                + 2
248                + self.classDef1_bytes
249                # Offset16	classDef2Offset	Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair.
250                + 2
251                + self.classDef2_bytes
252                # uint16	class1Count	Number of classes in classDef1 table — includes Class 0.
253                + 2
254                # uint16	class2Count	Number of classes in classDef2 table — includes Class 0.
255                + 2
256                # Class1Record	class1Records[class1Count]	Array of Class1 records, ordered by classes in classDef1.
257                + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes)
258                * len(self.indices)
259                * self.width
260            )
261        return self._cost
262
263    @property
264    def coverage_bytes(self):
265        format1_bytes = (
266            # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1
267            # uint16	coverageFormat	Format identifier — format = 1
268            # uint16	glyphCount	Number of glyphs in the glyph array
269            4
270            # uint16	glyphArray[glyphCount]	Array of glyph IDs — in numerical order
271            + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2
272        )
273        ranges = sorted(
274            chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices)
275        )
276        merged_range_count = 0
277        last = None
278        for start, end in ranges:
279            if last is not None and start != last + 1:
280                merged_range_count += 1
281            last = end
282        format2_bytes = (
283            # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2
284            # uint16	coverageFormat	Format identifier — format = 2
285            # uint16	rangeCount	Number of RangeRecords
286            4
287            # RangeRecord	rangeRecords[rangeCount]	Array of glyph ranges — ordered by startGlyphID.
288            # uint16	startGlyphID	First glyph ID in the range
289            # uint16	endGlyphID	Last glyph ID in the range
290            # uint16	startCoverageIndex	Coverage Index of first glyph ID in range
291            + merged_range_count * 6
292        )
293        return min(format1_bytes, format2_bytes)
294
295    @property
296    def classDef1_bytes(self):
297        # We can skip encoding one of the Class1 definitions, and use
298        # Class1=0 to represent it instead, because Class1 is gated by the
299        # Coverage definition. Use Class1=0 for the highest byte savings.
300        # Going through all options takes too long, pick the biggest class
301        # = what happens in otlLib.builder.ClassDefBuilder.classes()
302        biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i]))
303        return _classDef_bytes(
304            self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index]
305        )
306
307    @property
308    def classDef2_bytes(self):
309        # All Class2 need to be encoded because we can't use Class2=0
310        return _classDef_bytes(self.ctx.all_class2_data, self.column_indices)
311
312
313def cluster_pairs_by_class2_coverage_custom_cost(
314    font: TTFont,
315    pairs: Pairs,
316    compression: int = 5,
317) -> List[Pairs]:
318    if not pairs:
319        # The subtable was actually empty?
320        return [pairs]
321
322    # Sorted for reproducibility/determinism
323    all_class1 = sorted(set(pair[0] for pair in pairs))
324    all_class2 = sorted(set(pair[1] for pair in pairs))
325
326    # Use Python's big ints for binary vectors representing each line
327    lines = [
328        sum(
329            1 << i if (class1, class2) in pairs else 0
330            for i, class2 in enumerate(all_class2)
331        )
332        for class1 in all_class1
333    ]
334
335    # Map glyph names to ids and work with ints throughout for ClassDef formats
336    name_to_id = font.getReverseGlyphMap()
337    # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id)
338    all_class1_data = [
339        _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1
340    ]
341    all_class2_data = [
342        _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2
343    ]
344
345    format1 = 0
346    format2 = 0
347    for pair, value in pairs.items():
348        format1 |= value[0].getEffectiveFormat() if value[0] else 0
349        format2 |= value[1].getEffectiveFormat() if value[1] else 0
350    valueFormat1_bytes = bit_count(format1) * 2
351    valueFormat2_bytes = bit_count(format2) * 2
352
353    ctx = ClusteringContext(
354        lines,
355        all_class1,
356        all_class1_data,
357        all_class2_data,
358        valueFormat1_bytes,
359        valueFormat2_bytes,
360    )
361
362    cluster_cache: Dict[int, Cluster] = {}
363
364    def make_cluster(indices: int) -> Cluster:
365        cluster = cluster_cache.get(indices, None)
366        if cluster is not None:
367            return cluster
368        cluster = Cluster(ctx, indices)
369        cluster_cache[indices] = cluster
370        return cluster
371
372    def merge(cluster: Cluster, other: Cluster) -> Cluster:
373        return make_cluster(cluster.indices_bitmask | other.indices_bitmask)
374
375    # Agglomerative clustering by hand, checking the cost gain of the new
376    # cluster against the previously separate clusters
377    # Start with 1 cluster per line
378    # cluster = set of lines = new subtable
379    clusters = [make_cluster(1 << i) for i in range(len(lines))]
380
381    # Cost of 1 cluster with everything
382    # `(1 << len) - 1` gives a bitmask full of 1's of length `len`
383    cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost
384    log.debug(f"        len(clusters) = {len(clusters)}")
385
386    while len(clusters) > 1:
387        lowest_cost_change = None
388        best_cluster_index = None
389        best_other_index = None
390        best_merged = None
391        for i, cluster in enumerate(clusters):
392            for j, other in enumerate(clusters[i + 1 :]):
393                merged = merge(cluster, other)
394                cost_change = merged.cost - cluster.cost - other.cost
395                if lowest_cost_change is None or cost_change < lowest_cost_change:
396                    lowest_cost_change = cost_change
397                    best_cluster_index = i
398                    best_other_index = i + 1 + j
399                    best_merged = merged
400        assert lowest_cost_change is not None
401        assert best_cluster_index is not None
402        assert best_other_index is not None
403        assert best_merged is not None
404
405        # If the best merge we found is still taking down the file size, then
406        # there's no question: we must do it, because it's beneficial in both
407        # ways (lower file size and lower number of subtables).  However, if the
408        # best merge we found is not reducing file size anymore, then we need to
409        # look at the other stop criteria = the compression factor.
410        if lowest_cost_change > 0:
411            # Stop critera: check whether we should keep merging.
412            # Compute size reduction brought by splitting
413            cost_after_splitting = sum(c.cost for c in clusters)
414            # size_reduction so that after = before * (1 - size_reduction)
415            # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2
416            size_reduction = 1 - cost_after_splitting / cost_before_splitting
417
418            # Force more merging by taking into account the compression number.
419            # Target behaviour: compression number = 1 to 9, default 5 like gzip
420            #   - 1 = accept to add 1 subtable to reduce size by 50%
421            #   - 5 = accept to add 5 subtables to reduce size by 50%
422            # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691
423            # Given the size reduction we have achieved so far, compute how many
424            # new subtables are acceptable.
425            max_new_subtables = -log2(1 - size_reduction) * compression
426            log.debug(
427                f"            len(clusters) = {len(clusters):3d}    size_reduction={size_reduction:5.2f}    max_new_subtables={max_new_subtables}",
428            )
429            if compression == 9:
430                # Override level 9 to mean: create any number of subtables
431                max_new_subtables = len(clusters)
432
433            # If we have managed to take the number of new subtables below the
434            # threshold, then we can stop.
435            if len(clusters) <= max_new_subtables + 1:
436                break
437
438        # No reason to stop yet, do the merge and move on to the next.
439        del clusters[best_other_index]
440        clusters[best_cluster_index] = best_merged
441
442    # All clusters are final; turn bitmasks back into the "Pairs" format
443    pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict)
444    for pair, values in pairs.items():
445        pairs_by_class1[pair[0]][pair] = values
446    pairs_groups: List[Pairs] = []
447    for cluster in clusters:
448        pairs_group: Pairs = dict()
449        for i in cluster.indices:
450            class1 = all_class1[i]
451            pairs_group.update(pairs_by_class1[class1])
452        pairs_groups.append(pairs_group)
453    return pairs_groups
454