xref: /aosp_15_r20/external/fonttools/Lib/fontTools/misc/classifyTools.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1""" fontTools.misc.classifyTools.py -- tools for classifying things.
2"""
3
4
5class Classifier(object):
6    """
7    Main Classifier object, used to classify things into similar sets.
8    """
9
10    def __init__(self, sort=True):
11        self._things = set()  # set of all things known so far
12        self._sets = []  # list of class sets produced so far
13        self._mapping = {}  # map from things to their class set
14        self._dirty = False
15        self._sort = sort
16
17    def add(self, set_of_things):
18        """
19        Add a set to the classifier.  Any iterable is accepted.
20        """
21        if not set_of_things:
22            return
23
24        self._dirty = True
25
26        things, sets, mapping = self._things, self._sets, self._mapping
27
28        s = set(set_of_things)
29        intersection = s.intersection(things)  # existing things
30        s.difference_update(intersection)  # new things
31        difference = s
32        del s
33
34        # Add new class for new things
35        if difference:
36            things.update(difference)
37            sets.append(difference)
38            for thing in difference:
39                mapping[thing] = difference
40        del difference
41
42        while intersection:
43            # Take one item and process the old class it belongs to
44            old_class = mapping[next(iter(intersection))]
45            old_class_intersection = old_class.intersection(intersection)
46
47            # Update old class to remove items from new set
48            old_class.difference_update(old_class_intersection)
49
50            # Remove processed items from todo list
51            intersection.difference_update(old_class_intersection)
52
53            # Add new class for the intersection with old class
54            sets.append(old_class_intersection)
55            for thing in old_class_intersection:
56                mapping[thing] = old_class_intersection
57            del old_class_intersection
58
59    def update(self, list_of_sets):
60        """
61        Add a a list of sets to the classifier.  Any iterable of iterables is accepted.
62        """
63        for s in list_of_sets:
64            self.add(s)
65
66    def _process(self):
67        if not self._dirty:
68            return
69
70        # Do any deferred processing
71        sets = self._sets
72        self._sets = [s for s in sets if s]
73
74        if self._sort:
75            self._sets = sorted(self._sets, key=lambda s: (-len(s), sorted(s)))
76
77        self._dirty = False
78
79    # Output methods
80
81    def getThings(self):
82        """Returns the set of all things known so far.
83
84        The return value belongs to the Classifier object and should NOT
85        be modified while the classifier is still in use.
86        """
87        self._process()
88        return self._things
89
90    def getMapping(self):
91        """Returns the mapping from things to their class set.
92
93        The return value belongs to the Classifier object and should NOT
94        be modified while the classifier is still in use.
95        """
96        self._process()
97        return self._mapping
98
99    def getClasses(self):
100        """Returns the list of class sets.
101
102        The return value belongs to the Classifier object and should NOT
103        be modified while the classifier is still in use.
104        """
105        self._process()
106        return self._sets
107
108
109def classify(list_of_sets, sort=True):
110    """
111    Takes a iterable of iterables (list of sets from here on; but any
112    iterable works.), and returns the smallest list of sets such that
113    each set, is either a subset, or is disjoint from, each of the input
114    sets.
115
116    In other words, this function classifies all the things present in
117    any of the input sets, into similar classes, based on which sets
118    things are a member of.
119
120    If sort=True, return class sets are sorted by decreasing size and
121    their natural sort order within each class size.  Otherwise, class
122    sets are returned in the order that they were identified, which is
123    generally not significant.
124
125    >>> classify([]) == ([], {})
126    True
127    >>> classify([[]]) == ([], {})
128    True
129    >>> classify([[], []]) == ([], {})
130    True
131    >>> classify([[1]]) == ([{1}], {1: {1}})
132    True
133    >>> classify([[1,2]]) == ([{1, 2}], {1: {1, 2}, 2: {1, 2}})
134    True
135    >>> classify([[1],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}})
136    True
137    >>> classify([[1,2],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}})
138    True
139    >>> classify([[1,2],[2,4]]) == ([{1}, {2}, {4}], {1: {1}, 2: {2}, 4: {4}})
140    True
141    >>> classify([[1,2],[2,4,5]]) == (
142    ...     [{4, 5}, {1}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}})
143    True
144    >>> classify([[1,2],[2,4,5]], sort=False) == (
145    ...     [{1}, {4, 5}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}})
146    True
147    >>> classify([[1,2,9],[2,4,5]], sort=False) == (
148    ...     [{1, 9}, {4, 5}, {2}], {1: {1, 9}, 2: {2}, 4: {4, 5}, 5: {4, 5},
149    ...     9: {1, 9}})
150    True
151    >>> classify([[1,2,9,15],[2,4,5]], sort=False) == (
152    ...     [{1, 9, 15}, {4, 5}, {2}], {1: {1, 9, 15}, 2: {2}, 4: {4, 5},
153    ...     5: {4, 5}, 9: {1, 9, 15}, 15: {1, 9, 15}})
154    True
155    >>> classes, mapping = classify([[1,2,9,15],[2,4,5],[15,5]], sort=False)
156    >>> set([frozenset(c) for c in classes]) == set(
157    ...     [frozenset(s) for s in ({1, 9}, {4}, {2}, {5}, {15})])
158    True
159    >>> mapping == {1: {1, 9}, 2: {2}, 4: {4}, 5: {5}, 9: {1, 9}, 15: {15}}
160    True
161    """
162    classifier = Classifier(sort=sort)
163    classifier.update(list_of_sets)
164    return classifier.getClasses(), classifier.getMapping()
165
166
167if __name__ == "__main__":
168    import sys, doctest
169
170    sys.exit(doctest.testmod(optionflags=doctest.ELLIPSIS).failed)
171