1""" fontTools.misc.classifyTools.py -- tools for classifying things. 2""" 3 4 5class Classifier(object): 6 """ 7 Main Classifier object, used to classify things into similar sets. 8 """ 9 10 def __init__(self, sort=True): 11 self._things = set() # set of all things known so far 12 self._sets = [] # list of class sets produced so far 13 self._mapping = {} # map from things to their class set 14 self._dirty = False 15 self._sort = sort 16 17 def add(self, set_of_things): 18 """ 19 Add a set to the classifier. Any iterable is accepted. 20 """ 21 if not set_of_things: 22 return 23 24 self._dirty = True 25 26 things, sets, mapping = self._things, self._sets, self._mapping 27 28 s = set(set_of_things) 29 intersection = s.intersection(things) # existing things 30 s.difference_update(intersection) # new things 31 difference = s 32 del s 33 34 # Add new class for new things 35 if difference: 36 things.update(difference) 37 sets.append(difference) 38 for thing in difference: 39 mapping[thing] = difference 40 del difference 41 42 while intersection: 43 # Take one item and process the old class it belongs to 44 old_class = mapping[next(iter(intersection))] 45 old_class_intersection = old_class.intersection(intersection) 46 47 # Update old class to remove items from new set 48 old_class.difference_update(old_class_intersection) 49 50 # Remove processed items from todo list 51 intersection.difference_update(old_class_intersection) 52 53 # Add new class for the intersection with old class 54 sets.append(old_class_intersection) 55 for thing in old_class_intersection: 56 mapping[thing] = old_class_intersection 57 del old_class_intersection 58 59 def update(self, list_of_sets): 60 """ 61 Add a a list of sets to the classifier. Any iterable of iterables is accepted. 62 """ 63 for s in list_of_sets: 64 self.add(s) 65 66 def _process(self): 67 if not self._dirty: 68 return 69 70 # Do any deferred processing 71 sets = self._sets 72 self._sets = [s for s in sets if s] 73 74 if self._sort: 75 self._sets = sorted(self._sets, key=lambda s: (-len(s), sorted(s))) 76 77 self._dirty = False 78 79 # Output methods 80 81 def getThings(self): 82 """Returns the set of all things known so far. 83 84 The return value belongs to the Classifier object and should NOT 85 be modified while the classifier is still in use. 86 """ 87 self._process() 88 return self._things 89 90 def getMapping(self): 91 """Returns the mapping from things to their class set. 92 93 The return value belongs to the Classifier object and should NOT 94 be modified while the classifier is still in use. 95 """ 96 self._process() 97 return self._mapping 98 99 def getClasses(self): 100 """Returns the list of class sets. 101 102 The return value belongs to the Classifier object and should NOT 103 be modified while the classifier is still in use. 104 """ 105 self._process() 106 return self._sets 107 108 109def classify(list_of_sets, sort=True): 110 """ 111 Takes a iterable of iterables (list of sets from here on; but any 112 iterable works.), and returns the smallest list of sets such that 113 each set, is either a subset, or is disjoint from, each of the input 114 sets. 115 116 In other words, this function classifies all the things present in 117 any of the input sets, into similar classes, based on which sets 118 things are a member of. 119 120 If sort=True, return class sets are sorted by decreasing size and 121 their natural sort order within each class size. Otherwise, class 122 sets are returned in the order that they were identified, which is 123 generally not significant. 124 125 >>> classify([]) == ([], {}) 126 True 127 >>> classify([[]]) == ([], {}) 128 True 129 >>> classify([[], []]) == ([], {}) 130 True 131 >>> classify([[1]]) == ([{1}], {1: {1}}) 132 True 133 >>> classify([[1,2]]) == ([{1, 2}], {1: {1, 2}, 2: {1, 2}}) 134 True 135 >>> classify([[1],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}}) 136 True 137 >>> classify([[1,2],[2]]) == ([{1}, {2}], {1: {1}, 2: {2}}) 138 True 139 >>> classify([[1,2],[2,4]]) == ([{1}, {2}, {4}], {1: {1}, 2: {2}, 4: {4}}) 140 True 141 >>> classify([[1,2],[2,4,5]]) == ( 142 ... [{4, 5}, {1}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}}) 143 True 144 >>> classify([[1,2],[2,4,5]], sort=False) == ( 145 ... [{1}, {4, 5}, {2}], {1: {1}, 2: {2}, 4: {4, 5}, 5: {4, 5}}) 146 True 147 >>> classify([[1,2,9],[2,4,5]], sort=False) == ( 148 ... [{1, 9}, {4, 5}, {2}], {1: {1, 9}, 2: {2}, 4: {4, 5}, 5: {4, 5}, 149 ... 9: {1, 9}}) 150 True 151 >>> classify([[1,2,9,15],[2,4,5]], sort=False) == ( 152 ... [{1, 9, 15}, {4, 5}, {2}], {1: {1, 9, 15}, 2: {2}, 4: {4, 5}, 153 ... 5: {4, 5}, 9: {1, 9, 15}, 15: {1, 9, 15}}) 154 True 155 >>> classes, mapping = classify([[1,2,9,15],[2,4,5],[15,5]], sort=False) 156 >>> set([frozenset(c) for c in classes]) == set( 157 ... [frozenset(s) for s in ({1, 9}, {4}, {2}, {5}, {15})]) 158 True 159 >>> mapping == {1: {1, 9}, 2: {2}, 4: {4}, 5: {5}, 9: {1, 9}, 15: {15}} 160 True 161 """ 162 classifier = Classifier(sort=sort) 163 classifier.update(list_of_sets) 164 return classifier.getClasses(), classifier.getMapping() 165 166 167if __name__ == "__main__": 168 import sys, doctest 169 170 sys.exit(doctest.testmod(optionflags=doctest.ELLIPSIS).failed) 171