1*9c5db199SXin Li# Lint as: python2, python3 2*9c5db199SXin Liimport re,string 3*9c5db199SXin Li 4*9c5db199SXin Li 5*9c5db199SXin Liclass reason_counter: 6*9c5db199SXin Li def __init__(self, wording): 7*9c5db199SXin Li self.wording = wording 8*9c5db199SXin Li self.num = 1 9*9c5db199SXin Li 10*9c5db199SXin Li def update(self, new_wording): 11*9c5db199SXin Li self.num += 1 12*9c5db199SXin Li self.wording = new_wording 13*9c5db199SXin Li 14*9c5db199SXin Li def html(self): 15*9c5db199SXin Li if self.num == 1: 16*9c5db199SXin Li return self.wording 17*9c5db199SXin Li else: 18*9c5db199SXin Li return "%s (%d+)" % (self.wording, self.num) 19*9c5db199SXin Li 20*9c5db199SXin Li 21*9c5db199SXin Lidef numbers_are_irrelevant(txt): 22*9c5db199SXin Li ## ? when do we replace numbers with NN ? 23*9c5db199SXin Li ## By default is always, but 24*9c5db199SXin Li ## if/when some categories of reasons choose to keep their numbers, 25*9c5db199SXin Li ## then the function shall return False for such categories 26*9c5db199SXin Li return True 27*9c5db199SXin Li 28*9c5db199SXin Li 29*9c5db199SXin Lidef aggregate_reason_fields(reasons_list): 30*9c5db199SXin Li # each reason in the list may be a combination 31*9c5db199SXin Li # of | - separated reasons. 32*9c5db199SXin Li # expand into list 33*9c5db199SXin Li reasons_txt = '|'.join(reasons_list) 34*9c5db199SXin Li reasons = reasons_txt.split('|') 35*9c5db199SXin Li reason_htable = {} 36*9c5db199SXin Li for reason in reasons: 37*9c5db199SXin Li reason_reduced = reason.strip() 38*9c5db199SXin Li ## reduce whitespaces 39*9c5db199SXin Li reason_reduced = re.sub(r"\s+"," ", reason_reduced) 40*9c5db199SXin Li 41*9c5db199SXin Li if reason_reduced == '': 42*9c5db199SXin Li continue # ignore empty reasons 43*9c5db199SXin Li 44*9c5db199SXin Li if numbers_are_irrelevant(reason_reduced): 45*9c5db199SXin Li # reduce numbers included into reason descriptor 46*9c5db199SXin Li # by replacing them with generic NN 47*9c5db199SXin Li reason_reduced = re.sub(r"\d+","NN", reason_reduced) 48*9c5db199SXin Li 49*9c5db199SXin Li if not reason_reduced in reason_htable: 50*9c5db199SXin Li reason_htable[reason_reduced] = reason_counter(reason) 51*9c5db199SXin Li else: 52*9c5db199SXin Li ## reason_counter keeps original ( non reduced ) 53*9c5db199SXin Li ## reason if it occured once 54*9c5db199SXin Li ## if reason occured more then once, reason_counter 55*9c5db199SXin Li ## will keep it in reduced/generalized form 56*9c5db199SXin Li reason_htable[reason_reduced].update(reason_reduced) 57*9c5db199SXin Li 58*9c5db199SXin Li generic_reasons = list(reason_htable.keys()) 59*9c5db199SXin Li generic_reasons.sort(key = (lambda k: reason_htable[k].num), 60*9c5db199SXin Li reverse = True) 61*9c5db199SXin Li return [reason_htable[generic_reason].html() for generic_reason in generic_reasons] 62