1# Lint as: python2, python3 2import re,string 3 4 5class reason_counter: 6 def __init__(self, wording): 7 self.wording = wording 8 self.num = 1 9 10 def update(self, new_wording): 11 self.num += 1 12 self.wording = new_wording 13 14 def html(self): 15 if self.num == 1: 16 return self.wording 17 else: 18 return "%s (%d+)" % (self.wording, self.num) 19 20 21def numbers_are_irrelevant(txt): 22 ## ? when do we replace numbers with NN ? 23 ## By default is always, but 24 ## if/when some categories of reasons choose to keep their numbers, 25 ## then the function shall return False for such categories 26 return True 27 28 29def aggregate_reason_fields(reasons_list): 30 # each reason in the list may be a combination 31 # of | - separated reasons. 32 # expand into list 33 reasons_txt = '|'.join(reasons_list) 34 reasons = reasons_txt.split('|') 35 reason_htable = {} 36 for reason in reasons: 37 reason_reduced = reason.strip() 38 ## reduce whitespaces 39 reason_reduced = re.sub(r"\s+"," ", reason_reduced) 40 41 if reason_reduced == '': 42 continue # ignore empty reasons 43 44 if numbers_are_irrelevant(reason_reduced): 45 # reduce numbers included into reason descriptor 46 # by replacing them with generic NN 47 reason_reduced = re.sub(r"\d+","NN", reason_reduced) 48 49 if not reason_reduced in reason_htable: 50 reason_htable[reason_reduced] = reason_counter(reason) 51 else: 52 ## reason_counter keeps original ( non reduced ) 53 ## reason if it occured once 54 ## if reason occured more then once, reason_counter 55 ## will keep it in reduced/generalized form 56 reason_htable[reason_reduced].update(reason_reduced) 57 58 generic_reasons = list(reason_htable.keys()) 59 generic_reasons.sort(key = (lambda k: reason_htable[k].num), 60 reverse = True) 61 return [reason_htable[generic_reason].html() for generic_reason in generic_reasons] 62