1*760c253cSXin Li#!/usr/bin/env python3 2*760c253cSXin Li# -*- coding: utf-8 -*- 3*760c253cSXin Li# Copyright 2019 The ChromiumOS Authors 4*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be 5*760c253cSXin Li# found in the LICENSE file. 6*760c253cSXin Li 7*760c253cSXin Li"""One-line documentation for perf_diff module. 8*760c253cSXin Li 9*760c253cSXin LiA detailed description of perf_diff. 10*760c253cSXin Li""" 11*760c253cSXin Li 12*760c253cSXin Li 13*760c253cSXin Li__author__ = "[email protected] (Ahmad Sharif)" 14*760c253cSXin Li 15*760c253cSXin Liimport argparse 16*760c253cSXin Liimport functools 17*760c253cSXin Liimport re 18*760c253cSXin Liimport sys 19*760c253cSXin Li 20*760c253cSXin Lifrom cros_utils import misc 21*760c253cSXin Lifrom cros_utils import tabulator 22*760c253cSXin Li 23*760c253cSXin Li 24*760c253cSXin LiROWS_TO_SHOW = "Rows_to_show_in_the_perf_table" 25*760c253cSXin LiTOTAL_EVENTS = "Total_events_of_this_profile" 26*760c253cSXin Li 27*760c253cSXin Li 28*760c253cSXin Lidef GetPerfDictFromReport(report_file): 29*760c253cSXin Li output = {} 30*760c253cSXin Li perf_report = PerfReport(report_file) 31*760c253cSXin Li for k, v in perf_report.sections.items(): 32*760c253cSXin Li if k not in output: 33*760c253cSXin Li output[k] = {} 34*760c253cSXin Li output[k][ROWS_TO_SHOW] = 0 35*760c253cSXin Li output[k][TOTAL_EVENTS] = 0 36*760c253cSXin Li for function in v.functions: 37*760c253cSXin Li out_key = "%s" % (function.name) 38*760c253cSXin Li output[k][out_key] = function.count 39*760c253cSXin Li output[k][TOTAL_EVENTS] += function.count 40*760c253cSXin Li if function.percent > 1: 41*760c253cSXin Li output[k][ROWS_TO_SHOW] += 1 42*760c253cSXin Li return output 43*760c253cSXin Li 44*760c253cSXin Li 45*760c253cSXin Lidef _SortDictionaryByValue(d): 46*760c253cSXin Li l = d.items() 47*760c253cSXin Li 48*760c253cSXin Li def GetFloat(x): 49*760c253cSXin Li if misc.IsFloat(x): 50*760c253cSXin Li return float(x) 51*760c253cSXin Li else: 52*760c253cSXin Li return x 53*760c253cSXin Li 54*760c253cSXin Li sorted_l = sorted(l, key=lambda x: GetFloat(x[1])) 55*760c253cSXin Li sorted_l.reverse() 56*760c253cSXin Li return [f[0] for f in sorted_l] 57*760c253cSXin Li 58*760c253cSXin Li 59*760c253cSXin Liclass Tabulator(object): 60*760c253cSXin Li """Make tables.""" 61*760c253cSXin Li 62*760c253cSXin Li def __init__(self, all_dicts): 63*760c253cSXin Li self._all_dicts = all_dicts 64*760c253cSXin Li 65*760c253cSXin Li def PrintTable(self): 66*760c253cSXin Li for dicts in self._all_dicts: 67*760c253cSXin Li self.PrintTableHelper(dicts) 68*760c253cSXin Li 69*760c253cSXin Li def PrintTableHelper(self, dicts): 70*760c253cSXin Li """Transfrom dicts to tables.""" 71*760c253cSXin Li fields = {} 72*760c253cSXin Li for d in dicts: 73*760c253cSXin Li for f in d.keys(): 74*760c253cSXin Li if f not in fields: 75*760c253cSXin Li fields[f] = d[f] 76*760c253cSXin Li else: 77*760c253cSXin Li fields[f] = max(fields[f], d[f]) 78*760c253cSXin Li table = [] 79*760c253cSXin Li header = ["name"] 80*760c253cSXin Li for i in range(len(dicts)): 81*760c253cSXin Li header.append(i) 82*760c253cSXin Li 83*760c253cSXin Li table.append(header) 84*760c253cSXin Li 85*760c253cSXin Li sorted_fields = _SortDictionaryByValue(fields) 86*760c253cSXin Li 87*760c253cSXin Li for f in sorted_fields: 88*760c253cSXin Li row = [f] 89*760c253cSXin Li for d in dicts: 90*760c253cSXin Li if f in d: 91*760c253cSXin Li row.append(d[f]) 92*760c253cSXin Li else: 93*760c253cSXin Li row.append("0") 94*760c253cSXin Li table.append(row) 95*760c253cSXin Li 96*760c253cSXin Li print(tabulator.GetSimpleTable(table)) 97*760c253cSXin Li 98*760c253cSXin Li 99*760c253cSXin Liclass Function(object): 100*760c253cSXin Li """Function for formatting.""" 101*760c253cSXin Li 102*760c253cSXin Li def __init__(self): 103*760c253cSXin Li self.count = 0 104*760c253cSXin Li self.name = "" 105*760c253cSXin Li self.percent = 0 106*760c253cSXin Li 107*760c253cSXin Li 108*760c253cSXin Liclass Section(object): 109*760c253cSXin Li """Section formatting.""" 110*760c253cSXin Li 111*760c253cSXin Li def __init__(self, contents): 112*760c253cSXin Li self.name = "" 113*760c253cSXin Li self.raw_contents = contents 114*760c253cSXin Li self._ParseSection() 115*760c253cSXin Li 116*760c253cSXin Li def _ParseSection(self): 117*760c253cSXin Li matches = re.findall(r"Events: (\w+)\s+(.*)", self.raw_contents) 118*760c253cSXin Li assert len(matches) <= 1, "More than one event found in 1 section" 119*760c253cSXin Li if not matches: 120*760c253cSXin Li return 121*760c253cSXin Li match = matches[0] 122*760c253cSXin Li self.name = match[1] 123*760c253cSXin Li self.count = misc.UnitToNumber(match[0]) 124*760c253cSXin Li 125*760c253cSXin Li self.functions = [] 126*760c253cSXin Li for line in self.raw_contents.splitlines(): 127*760c253cSXin Li if not line.strip(): 128*760c253cSXin Li continue 129*760c253cSXin Li if "%" not in line: 130*760c253cSXin Li continue 131*760c253cSXin Li if not line.startswith("#"): 132*760c253cSXin Li fields = [f for f in line.split(" ") if f] 133*760c253cSXin Li function = Function() 134*760c253cSXin Li function.percent = float(fields[0].strip("%")) 135*760c253cSXin Li function.count = int(fields[1]) 136*760c253cSXin Li function.name = " ".join(fields[2:]) 137*760c253cSXin Li self.functions.append(function) 138*760c253cSXin Li 139*760c253cSXin Li 140*760c253cSXin Liclass PerfReport(object): 141*760c253cSXin Li """Get report from raw report.""" 142*760c253cSXin Li 143*760c253cSXin Li def __init__(self, perf_file): 144*760c253cSXin Li self.perf_file = perf_file 145*760c253cSXin Li self._ReadFile() 146*760c253cSXin Li self.sections = {} 147*760c253cSXin Li self.metadata = {} 148*760c253cSXin Li self._section_contents = [] 149*760c253cSXin Li self._section_header = "" 150*760c253cSXin Li self._SplitSections() 151*760c253cSXin Li self._ParseSections() 152*760c253cSXin Li self._ParseSectionHeader() 153*760c253cSXin Li 154*760c253cSXin Li def _ParseSectionHeader(self): 155*760c253cSXin Li """Parse a header of a perf report file.""" 156*760c253cSXin Li # The "captured on" field is inaccurate - this actually refers to when the 157*760c253cSXin Li # report was generated, not when the data was captured. 158*760c253cSXin Li for line in self._section_header.splitlines(): 159*760c253cSXin Li line = line[2:] 160*760c253cSXin Li if ":" in line: 161*760c253cSXin Li key, val = line.strip().split(":", 1) 162*760c253cSXin Li key = key.strip() 163*760c253cSXin Li val = val.strip() 164*760c253cSXin Li self.metadata[key] = val 165*760c253cSXin Li 166*760c253cSXin Li def _ReadFile(self): 167*760c253cSXin Li self._perf_contents = open(self.perf_file).read() 168*760c253cSXin Li 169*760c253cSXin Li def _ParseSections(self): 170*760c253cSXin Li self.event_counts = {} 171*760c253cSXin Li self.sections = {} 172*760c253cSXin Li for section_content in self._section_contents: 173*760c253cSXin Li section = Section(section_content) 174*760c253cSXin Li section.name = self._GetHumanReadableName(section.name) 175*760c253cSXin Li self.sections[section.name] = section 176*760c253cSXin Li 177*760c253cSXin Li # TODO(asharif): Do this better. 178*760c253cSXin Li def _GetHumanReadableName(self, section_name): 179*760c253cSXin Li if not "raw" in section_name: 180*760c253cSXin Li return section_name 181*760c253cSXin Li raw_number = section_name.strip().split(" ")[-1] 182*760c253cSXin Li for line in self._section_header.splitlines(): 183*760c253cSXin Li if raw_number in line: 184*760c253cSXin Li name = line.strip().split(" ")[5] 185*760c253cSXin Li return name 186*760c253cSXin Li 187*760c253cSXin Li def _SplitSections(self): 188*760c253cSXin Li self._section_contents = [] 189*760c253cSXin Li indices = [ 190*760c253cSXin Li m.start() for m in re.finditer("# Events:", self._perf_contents) 191*760c253cSXin Li ] 192*760c253cSXin Li indices.append(len(self._perf_contents)) 193*760c253cSXin Li for i in range(len(indices) - 1): 194*760c253cSXin Li section_content = self._perf_contents[indices[i] : indices[i + 1]] 195*760c253cSXin Li self._section_contents.append(section_content) 196*760c253cSXin Li self._section_header = "" 197*760c253cSXin Li if indices: 198*760c253cSXin Li self._section_header = self._perf_contents[0 : indices[0]] 199*760c253cSXin Li 200*760c253cSXin Li 201*760c253cSXin Liclass PerfDiffer(object): 202*760c253cSXin Li """Perf differ class.""" 203*760c253cSXin Li 204*760c253cSXin Li def __init__(self, reports, num_symbols, common_only): 205*760c253cSXin Li self._reports = reports 206*760c253cSXin Li self._num_symbols = num_symbols 207*760c253cSXin Li self._common_only = common_only 208*760c253cSXin Li self._common_function_names = {} 209*760c253cSXin Li 210*760c253cSXin Li def DoDiff(self): 211*760c253cSXin Li """The function that does the diff.""" 212*760c253cSXin Li section_names = self._FindAllSections() 213*760c253cSXin Li 214*760c253cSXin Li filename_dicts = [] 215*760c253cSXin Li summary_dicts = [] 216*760c253cSXin Li for report in self._reports: 217*760c253cSXin Li d = {} 218*760c253cSXin Li filename_dicts.append({"file": report.perf_file}) 219*760c253cSXin Li for section_name in section_names: 220*760c253cSXin Li if section_name in report.sections: 221*760c253cSXin Li d[section_name] = report.sections[section_name].count 222*760c253cSXin Li summary_dicts.append(d) 223*760c253cSXin Li 224*760c253cSXin Li all_dicts = [filename_dicts, summary_dicts] 225*760c253cSXin Li 226*760c253cSXin Li for section_name in section_names: 227*760c253cSXin Li function_names = self._GetTopFunctions( 228*760c253cSXin Li section_name, self._num_symbols 229*760c253cSXin Li ) 230*760c253cSXin Li self._FindCommonFunctions(section_name) 231*760c253cSXin Li dicts = [] 232*760c253cSXin Li for report in self._reports: 233*760c253cSXin Li d = {} 234*760c253cSXin Li if section_name in report.sections: 235*760c253cSXin Li section = report.sections[section_name] 236*760c253cSXin Li 237*760c253cSXin Li # Get a common scaling factor for this report. 238*760c253cSXin Li common_scaling_factor = self._GetCommonScalingFactor( 239*760c253cSXin Li section 240*760c253cSXin Li ) 241*760c253cSXin Li 242*760c253cSXin Li for function in section.functions: 243*760c253cSXin Li if function.name in function_names: 244*760c253cSXin Li key = "%s %s" % (section.name, function.name) 245*760c253cSXin Li d[key] = function.count 246*760c253cSXin Li # Compute a factor to scale the function count by in common_only 247*760c253cSXin Li # mode. 248*760c253cSXin Li if self._common_only and ( 249*760c253cSXin Li function.name 250*760c253cSXin Li in self._common_function_names[section.name] 251*760c253cSXin Li ): 252*760c253cSXin Li d[key + " scaled"] = ( 253*760c253cSXin Li common_scaling_factor * function.count 254*760c253cSXin Li ) 255*760c253cSXin Li dicts.append(d) 256*760c253cSXin Li 257*760c253cSXin Li all_dicts.append(dicts) 258*760c253cSXin Li 259*760c253cSXin Li mytabulator = Tabulator(all_dicts) 260*760c253cSXin Li mytabulator.PrintTable() 261*760c253cSXin Li 262*760c253cSXin Li def _FindAllSections(self): 263*760c253cSXin Li sections = {} 264*760c253cSXin Li for report in self._reports: 265*760c253cSXin Li for section in report.sections.values(): 266*760c253cSXin Li if section.name not in sections: 267*760c253cSXin Li sections[section.name] = section.count 268*760c253cSXin Li else: 269*760c253cSXin Li sections[section.name] = max( 270*760c253cSXin Li sections[section.name], section.count 271*760c253cSXin Li ) 272*760c253cSXin Li return _SortDictionaryByValue(sections) 273*760c253cSXin Li 274*760c253cSXin Li def _GetCommonScalingFactor(self, section): 275*760c253cSXin Li unique_count = self._GetCount( 276*760c253cSXin Li section, lambda x: x in self._common_function_names[section.name] 277*760c253cSXin Li ) 278*760c253cSXin Li return 100.0 / unique_count 279*760c253cSXin Li 280*760c253cSXin Li def _GetCount(self, section, filter_fun=None): 281*760c253cSXin Li total_count = 0 282*760c253cSXin Li for function in section.functions: 283*760c253cSXin Li if not filter_fun or filter_fun(function.name): 284*760c253cSXin Li total_count += int(function.count) 285*760c253cSXin Li return total_count 286*760c253cSXin Li 287*760c253cSXin Li def _FindCommonFunctions(self, section_name): 288*760c253cSXin Li function_names_list = [] 289*760c253cSXin Li for report in self._reports: 290*760c253cSXin Li if section_name in report.sections: 291*760c253cSXin Li section = report.sections[section_name] 292*760c253cSXin Li function_names = {f.name for f in section.functions} 293*760c253cSXin Li function_names_list.append(function_names) 294*760c253cSXin Li 295*760c253cSXin Li self._common_function_names[section_name] = functools.reduce( 296*760c253cSXin Li set.intersection, function_names_list 297*760c253cSXin Li ) 298*760c253cSXin Li 299*760c253cSXin Li def _GetTopFunctions(self, section_name, num_functions): 300*760c253cSXin Li all_functions = {} 301*760c253cSXin Li for report in self._reports: 302*760c253cSXin Li if section_name in report.sections: 303*760c253cSXin Li section = report.sections[section_name] 304*760c253cSXin Li for f in section.functions[:num_functions]: 305*760c253cSXin Li if f.name in all_functions: 306*760c253cSXin Li all_functions[f.name] = max( 307*760c253cSXin Li all_functions[f.name], f.count 308*760c253cSXin Li ) 309*760c253cSXin Li else: 310*760c253cSXin Li all_functions[f.name] = f.count 311*760c253cSXin Li # FIXME(asharif): Don't really need to sort these... 312*760c253cSXin Li return _SortDictionaryByValue(all_functions) 313*760c253cSXin Li 314*760c253cSXin Li def _GetFunctionsDict(self, section, function_names): 315*760c253cSXin Li d = {} 316*760c253cSXin Li for function in section.functions: 317*760c253cSXin Li if function.name in function_names: 318*760c253cSXin Li d[function.name] = function.count 319*760c253cSXin Li return d 320*760c253cSXin Li 321*760c253cSXin Li 322*760c253cSXin Lidef Main(argv): 323*760c253cSXin Li """The entry of the main.""" 324*760c253cSXin Li parser = argparse.ArgumentParser() 325*760c253cSXin Li parser.add_argument( 326*760c253cSXin Li "-n", 327*760c253cSXin Li "--num_symbols", 328*760c253cSXin Li dest="num_symbols", 329*760c253cSXin Li default="5", 330*760c253cSXin Li help="The number of symbols to show.", 331*760c253cSXin Li ) 332*760c253cSXin Li parser.add_argument( 333*760c253cSXin Li "-c", 334*760c253cSXin Li "--common_only", 335*760c253cSXin Li dest="common_only", 336*760c253cSXin Li action="store_true", 337*760c253cSXin Li default=False, 338*760c253cSXin Li help="Diff common symbols only.", 339*760c253cSXin Li ) 340*760c253cSXin Li 341*760c253cSXin Li options, args = parser.parse_known_args(argv) 342*760c253cSXin Li 343*760c253cSXin Li try: 344*760c253cSXin Li reports = [] 345*760c253cSXin Li for report in args[1:]: 346*760c253cSXin Li report = PerfReport(report) 347*760c253cSXin Li reports.append(report) 348*760c253cSXin Li pd = PerfDiffer(reports, int(options.num_symbols), options.common_only) 349*760c253cSXin Li pd.DoDiff() 350*760c253cSXin Li finally: 351*760c253cSXin Li pass 352*760c253cSXin Li 353*760c253cSXin Li return 0 354*760c253cSXin Li 355*760c253cSXin Li 356*760c253cSXin Liif __name__ == "__main__": 357*760c253cSXin Li sys.exit(Main(sys.argv)) 358