1*67e74705SXin Li#!/usr/bin/env python 2*67e74705SXin Li 3*67e74705SXin Li""" 4*67e74705SXin LiCmpRuns - A simple tool for comparing two static analyzer runs to determine 5*67e74705SXin Liwhich reports have been added, removed, or changed. 6*67e74705SXin Li 7*67e74705SXin LiThis is designed to support automated testing using the static analyzer, from 8*67e74705SXin Litwo perspectives: 9*67e74705SXin Li 1. To monitor changes in the static analyzer's reports on real code bases, for 10*67e74705SXin Li regression testing. 11*67e74705SXin Li 12*67e74705SXin Li 2. For use by end users who want to integrate regular static analyzer testing 13*67e74705SXin Li into a buildbot like environment. 14*67e74705SXin Li 15*67e74705SXin LiUsage: 16*67e74705SXin Li 17*67e74705SXin Li # Load the results of both runs, to obtain lists of the corresponding 18*67e74705SXin Li # AnalysisDiagnostic objects. 19*67e74705SXin Li # 20*67e74705SXin Li resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty) 21*67e74705SXin Li resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty) 22*67e74705SXin Li 23*67e74705SXin Li # Generate a relation from diagnostics in run A to diagnostics in run B 24*67e74705SXin Li # to obtain a list of triples (a, b, confidence). 25*67e74705SXin Li diff = compareResults(resultsA, resultsB) 26*67e74705SXin Li 27*67e74705SXin Li""" 28*67e74705SXin Li 29*67e74705SXin Liimport os 30*67e74705SXin Liimport plistlib 31*67e74705SXin Liimport CmpRuns 32*67e74705SXin Li 33*67e74705SXin Li# Information about analysis run: 34*67e74705SXin Li# path - the analysis output directory 35*67e74705SXin Li# root - the name of the root directory, which will be disregarded when 36*67e74705SXin Li# determining the source file name 37*67e74705SXin Liclass SingleRunInfo: 38*67e74705SXin Li def __init__(self, path, root="", verboseLog=None): 39*67e74705SXin Li self.path = path 40*67e74705SXin Li self.root = root.rstrip("/\\") 41*67e74705SXin Li self.verboseLog = verboseLog 42*67e74705SXin Li 43*67e74705SXin Liclass AnalysisDiagnostic: 44*67e74705SXin Li def __init__(self, data, report, htmlReport): 45*67e74705SXin Li self._data = data 46*67e74705SXin Li self._loc = self._data['location'] 47*67e74705SXin Li self._report = report 48*67e74705SXin Li self._htmlReport = htmlReport 49*67e74705SXin Li 50*67e74705SXin Li def getFileName(self): 51*67e74705SXin Li root = self._report.run.root 52*67e74705SXin Li fileName = self._report.files[self._loc['file']] 53*67e74705SXin Li if fileName.startswith(root) and len(root) > 0: 54*67e74705SXin Li return fileName[len(root)+1:] 55*67e74705SXin Li return fileName 56*67e74705SXin Li 57*67e74705SXin Li def getLine(self): 58*67e74705SXin Li return self._loc['line'] 59*67e74705SXin Li 60*67e74705SXin Li def getColumn(self): 61*67e74705SXin Li return self._loc['col'] 62*67e74705SXin Li 63*67e74705SXin Li def getCategory(self): 64*67e74705SXin Li return self._data['category'] 65*67e74705SXin Li 66*67e74705SXin Li def getDescription(self): 67*67e74705SXin Li return self._data['description'] 68*67e74705SXin Li 69*67e74705SXin Li def getIssueIdentifier(self) : 70*67e74705SXin Li id = self.getFileName() + "+" 71*67e74705SXin Li if 'issue_context' in self._data : 72*67e74705SXin Li id += self._data['issue_context'] + "+" 73*67e74705SXin Li if 'issue_hash_content_of_line_in_context' in self._data : 74*67e74705SXin Li id += str(self._data['issue_hash_content_of_line_in_context']) 75*67e74705SXin Li return id 76*67e74705SXin Li 77*67e74705SXin Li def getReport(self): 78*67e74705SXin Li if self._htmlReport is None: 79*67e74705SXin Li return " " 80*67e74705SXin Li return os.path.join(self._report.run.path, self._htmlReport) 81*67e74705SXin Li 82*67e74705SXin Li def getReadableName(self): 83*67e74705SXin Li return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(), 84*67e74705SXin Li self.getColumn(), self.getCategory(), 85*67e74705SXin Li self.getDescription()) 86*67e74705SXin Li 87*67e74705SXin Li # Note, the data format is not an API and may change from one analyzer 88*67e74705SXin Li # version to another. 89*67e74705SXin Li def getRawData(self): 90*67e74705SXin Li return self._data 91*67e74705SXin Li 92*67e74705SXin Liclass multidict: 93*67e74705SXin Li def __init__(self, elts=()): 94*67e74705SXin Li self.data = {} 95*67e74705SXin Li for key,value in elts: 96*67e74705SXin Li self[key] = value 97*67e74705SXin Li 98*67e74705SXin Li def __getitem__(self, item): 99*67e74705SXin Li return self.data[item] 100*67e74705SXin Li def __setitem__(self, key, value): 101*67e74705SXin Li if key in self.data: 102*67e74705SXin Li self.data[key].append(value) 103*67e74705SXin Li else: 104*67e74705SXin Li self.data[key] = [value] 105*67e74705SXin Li def items(self): 106*67e74705SXin Li return self.data.items() 107*67e74705SXin Li def values(self): 108*67e74705SXin Li return self.data.values() 109*67e74705SXin Li def keys(self): 110*67e74705SXin Li return self.data.keys() 111*67e74705SXin Li def __len__(self): 112*67e74705SXin Li return len(self.data) 113*67e74705SXin Li def get(self, key, default=None): 114*67e74705SXin Li return self.data.get(key, default) 115*67e74705SXin Li 116*67e74705SXin Liclass CmpOptions: 117*67e74705SXin Li def __init__(self, verboseLog=None, rootA="", rootB=""): 118*67e74705SXin Li self.rootA = rootA 119*67e74705SXin Li self.rootB = rootB 120*67e74705SXin Li self.verboseLog = verboseLog 121*67e74705SXin Li 122*67e74705SXin Liclass AnalysisReport: 123*67e74705SXin Li def __init__(self, run, files): 124*67e74705SXin Li self.run = run 125*67e74705SXin Li self.files = files 126*67e74705SXin Li self.diagnostics = [] 127*67e74705SXin Li 128*67e74705SXin Liclass AnalysisRun: 129*67e74705SXin Li def __init__(self, info): 130*67e74705SXin Li self.path = info.path 131*67e74705SXin Li self.root = info.root 132*67e74705SXin Li self.info = info 133*67e74705SXin Li self.reports = [] 134*67e74705SXin Li # Cumulative list of all diagnostics from all the reports. 135*67e74705SXin Li self.diagnostics = [] 136*67e74705SXin Li self.clang_version = None 137*67e74705SXin Li 138*67e74705SXin Li def getClangVersion(self): 139*67e74705SXin Li return self.clang_version 140*67e74705SXin Li 141*67e74705SXin Li def readSingleFile(self, p, deleteEmpty): 142*67e74705SXin Li data = plistlib.readPlist(p) 143*67e74705SXin Li 144*67e74705SXin Li # We want to retrieve the clang version even if there are no 145*67e74705SXin Li # reports. Assume that all reports were created using the same 146*67e74705SXin Li # clang version (this is always true and is more efficient). 147*67e74705SXin Li if 'clang_version' in data: 148*67e74705SXin Li if self.clang_version == None: 149*67e74705SXin Li self.clang_version = data.pop('clang_version') 150*67e74705SXin Li else: 151*67e74705SXin Li data.pop('clang_version') 152*67e74705SXin Li 153*67e74705SXin Li # Ignore/delete empty reports. 154*67e74705SXin Li if not data['files']: 155*67e74705SXin Li if deleteEmpty == True: 156*67e74705SXin Li os.remove(p) 157*67e74705SXin Li return 158*67e74705SXin Li 159*67e74705SXin Li # Extract the HTML reports, if they exists. 160*67e74705SXin Li if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 161*67e74705SXin Li htmlFiles = [] 162*67e74705SXin Li for d in data['diagnostics']: 163*67e74705SXin Li # FIXME: Why is this named files, when does it have multiple 164*67e74705SXin Li # files? 165*67e74705SXin Li assert len(d['HTMLDiagnostics_files']) == 1 166*67e74705SXin Li htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 167*67e74705SXin Li else: 168*67e74705SXin Li htmlFiles = [None] * len(data['diagnostics']) 169*67e74705SXin Li 170*67e74705SXin Li report = AnalysisReport(self, data.pop('files')) 171*67e74705SXin Li diagnostics = [AnalysisDiagnostic(d, report, h) 172*67e74705SXin Li for d,h in zip(data.pop('diagnostics'), 173*67e74705SXin Li htmlFiles)] 174*67e74705SXin Li 175*67e74705SXin Li assert not data 176*67e74705SXin Li 177*67e74705SXin Li report.diagnostics.extend(diagnostics) 178*67e74705SXin Li self.reports.append(report) 179*67e74705SXin Li self.diagnostics.extend(diagnostics) 180*67e74705SXin Li 181*67e74705SXin Li 182*67e74705SXin Li# Backward compatibility API. 183*67e74705SXin Lidef loadResults(path, opts, root = "", deleteEmpty=True): 184*67e74705SXin Li return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog), 185*67e74705SXin Li deleteEmpty) 186*67e74705SXin Li 187*67e74705SXin Li# Load results of the analyzes from a given output folder. 188*67e74705SXin Li# - info is the SingleRunInfo object 189*67e74705SXin Li# - deleteEmpty specifies if the empty plist files should be deleted 190*67e74705SXin Lidef loadResultsFromSingleRun(info, deleteEmpty=True): 191*67e74705SXin Li path = info.path 192*67e74705SXin Li run = AnalysisRun(info) 193*67e74705SXin Li 194*67e74705SXin Li if os.path.isfile(path): 195*67e74705SXin Li run.readSingleFile(path, deleteEmpty) 196*67e74705SXin Li else: 197*67e74705SXin Li for (dirpath, dirnames, filenames) in os.walk(path): 198*67e74705SXin Li for f in filenames: 199*67e74705SXin Li if (not f.endswith('plist')): 200*67e74705SXin Li continue 201*67e74705SXin Li p = os.path.join(dirpath, f) 202*67e74705SXin Li run.readSingleFile(p, deleteEmpty) 203*67e74705SXin Li 204*67e74705SXin Li return run 205*67e74705SXin Li 206*67e74705SXin Lidef cmpAnalysisDiagnostic(d) : 207*67e74705SXin Li return d.getIssueIdentifier() 208*67e74705SXin Li 209*67e74705SXin Lidef compareResults(A, B): 210*67e74705SXin Li """ 211*67e74705SXin Li compareResults - Generate a relation from diagnostics in run A to 212*67e74705SXin Li diagnostics in run B. 213*67e74705SXin Li 214*67e74705SXin Li The result is the relation as a list of triples (a, b, confidence) where 215*67e74705SXin Li each element {a,b} is None or an element from the respective run, and 216*67e74705SXin Li confidence is a measure of the match quality (where 0 indicates equality, 217*67e74705SXin Li and None is used if either element is None). 218*67e74705SXin Li """ 219*67e74705SXin Li 220*67e74705SXin Li res = [] 221*67e74705SXin Li 222*67e74705SXin Li # Quickly eliminate equal elements. 223*67e74705SXin Li neqA = [] 224*67e74705SXin Li neqB = [] 225*67e74705SXin Li eltsA = list(A.diagnostics) 226*67e74705SXin Li eltsB = list(B.diagnostics) 227*67e74705SXin Li eltsA.sort(key = cmpAnalysisDiagnostic) 228*67e74705SXin Li eltsB.sort(key = cmpAnalysisDiagnostic) 229*67e74705SXin Li while eltsA and eltsB: 230*67e74705SXin Li a = eltsA.pop() 231*67e74705SXin Li b = eltsB.pop() 232*67e74705SXin Li if (a.getIssueIdentifier() == b.getIssueIdentifier()) : 233*67e74705SXin Li res.append((a, b, 0)) 234*67e74705SXin Li elif a.getIssueIdentifier() > b.getIssueIdentifier(): 235*67e74705SXin Li eltsB.append(b) 236*67e74705SXin Li neqA.append(a) 237*67e74705SXin Li else: 238*67e74705SXin Li eltsA.append(a) 239*67e74705SXin Li neqB.append(b) 240*67e74705SXin Li neqA.extend(eltsA) 241*67e74705SXin Li neqB.extend(eltsB) 242*67e74705SXin Li 243*67e74705SXin Li # FIXME: Add fuzzy matching. One simple and possible effective idea would be 244*67e74705SXin Li # to bin the diagnostics, print them in a normalized form (based solely on 245*67e74705SXin Li # the structure of the diagnostic), compute the diff, then use that as the 246*67e74705SXin Li # basis for matching. This has the nice property that we don't depend in any 247*67e74705SXin Li # way on the diagnostic format. 248*67e74705SXin Li 249*67e74705SXin Li for a in neqA: 250*67e74705SXin Li res.append((a, None, None)) 251*67e74705SXin Li for b in neqB: 252*67e74705SXin Li res.append((None, b, None)) 253*67e74705SXin Li 254*67e74705SXin Li return res 255*67e74705SXin Li 256*67e74705SXin Lidef dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True): 257*67e74705SXin Li # Load the run results. 258*67e74705SXin Li resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty) 259*67e74705SXin Li resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty) 260*67e74705SXin Li 261*67e74705SXin Li # Open the verbose log, if given. 262*67e74705SXin Li if opts.verboseLog: 263*67e74705SXin Li auxLog = open(opts.verboseLog, "wb") 264*67e74705SXin Li else: 265*67e74705SXin Li auxLog = None 266*67e74705SXin Li 267*67e74705SXin Li diff = compareResults(resultsA, resultsB) 268*67e74705SXin Li foundDiffs = 0 269*67e74705SXin Li for res in diff: 270*67e74705SXin Li a,b,confidence = res 271*67e74705SXin Li if a is None: 272*67e74705SXin Li print "ADDED: %r" % b.getReadableName() 273*67e74705SXin Li foundDiffs += 1 274*67e74705SXin Li if auxLog: 275*67e74705SXin Li print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), 276*67e74705SXin Li b.getReport())) 277*67e74705SXin Li elif b is None: 278*67e74705SXin Li print "REMOVED: %r" % a.getReadableName() 279*67e74705SXin Li foundDiffs += 1 280*67e74705SXin Li if auxLog: 281*67e74705SXin Li print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), 282*67e74705SXin Li a.getReport())) 283*67e74705SXin Li elif confidence: 284*67e74705SXin Li print "CHANGED: %r to %r" % (a.getReadableName(), 285*67e74705SXin Li b.getReadableName()) 286*67e74705SXin Li foundDiffs += 1 287*67e74705SXin Li if auxLog: 288*67e74705SXin Li print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 289*67e74705SXin Li % (a.getReadableName(), 290*67e74705SXin Li b.getReadableName(), 291*67e74705SXin Li a.getReport(), 292*67e74705SXin Li b.getReport())) 293*67e74705SXin Li else: 294*67e74705SXin Li pass 295*67e74705SXin Li 296*67e74705SXin Li TotalReports = len(resultsB.diagnostics) 297*67e74705SXin Li print "TOTAL REPORTS: %r" % TotalReports 298*67e74705SXin Li print "TOTAL DIFFERENCES: %r" % foundDiffs 299*67e74705SXin Li if auxLog: 300*67e74705SXin Li print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports 301*67e74705SXin Li print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs 302*67e74705SXin Li 303*67e74705SXin Li return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics) 304*67e74705SXin Li 305*67e74705SXin Lidef main(): 306*67e74705SXin Li from optparse import OptionParser 307*67e74705SXin Li parser = OptionParser("usage: %prog [options] [dir A] [dir B]") 308*67e74705SXin Li parser.add_option("", "--rootA", dest="rootA", 309*67e74705SXin Li help="Prefix to ignore on source files for directory A", 310*67e74705SXin Li action="store", type=str, default="") 311*67e74705SXin Li parser.add_option("", "--rootB", dest="rootB", 312*67e74705SXin Li help="Prefix to ignore on source files for directory B", 313*67e74705SXin Li action="store", type=str, default="") 314*67e74705SXin Li parser.add_option("", "--verbose-log", dest="verboseLog", 315*67e74705SXin Li help="Write additional information to LOG [default=None]", 316*67e74705SXin Li action="store", type=str, default=None, 317*67e74705SXin Li metavar="LOG") 318*67e74705SXin Li (opts, args) = parser.parse_args() 319*67e74705SXin Li 320*67e74705SXin Li if len(args) != 2: 321*67e74705SXin Li parser.error("invalid number of arguments") 322*67e74705SXin Li 323*67e74705SXin Li dirA,dirB = args 324*67e74705SXin Li 325*67e74705SXin Li dumpScanBuildResultsDiff(dirA, dirB, opts) 326*67e74705SXin Li 327*67e74705SXin Liif __name__ == '__main__': 328*67e74705SXin Li main() 329