xref: /aosp_15_r20/external/clang/utils/analyzer/CmpRuns.py (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li#!/usr/bin/env python
2*67e74705SXin Li
3*67e74705SXin Li"""
4*67e74705SXin LiCmpRuns - A simple tool for comparing two static analyzer runs to determine
5*67e74705SXin Liwhich reports have been added, removed, or changed.
6*67e74705SXin Li
7*67e74705SXin LiThis is designed to support automated testing using the static analyzer, from
8*67e74705SXin Litwo perspectives:
9*67e74705SXin Li  1. To monitor changes in the static analyzer's reports on real code bases, for
10*67e74705SXin Li     regression testing.
11*67e74705SXin Li
12*67e74705SXin Li  2. For use by end users who want to integrate regular static analyzer testing
13*67e74705SXin Li     into a buildbot like environment.
14*67e74705SXin Li
15*67e74705SXin LiUsage:
16*67e74705SXin Li
17*67e74705SXin Li    # Load the results of both runs, to obtain lists of the corresponding
18*67e74705SXin Li    # AnalysisDiagnostic objects.
19*67e74705SXin Li    #
20*67e74705SXin Li    resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21*67e74705SXin Li    resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
22*67e74705SXin Li
23*67e74705SXin Li    # Generate a relation from diagnostics in run A to diagnostics in run B
24*67e74705SXin Li    # to obtain a list of triples (a, b, confidence).
25*67e74705SXin Li    diff = compareResults(resultsA, resultsB)
26*67e74705SXin Li
27*67e74705SXin Li"""
28*67e74705SXin Li
29*67e74705SXin Liimport os
30*67e74705SXin Liimport plistlib
31*67e74705SXin Liimport CmpRuns
32*67e74705SXin Li
33*67e74705SXin Li# Information about analysis run:
34*67e74705SXin Li# path - the analysis output directory
35*67e74705SXin Li# root - the name of the root directory, which will be disregarded when
36*67e74705SXin Li# determining the source file name
37*67e74705SXin Liclass SingleRunInfo:
38*67e74705SXin Li    def __init__(self, path, root="", verboseLog=None):
39*67e74705SXin Li        self.path = path
40*67e74705SXin Li        self.root = root.rstrip("/\\")
41*67e74705SXin Li        self.verboseLog = verboseLog
42*67e74705SXin Li
43*67e74705SXin Liclass AnalysisDiagnostic:
44*67e74705SXin Li    def __init__(self, data, report, htmlReport):
45*67e74705SXin Li        self._data = data
46*67e74705SXin Li        self._loc = self._data['location']
47*67e74705SXin Li        self._report = report
48*67e74705SXin Li        self._htmlReport = htmlReport
49*67e74705SXin Li
50*67e74705SXin Li    def getFileName(self):
51*67e74705SXin Li        root = self._report.run.root
52*67e74705SXin Li        fileName = self._report.files[self._loc['file']]
53*67e74705SXin Li        if fileName.startswith(root) and len(root) > 0:
54*67e74705SXin Li            return fileName[len(root)+1:]
55*67e74705SXin Li        return fileName
56*67e74705SXin Li
57*67e74705SXin Li    def getLine(self):
58*67e74705SXin Li        return self._loc['line']
59*67e74705SXin Li
60*67e74705SXin Li    def getColumn(self):
61*67e74705SXin Li        return self._loc['col']
62*67e74705SXin Li
63*67e74705SXin Li    def getCategory(self):
64*67e74705SXin Li        return self._data['category']
65*67e74705SXin Li
66*67e74705SXin Li    def getDescription(self):
67*67e74705SXin Li        return self._data['description']
68*67e74705SXin Li
69*67e74705SXin Li    def getIssueIdentifier(self) :
70*67e74705SXin Li        id = self.getFileName() + "+"
71*67e74705SXin Li        if 'issue_context' in self._data :
72*67e74705SXin Li          id += self._data['issue_context'] + "+"
73*67e74705SXin Li        if 'issue_hash_content_of_line_in_context' in self._data :
74*67e74705SXin Li          id += str(self._data['issue_hash_content_of_line_in_context'])
75*67e74705SXin Li        return id
76*67e74705SXin Li
77*67e74705SXin Li    def getReport(self):
78*67e74705SXin Li        if self._htmlReport is None:
79*67e74705SXin Li            return " "
80*67e74705SXin Li        return os.path.join(self._report.run.path, self._htmlReport)
81*67e74705SXin Li
82*67e74705SXin Li    def getReadableName(self):
83*67e74705SXin Li        return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
84*67e74705SXin Li                                     self.getColumn(), self.getCategory(),
85*67e74705SXin Li                                     self.getDescription())
86*67e74705SXin Li
87*67e74705SXin Li    # Note, the data format is not an API and may change from one analyzer
88*67e74705SXin Li    # version to another.
89*67e74705SXin Li    def getRawData(self):
90*67e74705SXin Li        return self._data
91*67e74705SXin Li
92*67e74705SXin Liclass multidict:
93*67e74705SXin Li    def __init__(self, elts=()):
94*67e74705SXin Li        self.data = {}
95*67e74705SXin Li        for key,value in elts:
96*67e74705SXin Li            self[key] = value
97*67e74705SXin Li
98*67e74705SXin Li    def __getitem__(self, item):
99*67e74705SXin Li        return self.data[item]
100*67e74705SXin Li    def __setitem__(self, key, value):
101*67e74705SXin Li        if key in self.data:
102*67e74705SXin Li            self.data[key].append(value)
103*67e74705SXin Li        else:
104*67e74705SXin Li            self.data[key] = [value]
105*67e74705SXin Li    def items(self):
106*67e74705SXin Li        return self.data.items()
107*67e74705SXin Li    def values(self):
108*67e74705SXin Li        return self.data.values()
109*67e74705SXin Li    def keys(self):
110*67e74705SXin Li        return self.data.keys()
111*67e74705SXin Li    def __len__(self):
112*67e74705SXin Li        return len(self.data)
113*67e74705SXin Li    def get(self, key, default=None):
114*67e74705SXin Li        return self.data.get(key, default)
115*67e74705SXin Li
116*67e74705SXin Liclass CmpOptions:
117*67e74705SXin Li    def __init__(self, verboseLog=None, rootA="", rootB=""):
118*67e74705SXin Li        self.rootA = rootA
119*67e74705SXin Li        self.rootB = rootB
120*67e74705SXin Li        self.verboseLog = verboseLog
121*67e74705SXin Li
122*67e74705SXin Liclass AnalysisReport:
123*67e74705SXin Li    def __init__(self, run, files):
124*67e74705SXin Li        self.run = run
125*67e74705SXin Li        self.files = files
126*67e74705SXin Li        self.diagnostics = []
127*67e74705SXin Li
128*67e74705SXin Liclass AnalysisRun:
129*67e74705SXin Li    def __init__(self, info):
130*67e74705SXin Li        self.path = info.path
131*67e74705SXin Li        self.root = info.root
132*67e74705SXin Li        self.info = info
133*67e74705SXin Li        self.reports = []
134*67e74705SXin Li        # Cumulative list of all diagnostics from all the reports.
135*67e74705SXin Li        self.diagnostics = []
136*67e74705SXin Li        self.clang_version = None
137*67e74705SXin Li
138*67e74705SXin Li    def getClangVersion(self):
139*67e74705SXin Li        return self.clang_version
140*67e74705SXin Li
141*67e74705SXin Li    def readSingleFile(self, p, deleteEmpty):
142*67e74705SXin Li        data = plistlib.readPlist(p)
143*67e74705SXin Li
144*67e74705SXin Li        # We want to retrieve the clang version even if there are no
145*67e74705SXin Li        # reports. Assume that all reports were created using the same
146*67e74705SXin Li        # clang version (this is always true and is more efficient).
147*67e74705SXin Li        if 'clang_version' in data:
148*67e74705SXin Li            if self.clang_version == None:
149*67e74705SXin Li                self.clang_version = data.pop('clang_version')
150*67e74705SXin Li            else:
151*67e74705SXin Li                data.pop('clang_version')
152*67e74705SXin Li
153*67e74705SXin Li        # Ignore/delete empty reports.
154*67e74705SXin Li        if not data['files']:
155*67e74705SXin Li            if deleteEmpty == True:
156*67e74705SXin Li                os.remove(p)
157*67e74705SXin Li            return
158*67e74705SXin Li
159*67e74705SXin Li        # Extract the HTML reports, if they exists.
160*67e74705SXin Li        if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
161*67e74705SXin Li            htmlFiles = []
162*67e74705SXin Li            for d in data['diagnostics']:
163*67e74705SXin Li                # FIXME: Why is this named files, when does it have multiple
164*67e74705SXin Li                # files?
165*67e74705SXin Li                assert len(d['HTMLDiagnostics_files']) == 1
166*67e74705SXin Li                htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
167*67e74705SXin Li        else:
168*67e74705SXin Li            htmlFiles = [None] * len(data['diagnostics'])
169*67e74705SXin Li
170*67e74705SXin Li        report = AnalysisReport(self, data.pop('files'))
171*67e74705SXin Li        diagnostics = [AnalysisDiagnostic(d, report, h)
172*67e74705SXin Li                       for d,h in zip(data.pop('diagnostics'),
173*67e74705SXin Li                                      htmlFiles)]
174*67e74705SXin Li
175*67e74705SXin Li        assert not data
176*67e74705SXin Li
177*67e74705SXin Li        report.diagnostics.extend(diagnostics)
178*67e74705SXin Li        self.reports.append(report)
179*67e74705SXin Li        self.diagnostics.extend(diagnostics)
180*67e74705SXin Li
181*67e74705SXin Li
182*67e74705SXin Li# Backward compatibility API.
183*67e74705SXin Lidef loadResults(path, opts, root = "", deleteEmpty=True):
184*67e74705SXin Li    return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
185*67e74705SXin Li                                    deleteEmpty)
186*67e74705SXin Li
187*67e74705SXin Li# Load results of the analyzes from a given output folder.
188*67e74705SXin Li# - info is the SingleRunInfo object
189*67e74705SXin Li# - deleteEmpty specifies if the empty plist files should be deleted
190*67e74705SXin Lidef loadResultsFromSingleRun(info, deleteEmpty=True):
191*67e74705SXin Li    path = info.path
192*67e74705SXin Li    run = AnalysisRun(info)
193*67e74705SXin Li
194*67e74705SXin Li    if os.path.isfile(path):
195*67e74705SXin Li        run.readSingleFile(path, deleteEmpty)
196*67e74705SXin Li    else:
197*67e74705SXin Li        for (dirpath, dirnames, filenames) in os.walk(path):
198*67e74705SXin Li            for f in filenames:
199*67e74705SXin Li                if (not f.endswith('plist')):
200*67e74705SXin Li                    continue
201*67e74705SXin Li                p = os.path.join(dirpath, f)
202*67e74705SXin Li                run.readSingleFile(p, deleteEmpty)
203*67e74705SXin Li
204*67e74705SXin Li    return run
205*67e74705SXin Li
206*67e74705SXin Lidef cmpAnalysisDiagnostic(d) :
207*67e74705SXin Li    return d.getIssueIdentifier()
208*67e74705SXin Li
209*67e74705SXin Lidef compareResults(A, B):
210*67e74705SXin Li    """
211*67e74705SXin Li    compareResults - Generate a relation from diagnostics in run A to
212*67e74705SXin Li    diagnostics in run B.
213*67e74705SXin Li
214*67e74705SXin Li    The result is the relation as a list of triples (a, b, confidence) where
215*67e74705SXin Li    each element {a,b} is None or an element from the respective run, and
216*67e74705SXin Li    confidence is a measure of the match quality (where 0 indicates equality,
217*67e74705SXin Li    and None is used if either element is None).
218*67e74705SXin Li    """
219*67e74705SXin Li
220*67e74705SXin Li    res = []
221*67e74705SXin Li
222*67e74705SXin Li    # Quickly eliminate equal elements.
223*67e74705SXin Li    neqA = []
224*67e74705SXin Li    neqB = []
225*67e74705SXin Li    eltsA = list(A.diagnostics)
226*67e74705SXin Li    eltsB = list(B.diagnostics)
227*67e74705SXin Li    eltsA.sort(key = cmpAnalysisDiagnostic)
228*67e74705SXin Li    eltsB.sort(key = cmpAnalysisDiagnostic)
229*67e74705SXin Li    while eltsA and eltsB:
230*67e74705SXin Li        a = eltsA.pop()
231*67e74705SXin Li        b = eltsB.pop()
232*67e74705SXin Li        if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
233*67e74705SXin Li            res.append((a, b, 0))
234*67e74705SXin Li        elif a.getIssueIdentifier() > b.getIssueIdentifier():
235*67e74705SXin Li            eltsB.append(b)
236*67e74705SXin Li            neqA.append(a)
237*67e74705SXin Li        else:
238*67e74705SXin Li            eltsA.append(a)
239*67e74705SXin Li            neqB.append(b)
240*67e74705SXin Li    neqA.extend(eltsA)
241*67e74705SXin Li    neqB.extend(eltsB)
242*67e74705SXin Li
243*67e74705SXin Li    # FIXME: Add fuzzy matching. One simple and possible effective idea would be
244*67e74705SXin Li    # to bin the diagnostics, print them in a normalized form (based solely on
245*67e74705SXin Li    # the structure of the diagnostic), compute the diff, then use that as the
246*67e74705SXin Li    # basis for matching. This has the nice property that we don't depend in any
247*67e74705SXin Li    # way on the diagnostic format.
248*67e74705SXin Li
249*67e74705SXin Li    for a in neqA:
250*67e74705SXin Li        res.append((a, None, None))
251*67e74705SXin Li    for b in neqB:
252*67e74705SXin Li        res.append((None, b, None))
253*67e74705SXin Li
254*67e74705SXin Li    return res
255*67e74705SXin Li
256*67e74705SXin Lidef dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
257*67e74705SXin Li    # Load the run results.
258*67e74705SXin Li    resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
259*67e74705SXin Li    resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
260*67e74705SXin Li
261*67e74705SXin Li    # Open the verbose log, if given.
262*67e74705SXin Li    if opts.verboseLog:
263*67e74705SXin Li        auxLog = open(opts.verboseLog, "wb")
264*67e74705SXin Li    else:
265*67e74705SXin Li        auxLog = None
266*67e74705SXin Li
267*67e74705SXin Li    diff = compareResults(resultsA, resultsB)
268*67e74705SXin Li    foundDiffs = 0
269*67e74705SXin Li    for res in diff:
270*67e74705SXin Li        a,b,confidence = res
271*67e74705SXin Li        if a is None:
272*67e74705SXin Li            print "ADDED: %r" % b.getReadableName()
273*67e74705SXin Li            foundDiffs += 1
274*67e74705SXin Li            if auxLog:
275*67e74705SXin Li                print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
276*67e74705SXin Li                                                        b.getReport()))
277*67e74705SXin Li        elif b is None:
278*67e74705SXin Li            print "REMOVED: %r" % a.getReadableName()
279*67e74705SXin Li            foundDiffs += 1
280*67e74705SXin Li            if auxLog:
281*67e74705SXin Li                print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
282*67e74705SXin Li                                                          a.getReport()))
283*67e74705SXin Li        elif confidence:
284*67e74705SXin Li            print "CHANGED: %r to %r" % (a.getReadableName(),
285*67e74705SXin Li                                         b.getReadableName())
286*67e74705SXin Li            foundDiffs += 1
287*67e74705SXin Li            if auxLog:
288*67e74705SXin Li                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
289*67e74705SXin Li                                 % (a.getReadableName(),
290*67e74705SXin Li                                    b.getReadableName(),
291*67e74705SXin Li                                    a.getReport(),
292*67e74705SXin Li                                    b.getReport()))
293*67e74705SXin Li        else:
294*67e74705SXin Li            pass
295*67e74705SXin Li
296*67e74705SXin Li    TotalReports = len(resultsB.diagnostics)
297*67e74705SXin Li    print "TOTAL REPORTS: %r" % TotalReports
298*67e74705SXin Li    print "TOTAL DIFFERENCES: %r" % foundDiffs
299*67e74705SXin Li    if auxLog:
300*67e74705SXin Li        print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
301*67e74705SXin Li        print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
302*67e74705SXin Li
303*67e74705SXin Li    return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
304*67e74705SXin Li
305*67e74705SXin Lidef main():
306*67e74705SXin Li    from optparse import OptionParser
307*67e74705SXin Li    parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
308*67e74705SXin Li    parser.add_option("", "--rootA", dest="rootA",
309*67e74705SXin Li                      help="Prefix to ignore on source files for directory A",
310*67e74705SXin Li                      action="store", type=str, default="")
311*67e74705SXin Li    parser.add_option("", "--rootB", dest="rootB",
312*67e74705SXin Li                      help="Prefix to ignore on source files for directory B",
313*67e74705SXin Li                      action="store", type=str, default="")
314*67e74705SXin Li    parser.add_option("", "--verbose-log", dest="verboseLog",
315*67e74705SXin Li                      help="Write additional information to LOG [default=None]",
316*67e74705SXin Li                      action="store", type=str, default=None,
317*67e74705SXin Li                      metavar="LOG")
318*67e74705SXin Li    (opts, args) = parser.parse_args()
319*67e74705SXin Li
320*67e74705SXin Li    if len(args) != 2:
321*67e74705SXin Li        parser.error("invalid number of arguments")
322*67e74705SXin Li
323*67e74705SXin Li    dirA,dirB = args
324*67e74705SXin Li
325*67e74705SXin Li    dumpScanBuildResultsDiff(dirA, dirB, opts)
326*67e74705SXin Li
327*67e74705SXin Liif __name__ == '__main__':
328*67e74705SXin Li    main()
329