1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module is responsible to generate 'index.html' for the report.
6
7The input for this step is the output directory, where individual reports
8could be found. It parses those reports and generates 'index.html'. """
9
10import re
11import os
12import os.path
13import sys
14import shutil
15import plistlib
16import glob
17import json
18import logging
19import datetime
20from libscanbuild import duplicate_check
21from libscanbuild.clang import get_version
22
23__all__ = ["document"]
24
25
26def document(args):
27    """Generates cover report and returns the number of bugs/crashes."""
28
29    html_reports_available = args.output_format in {"html", "plist-html", "sarif-html"}
30    sarif_reports_available = args.output_format in {"sarif", "sarif-html"}
31
32    logging.debug("count crashes and bugs")
33    crash_count = sum(1 for _ in read_crashes(args.output))
34    bug_counter = create_counters()
35    for bug in read_bugs(args.output, html_reports_available):
36        bug_counter(bug)
37    result = crash_count + bug_counter.total
38
39    if html_reports_available and result:
40        use_cdb = os.path.exists(args.cdb)
41
42        logging.debug("generate index.html file")
43        # common prefix for source files to have sorter path
44        prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
45        # assemble the cover from multiple fragments
46        fragments = []
47        try:
48            if bug_counter.total:
49                fragments.append(bug_summary(args.output, bug_counter))
50                fragments.append(bug_report(args.output, prefix))
51            if crash_count:
52                fragments.append(crash_report(args.output, prefix))
53            assemble_cover(args, prefix, fragments)
54            # copy additional files to the report
55            copy_resource_files(args.output)
56            if use_cdb:
57                shutil.copy(args.cdb, args.output)
58        finally:
59            for fragment in fragments:
60                os.remove(fragment)
61
62    if sarif_reports_available:
63        logging.debug("merging sarif files")
64        merge_sarif_files(args.output)
65
66    return result
67
68
69def assemble_cover(args, prefix, fragments):
70    """Put together the fragments into a final report."""
71
72    import getpass
73    import socket
74
75    if args.html_title is None:
76        args.html_title = os.path.basename(prefix) + " - analyzer results"
77
78    with open(os.path.join(args.output, "index.html"), "w") as handle:
79        indent = 0
80        handle.write(
81            reindent(
82                """
83        |<!DOCTYPE html>
84        |<html>
85        |  <head>
86        |    <title>{html_title}</title>
87        |    <link type="text/css" rel="stylesheet" href="scanview.css"/>
88        |    <script type='text/javascript' src="sorttable.js"></script>
89        |    <script type='text/javascript' src='selectable.js'></script>
90        |  </head>""",
91                indent,
92            ).format(html_title=args.html_title)
93        )
94        handle.write(comment("SUMMARYENDHEAD"))
95        handle.write(
96            reindent(
97                """
98        |  <body>
99        |    <h1>{html_title}</h1>
100        |    <table>
101        |      <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
102        |      <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
103        |      <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
104        |      <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
105        |      <tr><th>Date:</th><td>{date}</td></tr>
106        |    </table>""",
107                indent,
108            ).format(
109                html_title=args.html_title,
110                user_name=getpass.getuser(),
111                host_name=socket.gethostname(),
112                current_dir=prefix,
113                cmd_args=" ".join(sys.argv),
114                clang_version=get_version(args.clang),
115                date=datetime.datetime.today().strftime("%c"),
116            )
117        )
118        for fragment in fragments:
119            # copy the content of fragments
120            with open(fragment, "r") as input_handle:
121                shutil.copyfileobj(input_handle, handle)
122        handle.write(
123            reindent(
124                """
125        |  </body>
126        |</html>""",
127                indent,
128            )
129        )
130
131
132def bug_summary(output_dir, bug_counter):
133    """Bug summary is a HTML table to give a better overview of the bugs."""
134
135    name = os.path.join(output_dir, "summary.html.fragment")
136    with open(name, "w") as handle:
137        indent = 4
138        handle.write(
139            reindent(
140                """
141        |<h2>Bug Summary</h2>
142        |<table>
143        |  <thead>
144        |    <tr>
145        |      <td>Bug Type</td>
146        |      <td>Quantity</td>
147        |      <td class="sorttable_nosort">Display?</td>
148        |    </tr>
149        |  </thead>
150        |  <tbody>""",
151                indent,
152            )
153        )
154        handle.write(
155            reindent(
156                """
157        |    <tr style="font-weight:bold">
158        |      <td class="SUMM_DESC">All Bugs</td>
159        |      <td class="Q">{0}</td>
160        |      <td>
161        |        <center>
162        |          <input checked type="checkbox" id="AllBugsCheck"
163        |                 onClick="CopyCheckedStateToCheckButtons(this);"/>
164        |        </center>
165        |      </td>
166        |    </tr>""",
167                indent,
168            ).format(bug_counter.total)
169        )
170        for category, types in bug_counter.categories.items():
171            handle.write(
172                reindent(
173                    """
174        |    <tr>
175        |      <th>{0}</th><th colspan=2></th>
176        |    </tr>""",
177                    indent,
178                ).format(category)
179            )
180            for bug_type in types.values():
181                handle.write(
182                    reindent(
183                        """
184        |    <tr>
185        |      <td class="SUMM_DESC">{bug_type}</td>
186        |      <td class="Q">{bug_count}</td>
187        |      <td>
188        |        <center>
189        |          <input checked type="checkbox"
190        |                 onClick="ToggleDisplay(this,'{bug_type_class}');"/>
191        |        </center>
192        |      </td>
193        |    </tr>""",
194                        indent,
195                    ).format(**bug_type)
196                )
197        handle.write(
198            reindent(
199                """
200        |  </tbody>
201        |</table>""",
202                indent,
203            )
204        )
205        handle.write(comment("SUMMARYBUGEND"))
206    return name
207
208
209def bug_report(output_dir, prefix):
210    """Creates a fragment from the analyzer reports."""
211
212    pretty = prettify_bug(prefix, output_dir)
213    bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
214
215    name = os.path.join(output_dir, "bugs.html.fragment")
216    with open(name, "w") as handle:
217        indent = 4
218        handle.write(
219            reindent(
220                """
221        |<h2>Reports</h2>
222        |<table class="sortable" style="table-layout:automatic">
223        |  <thead>
224        |    <tr>
225        |      <td>Bug Group</td>
226        |      <td class="sorttable_sorted">
227        |        Bug Type
228        |        <span id="sorttable_sortfwdind">&nbsp;&#x25BE;</span>
229        |      </td>
230        |      <td>File</td>
231        |      <td>Function/Method</td>
232        |      <td class="Q">Line</td>
233        |      <td class="Q">Path Length</td>
234        |      <td class="sorttable_nosort"></td>
235        |    </tr>
236        |  </thead>
237        |  <tbody>""",
238                indent,
239            )
240        )
241        handle.write(comment("REPORTBUGCOL"))
242        for current in bugs:
243            handle.write(
244                reindent(
245                    """
246        |    <tr class="{bug_type_class}">
247        |      <td class="DESC">{bug_category}</td>
248        |      <td class="DESC">{bug_type}</td>
249        |      <td>{bug_file}</td>
250        |      <td class="DESC">{bug_function}</td>
251        |      <td class="Q">{bug_line}</td>
252        |      <td class="Q">{bug_path_length}</td>
253        |      <td><a href="{report_file}#EndPath">View Report</a></td>
254        |    </tr>""",
255                    indent,
256                ).format(**current)
257            )
258            handle.write(comment("REPORTBUG", {"id": current["report_file"]}))
259        handle.write(
260            reindent(
261                """
262        |  </tbody>
263        |</table>""",
264                indent,
265            )
266        )
267        handle.write(comment("REPORTBUGEND"))
268    return name
269
270
271def crash_report(output_dir, prefix):
272    """Creates a fragment from the compiler crashes."""
273
274    pretty = prettify_crash(prefix, output_dir)
275    crashes = (pretty(crash) for crash in read_crashes(output_dir))
276
277    name = os.path.join(output_dir, "crashes.html.fragment")
278    with open(name, "w") as handle:
279        indent = 4
280        handle.write(
281            reindent(
282                """
283        |<h2>Analyzer Failures</h2>
284        |<p>The analyzer had problems processing the following files:</p>
285        |<table>
286        |  <thead>
287        |    <tr>
288        |      <td>Problem</td>
289        |      <td>Source File</td>
290        |      <td>Preprocessed File</td>
291        |      <td>STDERR Output</td>
292        |    </tr>
293        |  </thead>
294        |  <tbody>""",
295                indent,
296            )
297        )
298        for current in crashes:
299            handle.write(
300                reindent(
301                    """
302        |    <tr>
303        |      <td>{problem}</td>
304        |      <td>{source}</td>
305        |      <td><a href="{file}">preprocessor output</a></td>
306        |      <td><a href="{stderr}">analyzer std err</a></td>
307        |    </tr>""",
308                    indent,
309                ).format(**current)
310            )
311            handle.write(comment("REPORTPROBLEM", current))
312        handle.write(
313            reindent(
314                """
315        |  </tbody>
316        |</table>""",
317                indent,
318            )
319        )
320        handle.write(comment("REPORTCRASHES"))
321    return name
322
323
324def read_crashes(output_dir):
325    """Generate a unique sequence of crashes from given output directory."""
326
327    return (
328        parse_crash(filename)
329        for filename in glob.iglob(os.path.join(output_dir, "failures", "*.info.txt"))
330    )
331
332
333def read_bugs(output_dir, html):
334    # type: (str, bool) -> Generator[Dict[str, Any], None, None]
335    """Generate a unique sequence of bugs from given output directory.
336
337    Duplicates can be in a project if the same module was compiled multiple
338    times with different compiler options. These would be better to show in
339    the final report (cover) only once."""
340
341    def empty(file_name):
342        return os.stat(file_name).st_size == 0
343
344    duplicate = duplicate_check(
345        lambda bug: "{bug_line}.{bug_path_length}:{bug_file}".format(**bug)
346    )
347
348    # get the right parser for the job.
349    parser = parse_bug_html if html else parse_bug_plist
350    # get the input files, which are not empty.
351    pattern = os.path.join(output_dir, "*.html" if html else "*.plist")
352    bug_files = (file for file in glob.iglob(pattern) if not empty(file))
353
354    for bug_file in bug_files:
355        for bug in parser(bug_file):
356            if not duplicate(bug):
357                yield bug
358
359
360def merge_sarif_files(output_dir, sort_files=False):
361    """Reads and merges all .sarif files in the given output directory.
362
363    Each sarif file in the output directory is understood as a single run
364    and thus appear separate in the top level runs array. This requires
365    modifying the run index of any embedded links in messages.
366    """
367
368    def empty(file_name):
369        return os.stat(file_name).st_size == 0
370
371    def update_sarif_object(sarif_object, runs_count_offset):
372        """
373        Given a SARIF object, checks its dictionary entries for a 'message' property.
374        If it exists, updates the message index of embedded links in the run index.
375
376        Recursively looks through entries in the dictionary.
377        """
378        if not isinstance(sarif_object, dict):
379            return sarif_object
380
381        if "message" in sarif_object:
382            sarif_object["message"] = match_and_update_run(
383                sarif_object["message"], runs_count_offset
384            )
385
386        for key in sarif_object:
387            if isinstance(sarif_object[key], list):
388                # iterate through subobjects and update it.
389                arr = [
390                    update_sarif_object(entry, runs_count_offset)
391                    for entry in sarif_object[key]
392                ]
393                sarif_object[key] = arr
394            elif isinstance(sarif_object[key], dict):
395                sarif_object[key] = update_sarif_object(
396                    sarif_object[key], runs_count_offset
397                )
398            else:
399                # do nothing
400                pass
401
402        return sarif_object
403
404    def match_and_update_run(message, runs_count_offset):
405        """
406        Given a SARIF message object, checks if the text property contains an embedded link and
407        updates the run index if necessary.
408        """
409        if "text" not in message:
410            return message
411
412        # we only merge runs, so we only need to update the run index
413        pattern = re.compile(r"sarif:/runs/(\d+)")
414
415        text = message["text"]
416        matches = re.finditer(pattern, text)
417        matches_list = list(matches)
418
419        # update matches from right to left to make increasing character length (9->10) smoother
420        for idx in range(len(matches_list) - 1, -1, -1):
421            match = matches_list[idx]
422            new_run_count = str(runs_count_offset + int(match.group(1)))
423            text = text[0 : match.start(1)] + new_run_count + text[match.end(1) :]
424
425        message["text"] = text
426        return message
427
428    sarif_files = (
429        file
430        for file in glob.iglob(os.path.join(output_dir, "*.sarif"))
431        if not empty(file)
432    )
433    # exposed for testing since the order of files returned by glob is not guaranteed to be sorted
434    if sort_files:
435        sarif_files = list(sarif_files)
436        sarif_files.sort()
437
438    runs_count = 0
439    merged = {}
440    for sarif_file in sarif_files:
441        with open(sarif_file) as fp:
442            sarif = json.load(fp)
443            if "runs" not in sarif:
444                continue
445
446            # start with the first file
447            if not merged:
448                merged = sarif
449            else:
450                # extract the run and append it to the merged output
451                for run in sarif["runs"]:
452                    new_run = update_sarif_object(run, runs_count)
453                    merged["runs"].append(new_run)
454
455            runs_count += len(sarif["runs"])
456
457    with open(os.path.join(output_dir, "results-merged.sarif"), "w") as out:
458        json.dump(merged, out, indent=4, sort_keys=True)
459
460
461def parse_bug_plist(filename):
462    """Returns the generator of bugs from a single .plist file."""
463
464    with open(filename, "rb") as fp:
465        content = plistlib.load(fp)
466        files = content.get("files")
467        for bug in content.get("diagnostics", []):
468            if len(files) <= int(bug["location"]["file"]):
469                logging.warning('Parsing bug from "%s" failed', filename)
470                continue
471
472            yield {
473                "result": filename,
474                "bug_type": bug["type"],
475                "bug_category": bug["category"],
476                "bug_line": int(bug["location"]["line"]),
477                "bug_path_length": int(bug["location"]["col"]),
478                "bug_file": files[int(bug["location"]["file"])],
479            }
480
481
482def parse_bug_html(filename):
483    """Parse out the bug information from HTML output."""
484
485    patterns = [
486        re.compile(r"<!-- BUGTYPE (?P<bug_type>.*) -->$"),
487        re.compile(r"<!-- BUGFILE (?P<bug_file>.*) -->$"),
488        re.compile(r"<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$"),
489        re.compile(r"<!-- BUGLINE (?P<bug_line>.*) -->$"),
490        re.compile(r"<!-- BUGCATEGORY (?P<bug_category>.*) -->$"),
491        re.compile(r"<!-- BUGDESC (?P<bug_description>.*) -->$"),
492        re.compile(r"<!-- FUNCTIONNAME (?P<bug_function>.*) -->$"),
493    ]
494    endsign = re.compile(r"<!-- BUGMETAEND -->")
495
496    bug = {
497        "report_file": filename,
498        "bug_function": "n/a",  # compatibility with < clang-3.5
499        "bug_category": "Other",
500        "bug_line": 0,
501        "bug_path_length": 1,
502    }
503
504    with open(filename, encoding="utf-8") as handler:
505        for line in handler.readlines():
506            # do not read the file further
507            if endsign.match(line):
508                break
509            # search for the right lines
510            for regex in patterns:
511                match = regex.match(line.strip())
512                if match:
513                    bug.update(match.groupdict())
514                    break
515
516    encode_value(bug, "bug_line", int)
517    encode_value(bug, "bug_path_length", int)
518
519    yield bug
520
521
522def parse_crash(filename):
523    """Parse out the crash information from the report file."""
524
525    match = re.match(r"(.*)\.info\.txt", filename)
526    name = match.group(1) if match else None
527    with open(filename, mode="rb") as handler:
528        # this is a workaround to fix windows read '\r\n' as new lines.
529        lines = [line.decode().rstrip() for line in handler.readlines()]
530        return {
531            "source": lines[0],
532            "problem": lines[1],
533            "file": name,
534            "info": name + ".info.txt",
535            "stderr": name + ".stderr.txt",
536        }
537
538
539def category_type_name(bug):
540    """Create a new bug attribute from bug by category and type.
541
542    The result will be used as CSS class selector in the final report."""
543
544    def smash(key):
545        """Make value ready to be HTML attribute value."""
546
547        return bug.get(key, "").lower().replace(" ", "_").replace("'", "")
548
549    return escape("bt_" + smash("bug_category") + "_" + smash("bug_type"))
550
551
552def create_counters():
553    """Create counters for bug statistics.
554
555    Two entries are maintained: 'total' is an integer, represents the
556    number of bugs. The 'categories' is a two level categorisation of bug
557    counters. The first level is 'bug category' the second is 'bug type'.
558    Each entry in this classification is a dictionary of 'count', 'type'
559    and 'label'."""
560
561    def predicate(bug):
562        bug_category = bug["bug_category"]
563        bug_type = bug["bug_type"]
564        current_category = predicate.categories.get(bug_category, dict())
565        current_type = current_category.get(
566            bug_type,
567            {
568                "bug_type": bug_type,
569                "bug_type_class": category_type_name(bug),
570                "bug_count": 0,
571            },
572        )
573        current_type.update({"bug_count": current_type["bug_count"] + 1})
574        current_category.update({bug_type: current_type})
575        predicate.categories.update({bug_category: current_category})
576        predicate.total += 1
577
578    predicate.total = 0
579    predicate.categories = dict()
580    return predicate
581
582
583def prettify_bug(prefix, output_dir):
584    def predicate(bug):
585        """Make safe this values to embed into HTML."""
586
587        bug["bug_type_class"] = category_type_name(bug)
588
589        encode_value(bug, "bug_file", lambda x: escape(chop(prefix, x)))
590        encode_value(bug, "bug_category", escape)
591        encode_value(bug, "bug_type", escape)
592        encode_value(bug, "report_file", lambda x: escape(chop(output_dir, x)))
593        return bug
594
595    return predicate
596
597
598def prettify_crash(prefix, output_dir):
599    def predicate(crash):
600        """Make safe this values to embed into HTML."""
601
602        encode_value(crash, "source", lambda x: escape(chop(prefix, x)))
603        encode_value(crash, "problem", escape)
604        encode_value(crash, "file", lambda x: escape(chop(output_dir, x)))
605        encode_value(crash, "info", lambda x: escape(chop(output_dir, x)))
606        encode_value(crash, "stderr", lambda x: escape(chop(output_dir, x)))
607        return crash
608
609    return predicate
610
611
612def copy_resource_files(output_dir):
613    """Copy the javascript and css files to the report directory."""
614
615    this_dir = os.path.dirname(os.path.realpath(__file__))
616    for resource in os.listdir(os.path.join(this_dir, "resources")):
617        shutil.copy(os.path.join(this_dir, "resources", resource), output_dir)
618
619
620def encode_value(container, key, encode):
621    """Run 'encode' on 'container[key]' value and update it."""
622
623    if key in container:
624        value = encode(container[key])
625        container.update({key: value})
626
627
628def chop(prefix, filename):
629    """Create 'filename' from '/prefix/filename'"""
630
631    return filename if not len(prefix) else os.path.relpath(filename, prefix)
632
633
634def escape(text):
635    """Paranoid HTML escape method. (Python version independent)"""
636
637    escape_table = {
638        "&": "&amp;",
639        '"': "&quot;",
640        "'": "&apos;",
641        ">": "&gt;",
642        "<": "&lt;",
643    }
644    return "".join(escape_table.get(c, c) for c in text)
645
646
647def reindent(text, indent):
648    """Utility function to format html output and keep indentation."""
649
650    result = ""
651    for line in text.splitlines():
652        if len(line.strip()):
653            result += " " * indent + line.split("|")[1] + os.linesep
654    return result
655
656
657def comment(name, opts=dict()):
658    """Utility function to format meta information as comment."""
659
660    attributes = ""
661    for key, value in opts.items():
662        attributes += ' {0}="{1}"'.format(key, value)
663
664    return "<!-- {0}{1} -->{2}".format(name, attributes, os.linesep)
665
666
667def commonprefix_from(filename):
668    """Create file prefix from a compilation database entries."""
669
670    with open(filename, "r") as handle:
671        return commonprefix(item["file"] for item in json.load(handle))
672
673
674def commonprefix(files):
675    """Fixed version of os.path.commonprefix.
676
677    :param files: list of file names.
678    :return: the longest path prefix that is a prefix of all files."""
679    result = None
680    for current in files:
681        if result is not None:
682            result = os.path.commonprefix([result, current])
683        else:
684            result = current
685
686    if result is None:
687        return ""
688    elif not os.path.isdir(result):
689        return os.path.dirname(result)
690    else:
691        return os.path.abspath(result)
692