1# -*- coding: utf-8 -*- 2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3# See https://llvm.org/LICENSE.txt for license information. 4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5""" This module is responsible to generate 'index.html' for the report. 6 7The input for this step is the output directory, where individual reports 8could be found. It parses those reports and generates 'index.html'. """ 9 10import re 11import os 12import os.path 13import sys 14import shutil 15import plistlib 16import glob 17import json 18import logging 19import datetime 20from libscanbuild import duplicate_check 21from libscanbuild.clang import get_version 22 23__all__ = ["document"] 24 25 26def document(args): 27 """Generates cover report and returns the number of bugs/crashes.""" 28 29 html_reports_available = args.output_format in {"html", "plist-html", "sarif-html"} 30 sarif_reports_available = args.output_format in {"sarif", "sarif-html"} 31 32 logging.debug("count crashes and bugs") 33 crash_count = sum(1 for _ in read_crashes(args.output)) 34 bug_counter = create_counters() 35 for bug in read_bugs(args.output, html_reports_available): 36 bug_counter(bug) 37 result = crash_count + bug_counter.total 38 39 if html_reports_available and result: 40 use_cdb = os.path.exists(args.cdb) 41 42 logging.debug("generate index.html file") 43 # common prefix for source files to have sorter path 44 prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd() 45 # assemble the cover from multiple fragments 46 fragments = [] 47 try: 48 if bug_counter.total: 49 fragments.append(bug_summary(args.output, bug_counter)) 50 fragments.append(bug_report(args.output, prefix)) 51 if crash_count: 52 fragments.append(crash_report(args.output, prefix)) 53 assemble_cover(args, prefix, fragments) 54 # copy additional files to the report 55 copy_resource_files(args.output) 56 if use_cdb: 57 shutil.copy(args.cdb, args.output) 58 finally: 59 for fragment in fragments: 60 os.remove(fragment) 61 62 if sarif_reports_available: 63 logging.debug("merging sarif files") 64 merge_sarif_files(args.output) 65 66 return result 67 68 69def assemble_cover(args, prefix, fragments): 70 """Put together the fragments into a final report.""" 71 72 import getpass 73 import socket 74 75 if args.html_title is None: 76 args.html_title = os.path.basename(prefix) + " - analyzer results" 77 78 with open(os.path.join(args.output, "index.html"), "w") as handle: 79 indent = 0 80 handle.write( 81 reindent( 82 """ 83 |<!DOCTYPE html> 84 |<html> 85 | <head> 86 | <title>{html_title}</title> 87 | <link type="text/css" rel="stylesheet" href="scanview.css"/> 88 | <script type='text/javascript' src="sorttable.js"></script> 89 | <script type='text/javascript' src='selectable.js'></script> 90 | </head>""", 91 indent, 92 ).format(html_title=args.html_title) 93 ) 94 handle.write(comment("SUMMARYENDHEAD")) 95 handle.write( 96 reindent( 97 """ 98 | <body> 99 | <h1>{html_title}</h1> 100 | <table> 101 | <tr><th>User:</th><td>{user_name}@{host_name}</td></tr> 102 | <tr><th>Working Directory:</th><td>{current_dir}</td></tr> 103 | <tr><th>Command Line:</th><td>{cmd_args}</td></tr> 104 | <tr><th>Clang Version:</th><td>{clang_version}</td></tr> 105 | <tr><th>Date:</th><td>{date}</td></tr> 106 | </table>""", 107 indent, 108 ).format( 109 html_title=args.html_title, 110 user_name=getpass.getuser(), 111 host_name=socket.gethostname(), 112 current_dir=prefix, 113 cmd_args=" ".join(sys.argv), 114 clang_version=get_version(args.clang), 115 date=datetime.datetime.today().strftime("%c"), 116 ) 117 ) 118 for fragment in fragments: 119 # copy the content of fragments 120 with open(fragment, "r") as input_handle: 121 shutil.copyfileobj(input_handle, handle) 122 handle.write( 123 reindent( 124 """ 125 | </body> 126 |</html>""", 127 indent, 128 ) 129 ) 130 131 132def bug_summary(output_dir, bug_counter): 133 """Bug summary is a HTML table to give a better overview of the bugs.""" 134 135 name = os.path.join(output_dir, "summary.html.fragment") 136 with open(name, "w") as handle: 137 indent = 4 138 handle.write( 139 reindent( 140 """ 141 |<h2>Bug Summary</h2> 142 |<table> 143 | <thead> 144 | <tr> 145 | <td>Bug Type</td> 146 | <td>Quantity</td> 147 | <td class="sorttable_nosort">Display?</td> 148 | </tr> 149 | </thead> 150 | <tbody>""", 151 indent, 152 ) 153 ) 154 handle.write( 155 reindent( 156 """ 157 | <tr style="font-weight:bold"> 158 | <td class="SUMM_DESC">All Bugs</td> 159 | <td class="Q">{0}</td> 160 | <td> 161 | <center> 162 | <input checked type="checkbox" id="AllBugsCheck" 163 | onClick="CopyCheckedStateToCheckButtons(this);"/> 164 | </center> 165 | </td> 166 | </tr>""", 167 indent, 168 ).format(bug_counter.total) 169 ) 170 for category, types in bug_counter.categories.items(): 171 handle.write( 172 reindent( 173 """ 174 | <tr> 175 | <th>{0}</th><th colspan=2></th> 176 | </tr>""", 177 indent, 178 ).format(category) 179 ) 180 for bug_type in types.values(): 181 handle.write( 182 reindent( 183 """ 184 | <tr> 185 | <td class="SUMM_DESC">{bug_type}</td> 186 | <td class="Q">{bug_count}</td> 187 | <td> 188 | <center> 189 | <input checked type="checkbox" 190 | onClick="ToggleDisplay(this,'{bug_type_class}');"/> 191 | </center> 192 | </td> 193 | </tr>""", 194 indent, 195 ).format(**bug_type) 196 ) 197 handle.write( 198 reindent( 199 """ 200 | </tbody> 201 |</table>""", 202 indent, 203 ) 204 ) 205 handle.write(comment("SUMMARYBUGEND")) 206 return name 207 208 209def bug_report(output_dir, prefix): 210 """Creates a fragment from the analyzer reports.""" 211 212 pretty = prettify_bug(prefix, output_dir) 213 bugs = (pretty(bug) for bug in read_bugs(output_dir, True)) 214 215 name = os.path.join(output_dir, "bugs.html.fragment") 216 with open(name, "w") as handle: 217 indent = 4 218 handle.write( 219 reindent( 220 """ 221 |<h2>Reports</h2> 222 |<table class="sortable" style="table-layout:automatic"> 223 | <thead> 224 | <tr> 225 | <td>Bug Group</td> 226 | <td class="sorttable_sorted"> 227 | Bug Type 228 | <span id="sorttable_sortfwdind"> ▾</span> 229 | </td> 230 | <td>File</td> 231 | <td>Function/Method</td> 232 | <td class="Q">Line</td> 233 | <td class="Q">Path Length</td> 234 | <td class="sorttable_nosort"></td> 235 | </tr> 236 | </thead> 237 | <tbody>""", 238 indent, 239 ) 240 ) 241 handle.write(comment("REPORTBUGCOL")) 242 for current in bugs: 243 handle.write( 244 reindent( 245 """ 246 | <tr class="{bug_type_class}"> 247 | <td class="DESC">{bug_category}</td> 248 | <td class="DESC">{bug_type}</td> 249 | <td>{bug_file}</td> 250 | <td class="DESC">{bug_function}</td> 251 | <td class="Q">{bug_line}</td> 252 | <td class="Q">{bug_path_length}</td> 253 | <td><a href="{report_file}#EndPath">View Report</a></td> 254 | </tr>""", 255 indent, 256 ).format(**current) 257 ) 258 handle.write(comment("REPORTBUG", {"id": current["report_file"]})) 259 handle.write( 260 reindent( 261 """ 262 | </tbody> 263 |</table>""", 264 indent, 265 ) 266 ) 267 handle.write(comment("REPORTBUGEND")) 268 return name 269 270 271def crash_report(output_dir, prefix): 272 """Creates a fragment from the compiler crashes.""" 273 274 pretty = prettify_crash(prefix, output_dir) 275 crashes = (pretty(crash) for crash in read_crashes(output_dir)) 276 277 name = os.path.join(output_dir, "crashes.html.fragment") 278 with open(name, "w") as handle: 279 indent = 4 280 handle.write( 281 reindent( 282 """ 283 |<h2>Analyzer Failures</h2> 284 |<p>The analyzer had problems processing the following files:</p> 285 |<table> 286 | <thead> 287 | <tr> 288 | <td>Problem</td> 289 | <td>Source File</td> 290 | <td>Preprocessed File</td> 291 | <td>STDERR Output</td> 292 | </tr> 293 | </thead> 294 | <tbody>""", 295 indent, 296 ) 297 ) 298 for current in crashes: 299 handle.write( 300 reindent( 301 """ 302 | <tr> 303 | <td>{problem}</td> 304 | <td>{source}</td> 305 | <td><a href="{file}">preprocessor output</a></td> 306 | <td><a href="{stderr}">analyzer std err</a></td> 307 | </tr>""", 308 indent, 309 ).format(**current) 310 ) 311 handle.write(comment("REPORTPROBLEM", current)) 312 handle.write( 313 reindent( 314 """ 315 | </tbody> 316 |</table>""", 317 indent, 318 ) 319 ) 320 handle.write(comment("REPORTCRASHES")) 321 return name 322 323 324def read_crashes(output_dir): 325 """Generate a unique sequence of crashes from given output directory.""" 326 327 return ( 328 parse_crash(filename) 329 for filename in glob.iglob(os.path.join(output_dir, "failures", "*.info.txt")) 330 ) 331 332 333def read_bugs(output_dir, html): 334 # type: (str, bool) -> Generator[Dict[str, Any], None, None] 335 """Generate a unique sequence of bugs from given output directory. 336 337 Duplicates can be in a project if the same module was compiled multiple 338 times with different compiler options. These would be better to show in 339 the final report (cover) only once.""" 340 341 def empty(file_name): 342 return os.stat(file_name).st_size == 0 343 344 duplicate = duplicate_check( 345 lambda bug: "{bug_line}.{bug_path_length}:{bug_file}".format(**bug) 346 ) 347 348 # get the right parser for the job. 349 parser = parse_bug_html if html else parse_bug_plist 350 # get the input files, which are not empty. 351 pattern = os.path.join(output_dir, "*.html" if html else "*.plist") 352 bug_files = (file for file in glob.iglob(pattern) if not empty(file)) 353 354 for bug_file in bug_files: 355 for bug in parser(bug_file): 356 if not duplicate(bug): 357 yield bug 358 359 360def merge_sarif_files(output_dir, sort_files=False): 361 """Reads and merges all .sarif files in the given output directory. 362 363 Each sarif file in the output directory is understood as a single run 364 and thus appear separate in the top level runs array. This requires 365 modifying the run index of any embedded links in messages. 366 """ 367 368 def empty(file_name): 369 return os.stat(file_name).st_size == 0 370 371 def update_sarif_object(sarif_object, runs_count_offset): 372 """ 373 Given a SARIF object, checks its dictionary entries for a 'message' property. 374 If it exists, updates the message index of embedded links in the run index. 375 376 Recursively looks through entries in the dictionary. 377 """ 378 if not isinstance(sarif_object, dict): 379 return sarif_object 380 381 if "message" in sarif_object: 382 sarif_object["message"] = match_and_update_run( 383 sarif_object["message"], runs_count_offset 384 ) 385 386 for key in sarif_object: 387 if isinstance(sarif_object[key], list): 388 # iterate through subobjects and update it. 389 arr = [ 390 update_sarif_object(entry, runs_count_offset) 391 for entry in sarif_object[key] 392 ] 393 sarif_object[key] = arr 394 elif isinstance(sarif_object[key], dict): 395 sarif_object[key] = update_sarif_object( 396 sarif_object[key], runs_count_offset 397 ) 398 else: 399 # do nothing 400 pass 401 402 return sarif_object 403 404 def match_and_update_run(message, runs_count_offset): 405 """ 406 Given a SARIF message object, checks if the text property contains an embedded link and 407 updates the run index if necessary. 408 """ 409 if "text" not in message: 410 return message 411 412 # we only merge runs, so we only need to update the run index 413 pattern = re.compile(r"sarif:/runs/(\d+)") 414 415 text = message["text"] 416 matches = re.finditer(pattern, text) 417 matches_list = list(matches) 418 419 # update matches from right to left to make increasing character length (9->10) smoother 420 for idx in range(len(matches_list) - 1, -1, -1): 421 match = matches_list[idx] 422 new_run_count = str(runs_count_offset + int(match.group(1))) 423 text = text[0 : match.start(1)] + new_run_count + text[match.end(1) :] 424 425 message["text"] = text 426 return message 427 428 sarif_files = ( 429 file 430 for file in glob.iglob(os.path.join(output_dir, "*.sarif")) 431 if not empty(file) 432 ) 433 # exposed for testing since the order of files returned by glob is not guaranteed to be sorted 434 if sort_files: 435 sarif_files = list(sarif_files) 436 sarif_files.sort() 437 438 runs_count = 0 439 merged = {} 440 for sarif_file in sarif_files: 441 with open(sarif_file) as fp: 442 sarif = json.load(fp) 443 if "runs" not in sarif: 444 continue 445 446 # start with the first file 447 if not merged: 448 merged = sarif 449 else: 450 # extract the run and append it to the merged output 451 for run in sarif["runs"]: 452 new_run = update_sarif_object(run, runs_count) 453 merged["runs"].append(new_run) 454 455 runs_count += len(sarif["runs"]) 456 457 with open(os.path.join(output_dir, "results-merged.sarif"), "w") as out: 458 json.dump(merged, out, indent=4, sort_keys=True) 459 460 461def parse_bug_plist(filename): 462 """Returns the generator of bugs from a single .plist file.""" 463 464 with open(filename, "rb") as fp: 465 content = plistlib.load(fp) 466 files = content.get("files") 467 for bug in content.get("diagnostics", []): 468 if len(files) <= int(bug["location"]["file"]): 469 logging.warning('Parsing bug from "%s" failed', filename) 470 continue 471 472 yield { 473 "result": filename, 474 "bug_type": bug["type"], 475 "bug_category": bug["category"], 476 "bug_line": int(bug["location"]["line"]), 477 "bug_path_length": int(bug["location"]["col"]), 478 "bug_file": files[int(bug["location"]["file"])], 479 } 480 481 482def parse_bug_html(filename): 483 """Parse out the bug information from HTML output.""" 484 485 patterns = [ 486 re.compile(r"<!-- BUGTYPE (?P<bug_type>.*) -->$"), 487 re.compile(r"<!-- BUGFILE (?P<bug_file>.*) -->$"), 488 re.compile(r"<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$"), 489 re.compile(r"<!-- BUGLINE (?P<bug_line>.*) -->$"), 490 re.compile(r"<!-- BUGCATEGORY (?P<bug_category>.*) -->$"), 491 re.compile(r"<!-- BUGDESC (?P<bug_description>.*) -->$"), 492 re.compile(r"<!-- FUNCTIONNAME (?P<bug_function>.*) -->$"), 493 ] 494 endsign = re.compile(r"<!-- BUGMETAEND -->") 495 496 bug = { 497 "report_file": filename, 498 "bug_function": "n/a", # compatibility with < clang-3.5 499 "bug_category": "Other", 500 "bug_line": 0, 501 "bug_path_length": 1, 502 } 503 504 with open(filename, encoding="utf-8") as handler: 505 for line in handler.readlines(): 506 # do not read the file further 507 if endsign.match(line): 508 break 509 # search for the right lines 510 for regex in patterns: 511 match = regex.match(line.strip()) 512 if match: 513 bug.update(match.groupdict()) 514 break 515 516 encode_value(bug, "bug_line", int) 517 encode_value(bug, "bug_path_length", int) 518 519 yield bug 520 521 522def parse_crash(filename): 523 """Parse out the crash information from the report file.""" 524 525 match = re.match(r"(.*)\.info\.txt", filename) 526 name = match.group(1) if match else None 527 with open(filename, mode="rb") as handler: 528 # this is a workaround to fix windows read '\r\n' as new lines. 529 lines = [line.decode().rstrip() for line in handler.readlines()] 530 return { 531 "source": lines[0], 532 "problem": lines[1], 533 "file": name, 534 "info": name + ".info.txt", 535 "stderr": name + ".stderr.txt", 536 } 537 538 539def category_type_name(bug): 540 """Create a new bug attribute from bug by category and type. 541 542 The result will be used as CSS class selector in the final report.""" 543 544 def smash(key): 545 """Make value ready to be HTML attribute value.""" 546 547 return bug.get(key, "").lower().replace(" ", "_").replace("'", "") 548 549 return escape("bt_" + smash("bug_category") + "_" + smash("bug_type")) 550 551 552def create_counters(): 553 """Create counters for bug statistics. 554 555 Two entries are maintained: 'total' is an integer, represents the 556 number of bugs. The 'categories' is a two level categorisation of bug 557 counters. The first level is 'bug category' the second is 'bug type'. 558 Each entry in this classification is a dictionary of 'count', 'type' 559 and 'label'.""" 560 561 def predicate(bug): 562 bug_category = bug["bug_category"] 563 bug_type = bug["bug_type"] 564 current_category = predicate.categories.get(bug_category, dict()) 565 current_type = current_category.get( 566 bug_type, 567 { 568 "bug_type": bug_type, 569 "bug_type_class": category_type_name(bug), 570 "bug_count": 0, 571 }, 572 ) 573 current_type.update({"bug_count": current_type["bug_count"] + 1}) 574 current_category.update({bug_type: current_type}) 575 predicate.categories.update({bug_category: current_category}) 576 predicate.total += 1 577 578 predicate.total = 0 579 predicate.categories = dict() 580 return predicate 581 582 583def prettify_bug(prefix, output_dir): 584 def predicate(bug): 585 """Make safe this values to embed into HTML.""" 586 587 bug["bug_type_class"] = category_type_name(bug) 588 589 encode_value(bug, "bug_file", lambda x: escape(chop(prefix, x))) 590 encode_value(bug, "bug_category", escape) 591 encode_value(bug, "bug_type", escape) 592 encode_value(bug, "report_file", lambda x: escape(chop(output_dir, x))) 593 return bug 594 595 return predicate 596 597 598def prettify_crash(prefix, output_dir): 599 def predicate(crash): 600 """Make safe this values to embed into HTML.""" 601 602 encode_value(crash, "source", lambda x: escape(chop(prefix, x))) 603 encode_value(crash, "problem", escape) 604 encode_value(crash, "file", lambda x: escape(chop(output_dir, x))) 605 encode_value(crash, "info", lambda x: escape(chop(output_dir, x))) 606 encode_value(crash, "stderr", lambda x: escape(chop(output_dir, x))) 607 return crash 608 609 return predicate 610 611 612def copy_resource_files(output_dir): 613 """Copy the javascript and css files to the report directory.""" 614 615 this_dir = os.path.dirname(os.path.realpath(__file__)) 616 for resource in os.listdir(os.path.join(this_dir, "resources")): 617 shutil.copy(os.path.join(this_dir, "resources", resource), output_dir) 618 619 620def encode_value(container, key, encode): 621 """Run 'encode' on 'container[key]' value and update it.""" 622 623 if key in container: 624 value = encode(container[key]) 625 container.update({key: value}) 626 627 628def chop(prefix, filename): 629 """Create 'filename' from '/prefix/filename'""" 630 631 return filename if not len(prefix) else os.path.relpath(filename, prefix) 632 633 634def escape(text): 635 """Paranoid HTML escape method. (Python version independent)""" 636 637 escape_table = { 638 "&": "&", 639 '"': """, 640 "'": "'", 641 ">": ">", 642 "<": "<", 643 } 644 return "".join(escape_table.get(c, c) for c in text) 645 646 647def reindent(text, indent): 648 """Utility function to format html output and keep indentation.""" 649 650 result = "" 651 for line in text.splitlines(): 652 if len(line.strip()): 653 result += " " * indent + line.split("|")[1] + os.linesep 654 return result 655 656 657def comment(name, opts=dict()): 658 """Utility function to format meta information as comment.""" 659 660 attributes = "" 661 for key, value in opts.items(): 662 attributes += ' {0}="{1}"'.format(key, value) 663 664 return "<!-- {0}{1} -->{2}".format(name, attributes, os.linesep) 665 666 667def commonprefix_from(filename): 668 """Create file prefix from a compilation database entries.""" 669 670 with open(filename, "r") as handle: 671 return commonprefix(item["file"] for item in json.load(handle)) 672 673 674def commonprefix(files): 675 """Fixed version of os.path.commonprefix. 676 677 :param files: list of file names. 678 :return: the longest path prefix that is a prefix of all files.""" 679 result = None 680 for current in files: 681 if result is not None: 682 result = os.path.commonprefix([result, current]) 683 else: 684 result = current 685 686 if result is None: 687 return "" 688 elif not os.path.isdir(result): 689 return os.path.dirname(result) 690 else: 691 return os.path.abspath(result) 692