1#!/usr/bin/env python3 2 3# Copyright 2021 Google LLC 4# 5# Use of this source code is governed by a BSD-style license that can be 6# found in the LICENSE file. 7 8 9# This script is written to process the output from bloaty, read via stdin 10# The easiest way to use the script: 11# 12# bloaty <path_to_binary> -d compileunits,symbols -n 0 --tsv | bloaty_treemap.py > bloaty.html 13# 14# Open the resulting .html file in your browser. 15 16# TODO: Deal with symbols vs. fullsymbols, even both? 17# TODO: Support aggregation by scope, rather than file (split C++ identifiers on '::') 18# TODO: Deal with duplicate symbols better. These are actually good targets for optimization. 19# They are sometimes static functions in headers (so they appear in multiple .o files), 20# There are also symbols that appear multiple times due to inlining (eg, kNoCropRect). 21# TODO: Figure out why some symbols are misattributed. Eg, Swizzle::Convert and ::Make are tied 22# to the header by nm, and then to one caller (at random) by bloaty. They're not inlined, 23# though. Unless LTO is doing something wacky here? Scope-aggregation may be the answer? 24# Ultimately, this seems like an issue with bloaty and/or debug information itself. 25 26import os 27import sys 28 29parent_map = {} 30 31# For a given filepath "foo/bar/baz.cpp", `add_path` outputs rows to the data table 32# establishing the node hierarchy, and ensures that each line is emitted exactly once: 33# 34# ['foo/bar/baz.cpp', 'foo/bar', 0], 35# ['foo/bar', 'foo', 0], 36# ['foo', 'ROOT', 0], 37def add_path(path): 38 if not path in parent_map: 39 head = os.path.split(path)[0] 40 if not head: 41 parent_map[path] = "ROOT" 42 else: 43 add_path(head) 44 parent_map[path] = head 45 46 # We add a suffix to paths to eliminate the chances of a path name colliding with a symbol 47 # name. This is important because google.visualization.TreeMap requires node names to be 48 # unique, and a file such as test/foo/bar.cpp would create a node named "test", which could 49 # collide with a symbol named "test" defined in a C++ file. 50 # 51 # Assumptions made: 52 # - No C++ symbol ends with " (Path)". 53 # - No C++ symbol is named "ROOT". 54 parent = parent_map[path] 55 if parent != "ROOT": parent = "%s (Path)" % parent 56 print("['%s (Path)', '%s', 0]," % (path, parent)) 57 58def main(): 59 # HTML/script header, plus the first two (fixed) rows of the data table 60 print(""" 61 <html> 62 <head> 63 <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> 64 <script type="text/javascript"> 65 google.charts.load('current', {'packages':['treemap']}); 66 google.charts.setOnLoadCallback(drawChart); 67 function drawChart() { 68 const data = google.visualization.arrayToDataTable([ 69 ['Name', 'Parent', 'Size'], 70 ['ROOT', null, 0],""") 71 72 symbol_frequencies = {} 73 74 # Skip header row 75 # TODO: In the future, we could use this to automatically detect the source columns 76 next(sys.stdin) 77 78 for line in sys.stdin: 79 vals = line.rstrip().split("\t") 80 if len(vals) != 4: 81 print("ERROR: Failed to match line\n" + line) 82 sys.exit(1) 83 (filepath, symbol, vmsize, filesize) = vals 84 85 # Skip any entry where the filepath or symbol starts with '[' 86 # These tend to be section meta-data and debug information 87 if filepath.startswith("[") or symbol.startswith("["): 88 continue 89 90 # Strip the leading ../../ from paths 91 while filepath.startswith("../"): 92 filepath = filepath[3:]; 93 94 # Files in third_party sometimes have absolute paths. Strip those: 95 if filepath.startswith("/"): 96 rel_path_start = filepath.find("third_party") 97 if rel_path_start >= 0: 98 filepath = filepath[rel_path_start:] 99 else: 100 print("ERROR: Unexpected absolute path:\n" + filepath) 101 sys.exit(1) 102 103 # Symbols involving C++ lambdas can contain single quotes 104 symbol = symbol.replace("'", "\\'") 105 106 # Ensure that we've added intermediate nodes for all portions of this file path 107 add_path(filepath) 108 109 # Ensure that our final symbol name is unique (a repeated "foo" symbol becomes "foo_1", 110 # "foo_2", etc.) 111 if symbol not in symbol_frequencies: 112 symbol_frequencies[symbol] = 1 113 else: 114 freq = symbol_frequencies[symbol] 115 symbol_frequencies[symbol] = freq + 1 116 symbol += "_" + str(freq) 117 118 # Append another row for our sanitized data 119 print("['%s', '%s (Path)', %d]," % (symbol, filepath, int(filesize))) 120 121 # HTML/script footer 122 print(""" ]); 123 tree = new google.visualization.TreeMap(document.getElementById('chart_div')); 124 tree.draw(data, { 125 generateTooltip: showTooltip 126 }); 127 128 function showTooltip(row, size, value) { 129 const escapedLabel = data.getValue(row, 0) 130 .replace('&', '&') 131 .replace('<', '<') 132 .replace('>', '>') 133 return `<div style="background:#fd9; padding:10px; border-style:solid"> 134 <span style="font-family:Courier"> ${escapedLabel} <br> 135 Size: ${size} </div>`; 136 } 137 } 138 </script> 139 </head> 140 <body> 141 <div id="chart_div" style="width: 100%; height: 100%;"></div> 142 </body> 143 </html>""") 144 145if __name__ == "__main__": 146 main() 147