1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Copyright 2020 The ChromiumOS Authors 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Script to remove cold functions in an textual AFDO profile. 8 9The script will look through the AFDO profile to find all the function 10records. Then it'll start with the functions with lowest sample count and 11remove it from the profile, until the total remaining functions in the 12profile meets the given number. When there are many functions having the 13same sample count, we need to remove all of them in order to meet the 14target, so the result profile will always have less than or equal to the 15given number of functions. 16 17The script is intended to be used on production ChromeOS profiles, after 18other redaction/trimming scripts. It can be used with given textual CWP 19and benchmark profiles, in order to analyze how many removed functions are 20from which profile (or both), which can be used an indicator of fairness 21during the removal. 22 23This is part of the effort to stablize the impact of AFDO profile on 24Chrome binary size. See crbug.com/1062014 for more context. 25""" 26 27 28import argparse 29import collections 30import re 31import sys 32 33 34_function_line_re = re.compile(r"^([\w\$\.@]+):(\d+)(?::\d+)?$") 35ProfileRecord = collections.namedtuple( 36 "ProfileRecord", ["function_count", "function_body", "function_name"] 37) 38 39 40def _read_sample_count(line): 41 m = _function_line_re.match(line) 42 assert m, "Failed to interpret function line %s" % line 43 return m.group(1), int(m.group(2)) 44 45 46def _read_textual_afdo_profile(stream): 47 """Parses an AFDO profile from a line stream into ProfileRecords.""" 48 # ProfileRecords are actually nested, due to inlining. For the purpose of 49 # this script, that doesn't matter. 50 lines = (line.rstrip() for line in stream) 51 function_line = None 52 samples = [] 53 ret = [] 54 for line in lines: 55 if not line: 56 continue 57 58 if line[0].isspace(): 59 assert ( 60 function_line is not None 61 ), "sample exists outside of a function?" 62 samples.append(line) 63 continue 64 65 if function_line is not None: 66 name, count = _read_sample_count(function_line) 67 body = [function_line] + samples 68 ret.append( 69 ProfileRecord( 70 function_count=count, function_body=body, function_name=name 71 ) 72 ) 73 function_line = line 74 samples = [] 75 76 if function_line is not None: 77 name, count = _read_sample_count(function_line) 78 body = [function_line] + samples 79 ret.append( 80 ProfileRecord( 81 function_count=count, function_body=body, function_name=name 82 ) 83 ) 84 return ret 85 86 87def write_textual_afdo_profile(stream, records): 88 for r in records: 89 print("\n".join(r.function_body), file=stream) 90 91 92def analyze_functions(records, cwp, benchmark): 93 cwp_functions = {x.function_name for x in cwp} 94 benchmark_functions = {x.function_name for x in benchmark} 95 all_functions = {x.function_name for x in records} 96 cwp_only_functions = len( 97 (all_functions & cwp_functions) - benchmark_functions 98 ) 99 benchmark_only_functions = len( 100 (all_functions & benchmark_functions) - cwp_functions 101 ) 102 common_functions = len(all_functions & benchmark_functions & cwp_functions) 103 none_functions = len(all_functions - benchmark_functions - cwp_functions) 104 105 assert not none_functions 106 return cwp_only_functions, benchmark_only_functions, common_functions 107 108 109def run(input_stream, output_stream, goal, cwp=None, benchmark=None): 110 records = _read_textual_afdo_profile(input_stream) 111 num_functions = len(records) 112 if not num_functions: 113 return 114 assert goal, "It's invalid to remove all functions in the profile" 115 116 if cwp and benchmark: 117 cwp_records = _read_textual_afdo_profile(cwp) 118 benchmark_records = _read_textual_afdo_profile(benchmark) 119 cwp_num, benchmark_num, common_num = analyze_functions( 120 records, cwp_records, benchmark_records 121 ) 122 123 records.sort(key=lambda x: (-x.function_count, x.function_name)) 124 records = records[:goal] 125 126 print( 127 "Retained %d/%d (%.1f%%) functions in the profile" 128 % (len(records), num_functions, 100.0 * len(records) / num_functions), 129 file=sys.stderr, 130 ) 131 write_textual_afdo_profile(output_stream, records) 132 133 if cwp and benchmark: 134 ( 135 cwp_num_after, 136 benchmark_num_after, 137 common_num_after, 138 ) = analyze_functions(records, cwp_records, benchmark_records) 139 print( 140 "Retained %d/%d (%.1f%%) functions only appear in the CWP profile" 141 % (cwp_num_after, cwp_num, 100.0 * cwp_num_after / cwp_num), 142 file=sys.stderr, 143 ) 144 print( 145 "Retained %d/%d (%.1f%%) functions only appear in the benchmark profile" 146 % ( 147 benchmark_num_after, 148 benchmark_num, 149 100.0 * benchmark_num_after / benchmark_num, 150 ), 151 file=sys.stderr, 152 ) 153 print( 154 "Retained %d/%d (%.1f%%) functions appear in both CWP and benchmark" 155 " profiles" 156 % ( 157 common_num_after, 158 common_num, 159 100.0 * common_num_after / common_num, 160 ), 161 file=sys.stderr, 162 ) 163 164 165def main(): 166 parser = argparse.ArgumentParser( 167 description=__doc__, 168 formatter_class=argparse.RawDescriptionHelpFormatter, 169 ) 170 parser.add_argument( 171 "--input", 172 default="/dev/stdin", 173 help="File to read from. Defaults to stdin.", 174 ) 175 parser.add_argument( 176 "--output", 177 default="/dev/stdout", 178 help="File to write to. Defaults to stdout.", 179 ) 180 parser.add_argument( 181 "--number", 182 type=int, 183 required=True, 184 help="Number of functions to retain in the profile.", 185 ) 186 parser.add_argument( 187 "--cwp", help="Textualized CWP profiles, used for further analysis" 188 ) 189 parser.add_argument( 190 "--benchmark", 191 help="Textualized benchmark profile, used for further analysis", 192 ) 193 args = parser.parse_args() 194 195 if not args.number: 196 parser.error("It's invalid to remove the number of functions to 0.") 197 198 if (args.cwp and not args.benchmark) or (not args.cwp and args.benchmark): 199 parser.error("Please specify both --cwp and --benchmark") 200 201 with open(args.input) as stdin: 202 with open(args.output, "w") as stdout: 203 # When user specify textualized cwp and benchmark profiles, perform 204 # the analysis. Otherwise, just trim the cold functions from profile. 205 if args.cwp and args.benchmark: 206 with open(args.cwp) as cwp: 207 with open(args.benchmark) as benchmark: 208 run(stdin, stdout, args.number, cwp, benchmark) 209 else: 210 run(stdin, stdout, args.number) 211 212 213if __name__ == "__main__": 214 main() 215