xref: /aosp_15_r20/external/toolchain-utils/afdo_redaction/remove_cold_functions.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2020 The ChromiumOS Authors
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Script to remove cold functions in an textual AFDO profile.
8
9The script will look through the AFDO profile to find all the function
10records. Then it'll start with the functions with lowest sample count and
11remove it from the profile, until the total remaining functions in the
12profile meets the given number. When there are many functions having the
13same sample count, we need to remove all of them in order to meet the
14target, so the result profile will always have less than or equal to the
15given number of functions.
16
17The script is intended to be used on production ChromeOS profiles, after
18other redaction/trimming scripts. It can be used with given textual CWP
19and benchmark profiles, in order to analyze how many removed functions are
20from which profile (or both), which can be used an indicator of fairness
21during the removal.
22
23This is part of the effort to stablize the impact of AFDO profile on
24Chrome binary size. See crbug.com/1062014 for more context.
25"""
26
27
28import argparse
29import collections
30import re
31import sys
32
33
34_function_line_re = re.compile(r"^([\w\$\.@]+):(\d+)(?::\d+)?$")
35ProfileRecord = collections.namedtuple(
36    "ProfileRecord", ["function_count", "function_body", "function_name"]
37)
38
39
40def _read_sample_count(line):
41    m = _function_line_re.match(line)
42    assert m, "Failed to interpret function line %s" % line
43    return m.group(1), int(m.group(2))
44
45
46def _read_textual_afdo_profile(stream):
47    """Parses an AFDO profile from a line stream into ProfileRecords."""
48    # ProfileRecords are actually nested, due to inlining. For the purpose of
49    # this script, that doesn't matter.
50    lines = (line.rstrip() for line in stream)
51    function_line = None
52    samples = []
53    ret = []
54    for line in lines:
55        if not line:
56            continue
57
58        if line[0].isspace():
59            assert (
60                function_line is not None
61            ), "sample exists outside of a function?"
62            samples.append(line)
63            continue
64
65        if function_line is not None:
66            name, count = _read_sample_count(function_line)
67            body = [function_line] + samples
68            ret.append(
69                ProfileRecord(
70                    function_count=count, function_body=body, function_name=name
71                )
72            )
73        function_line = line
74        samples = []
75
76    if function_line is not None:
77        name, count = _read_sample_count(function_line)
78        body = [function_line] + samples
79        ret.append(
80            ProfileRecord(
81                function_count=count, function_body=body, function_name=name
82            )
83        )
84    return ret
85
86
87def write_textual_afdo_profile(stream, records):
88    for r in records:
89        print("\n".join(r.function_body), file=stream)
90
91
92def analyze_functions(records, cwp, benchmark):
93    cwp_functions = {x.function_name for x in cwp}
94    benchmark_functions = {x.function_name for x in benchmark}
95    all_functions = {x.function_name for x in records}
96    cwp_only_functions = len(
97        (all_functions & cwp_functions) - benchmark_functions
98    )
99    benchmark_only_functions = len(
100        (all_functions & benchmark_functions) - cwp_functions
101    )
102    common_functions = len(all_functions & benchmark_functions & cwp_functions)
103    none_functions = len(all_functions - benchmark_functions - cwp_functions)
104
105    assert not none_functions
106    return cwp_only_functions, benchmark_only_functions, common_functions
107
108
109def run(input_stream, output_stream, goal, cwp=None, benchmark=None):
110    records = _read_textual_afdo_profile(input_stream)
111    num_functions = len(records)
112    if not num_functions:
113        return
114    assert goal, "It's invalid to remove all functions in the profile"
115
116    if cwp and benchmark:
117        cwp_records = _read_textual_afdo_profile(cwp)
118        benchmark_records = _read_textual_afdo_profile(benchmark)
119        cwp_num, benchmark_num, common_num = analyze_functions(
120            records, cwp_records, benchmark_records
121        )
122
123    records.sort(key=lambda x: (-x.function_count, x.function_name))
124    records = records[:goal]
125
126    print(
127        "Retained %d/%d (%.1f%%) functions in the profile"
128        % (len(records), num_functions, 100.0 * len(records) / num_functions),
129        file=sys.stderr,
130    )
131    write_textual_afdo_profile(output_stream, records)
132
133    if cwp and benchmark:
134        (
135            cwp_num_after,
136            benchmark_num_after,
137            common_num_after,
138        ) = analyze_functions(records, cwp_records, benchmark_records)
139        print(
140            "Retained %d/%d (%.1f%%) functions only appear in the CWP profile"
141            % (cwp_num_after, cwp_num, 100.0 * cwp_num_after / cwp_num),
142            file=sys.stderr,
143        )
144        print(
145            "Retained %d/%d (%.1f%%) functions only appear in the benchmark profile"
146            % (
147                benchmark_num_after,
148                benchmark_num,
149                100.0 * benchmark_num_after / benchmark_num,
150            ),
151            file=sys.stderr,
152        )
153        print(
154            "Retained %d/%d (%.1f%%) functions appear in both CWP and benchmark"
155            " profiles"
156            % (
157                common_num_after,
158                common_num,
159                100.0 * common_num_after / common_num,
160            ),
161            file=sys.stderr,
162        )
163
164
165def main():
166    parser = argparse.ArgumentParser(
167        description=__doc__,
168        formatter_class=argparse.RawDescriptionHelpFormatter,
169    )
170    parser.add_argument(
171        "--input",
172        default="/dev/stdin",
173        help="File to read from. Defaults to stdin.",
174    )
175    parser.add_argument(
176        "--output",
177        default="/dev/stdout",
178        help="File to write to. Defaults to stdout.",
179    )
180    parser.add_argument(
181        "--number",
182        type=int,
183        required=True,
184        help="Number of functions to retain in the profile.",
185    )
186    parser.add_argument(
187        "--cwp", help="Textualized CWP profiles, used for further analysis"
188    )
189    parser.add_argument(
190        "--benchmark",
191        help="Textualized benchmark profile, used for further analysis",
192    )
193    args = parser.parse_args()
194
195    if not args.number:
196        parser.error("It's invalid to remove the number of functions to 0.")
197
198    if (args.cwp and not args.benchmark) or (not args.cwp and args.benchmark):
199        parser.error("Please specify both --cwp and --benchmark")
200
201    with open(args.input) as stdin:
202        with open(args.output, "w") as stdout:
203            # When user specify textualized cwp and benchmark profiles, perform
204            # the analysis. Otherwise, just trim the cold functions from profile.
205            if args.cwp and args.benchmark:
206                with open(args.cwp) as cwp:
207                    with open(args.benchmark) as benchmark:
208                        run(stdin, stdout, args.number, cwp, benchmark)
209            else:
210                run(stdin, stdout, args.number)
211
212
213if __name__ == "__main__":
214    main()
215