xref: /aosp_15_r20/external/pigweed/pw_bloat/py/pw_bloat/bloaty_config.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2022 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Generates a useful bloaty config file containing new data sources."""
15
16import argparse
17import logging
18import re
19import sys
20from typing import BinaryIO, NamedTuple, TextIO
21
22import pw_cli.argument_types
23from elftools.elf import elffile  # type: ignore
24
25_LOG = logging.getLogger('bloaty_config')
26
27# 'pw_bloat_config_memory_region_NAME_{start,end}{_N,}' where _N defaults to 0.
28_MEMORY_REGION_SYMBOL_RE = re.compile(
29    r'pw_bloat_config_memory_region_'
30    + r'(?P<name>\w+)_(?P<limit>(start|end))(_(?P<index>\d+))?'
31)
32
33
34def _parse_args() -> argparse.Namespace:
35    """Return a CLI argument parser for this module."""
36    parser = argparse.ArgumentParser(
37        description='Generates useful bloaty configurations entries',
38        epilog='Hint: try this:\n'
39        '   python -m pw_bloat.bloaty_config my_app.elf -o my_app.bloat',
40    )
41    parser.add_argument('elf_file', type=argparse.FileType('rb'))
42    parser.add_argument(
43        '--output',
44        '-o',
45        type=argparse.FileType('w'),
46        help='The generated bloaty configuration',
47        default=sys.stdout,
48    )
49    parser.add_argument(
50        '--utilization',
51        action='store_true',
52        dest='utilization',
53        default=True,
54        help=(
55            'Generate the utilization custom_data_source based on sections '
56            'with "unused_space" in anywhere in their name'
57        ),
58    )
59    parser.add_argument(
60        '--no-utilization',
61        action='store_false',
62        dest='utilization',
63    )
64
65    parser.add_argument(
66        '--memoryregions',
67        action='store_true',
68        default=True,
69        help=(
70            'Generate the memoryregions custom_data_source based on '
71            'symbols defined in the linker script matching the following '
72            'pattern: '
73            '"pw::bloat::config::memory_region::NAME[0].{start,end}"'
74        ),
75    )
76    parser.add_argument(
77        '--no-memoryregions',
78        action='store_false',
79        dest='memoryregions',
80    )
81
82    parser.add_argument(
83        '-l',
84        '--loglevel',
85        type=pw_cli.argument_types.log_level,
86        default=logging.INFO,
87        help='Set the log level' '(debug, info, warning, error, critical)',
88    )
89    return parser.parse_args()
90
91
92def _parse_memory_regions(parsed_elf_file: elffile.ELFFile) -> dict | None:
93    """
94    Search for the special pw::bloat::config symbols in the ELF binary.
95
96    This produces a dictionary which looks like:
97      {
98        MEMORY_REGION_NAME_0:{
99          0:(VM_START_ADDRESS, VM_END_ADDRESS)
100          ...
101          N:(VM_START_ADDRESS, VM_END_ADDRESS)
102        }
103        ...
104        MEMORY_REGION_NAME_M:{
105          0:(VM_START_ADDRESS, VM_END_ADDRESS)
106          ...
107          K:(VM_START_ADDRESS, VM_END_ADDRESS)
108        }
109      }
110    """
111    symtab_section = parsed_elf_file.get_section_by_name('.symtab')
112    assert symtab_section
113
114    # Produces an initial dictionary which looks like:
115    #  {
116    #    MEMORY_REGION_NAME_0:{
117    #      0:{ 'start':vm_start_address, 'end':vm_end_address }
118    #      ...
119    #      N:{ 'start':vm_start_address, 'end':vm_end_address }
120    #    }
121    #    ...
122    #    MEMORY_REGION_NAME_M:{
123    #      0:{ 'start':vm_start_address, 'end':vm_end_address }
124    #      ...
125    #      K:{ 'start':vm_start_address, 'end':vm_end_address }
126    #    }
127    #  }
128    memory_regions: dict = {}
129    for symbol in symtab_section.iter_symbols():
130        match = _MEMORY_REGION_SYMBOL_RE.match(symbol.name)
131        if not match:
132            continue
133
134        name = match.group('name')
135        limit = match.group('limit')
136        if match.group('index'):
137            index = int(match.group('index'))
138        else:
139            index = 0
140        if name not in memory_regions:
141            memory_regions[name] = {}
142        memory_region = memory_regions[name]
143        if index not in memory_region:
144            memory_region[index] = {}
145        memory_region_segment = memory_region[index]
146        memory_region_segment[limit] = symbol.entry.st_value
147
148    # If the user did not provide a single pw::bloat::config symbol in the ELF
149    # binary then bail out and do nothing.
150    if not memory_regions:
151        _LOG.info('No valid pw::bloat::config::memory_region::* symbols found')
152        return None
153
154    # Ensure all memory regions' ranges have an end and start.
155    missing_range_limits = False
156    for region_name, ranges in memory_regions.items():
157        for index, limits in ranges.items():
158            if 'start' not in limits:
159                missing_range_limits = True
160                _LOG.error(
161                    '%s[%d] is missing the start address', region_name, index
162                )
163            if 'end' not in limits:
164                missing_range_limits = True
165                _LOG.error(
166                    '%s[%d] is missing the end address', region_name, index
167                )
168    if missing_range_limits:
169        _LOG.error('Invalid memory regions detected: missing ranges')
170        return None
171
172    # Translate the initial memory_regions dictionary to the tupled return
173    # format, i.e. (start, end) values in the nested dictionary.
174    tupled_memory_regions: dict = {}
175    for region_name, ranges in memory_regions.items():
176        if region_name not in tupled_memory_regions:
177            tupled_memory_regions[region_name] = {}
178        for index, limits in ranges.items():
179            tupled_memory_regions[region_name][index] = (
180                limits['start'],
181                limits['end'],
182            )
183
184    # Ensure the memory regions do not overlap.
185    if _memory_regions_overlap(tupled_memory_regions):
186        _LOG.error('Invalid memory regions detected: overlaps detected')
187        return None
188
189    return tupled_memory_regions
190
191
192def _parse_segments(parsed_elf_file: elffile.ELFFile) -> dict:
193    """
194    Report all of the segment information from the ELF binary.
195
196    Iterates over all of the segments in the ELF file's program header and
197    reports where they reside in virtual memory through a dictionary which
198    looks like:
199      {
200        0:(start_vmaddr,end_vmaddr),
201        ...
202        N:(start_vmaddr,end_vmaddr),
203      }
204    """
205    segments = {}
206    for i in range(parsed_elf_file.num_segments()):
207        segment = parsed_elf_file.get_segment(i)
208        start_vmaddr = segment['p_vaddr']
209        memory_size = segment['p_memsz']
210        if memory_size == 0:
211            continue  # Not a loaded segment which resides in virtual memory.
212        end_vmaddr = start_vmaddr + memory_size
213        segments[i] = (start_vmaddr, end_vmaddr)
214    return segments
215
216
217def _memory_regions_overlap(memory_regions: dict) -> bool:
218    """Returns where any memory regions overlap each other."""
219    overlaps_detected = False
220    for current_name, current_ranges in memory_regions.items():
221        for current_index, (
222            current_start,
223            current_end,
224        ) in current_ranges.items():
225            for other_name, other_ranges in memory_regions.items():
226                for other_index, (
227                    other_start,
228                    other_end,
229                ) in other_ranges.items():
230                    if (
231                        current_name == other_name
232                        and current_index == other_index
233                    ):
234                        continue  # Skip yourself.
235                    # Check if the other region end is within this region.
236                    other_end_overlaps = (
237                        current_start < other_end <= current_end
238                    )
239                    other_start_overlaps = (
240                        current_start <= other_start < current_end
241                    )
242                    if other_end_overlaps or other_start_overlaps:
243                        overlaps_detected = True
244                        _LOG.error(
245                            f'error: {current_name}[{current_index}] '
246                            + f'[{hex(current_start)},'
247                            + f'{hex(current_end)}] overlaps with '
248                            + f'{other_name}[{other_index}] '
249                            f'[{hex(other_start)},'
250                            + f'{hex(other_end)}] overlaps with '
251                        )
252    return overlaps_detected
253
254
255def _get_segments_to_memory_region_map(elf_file: BinaryIO) -> dict | None:
256    """
257    Processes an ELF file to look up what memory regions segments are in.
258
259    Returns the result from map_segments_to_memory_regions if valid memory
260    regions were parsed out of the ELF file.
261    """
262    parsed_elf_file = elffile.ELFFile(elf_file)
263
264    memory_regions = _parse_memory_regions(parsed_elf_file)
265    if not memory_regions:
266        return None
267
268    segments = _parse_segments(parsed_elf_file)
269
270    return map_segments_to_memory_regions(
271        segments=segments, memory_regions=memory_regions
272    )
273
274
275def map_segments_to_memory_regions(
276    segments: dict, memory_regions: dict
277) -> dict:
278    """
279    Maps segments to the virtual memory regions they reside in.
280
281    This takes in the results from _parse_memory_regions and _parse_segments and
282    produces a dictionary which looks like:
283    {
284      SEGMENT_INDEX_0:'MEMORY_REGION_NAME_0',
285      SEGMENT_INDEX_1:'MEMORY_REGION_NAME_0',
286      ...
287      SEGMENT_INDEX_N:'MEMORY_REGION_NAME_M',
288    }
289    """
290
291    # Now for each segment, determine what memory region it belongs to
292    # and generate a bloaty config output for it.
293    segment_to_memory_region = {}
294    for segment, (segment_start, segment_end) in segments.items():
295        # Note this is the final filter bloaty rewrite pattern format.
296        for memory_region_name, memory_region_info in memory_regions.items():
297            for _, (
298                subregion_start,
299                subregion_end,
300            ) in memory_region_info.items():
301                if (
302                    segment_start >= subregion_start
303                    and segment_end <= subregion_end
304                ):
305                    # We found the subregion the segment resides in.
306                    segment_to_memory_region[segment] = memory_region_name
307        if segment not in segment_to_memory_region:
308            _LOG.error(
309                f'Error: Failed to find memory region for LOAD #{segment} '
310                + f'[{hex(segment_start)},{hex(segment_end)}]'
311            )
312    return segment_to_memory_region
313
314
315def generate_memoryregions_data_source(segment_to_memory_region: dict) -> str:
316    output: list[str] = []
317    output.append('custom_data_source: {')
318    output.append('  name: "memoryregions"')
319    output.append('  base_data_source: "segments"')
320    for segment_index, memory_region in segment_to_memory_region.items():
321        output.append('  rewrite: {')
322        segment_filter = r'^LOAD ' + f'#{segment_index}' + r' \\[.*\\]$'
323        output.append(f'    pattern:"{segment_filter}"')
324        output.append(f'    replacement:"{memory_region}"')
325        output.append('  }')
326    output.append('  rewrite: {')
327    output.append('    pattern:".*"')
328    output.append('    replacement:"Not resident in memory"')
329    output.append('  }')
330    output.append('}')
331    return '\n'.join(output) + '\n'
332
333
334def generate_utilization_data_source() -> str:
335    output: list[str] = []
336    output.append('custom_data_source: {')
337    output.append('  name:"utilization"')
338    output.append('  base_data_source:"sections"')
339    output.append('  rewrite: {')
340    output.append('    pattern:"unused_space"')
341    output.append('    replacement:"Free space"')
342    output.append('  }')
343    output.append('  rewrite: {')
344    output.append('    pattern:"^\\\\[LOAD"')
345    output.append('    replacement:"Padding"')
346    output.append('  }')
347    output.append('  rewrite: {')
348    output.append('    pattern:".*"')
349    output.append('    replacement:"Used space"')
350    output.append('  }')
351    output.append('}')
352    return '\n'.join(output) + '\n'
353
354
355class BloatyConfigResult(NamedTuple):
356    has_memoryregions: bool
357    has_utilization: bool
358
359
360def generate_bloaty_config(
361    elf_file: BinaryIO,
362    enable_memoryregions: bool,
363    enable_utilization: bool,
364    out_file: TextIO,
365) -> BloatyConfigResult:
366    """Generates a Bloaty config file from symbols within an ELF.
367
368    Returns:
369        Tuple indicating whether a memoryregions data source, a utilization data
370        source, or both were written.
371    """
372
373    result = [False, False]
374
375    if enable_memoryregions:
376        # Enable the "memoryregions" data_source if the user provided the
377        # required pw_bloat specific symbols in their linker script.
378        segment_to_memory_region = _get_segments_to_memory_region_map(elf_file)
379        if not segment_to_memory_region:
380            _LOG.info('memoryregions data_source is not provided')
381        else:
382            _LOG.info('memoryregions data_source is provided')
383            out_file.write(
384                generate_memoryregions_data_source(segment_to_memory_region)
385            )
386            result[0] = True
387
388    if enable_utilization:
389        _LOG.info('utilization data_source is provided')
390        out_file.write(generate_utilization_data_source())
391        result[1] = True
392
393    return BloatyConfigResult(*result)
394
395
396def main() -> int:
397    """Generates a useful bloaty config file containing new data sources."""
398    args = _parse_args()
399
400    logging.basicConfig(format='%(message)s', level=args.loglevel)
401
402    generate_bloaty_config(
403        elf_file=args.elf_file,
404        enable_memoryregions=args.memoryregions,
405        enable_utilization=args.utilization,
406        out_file=args.output,
407    )
408    return 0
409
410
411if __name__ == "__main__":
412    sys.exit(main())
413