1# Copyright 2022 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Generates a useful bloaty config file containing new data sources.""" 15 16import argparse 17import logging 18import re 19import sys 20from typing import BinaryIO, NamedTuple, TextIO 21 22import pw_cli.argument_types 23from elftools.elf import elffile # type: ignore 24 25_LOG = logging.getLogger('bloaty_config') 26 27# 'pw_bloat_config_memory_region_NAME_{start,end}{_N,}' where _N defaults to 0. 28_MEMORY_REGION_SYMBOL_RE = re.compile( 29 r'pw_bloat_config_memory_region_' 30 + r'(?P<name>\w+)_(?P<limit>(start|end))(_(?P<index>\d+))?' 31) 32 33 34def _parse_args() -> argparse.Namespace: 35 """Return a CLI argument parser for this module.""" 36 parser = argparse.ArgumentParser( 37 description='Generates useful bloaty configurations entries', 38 epilog='Hint: try this:\n' 39 ' python -m pw_bloat.bloaty_config my_app.elf -o my_app.bloat', 40 ) 41 parser.add_argument('elf_file', type=argparse.FileType('rb')) 42 parser.add_argument( 43 '--output', 44 '-o', 45 type=argparse.FileType('w'), 46 help='The generated bloaty configuration', 47 default=sys.stdout, 48 ) 49 parser.add_argument( 50 '--utilization', 51 action='store_true', 52 dest='utilization', 53 default=True, 54 help=( 55 'Generate the utilization custom_data_source based on sections ' 56 'with "unused_space" in anywhere in their name' 57 ), 58 ) 59 parser.add_argument( 60 '--no-utilization', 61 action='store_false', 62 dest='utilization', 63 ) 64 65 parser.add_argument( 66 '--memoryregions', 67 action='store_true', 68 default=True, 69 help=( 70 'Generate the memoryregions custom_data_source based on ' 71 'symbols defined in the linker script matching the following ' 72 'pattern: ' 73 '"pw::bloat::config::memory_region::NAME[0].{start,end}"' 74 ), 75 ) 76 parser.add_argument( 77 '--no-memoryregions', 78 action='store_false', 79 dest='memoryregions', 80 ) 81 82 parser.add_argument( 83 '-l', 84 '--loglevel', 85 type=pw_cli.argument_types.log_level, 86 default=logging.INFO, 87 help='Set the log level' '(debug, info, warning, error, critical)', 88 ) 89 return parser.parse_args() 90 91 92def _parse_memory_regions(parsed_elf_file: elffile.ELFFile) -> dict | None: 93 """ 94 Search for the special pw::bloat::config symbols in the ELF binary. 95 96 This produces a dictionary which looks like: 97 { 98 MEMORY_REGION_NAME_0:{ 99 0:(VM_START_ADDRESS, VM_END_ADDRESS) 100 ... 101 N:(VM_START_ADDRESS, VM_END_ADDRESS) 102 } 103 ... 104 MEMORY_REGION_NAME_M:{ 105 0:(VM_START_ADDRESS, VM_END_ADDRESS) 106 ... 107 K:(VM_START_ADDRESS, VM_END_ADDRESS) 108 } 109 } 110 """ 111 symtab_section = parsed_elf_file.get_section_by_name('.symtab') 112 assert symtab_section 113 114 # Produces an initial dictionary which looks like: 115 # { 116 # MEMORY_REGION_NAME_0:{ 117 # 0:{ 'start':vm_start_address, 'end':vm_end_address } 118 # ... 119 # N:{ 'start':vm_start_address, 'end':vm_end_address } 120 # } 121 # ... 122 # MEMORY_REGION_NAME_M:{ 123 # 0:{ 'start':vm_start_address, 'end':vm_end_address } 124 # ... 125 # K:{ 'start':vm_start_address, 'end':vm_end_address } 126 # } 127 # } 128 memory_regions: dict = {} 129 for symbol in symtab_section.iter_symbols(): 130 match = _MEMORY_REGION_SYMBOL_RE.match(symbol.name) 131 if not match: 132 continue 133 134 name = match.group('name') 135 limit = match.group('limit') 136 if match.group('index'): 137 index = int(match.group('index')) 138 else: 139 index = 0 140 if name not in memory_regions: 141 memory_regions[name] = {} 142 memory_region = memory_regions[name] 143 if index not in memory_region: 144 memory_region[index] = {} 145 memory_region_segment = memory_region[index] 146 memory_region_segment[limit] = symbol.entry.st_value 147 148 # If the user did not provide a single pw::bloat::config symbol in the ELF 149 # binary then bail out and do nothing. 150 if not memory_regions: 151 _LOG.info('No valid pw::bloat::config::memory_region::* symbols found') 152 return None 153 154 # Ensure all memory regions' ranges have an end and start. 155 missing_range_limits = False 156 for region_name, ranges in memory_regions.items(): 157 for index, limits in ranges.items(): 158 if 'start' not in limits: 159 missing_range_limits = True 160 _LOG.error( 161 '%s[%d] is missing the start address', region_name, index 162 ) 163 if 'end' not in limits: 164 missing_range_limits = True 165 _LOG.error( 166 '%s[%d] is missing the end address', region_name, index 167 ) 168 if missing_range_limits: 169 _LOG.error('Invalid memory regions detected: missing ranges') 170 return None 171 172 # Translate the initial memory_regions dictionary to the tupled return 173 # format, i.e. (start, end) values in the nested dictionary. 174 tupled_memory_regions: dict = {} 175 for region_name, ranges in memory_regions.items(): 176 if region_name not in tupled_memory_regions: 177 tupled_memory_regions[region_name] = {} 178 for index, limits in ranges.items(): 179 tupled_memory_regions[region_name][index] = ( 180 limits['start'], 181 limits['end'], 182 ) 183 184 # Ensure the memory regions do not overlap. 185 if _memory_regions_overlap(tupled_memory_regions): 186 _LOG.error('Invalid memory regions detected: overlaps detected') 187 return None 188 189 return tupled_memory_regions 190 191 192def _parse_segments(parsed_elf_file: elffile.ELFFile) -> dict: 193 """ 194 Report all of the segment information from the ELF binary. 195 196 Iterates over all of the segments in the ELF file's program header and 197 reports where they reside in virtual memory through a dictionary which 198 looks like: 199 { 200 0:(start_vmaddr,end_vmaddr), 201 ... 202 N:(start_vmaddr,end_vmaddr), 203 } 204 """ 205 segments = {} 206 for i in range(parsed_elf_file.num_segments()): 207 segment = parsed_elf_file.get_segment(i) 208 start_vmaddr = segment['p_vaddr'] 209 memory_size = segment['p_memsz'] 210 if memory_size == 0: 211 continue # Not a loaded segment which resides in virtual memory. 212 end_vmaddr = start_vmaddr + memory_size 213 segments[i] = (start_vmaddr, end_vmaddr) 214 return segments 215 216 217def _memory_regions_overlap(memory_regions: dict) -> bool: 218 """Returns where any memory regions overlap each other.""" 219 overlaps_detected = False 220 for current_name, current_ranges in memory_regions.items(): 221 for current_index, ( 222 current_start, 223 current_end, 224 ) in current_ranges.items(): 225 for other_name, other_ranges in memory_regions.items(): 226 for other_index, ( 227 other_start, 228 other_end, 229 ) in other_ranges.items(): 230 if ( 231 current_name == other_name 232 and current_index == other_index 233 ): 234 continue # Skip yourself. 235 # Check if the other region end is within this region. 236 other_end_overlaps = ( 237 current_start < other_end <= current_end 238 ) 239 other_start_overlaps = ( 240 current_start <= other_start < current_end 241 ) 242 if other_end_overlaps or other_start_overlaps: 243 overlaps_detected = True 244 _LOG.error( 245 f'error: {current_name}[{current_index}] ' 246 + f'[{hex(current_start)},' 247 + f'{hex(current_end)}] overlaps with ' 248 + f'{other_name}[{other_index}] ' 249 f'[{hex(other_start)},' 250 + f'{hex(other_end)}] overlaps with ' 251 ) 252 return overlaps_detected 253 254 255def _get_segments_to_memory_region_map(elf_file: BinaryIO) -> dict | None: 256 """ 257 Processes an ELF file to look up what memory regions segments are in. 258 259 Returns the result from map_segments_to_memory_regions if valid memory 260 regions were parsed out of the ELF file. 261 """ 262 parsed_elf_file = elffile.ELFFile(elf_file) 263 264 memory_regions = _parse_memory_regions(parsed_elf_file) 265 if not memory_regions: 266 return None 267 268 segments = _parse_segments(parsed_elf_file) 269 270 return map_segments_to_memory_regions( 271 segments=segments, memory_regions=memory_regions 272 ) 273 274 275def map_segments_to_memory_regions( 276 segments: dict, memory_regions: dict 277) -> dict: 278 """ 279 Maps segments to the virtual memory regions they reside in. 280 281 This takes in the results from _parse_memory_regions and _parse_segments and 282 produces a dictionary which looks like: 283 { 284 SEGMENT_INDEX_0:'MEMORY_REGION_NAME_0', 285 SEGMENT_INDEX_1:'MEMORY_REGION_NAME_0', 286 ... 287 SEGMENT_INDEX_N:'MEMORY_REGION_NAME_M', 288 } 289 """ 290 291 # Now for each segment, determine what memory region it belongs to 292 # and generate a bloaty config output for it. 293 segment_to_memory_region = {} 294 for segment, (segment_start, segment_end) in segments.items(): 295 # Note this is the final filter bloaty rewrite pattern format. 296 for memory_region_name, memory_region_info in memory_regions.items(): 297 for _, ( 298 subregion_start, 299 subregion_end, 300 ) in memory_region_info.items(): 301 if ( 302 segment_start >= subregion_start 303 and segment_end <= subregion_end 304 ): 305 # We found the subregion the segment resides in. 306 segment_to_memory_region[segment] = memory_region_name 307 if segment not in segment_to_memory_region: 308 _LOG.error( 309 f'Error: Failed to find memory region for LOAD #{segment} ' 310 + f'[{hex(segment_start)},{hex(segment_end)}]' 311 ) 312 return segment_to_memory_region 313 314 315def generate_memoryregions_data_source(segment_to_memory_region: dict) -> str: 316 output: list[str] = [] 317 output.append('custom_data_source: {') 318 output.append(' name: "memoryregions"') 319 output.append(' base_data_source: "segments"') 320 for segment_index, memory_region in segment_to_memory_region.items(): 321 output.append(' rewrite: {') 322 segment_filter = r'^LOAD ' + f'#{segment_index}' + r' \\[.*\\]$' 323 output.append(f' pattern:"{segment_filter}"') 324 output.append(f' replacement:"{memory_region}"') 325 output.append(' }') 326 output.append(' rewrite: {') 327 output.append(' pattern:".*"') 328 output.append(' replacement:"Not resident in memory"') 329 output.append(' }') 330 output.append('}') 331 return '\n'.join(output) + '\n' 332 333 334def generate_utilization_data_source() -> str: 335 output: list[str] = [] 336 output.append('custom_data_source: {') 337 output.append(' name:"utilization"') 338 output.append(' base_data_source:"sections"') 339 output.append(' rewrite: {') 340 output.append(' pattern:"unused_space"') 341 output.append(' replacement:"Free space"') 342 output.append(' }') 343 output.append(' rewrite: {') 344 output.append(' pattern:"^\\\\[LOAD"') 345 output.append(' replacement:"Padding"') 346 output.append(' }') 347 output.append(' rewrite: {') 348 output.append(' pattern:".*"') 349 output.append(' replacement:"Used space"') 350 output.append(' }') 351 output.append('}') 352 return '\n'.join(output) + '\n' 353 354 355class BloatyConfigResult(NamedTuple): 356 has_memoryregions: bool 357 has_utilization: bool 358 359 360def generate_bloaty_config( 361 elf_file: BinaryIO, 362 enable_memoryregions: bool, 363 enable_utilization: bool, 364 out_file: TextIO, 365) -> BloatyConfigResult: 366 """Generates a Bloaty config file from symbols within an ELF. 367 368 Returns: 369 Tuple indicating whether a memoryregions data source, a utilization data 370 source, or both were written. 371 """ 372 373 result = [False, False] 374 375 if enable_memoryregions: 376 # Enable the "memoryregions" data_source if the user provided the 377 # required pw_bloat specific symbols in their linker script. 378 segment_to_memory_region = _get_segments_to_memory_region_map(elf_file) 379 if not segment_to_memory_region: 380 _LOG.info('memoryregions data_source is not provided') 381 else: 382 _LOG.info('memoryregions data_source is provided') 383 out_file.write( 384 generate_memoryregions_data_source(segment_to_memory_region) 385 ) 386 result[0] = True 387 388 if enable_utilization: 389 _LOG.info('utilization data_source is provided') 390 out_file.write(generate_utilization_data_source()) 391 result[1] = True 392 393 return BloatyConfigResult(*result) 394 395 396def main() -> int: 397 """Generates a useful bloaty config file containing new data sources.""" 398 args = _parse_args() 399 400 logging.basicConfig(format='%(message)s', level=args.loglevel) 401 402 generate_bloaty_config( 403 elf_file=args.elf_file, 404 enable_memoryregions=args.memoryregions, 405 enable_utilization=args.utilization, 406 out_file=args.output, 407 ) 408 return 0 409 410 411if __name__ == "__main__": 412 sys.exit(main()) 413