xref: /aosp_15_r20/external/cronet/build/android/gyp/extract_unwind_tables.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env python3
2# Copyright 2018 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Extracts the unwind tables in from breakpad symbol files
7
8Runs dump_syms on the given binary file and extracts the CFI data into the
9given output file.
10The output file is a binary file containing CFI rows ordered based on function
11address. The output file only contains rows that match the most popular rule
12type in CFI table, to reduce the output size and specify data in compact format.
13See doc https://github.com/google/breakpad/blob/main/docs/symbol_files.md.
141. The CFA rules should be of postfix form "SP <val> +".
152. The RA rules should be of postfix form "CFA <val> + ^".
16Note: breakpad represents dereferencing address with '^' operator.
17
18The output file has 2 tables UNW_INDEX and UNW_DATA, inspired from ARM EHABI
19format. The first table contains function addresses and an index into the
20UNW_DATA table. The second table contains one or more rows for the function
21unwind information.
22
23The output file starts with 4 bytes counting the number of entries in UNW_INDEX.
24Then UNW_INDEX table and UNW_DATA table.
25
26UNW_INDEX contains two columns of N rows each, where N is the number of
27functions.
28  1. First column 4 byte rows of all the function start address as offset from
29     start of the binary, in sorted order.
30  2. For each function addr, the second column contains 2 byte indices in order.
31     The indices are offsets (in count of 2 bytes) of the CFI data from start of
32     UNW_DATA.
33The last entry in the table always contains CANT_UNWIND index to specify the
34end address of the last function.
35
36UNW_DATA contains data of all the functions. Each function data contains N rows.
37The data found at the address pointed from UNW_INDEX will be:
38  2 bytes: N - number of rows that belong to current function.
39  N * 4 bytes: N rows of data. 16 bits : Address offset from function start.
40                               14 bits : CFA offset / 4.
41                                2 bits : RA offset / 4.
42
43The function is not added to the unwind table in following conditions:
44C1. If length of the function code (number of instructions) is greater than
45    0xFFFF (2 byte address span). This is because we use 16 bits to refer to
46    offset of instruction from start of the address.
47C2. If the function moves the SP by more than 0xFFFF bytes. This is because we
48    use 14 bits to denote CFA offset (last 2 bits are 0).
49C3. If the Return Address is stored at an offset >= 16 from the CFA. Some
50    functions which have variable arguments can have offset upto 16.
51    TODO(ssid): We can actually store offset 16 by subtracting 1 from RA/4 since
52    we never have 0.
53C4: Some functions do not have unwind information defined in dwarf info. These
54    functions have index value CANT_UNWIND(0xFFFF) in UNW_INDEX table.
55
56
57Usage:
58  extract_unwind_tables.py --input_path [root path to unstripped chrome.so]
59      --output_path [output path] --dump_syms_path [path to dump_syms binary]
60"""
61
62import argparse
63import re
64import struct
65import subprocess
66import sys
67import tempfile
68
69
70_CFA_REG = '.cfa'
71_RA_REG = '.ra'
72
73_ADDR_ENTRY = 0
74_LENGTH_ENTRY = 1
75
76_CANT_UNWIND = 0xFFFF
77
78
79def _Write4Bytes(output_file, val):
80  """Writes a 32 bit unsigned integer to the given output file."""
81  output_file.write(struct.pack('<L', val));
82
83
84def _Write2Bytes(output_file, val):
85  """Writes a 16 bit unsigned integer to the given output file."""
86  output_file.write(struct.pack('<H', val));
87
88
89def _FindRuleForRegister(cfi_row, reg):
90  """Returns the postfix expression as string for a given register.
91
92  Breakpad CFI row format specifies rules for unwinding each register in postfix
93  expression form separated by space. Each rule starts with register name and a
94  colon. Eg: "CFI R1: <rule> R2: <rule>".
95  """
96  out = []
97  found_register = False
98  for part in cfi_row:
99    if found_register:
100      if part[-1] == ':':
101        break
102      out.append(part)
103    elif part == reg + ':':
104      found_register = True
105  return ' '.join(out)
106
107
108def _GetCfaAndRaOffset(cfi_row):
109  """Returns a tuple with 2 numbers (cfa_offset, ra_offset).
110
111  Returns right values if rule matches the predefined criteria. Returns (0, 0)
112  otherwise. The criteria for CFA rule is postfix form "SP <val> +" and RA rule
113  is postfix form "CFA -<val> + ^".
114  """
115  cfa_offset = 0
116  ra_offset = 0
117  cfa_rule = _FindRuleForRegister(cfi_row, _CFA_REG)
118  ra_rule = _FindRuleForRegister(cfi_row, _RA_REG)
119  if cfa_rule and re.match(r'sp [0-9]+ \+', cfa_rule):
120    cfa_offset = int(cfa_rule.split()[1], 10)
121  if ra_rule:
122    if not re.match(r'.cfa -[0-9]+ \+ \^', ra_rule):
123      return (0, 0)
124    ra_offset = -1 * int(ra_rule.split()[1], 10)
125  return (cfa_offset, ra_offset)
126
127
128def _GetAllCfiRows(symbol_file):
129  """Returns parsed CFI data from given symbol_file.
130
131  Each entry in the cfi data dictionary returned is a map from function start
132  address to array of function rows, starting with FUNCTION type, followed by
133  one or more CFI rows.
134  """
135  cfi_data = {}
136  current_func = []
137  for line in symbol_file:
138    line = line.decode('utf8')
139    if 'STACK CFI' not in line:
140      continue
141
142    parts = line.split()
143    data = {}
144    if parts[2] == 'INIT':
145      # Add the previous function to the output
146      if len(current_func) > 1:
147        cfi_data[current_func[0][_ADDR_ENTRY]] = current_func
148      current_func = []
149
150      # The function line is of format "STACK CFI INIT <addr> <length> ..."
151      data[_ADDR_ENTRY] = int(parts[3], 16)
152      data[_LENGTH_ENTRY] = int(parts[4], 16)
153
154      # Condition C1: Skip if length is large.
155      if data[_LENGTH_ENTRY] == 0 or data[_LENGTH_ENTRY] > 0xffff:
156        continue  # Skip the current function.
157    else:
158      # The current function is skipped.
159      if len(current_func) == 0:
160        continue
161
162      # The CFI row is of format "STACK CFI <addr> .cfa: <expr> .ra: <expr> ..."
163      data[_ADDR_ENTRY] = int(parts[2], 16)
164      (data[_CFA_REG], data[_RA_REG]) = _GetCfaAndRaOffset(parts)
165
166      # Condition C2 and C3: Skip based on limits on offsets.
167      if data[_CFA_REG] == 0 or data[_RA_REG] >= 16 or data[_CFA_REG] > 0xffff:
168        current_func = []
169        continue
170      assert data[_CFA_REG] % 4 == 0
171      # Since we skipped functions with code size larger than 0xffff, we should
172      # have no function offset larger than the same value.
173      assert data[_ADDR_ENTRY] - current_func[0][_ADDR_ENTRY] < 0xffff
174
175    if data[_ADDR_ENTRY] == 0:
176      # Skip current function, delete all previous entries.
177      current_func = []
178      continue
179    assert data[_ADDR_ENTRY] % 2 == 0
180    current_func.append(data)
181
182  # Condition C4: Skip function without CFI rows.
183  if len(current_func) > 1:
184    cfi_data[current_func[0][_ADDR_ENTRY]] = current_func
185  return cfi_data
186
187
188def _WriteCfiData(cfi_data, out_file):
189  """Writes the CFI data in defined format to out_file."""
190  # Stores the final data that will be written to UNW_DATA table, in order
191  # with 2 byte items.
192  unw_data = []
193
194  # Represent all the CFI data of functions as set of numbers and map them to an
195  # index in the |unw_data|. This index is later written to the UNW_INDEX table
196  # for each function. This map is used to find index of the data for functions.
197  data_to_index = {}
198  # Store mapping between the functions to the index.
199  func_addr_to_index = {}
200  previous_func_end = 0
201  for addr, function in sorted(cfi_data.items()):
202    # Add an empty function entry when functions CFIs are missing between 2
203    # functions.
204    if previous_func_end != 0 and addr - previous_func_end  > 4:
205      func_addr_to_index[previous_func_end + 2] = _CANT_UNWIND
206    previous_func_end = addr + cfi_data[addr][0][_LENGTH_ENTRY]
207
208    assert len(function) > 1
209    func_data_arr = []
210    func_data = 0
211    # The first row contains the function address and length. The rest of the
212    # rows have CFI data. Create function data array as given in the format.
213    for row in function[1:]:
214      addr_offset = row[_ADDR_ENTRY] - addr
215      cfa_offset = (row[_CFA_REG]) | (row[_RA_REG] // 4)
216
217      func_data_arr.append(addr_offset)
218      func_data_arr.append(cfa_offset)
219
220    # Consider all the rows in the data as one large integer and add it as a key
221    # to the |data_to_index|.
222    for data in func_data_arr:
223      func_data = (func_data << 16) | data
224
225    row_count = len(func_data_arr) // 2
226    if func_data not in data_to_index:
227      # When data is not found, create a new index = len(unw_data), and write
228      # the data to |unw_data|.
229      index = len(unw_data)
230      data_to_index[func_data] = index
231      unw_data.append(row_count)
232      for row in func_data_arr:
233        unw_data.append(row)
234    else:
235      # If the data was found, then use the same index for the function.
236      index = data_to_index[func_data]
237      assert row_count == unw_data[index]
238    func_addr_to_index[addr] = data_to_index[func_data]
239
240  # Mark the end end of last function entry.
241  func_addr_to_index[previous_func_end + 2] = _CANT_UNWIND
242
243  # Write the size of UNW_INDEX file in bytes.
244  _Write4Bytes(out_file, len(func_addr_to_index))
245
246  # Write the UNW_INDEX table. First list of addresses and then indices.
247  sorted_unw_index = sorted(func_addr_to_index.items())
248  for addr, index in sorted_unw_index:
249    _Write4Bytes(out_file, addr)
250  for addr, index in sorted_unw_index:
251    _Write2Bytes(out_file, index)
252
253  # Write the UNW_DATA table.
254  for data in unw_data:
255    _Write2Bytes(out_file, data)
256
257
258def main():
259  parser = argparse.ArgumentParser()
260  parser.add_argument(
261      '--input_path', required=True,
262      help='The input path of the unstripped binary')
263  parser.add_argument(
264      '--output_path', required=True,
265      help='The path of the output file')
266  parser.add_argument(
267      '--dump_syms_path', required=True,
268      help='The path of the dump_syms binary')
269
270  args = parser.parse_args()
271  cmd = ['./' + args.dump_syms_path, args.input_path, '-v']
272  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
273  cfi_data = _GetAllCfiRows(proc.stdout)
274  if proc.wait():
275    sys.stderr.write('dump_syms exited with code {} after {} symbols\n'.format(
276        proc.returncode, len(cfi_data)))
277    sys.exit(proc.returncode)
278  with open(args.output_path, 'wb') as out_file:
279    _WriteCfiData(cfi_data, out_file)
280
281
282if __name__ == '__main__':
283  main()
284