1 /* Read DWARF package file index sections.
2 Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
3 This file is part of elfutils.
4
5 This file is free software; you can redistribute it and/or modify
6 it under the terms of either
7
8 * the GNU Lesser General Public License as published by the Free
9 Software Foundation; either version 3 of the License, or (at
10 your option) any later version
11
12 or
13
14 * the GNU General Public License as published by the Free
15 Software Foundation; either version 2 of the License, or (at
16 your option) any later version
17
18 or both in parallel, as here.
19
20 elfutils is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received copies of the GNU General Public License and
26 the GNU Lesser General Public License along with this program. If
27 not, see <http://www.gnu.org/licenses/>. */
28
29 #ifdef HAVE_CONFIG_H
30 # include <config.h>
31 #endif
32
33 #include <assert.h>
34
35 #include "libdwP.h"
36
37 static Dwarf_Package_Index *
__libdw_read_package_index(Dwarf * dbg,bool tu)38 __libdw_read_package_index (Dwarf *dbg, bool tu)
39 {
40 Elf_Data *data;
41 if (tu)
42 data = dbg->sectiondata[IDX_debug_tu_index];
43 else
44 data = dbg->sectiondata[IDX_debug_cu_index];
45
46 /* We need at least 16 bytes for the header. */
47 if (data == NULL || data->d_size < 16)
48 {
49 invalid:
50 __libdw_seterrno (DWARF_E_INVALID_DWARF);
51 return NULL;
52 }
53
54 const unsigned char *datap = data->d_buf;
55 const unsigned char *endp = datap + data->d_size;
56 uint16_t version;
57 /* In GNU DebugFission for DWARF 4, the version is 2 as a uword. In the
58 standardized DWARF 5 format, it is a uhalf followed by a padding uhalf.
59 Check for both. */
60 if (read_4ubyte_unaligned (dbg, datap) == 2)
61 version = 2;
62 else
63 {
64 version = read_2ubyte_unaligned (dbg, datap);
65 if (version != 5)
66 {
67 __libdw_seterrno (DWARF_E_VERSION);
68 return NULL;
69 }
70 }
71 datap += 4;
72 uint32_t section_count = read_4ubyte_unaligned_inc (dbg, datap);
73 uint32_t unit_count = read_4ubyte_unaligned_inc (dbg, datap);
74 uint32_t slot_count = read_4ubyte_unaligned_inc (dbg, datap);
75
76 /* The specification has a stricter requirement that
77 slot_count > 3 * unit_count / 2, but this is enough for us. */
78 if (slot_count < unit_count)
79 goto invalid;
80
81 /* After the header, the section must contain:
82
83 8 byte signature per hash table slot
84 + 4 byte index per hash table slot
85 + Section offset table with 1 header row, 1 row per unit, 1 column per
86 section, 4 bytes per field
87 + Section size table with 1 row per unit, 1 column per section, 4 bytes
88 per field
89
90 We have to be careful about overflow when checking this. */
91 const unsigned char *hash_table = datap;
92 if ((size_t) (endp - hash_table) < (uint64_t) slot_count * 12)
93 goto invalid;
94 const unsigned char *indices = hash_table + (size_t) slot_count * 8;
95 const unsigned char *sections = indices + (size_t) slot_count * 4;
96 if ((size_t) (endp - sections) < (uint64_t) section_count * 4)
97 goto invalid;
98 const unsigned char *section_offsets = sections + (size_t) section_count * 4;
99 if ((uint64_t) unit_count * section_count > UINT64_MAX / 8
100 || ((size_t) (endp - section_offsets)
101 < (uint64_t) unit_count * section_count * 8))
102 goto invalid;
103 const unsigned char *section_sizes
104 = section_offsets + (uint64_t) unit_count * section_count * 4;
105
106 Dwarf_Package_Index *index = malloc (sizeof (*index));
107 if (index == NULL)
108 {
109 __libdw_seterrno (DWARF_E_NOMEM);
110 return NULL;
111 }
112
113 index->dbg = dbg;
114 /* Set absent sections to UINT32_MAX. */
115 for (size_t i = 0;
116 i < sizeof (index->sections) / sizeof (index->sections[0]); i++)
117 index->sections[i] = UINT32_MAX;
118 for (size_t i = 0; i < section_count; i++)
119 {
120 uint32_t section = read_4ubyte_unaligned (dbg, sections + i * 4);
121 /* 2 is DW_SECT_TYPES in version 2 and reserved in version 5. We ignore
122 it for version 5.
123 5 is DW_SECT_LOC in version 2 and DW_SECT_LOCLISTS in version 5. We
124 use the same index for both.
125 7 is DW_SECT_MACINFO in version 2 and DW_SECT_MACRO in version 5. We
126 use the same index for both.
127 8 is DW_SECT_MACRO in version 2 and DW_SECT_RNGLISTS in version 5. We
128 use the same index for version 2's DW_SECT_MACRO as version 2's
129 DW_SECT_MACINFO/version 5's DW_SECT_MACRO.
130 We ignore unknown sections. */
131 if (section == 0)
132 continue;
133 if (version == 2)
134 {
135 if (section > 8)
136 continue;
137 else if (section == 8)
138 section = DW_SECT_MACRO;
139 }
140 else if (section == 2
141 || (section
142 > sizeof (index->sections) / sizeof (index->sections[0])))
143 continue;
144 index->sections[section - 1] = i;
145 }
146
147 /* DW_SECT_INFO (or DW_SECT_TYPES for DWARF 4 type units) and DW_SECT_ABBREV
148 are required. */
149 if (((!tu || dbg->sectiondata[IDX_debug_types] == NULL)
150 && index->sections[DW_SECT_INFO - 1] == UINT32_MAX)
151 || (tu && dbg->sectiondata[IDX_debug_types] != NULL
152 && index->sections[DW_SECT_TYPES - 1] == UINT32_MAX)
153 || index->sections[DW_SECT_ABBREV - 1] == UINT32_MAX)
154 {
155 free (index);
156 __libdw_seterrno (DWARF_E_INVALID_DWARF);
157 return NULL;
158 }
159
160 index->section_count = section_count;
161 index->unit_count = unit_count;
162 index->slot_count = slot_count;
163 index->last_unit_found = 0;
164 index->hash_table = hash_table;
165 index->indices = indices;
166 index->section_offsets = section_offsets;
167 index->section_sizes = section_sizes;
168 index->debug_info_offsets = NULL;
169
170 return index;
171 }
172
173 static Dwarf_Package_Index *
__libdw_package_index(Dwarf * dbg,bool tu)174 __libdw_package_index (Dwarf *dbg, bool tu)
175 {
176 if (tu && dbg->tu_index != NULL)
177 return dbg->tu_index;
178 else if (!tu && dbg->cu_index != NULL)
179 return dbg->cu_index;
180
181 Dwarf_Package_Index *index = __libdw_read_package_index (dbg, tu);
182 if (index == NULL)
183 return NULL;
184
185 /* Offsets in the section offset table are 32-bit unsigned integers. In
186 practice, the .debug_info.dwo section for very large executables can be
187 larger than 4GB. GNU dwp as of binutils 2.41 and llvm-dwp before LLVM 15
188 both accidentally truncate offsets larger than 4GB.
189
190 LLVM 15 detects the overflow and errors out instead; see LLVM commit
191 f8df8114715b ("[DWP][DWARF] Detect and error on debug info offset
192 overflow"). However, lldb in LLVM 16 supports using dwp files with
193 truncated offsets by recovering them directly from the unit headers in the
194 .debug_info.dwo section; see LLVM commit c0db06227721 ("[DWARFLibrary] Add
195 support to re-construct cu-index"). Since LLVM 17, the overflow error can
196 be turned into a warning instead; see LLVM commit 53a483cee801 ("[DWP] add
197 overflow check for llvm-dwp tools if offset overflow").
198
199 LLVM's support for > 4GB offsets is effectively an extension to the DWARF
200 package file format, which we implement here. The strategy is to walk the
201 unit headers in .debug_info.dwo in lockstep with the DW_SECT_INFO columns
202 in the section offset tables. As long as they are in the same order
203 (which they are in practice for both GNU dwp and llvm-dwp), we can
204 correlate the truncated offset and produce a corrected array of offsets.
205
206 Note that this will be fixed properly in DWARF 6:
207 https://dwarfstd.org/issues/220708.2.html. */
208 if (index->sections[DW_SECT_INFO - 1] != UINT32_MAX
209 && dbg->sectiondata[IDX_debug_info]->d_size > UINT32_MAX)
210 {
211 Dwarf_Package_Index *cu_index, *tu_index = NULL;
212 if (tu)
213 {
214 tu_index = index;
215 assert (dbg->cu_index == NULL);
216 cu_index = __libdw_read_package_index (dbg, false);
217 if (cu_index == NULL)
218 {
219 free(index);
220 return NULL;
221 }
222 }
223 else
224 {
225 cu_index = index;
226 if (dbg->sectiondata[IDX_debug_tu_index] != NULL
227 && dbg->sectiondata[IDX_debug_types] == NULL)
228 {
229 assert (dbg->tu_index == NULL);
230 tu_index = __libdw_read_package_index (dbg, true);
231 if (tu_index == NULL)
232 {
233 free(index);
234 return NULL;
235 }
236 }
237 }
238
239 cu_index->debug_info_offsets = malloc (cu_index->unit_count
240 * sizeof (Dwarf_Off));
241 if (cu_index->debug_info_offsets == NULL)
242 {
243 free (tu_index);
244 free (cu_index);
245 __libdw_seterrno (DWARF_E_NOMEM);
246 return NULL;
247 }
248 if (tu_index != NULL)
249 {
250 tu_index->debug_info_offsets = malloc (tu_index->unit_count
251 * sizeof (Dwarf_Off));
252 if (tu_index->debug_info_offsets == NULL)
253 {
254 free (tu_index);
255 free (cu_index->debug_info_offsets);
256 free (cu_index);
257 __libdw_seterrno (DWARF_E_NOMEM);
258 return NULL;
259 }
260 }
261
262 Dwarf_Off off = 0;
263 uint32_t cui = 0, tui = 0;
264 uint32_t cu_count = cu_index->unit_count;
265 const unsigned char *cu_offset
266 = cu_index->section_offsets + cu_index->sections[DW_SECT_INFO - 1] * 4;
267 uint32_t tu_count = 0;
268 const unsigned char *tu_offset = NULL;
269 if (tu_index != NULL)
270 {
271 tu_count = tu_index->unit_count;
272 tu_offset = tu_index->section_offsets
273 + tu_index->sections[DW_SECT_INFO - 1] * 4;
274 }
275 while (cui < cu_count || tui < tu_count)
276 {
277 Dwarf_Off next_off;
278 uint8_t unit_type;
279 if (__libdw_next_unit (dbg, false, off, &next_off, NULL, NULL,
280 &unit_type, NULL, NULL, NULL, NULL, NULL)
281 != 0)
282 {
283 not_sorted:
284 free (cu_index->debug_info_offsets);
285 cu_index->debug_info_offsets = NULL;
286 if (tu_index != NULL)
287 {
288 free (tu_index->debug_info_offsets);
289 tu_index->debug_info_offsets = NULL;
290 }
291 break;
292 }
293 if (unit_type != DW_UT_split_type && cui < cu_count)
294 {
295 if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, cu_offset))
296 goto not_sorted;
297 cu_index->debug_info_offsets[cui++] = off;
298 cu_offset += cu_index->section_count * 4;
299 }
300 else if (unit_type == DW_UT_split_type && tu_index != NULL
301 && tui < tu_count)
302 {
303 if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, tu_offset))
304 goto not_sorted;
305 tu_index->debug_info_offsets[tui++] = off;
306 tu_offset += tu_index->section_count * 4;
307 }
308 off = next_off;
309 }
310
311 if (tu)
312 dbg->cu_index = cu_index;
313 else if (tu_index != NULL)
314 dbg->tu_index = tu_index;
315 }
316
317 if (tu)
318 dbg->tu_index = index;
319 else
320 dbg->cu_index = index;
321 return index;
322 }
323
324 static int
__libdw_dwp_unit_row(Dwarf_Package_Index * index,uint64_t unit_id,uint32_t * unit_rowp)325 __libdw_dwp_unit_row (Dwarf_Package_Index *index, uint64_t unit_id,
326 uint32_t *unit_rowp)
327 {
328 if (index == NULL)
329 return -1;
330
331 uint32_t hash = unit_id;
332 uint32_t hash2 = (unit_id >> 32) | 1;
333 /* Only check each slot once. */
334 for (uint32_t n = index->slot_count; n-- > 0; )
335 {
336 size_t slot = hash & (index->slot_count - 1);
337 uint64_t sig = read_8ubyte_unaligned (index->dbg,
338 index->hash_table + slot * 8);
339 if (sig == unit_id)
340 {
341 uint32_t row = read_4ubyte_unaligned (index->dbg,
342 index->indices + slot * 4);
343 if (row > index->unit_count)
344 {
345 __libdw_seterrno (DWARF_E_INVALID_DWARF);
346 return -1;
347 }
348 *unit_rowp = row;
349 return 0;
350 }
351 else if (sig == 0
352 && read_4ubyte_unaligned (index->dbg,
353 index->indices + slot * 4) == 0)
354 break;
355 hash += hash2;
356 }
357 *unit_rowp = 0;
358 return 0;
359 }
360
361 static int
__libdw_dwp_section_info(Dwarf_Package_Index * index,uint32_t unit_row,unsigned int section,Dwarf_Off * offsetp,Dwarf_Off * sizep)362 __libdw_dwp_section_info (Dwarf_Package_Index *index, uint32_t unit_row,
363 unsigned int section, Dwarf_Off *offsetp,
364 Dwarf_Off *sizep)
365 {
366 if (index == NULL)
367 return -1;
368 if (unit_row == 0)
369 {
370 __libdw_seterrno (DWARF_E_INVALID_DWARF);
371 return -1;
372 }
373 if (index->sections[section - 1] == UINT32_MAX)
374 {
375 if (offsetp != NULL)
376 *offsetp = 0;
377 if (sizep != NULL)
378 *sizep = 0;
379 return 0;
380 }
381 size_t i = (size_t)(unit_row - 1) * index->section_count
382 + index->sections[section - 1];
383 if (offsetp != NULL)
384 {
385 if (section == DW_SECT_INFO && index->debug_info_offsets != NULL)
386 *offsetp = index->debug_info_offsets[unit_row - 1];
387 else
388 *offsetp = read_4ubyte_unaligned (index->dbg,
389 index->section_offsets + i * 4);
390 }
391 if (sizep != NULL)
392 *sizep = read_4ubyte_unaligned (index->dbg,
393 index->section_sizes + i * 4);
394 return 0;
395 }
396
397 int
398 internal_function
__libdw_dwp_find_unit(Dwarf * dbg,bool debug_types,Dwarf_Off off,uint16_t version,uint8_t unit_type,uint64_t unit_id8,uint32_t * unit_rowp,Dwarf_Off * abbrev_offsetp)399 __libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off off,
400 uint16_t version, uint8_t unit_type, uint64_t unit_id8,
401 uint32_t *unit_rowp, Dwarf_Off *abbrev_offsetp)
402 {
403 if (version >= 5
404 && unit_type != DW_UT_split_compile && unit_type != DW_UT_split_type)
405 {
406 not_dwp:
407 *unit_rowp = 0;
408 *abbrev_offsetp = 0;
409 return 0;
410 }
411 bool tu = unit_type == DW_UT_split_type || debug_types;
412 if (dbg->sectiondata[tu ? IDX_debug_tu_index : IDX_debug_cu_index] == NULL)
413 goto not_dwp;
414 Dwarf_Package_Index *index = __libdw_package_index (dbg, tu);
415 if (index == NULL)
416 return -1;
417
418 /* This is always called for ascending offsets. The most obvious way for a
419 producer to generate the section offset table is sorted by offset; both
420 GNU dwp and llvm-dwp do this. In this common case, we can avoid the full
421 lookup. */
422 if (index->last_unit_found < index->unit_count)
423 {
424 Dwarf_Off offset, size;
425 if (__libdw_dwp_section_info (index, index->last_unit_found + 1,
426 debug_types ? DW_SECT_TYPES : DW_SECT_INFO,
427 &offset, &size) != 0)
428 return -1;
429 if (offset <= off && off - offset < size)
430 {
431 *unit_rowp = ++index->last_unit_found;
432 goto done;
433 }
434 else
435 /* The units are not sorted. Don't try again. */
436 index->last_unit_found = index->unit_count;
437 }
438
439 if (version >= 5 || debug_types)
440 {
441 /* In DWARF 5 and in type units, the unit signature is available in the
442 unit header. */
443 if (__libdw_dwp_unit_row (index, unit_id8, unit_rowp) != 0)
444 return -1;
445 }
446 else
447 {
448 /* In DWARF 4 compilation units, the unit signature is an attribute. We
449 can't parse attributes in the split unit until we get the abbreviation
450 table offset from the package index, which is a chicken-and-egg
451 problem. We could get the signature from the skeleton unit, but that
452 may not be available.
453
454 Instead, we resort to a linear scan through the section offset table.
455 Finding all units is therefore quadratic in the number of units.
456 However, this will likely never be needed in practice because of the
457 sorted fast path above. If this ceases to be the case, we can try to
458 plumb through the skeleton unit's signature when it is available, or
459 build a sorted lookup table for binary search. */
460 if (index->sections[DW_SECT_INFO - 1] == UINT32_MAX)
461 {
462 __libdw_seterrno (DWARF_E_INVALID_DWARF);
463 return -1;
464 }
465 for (uint32_t i = 0; i < index->unit_count; i++)
466 {
467 Dwarf_Off offset, size;
468 __libdw_dwp_section_info (index, i + 1, DW_SECT_INFO, &offset,
469 &size);
470 if (offset <= off && off - offset < size)
471 {
472 *unit_rowp = i + 1;
473 goto done;
474 }
475 }
476 __libdw_seterrno (DWARF_E_INVALID_DWARF);
477 return -1;
478 }
479
480 done:
481 return __libdw_dwp_section_info (index, *unit_rowp, DW_SECT_ABBREV,
482 abbrev_offsetp, NULL);
483 }
484
485 Dwarf_CU *
486 internal_function
__libdw_dwp_findcu_id(Dwarf * dbg,uint64_t unit_id8)487 __libdw_dwp_findcu_id (Dwarf *dbg, uint64_t unit_id8)
488 {
489 Dwarf_Package_Index *index = __libdw_package_index (dbg, false);
490 uint32_t unit_row;
491 Dwarf_Off offset;
492 Dwarf_CU *cu;
493 if (__libdw_dwp_unit_row (index, unit_id8, &unit_row) == 0
494 && __libdw_dwp_section_info (index, unit_row, DW_SECT_INFO, &offset,
495 NULL) == 0
496 && (cu = __libdw_findcu (dbg, offset, false)) != NULL
497 && cu->unit_type == DW_UT_split_compile
498 && cu->unit_id8 == unit_id8)
499 return cu;
500 else
501 return NULL;
502 }
503
504 int
dwarf_cu_dwp_section_info(Dwarf_CU * cu,unsigned int section,Dwarf_Off * offsetp,Dwarf_Off * sizep)505 dwarf_cu_dwp_section_info (Dwarf_CU *cu, unsigned int section,
506 Dwarf_Off *offsetp, Dwarf_Off *sizep)
507 {
508 if (cu == NULL)
509 return -1;
510 if (section < DW_SECT_INFO || section > DW_SECT_RNGLISTS)
511 {
512 __libdw_seterrno (DWARF_E_UNKNOWN_SECTION);
513 return -1;
514 }
515 if (cu->dwp_row == 0)
516 {
517 if (offsetp != NULL)
518 *offsetp = 0;
519 if (sizep != NULL)
520 *sizep = 0;
521 return 0;
522 }
523 else
524 {
525 Dwarf_Package_Index *index
526 = cu->unit_type == DW_UT_split_compile
527 ? cu->dbg->cu_index : cu->dbg->tu_index;
528 return __libdw_dwp_section_info (index, cu->dwp_row, section, offsetp,
529 sizep);
530 }
531 }
532 INTDEF(dwarf_cu_dwp_section_info)
533