xref: /aosp_15_r20/external/elfutils/libdw/dwarf_cu_dwp_section_info.c (revision 7304104da70ce23c86437a01be71edd1a2d7f37e)
1 /* Read DWARF package file index sections.
2    Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
3    This file is part of elfutils.
4 
5    This file is free software; you can redistribute it and/or modify
6    it under the terms of either
7 
8      * the GNU Lesser General Public License as published by the Free
9        Software Foundation; either version 3 of the License, or (at
10        your option) any later version
11 
12    or
13 
14      * the GNU General Public License as published by the Free
15        Software Foundation; either version 2 of the License, or (at
16        your option) any later version
17 
18    or both in parallel, as here.
19 
20    elfutils is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received copies of the GNU General Public License and
26    the GNU Lesser General Public License along with this program.  If
27    not, see <http://www.gnu.org/licenses/>.  */
28 
29 #ifdef HAVE_CONFIG_H
30 # include <config.h>
31 #endif
32 
33 #include <assert.h>
34 
35 #include "libdwP.h"
36 
37 static Dwarf_Package_Index *
__libdw_read_package_index(Dwarf * dbg,bool tu)38 __libdw_read_package_index (Dwarf *dbg, bool tu)
39 {
40   Elf_Data *data;
41   if (tu)
42     data = dbg->sectiondata[IDX_debug_tu_index];
43   else
44     data = dbg->sectiondata[IDX_debug_cu_index];
45 
46   /* We need at least 16 bytes for the header.  */
47   if (data == NULL || data->d_size < 16)
48     {
49     invalid:
50       __libdw_seterrno (DWARF_E_INVALID_DWARF);
51       return NULL;
52     }
53 
54   const unsigned char *datap = data->d_buf;
55   const unsigned char *endp = datap + data->d_size;
56   uint16_t version;
57   /* In GNU DebugFission for DWARF 4, the version is 2 as a uword.  In the
58      standardized DWARF 5 format, it is a uhalf followed by a padding uhalf.
59      Check for both.  */
60   if (read_4ubyte_unaligned (dbg, datap) == 2)
61     version = 2;
62   else
63     {
64       version = read_2ubyte_unaligned (dbg, datap);
65       if (version != 5)
66 	{
67 	  __libdw_seterrno (DWARF_E_VERSION);
68 	  return NULL;
69 	}
70     }
71   datap += 4;
72   uint32_t section_count = read_4ubyte_unaligned_inc (dbg, datap);
73   uint32_t unit_count = read_4ubyte_unaligned_inc (dbg, datap);
74   uint32_t slot_count = read_4ubyte_unaligned_inc (dbg, datap);
75 
76   /* The specification has a stricter requirement that
77      slot_count > 3 * unit_count / 2, but this is enough for us.  */
78   if (slot_count < unit_count)
79     goto invalid;
80 
81   /* After the header, the section must contain:
82 
83        8 byte signature per hash table slot
84      + 4 byte index per hash table slot
85      + Section offset table with 1 header row, 1 row per unit, 1 column per
86        section, 4 bytes per field
87      + Section size table with 1 row per unit, 1 column per section, 4 bytes
88        per field
89 
90      We have to be careful about overflow when checking this.  */
91   const unsigned char *hash_table = datap;
92   if ((size_t) (endp - hash_table) < (uint64_t) slot_count * 12)
93     goto invalid;
94   const unsigned char *indices = hash_table + (size_t) slot_count * 8;
95   const unsigned char *sections = indices + (size_t) slot_count * 4;
96   if ((size_t) (endp - sections) < (uint64_t) section_count * 4)
97     goto invalid;
98   const unsigned char *section_offsets = sections + (size_t) section_count * 4;
99   if ((uint64_t) unit_count * section_count > UINT64_MAX / 8
100       || ((size_t) (endp - section_offsets)
101 	  < (uint64_t) unit_count * section_count * 8))
102     goto invalid;
103   const unsigned char *section_sizes
104     = section_offsets + (uint64_t) unit_count * section_count * 4;
105 
106   Dwarf_Package_Index *index = malloc (sizeof (*index));
107   if (index == NULL)
108     {
109       __libdw_seterrno (DWARF_E_NOMEM);
110       return NULL;
111     }
112 
113   index->dbg = dbg;
114   /* Set absent sections to UINT32_MAX.  */
115   for (size_t i = 0;
116        i < sizeof (index->sections) / sizeof (index->sections[0]); i++)
117     index->sections[i] = UINT32_MAX;
118   for (size_t i = 0; i < section_count; i++)
119     {
120       uint32_t section = read_4ubyte_unaligned (dbg, sections + i * 4);
121       /* 2 is DW_SECT_TYPES in version 2 and reserved in version 5.  We ignore
122          it for version 5.
123 	 5 is DW_SECT_LOC in version 2 and DW_SECT_LOCLISTS in version 5.  We
124 	 use the same index for both.
125 	 7 is DW_SECT_MACINFO in version 2 and DW_SECT_MACRO in version 5.  We
126 	 use the same index for both.
127 	 8 is DW_SECT_MACRO in version 2 and DW_SECT_RNGLISTS in version 5.  We
128 	 use the same index for version 2's DW_SECT_MACRO as version 2's
129 	 DW_SECT_MACINFO/version 5's DW_SECT_MACRO.
130 	 We ignore unknown sections.  */
131       if (section == 0)
132 	continue;
133       if (version == 2)
134 	{
135 	  if (section > 8)
136 	    continue;
137 	  else if (section == 8)
138 	    section = DW_SECT_MACRO;
139 	}
140       else if (section == 2
141 	       || (section
142 		   > sizeof (index->sections) / sizeof (index->sections[0])))
143 	continue;
144       index->sections[section - 1] = i;
145     }
146 
147   /* DW_SECT_INFO (or DW_SECT_TYPES for DWARF 4 type units) and DW_SECT_ABBREV
148      are required.  */
149   if (((!tu || dbg->sectiondata[IDX_debug_types] == NULL)
150        && index->sections[DW_SECT_INFO - 1] == UINT32_MAX)
151       || (tu && dbg->sectiondata[IDX_debug_types] != NULL
152 	  && index->sections[DW_SECT_TYPES - 1] == UINT32_MAX)
153       || index->sections[DW_SECT_ABBREV - 1] == UINT32_MAX)
154     {
155       free (index);
156       __libdw_seterrno (DWARF_E_INVALID_DWARF);
157       return NULL;
158     }
159 
160   index->section_count = section_count;
161   index->unit_count = unit_count;
162   index->slot_count = slot_count;
163   index->last_unit_found = 0;
164   index->hash_table = hash_table;
165   index->indices = indices;
166   index->section_offsets = section_offsets;
167   index->section_sizes = section_sizes;
168   index->debug_info_offsets = NULL;
169 
170   return index;
171 }
172 
173 static Dwarf_Package_Index *
__libdw_package_index(Dwarf * dbg,bool tu)174 __libdw_package_index (Dwarf *dbg, bool tu)
175 {
176   if (tu && dbg->tu_index != NULL)
177     return dbg->tu_index;
178   else if (!tu && dbg->cu_index != NULL)
179     return dbg->cu_index;
180 
181   Dwarf_Package_Index *index = __libdw_read_package_index (dbg, tu);
182   if (index == NULL)
183     return NULL;
184 
185   /* Offsets in the section offset table are 32-bit unsigned integers.  In
186      practice, the .debug_info.dwo section for very large executables can be
187      larger than 4GB.  GNU dwp as of binutils 2.41 and llvm-dwp before LLVM 15
188      both accidentally truncate offsets larger than 4GB.
189 
190      LLVM 15 detects the overflow and errors out instead; see LLVM commit
191      f8df8114715b ("[DWP][DWARF] Detect and error on debug info offset
192      overflow").  However, lldb in LLVM 16 supports using dwp files with
193      truncated offsets by recovering them directly from the unit headers in the
194      .debug_info.dwo section; see LLVM commit c0db06227721 ("[DWARFLibrary] Add
195      support to re-construct cu-index").  Since LLVM 17, the overflow error can
196      be turned into a warning instead; see LLVM commit 53a483cee801 ("[DWP] add
197      overflow check for llvm-dwp tools if offset overflow").
198 
199      LLVM's support for > 4GB offsets is effectively an extension to the DWARF
200      package file format, which we implement here.  The strategy is to walk the
201      unit headers in .debug_info.dwo in lockstep with the DW_SECT_INFO columns
202      in the section offset tables.  As long as they are in the same order
203      (which they are in practice for both GNU dwp and llvm-dwp), we can
204      correlate the truncated offset and produce a corrected array of offsets.
205 
206      Note that this will be fixed properly in DWARF 6:
207      https://dwarfstd.org/issues/220708.2.html.  */
208   if (index->sections[DW_SECT_INFO - 1] != UINT32_MAX
209       && dbg->sectiondata[IDX_debug_info]->d_size > UINT32_MAX)
210     {
211       Dwarf_Package_Index *cu_index, *tu_index = NULL;
212       if (tu)
213 	{
214 	  tu_index = index;
215 	  assert (dbg->cu_index == NULL);
216 	  cu_index = __libdw_read_package_index (dbg, false);
217 	  if (cu_index == NULL)
218 	    {
219 	      free(index);
220 	      return NULL;
221 	    }
222 	}
223       else
224 	{
225 	  cu_index = index;
226 	  if (dbg->sectiondata[IDX_debug_tu_index] != NULL
227 	      && dbg->sectiondata[IDX_debug_types] == NULL)
228 	    {
229 	      assert (dbg->tu_index == NULL);
230 	      tu_index = __libdw_read_package_index (dbg, true);
231 	      if (tu_index == NULL)
232 		{
233 		  free(index);
234 		  return NULL;
235 		}
236 	    }
237 	}
238 
239       cu_index->debug_info_offsets = malloc (cu_index->unit_count
240 					     * sizeof (Dwarf_Off));
241       if (cu_index->debug_info_offsets == NULL)
242 	{
243 	  free (tu_index);
244 	  free (cu_index);
245 	  __libdw_seterrno (DWARF_E_NOMEM);
246 	  return NULL;
247 	}
248       if (tu_index != NULL)
249 	{
250 	  tu_index->debug_info_offsets = malloc (tu_index->unit_count
251 						 * sizeof (Dwarf_Off));
252 	  if (tu_index->debug_info_offsets == NULL)
253 	    {
254 	      free (tu_index);
255 	      free (cu_index->debug_info_offsets);
256 	      free (cu_index);
257 	      __libdw_seterrno (DWARF_E_NOMEM);
258 	      return NULL;
259 	    }
260 	}
261 
262       Dwarf_Off off = 0;
263       uint32_t cui = 0, tui = 0;
264       uint32_t cu_count = cu_index->unit_count;
265       const unsigned char *cu_offset
266 	= cu_index->section_offsets + cu_index->sections[DW_SECT_INFO - 1] * 4;
267       uint32_t tu_count = 0;
268       const unsigned char *tu_offset = NULL;
269       if (tu_index != NULL)
270 	{
271 	  tu_count = tu_index->unit_count;
272 	  tu_offset = tu_index->section_offsets
273 		      + tu_index->sections[DW_SECT_INFO - 1] * 4;
274 	}
275       while (cui < cu_count || tui < tu_count)
276 	{
277 	  Dwarf_Off next_off;
278 	  uint8_t unit_type;
279 	  if (__libdw_next_unit (dbg, false, off, &next_off, NULL, NULL,
280 				 &unit_type, NULL, NULL, NULL, NULL, NULL)
281 	      != 0)
282 	    {
283 	    not_sorted:
284 	      free (cu_index->debug_info_offsets);
285 	      cu_index->debug_info_offsets = NULL;
286 	      if (tu_index != NULL)
287 		{
288 		  free (tu_index->debug_info_offsets);
289 		  tu_index->debug_info_offsets = NULL;
290 		}
291 	      break;
292 	    }
293 	  if (unit_type != DW_UT_split_type && cui < cu_count)
294 	    {
295 	      if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, cu_offset))
296 		goto not_sorted;
297 	      cu_index->debug_info_offsets[cui++] = off;
298 	      cu_offset += cu_index->section_count * 4;
299 	    }
300 	  else if (unit_type == DW_UT_split_type && tu_index != NULL
301 		   && tui < tu_count)
302 	    {
303 	      if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, tu_offset))
304 		goto not_sorted;
305 	      tu_index->debug_info_offsets[tui++] = off;
306 	      tu_offset += tu_index->section_count * 4;
307 	    }
308 	  off = next_off;
309 	}
310 
311       if (tu)
312 	dbg->cu_index = cu_index;
313       else if (tu_index != NULL)
314 	dbg->tu_index = tu_index;
315     }
316 
317   if (tu)
318     dbg->tu_index = index;
319   else
320     dbg->cu_index = index;
321   return index;
322 }
323 
324 static int
__libdw_dwp_unit_row(Dwarf_Package_Index * index,uint64_t unit_id,uint32_t * unit_rowp)325 __libdw_dwp_unit_row (Dwarf_Package_Index *index, uint64_t unit_id,
326 		      uint32_t *unit_rowp)
327 {
328   if (index == NULL)
329     return -1;
330 
331   uint32_t hash = unit_id;
332   uint32_t hash2 = (unit_id >> 32) | 1;
333   /* Only check each slot once.  */
334   for (uint32_t n = index->slot_count; n-- > 0; )
335     {
336       size_t slot = hash & (index->slot_count - 1);
337       uint64_t sig = read_8ubyte_unaligned (index->dbg,
338 					    index->hash_table + slot * 8);
339       if (sig == unit_id)
340 	{
341 	  uint32_t row = read_4ubyte_unaligned (index->dbg,
342 						index->indices + slot * 4);
343 	  if (row > index->unit_count)
344 	    {
345 	      __libdw_seterrno (DWARF_E_INVALID_DWARF);
346 	      return -1;
347 	    }
348 	  *unit_rowp = row;
349 	  return 0;
350 	}
351       else if (sig == 0
352 	       && read_4ubyte_unaligned (index->dbg,
353 					 index->indices + slot * 4) == 0)
354 	break;
355       hash += hash2;
356     }
357   *unit_rowp = 0;
358   return 0;
359 }
360 
361 static int
__libdw_dwp_section_info(Dwarf_Package_Index * index,uint32_t unit_row,unsigned int section,Dwarf_Off * offsetp,Dwarf_Off * sizep)362 __libdw_dwp_section_info (Dwarf_Package_Index *index, uint32_t unit_row,
363 			  unsigned int section, Dwarf_Off *offsetp,
364 			  Dwarf_Off *sizep)
365 {
366   if (index == NULL)
367     return -1;
368   if (unit_row == 0)
369     {
370       __libdw_seterrno (DWARF_E_INVALID_DWARF);
371       return -1;
372     }
373   if (index->sections[section - 1] == UINT32_MAX)
374     {
375       if (offsetp != NULL)
376 	*offsetp = 0;
377       if (sizep != NULL)
378 	*sizep = 0;
379       return 0;
380     }
381   size_t i = (size_t)(unit_row - 1) * index->section_count
382 	     + index->sections[section - 1];
383   if (offsetp != NULL)
384     {
385       if (section == DW_SECT_INFO && index->debug_info_offsets != NULL)
386 	*offsetp = index->debug_info_offsets[unit_row - 1];
387       else
388 	*offsetp = read_4ubyte_unaligned (index->dbg,
389 					  index->section_offsets + i * 4);
390     }
391   if (sizep != NULL)
392     *sizep = read_4ubyte_unaligned (index->dbg,
393 				    index->section_sizes + i * 4);
394   return 0;
395 }
396 
397 int
398 internal_function
__libdw_dwp_find_unit(Dwarf * dbg,bool debug_types,Dwarf_Off off,uint16_t version,uint8_t unit_type,uint64_t unit_id8,uint32_t * unit_rowp,Dwarf_Off * abbrev_offsetp)399 __libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off off,
400 		       uint16_t version, uint8_t unit_type, uint64_t unit_id8,
401 		       uint32_t *unit_rowp, Dwarf_Off *abbrev_offsetp)
402 {
403   if (version >= 5
404       && unit_type != DW_UT_split_compile && unit_type != DW_UT_split_type)
405     {
406     not_dwp:
407       *unit_rowp = 0;
408       *abbrev_offsetp = 0;
409       return 0;
410     }
411   bool tu = unit_type == DW_UT_split_type || debug_types;
412   if (dbg->sectiondata[tu ? IDX_debug_tu_index : IDX_debug_cu_index] == NULL)
413     goto not_dwp;
414   Dwarf_Package_Index *index = __libdw_package_index (dbg, tu);
415   if (index == NULL)
416     return -1;
417 
418   /* This is always called for ascending offsets.  The most obvious way for a
419      producer to generate the section offset table is sorted by offset; both
420      GNU dwp and llvm-dwp do this.  In this common case, we can avoid the full
421      lookup.  */
422   if (index->last_unit_found < index->unit_count)
423     {
424       Dwarf_Off offset, size;
425       if (__libdw_dwp_section_info (index, index->last_unit_found + 1,
426 				    debug_types ? DW_SECT_TYPES : DW_SECT_INFO,
427 				    &offset, &size) != 0)
428 	return -1;
429       if (offset <= off && off - offset < size)
430 	{
431 	  *unit_rowp = ++index->last_unit_found;
432 	  goto done;
433 	}
434       else
435 	/* The units are not sorted. Don't try again.  */
436 	index->last_unit_found = index->unit_count;
437     }
438 
439   if (version >= 5 || debug_types)
440     {
441       /* In DWARF 5 and in type units, the unit signature is available in the
442          unit header.  */
443       if (__libdw_dwp_unit_row (index, unit_id8, unit_rowp) != 0)
444 	return -1;
445     }
446   else
447     {
448       /* In DWARF 4 compilation units, the unit signature is an attribute.  We
449 	 can't parse attributes in the split unit until we get the abbreviation
450 	 table offset from the package index, which is a chicken-and-egg
451 	 problem.  We could get the signature from the skeleton unit, but that
452 	 may not be available.
453 
454 	 Instead, we resort to a linear scan through the section offset table.
455 	 Finding all units is therefore quadratic in the number of units.
456 	 However, this will likely never be needed in practice because of the
457 	 sorted fast path above.  If this ceases to be the case, we can try to
458 	 plumb through the skeleton unit's signature when it is available, or
459 	 build a sorted lookup table for binary search.  */
460       if (index->sections[DW_SECT_INFO - 1] == UINT32_MAX)
461 	{
462 	  __libdw_seterrno (DWARF_E_INVALID_DWARF);
463 	  return -1;
464 	}
465       for (uint32_t i = 0; i < index->unit_count; i++)
466 	{
467 	  Dwarf_Off offset, size;
468 	  __libdw_dwp_section_info (index, i + 1, DW_SECT_INFO, &offset,
469 				    &size);
470 	  if (offset <= off && off - offset < size)
471 	    {
472 	      *unit_rowp = i + 1;
473 	      goto done;
474 	    }
475 	}
476       __libdw_seterrno (DWARF_E_INVALID_DWARF);
477       return -1;
478     }
479 
480  done:
481   return __libdw_dwp_section_info (index, *unit_rowp, DW_SECT_ABBREV,
482 				   abbrev_offsetp, NULL);
483 }
484 
485 Dwarf_CU *
486 internal_function
__libdw_dwp_findcu_id(Dwarf * dbg,uint64_t unit_id8)487 __libdw_dwp_findcu_id (Dwarf *dbg, uint64_t unit_id8)
488 {
489   Dwarf_Package_Index *index = __libdw_package_index (dbg, false);
490   uint32_t unit_row;
491   Dwarf_Off offset;
492   Dwarf_CU *cu;
493   if (__libdw_dwp_unit_row (index, unit_id8, &unit_row) == 0
494       && __libdw_dwp_section_info (index, unit_row, DW_SECT_INFO, &offset,
495 				   NULL) == 0
496       && (cu = __libdw_findcu (dbg, offset, false)) != NULL
497       && cu->unit_type == DW_UT_split_compile
498       && cu->unit_id8 == unit_id8)
499     return cu;
500   else
501     return NULL;
502 }
503 
504 int
dwarf_cu_dwp_section_info(Dwarf_CU * cu,unsigned int section,Dwarf_Off * offsetp,Dwarf_Off * sizep)505 dwarf_cu_dwp_section_info (Dwarf_CU *cu, unsigned int section,
506 			   Dwarf_Off *offsetp, Dwarf_Off *sizep)
507 {
508   if (cu == NULL)
509     return -1;
510   if (section < DW_SECT_INFO || section > DW_SECT_RNGLISTS)
511     {
512       __libdw_seterrno (DWARF_E_UNKNOWN_SECTION);
513       return -1;
514     }
515   if (cu->dwp_row == 0)
516     {
517       if (offsetp != NULL)
518 	*offsetp = 0;
519       if (sizep != NULL)
520 	*sizep = 0;
521       return 0;
522     }
523   else
524     {
525       Dwarf_Package_Index *index
526 	= cu->unit_type == DW_UT_split_compile
527 	? cu->dbg->cu_index : cu->dbg->tu_index;
528       return __libdw_dwp_section_info (index, cu->dwp_row, section, offsetp,
529 				       sizep);
530     }
531 }
532 INTDEF(dwarf_cu_dwp_section_info)
533