1 /* Print the source files of a given ELF file.
2 Copyright (C) 2023 Red Hat, Inc.
3 This file is part of elfutils.
4 Written by Housam Alamour <[email protected]>.
5
6 This file is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 elfutils is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18
19
20 /* In case we have a bad fts we include this before config.h because it
21 can't handle _FILE_OFFSET_BITS.
22 Everything we need here is fine if its declarations just come first.
23 Also, include sys/types.h before fts. On some systems fts.h is not self
24 contained. */
25 #ifdef BAD_FTS
26 #include <sys/types.h>
27 #include <fts.h>
28 #endif
29
30 #ifdef HAVE_CONFIG_H
31 # include <config.h>
32 #endif
33
34 #include "printversion.h"
35 #include <dwarf.h>
36 #include <argp.h>
37 #include <cstring>
38 #include <set>
39 #include <string>
40 #include <cassert>
41 #include <gelf.h>
42 #include <memory>
43
44 #ifdef ENABLE_LIBDEBUGINFOD
45 #include "debuginfod.h"
46 #endif
47
48 #include <libdwfl.h>
49 #include <fcntl.h>
50 #include <iostream>
51 #include <libdw.h>
52 #include <sstream>
53 #include <vector>
54
55 /* Libraries for use by the --zip option */
56 #ifdef HAVE_LIBARCHIVE
57 #include <archive.h>
58 #include <archive_entry.h>
59 #endif
60
61 /* If fts.h is included before config.h, its indirect inclusions may not
62 give us the right LFS aliases of these functions, so map them manually. */
63 #ifdef BAD_FTS
64 #ifdef _FILE_OFFSET_BITS
65 #define open open64
66 #define fopen fopen64
67 #endif
68 #else
69 #include <sys/types.h>
70 #include <fts.h>
71 #endif
72
73 using namespace std;
74
75 /* Name and version of program. */
76 ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
77
78 /* Bug report address. */
79 ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
80
81 constexpr size_t BUFFER_SIZE = 8192;
82
83 /* Definitions of arguments for argp functions. */
84 static const struct argp_option options[] =
85 {
86 { NULL, 0, NULL, OPTION_DOC, N_("Output options:"), 1 },
87 { "null", '0', NULL, 0,
88 N_ ("Separate items by a null instead of a newline."), 0 },
89 { "verbose", 'v', NULL, 0,
90 N_ ("Increase verbosity of logging messages."), 0 },
91 { "cu-only", 'c', NULL, 0, N_("Only list the CU names."), 0 },
92 #ifdef HAVE_LIBARCHIVE
93 { "zip", 'z', NULL, 0, N_("Zip all the source files and send to stdout. "
94 "Cannot be used with the null option"), 0 },
95 #ifdef ENABLE_LIBDEBUGINFOD
96 { "no-backup", 'b', NULL, 0, N_("Disables local source file search when "
97 "debuginfod fails to fetch files. This option is only applicable"
98 "when fetching and zipping files."), 0 },
99 #endif
100 #endif
101 { NULL, 0, NULL, 0, NULL, 0 }
102 };
103
104 /* Short description of program. */
105 static const char doc[] = N_("Lists the source files of a DWARF/ELF file. The default input is the file 'a.out'.");
106
107 /* Strings for arguments in help texts. */
108 static const char args_doc[] = N_("INPUT");
109
110 /* Prototype for option handler. */
111 static error_t parse_opt (int key, char *arg, struct argp_state *state);
112
113 static struct argp_child argp_children[2]; /* [0] is set in main. */
114
115 /* Data structure to communicate with argp functions. */
116 static const struct argp argp =
117 {
118 options, parse_opt, args_doc, doc, argp_children, NULL, NULL
119 };
120
121 /* Verbose message printing. */
122 static bool verbose;
123 /* Delimit the output with nulls. */
124 static bool null_arg;
125 /* Only print compilation unit names. */
126 static bool CU_only;
127 #ifdef HAVE_LIBARCHIVE
128 /* Zip all the source files and send to stdout. */
129 static bool zip;
130
131 #ifdef ENABLE_LIBDEBUGINFOD
132 /* Disables local source file search when debuginfod fails to fetch them.
133 This option is only applicable when fetching and zipping files.*/
134 static bool no_backup;
135 #endif
136 #endif
137
138 /* Handle program arguments. Note null arg and zip
139 cannot be combined due to warnings raised when unzipping. */
140 static error_t
parse_opt(int key,char * arg,struct argp_state * state)141 parse_opt (int key, char *arg, struct argp_state *state)
142 {
143 /* Suppress "unused parameter" warning. */
144 (void)arg;
145 switch (key)
146 {
147 case ARGP_KEY_INIT:
148 state->child_inputs[0] = state->input;
149 break;
150
151 case '0':
152 null_arg = true;
153 break;
154
155 case 'v':
156 verbose = true;
157 break;
158
159 case 'c':
160 CU_only = true;
161 break;
162
163 #ifdef HAVE_LIBARCHIVE
164 case 'z':
165 zip = true;
166 break;
167
168 #ifdef ENABLE_LIBDEBUGINFOD
169 case 'b':
170 no_backup = true;
171 break;
172 #endif
173 #endif
174
175 default:
176 return ARGP_ERR_UNKNOWN;
177 }
178 return 0;
179 }
180
181 /* Remove the "/./" , "../" and the preceding directory
182 that some paths include which raise errors during unzip. */
canonicalize_path(string path)183 string canonicalize_path(string path)
184 {
185 stringstream ss(path);
186 string token;
187 vector<string> tokens;
188 /* Extract each directory of the path and place into a vector. */
189 while (getline(ss, token, '/')) {
190 /* Ignore any empty //, or /./ dirs. */
191 if (token == "" || token == ".")
192 continue;
193 /* When /.. is encountered, remove the most recent directory from the vector. */
194 else if (token == "..") {
195 if (!tokens.empty())
196 tokens.pop_back();
197 } else
198 tokens.push_back(token);
199 }
200 stringstream result;
201 if (tokens.empty())
202 return "/";
203 /* Reconstruct the path from the extracted directories. */
204 for (const string &t : tokens) {
205 result << '/' << t;
206 }
207 return result.str();
208 }
209
210 /* Global list of collected source files and their respective module.
211 Normally, it'll contain the sources of just one named binary, but
212 the '-K' option can cause multiple dwfl modules to be loaded, thus
213 listed. */
214 set<pair<string, Dwfl_Module*>> debug_sourcefiles;
215
216 static int
collect_sourcefiles(Dwfl_Module * dwflmod,void ** userdata,const char * name,Dwarf_Addr base,void * arg)217 collect_sourcefiles (Dwfl_Module *dwflmod,
218 void **userdata __attribute__ ((unused)),
219 const char *name __attribute__ ((unused)),
220 Dwarf_Addr base __attribute__ ((unused)),
221 void *arg __attribute__ ((unused)))
222 {
223 Dwarf *dbg;
224 Dwarf_Addr bias; /* ignored - for addressing purposes only. */
225
226 dbg = dwfl_module_getdwarf (dwflmod, &bias);
227
228 Dwarf_Off offset = 0;
229 Dwarf_Off old_offset;
230 size_t hsize;
231 /* Traverse all CUs of this module. */
232 while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
233 {
234 Dwarf_Die cudie_mem;
235 Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
236
237 if (cudie == NULL)
238 continue;
239
240 const char *cuname = dwarf_diename (cudie) ?: "<unknown>";
241 Dwarf_Files *files;
242 size_t nfiles;
243 if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
244 continue;
245
246 /* extract DW_AT_comp_dir to resolve relative file names. */
247 const char *comp_dir = "";
248 const char *const *dirs;
249 size_t ndirs;
250
251 if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 && dirs[0] != NULL)
252 comp_dir = dirs[0];
253 if (comp_dir == NULL)
254 comp_dir = "";
255
256 if (verbose)
257 clog << "searching for sources for cu=" << cuname
258 << " comp_dir=" << comp_dir << " #files=" << nfiles
259 << " #dirs=" << ndirs << endl;
260
261 if (comp_dir[0] == '\0' && cuname[0] != '/')
262 {
263 /* This is a common symptom for dwz-compressed debug files,
264 where the altdebug file cannot be resolved. */
265 if (verbose)
266 clog << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
267 continue;
268 }
269 for (size_t f = 1; f < nfiles; ++f)
270 {
271 const char *hat;
272 if (CU_only)
273 {
274 if (strcmp(cuname, "<unknown>") == 0 || strcmp(cuname, "<artificial>") == 0 )
275 continue;
276 hat = cuname;
277 }
278 else
279 hat = dwarf_filesrc (files, f, NULL, NULL);
280
281 if (hat == NULL)
282 continue;
283
284 if (string(hat).find("<built-in>")
285 != string::npos) /* gcc intrinsics, don't bother recording */
286 continue;
287
288 string waldo;
289 if (hat[0] == '/') /* absolute */
290 waldo = (string (hat));
291 else if (comp_dir[0] != '\0') /* comp_dir relative */
292 waldo = (string (comp_dir) + string ("/") + string (hat));
293 else
294 {
295 if (verbose)
296 clog << "skipping file=" << hat << " due to empty comp_dir" << endl;
297 continue;
298 }
299 waldo = canonicalize_path (waldo);
300 debug_sourcefiles.insert (make_pair(waldo, dwflmod));
301 }
302 }
303 return DWARF_CB_OK;
304 }
305
306 #ifdef HAVE_LIBARCHIVE
zip_files()307 void zip_files()
308 {
309 struct archive *a = archive_write_new();
310 struct stat st;
311 char buff[BUFFER_SIZE];
312 int len;
313 int fd;
314 #ifdef ENABLE_LIBDEBUGINFOD
315 /* Initialize a debuginfod client. */
316 static unique_ptr <debuginfod_client, void (*)(debuginfod_client*)>
317 client (debuginfod_begin(), &debuginfod_end);
318 #endif
319
320 archive_write_set_format_zip(a);
321 archive_write_open_fd(a, STDOUT_FILENO);
322
323 int missing_files = 0;
324 for (const auto &pair : debug_sourcefiles)
325 {
326 fd = -1;
327 const std::string &file_path = pair.first;
328
329 /* Attempt to query debuginfod client to fetch source files. */
330 #ifdef ENABLE_LIBDEBUGINFOD
331 Dwfl_Module* dwflmod = pair.second;
332 /* Obtain source file's build ID. */
333 const unsigned char *bits;
334 GElf_Addr vaddr;
335 int bits_length = dwfl_module_build_id(dwflmod, &bits, &vaddr);
336 /* Ensure successful client and build ID acquisition. */
337 if (client.get() != NULL && bits_length > 0)
338 {
339 fd = debuginfod_find_source(client.get(),
340 bits, bits_length,
341 file_path.c_str(), NULL);
342 }
343 else
344 {
345 if (client.get() == NULL)
346 cerr << "Error: Failed to initialize debuginfod client." << endl;
347 else
348 cerr << "Error: Invalid build ID length (" << bits_length << ")." << endl;
349 }
350 #endif
351
352 if (!no_backup)
353 /* Files could not be located using debuginfod, search locally */
354 if (fd < 0)
355 fd = open(file_path.c_str(), O_RDONLY);
356 if (fd < 0)
357 {
358 if (verbose)
359 cerr << file_path << endl;
360 missing_files++;
361 continue;
362 }
363
364 /* Create an entry for each file including file information to be placed in the zip. */
365 if (fstat(fd, &st) == -1)
366 {
367 if (verbose)
368 cerr << file_path << endl;
369 missing_files++;
370 if (verbose)
371 cerr << "Error: Failed to get file status for " << file_path << ": " << strerror(errno) << endl;
372 continue;
373 }
374 struct archive_entry *entry = archive_entry_new();
375 /* Removing first "/"" to make the path "relative" before zipping, otherwise warnings are raised when unzipping. */
376 string entry_name = file_path.substr(file_path.find_first_of('/') + 1);
377 archive_entry_set_pathname(entry, entry_name.c_str());
378 archive_entry_copy_stat(entry, &st);
379 if (archive_write_header(a, entry) != ARCHIVE_OK)
380 {
381 if (verbose)
382 cerr << file_path << endl;
383 missing_files++;
384 if (verbose)
385 cerr << "Error: failed to write header for " << file_path << ": " << archive_error_string(a) << endl;
386 continue;
387 }
388
389 /* Write the file to the zip. */
390 len = read(fd, buff, sizeof(buff));
391 if (len == -1)
392 {
393 if (verbose)
394 cerr << file_path << endl;
395 missing_files++;
396 if (verbose)
397 cerr << "Error: Failed to open file: " << file_path << ": " << strerror(errno) <<endl;
398 continue;
399 }
400 while (len > 0)
401 {
402 if (archive_write_data(a, buff, len) < ARCHIVE_OK)
403 {
404 if (verbose)
405 cerr << "Error: Failed to read from the file: " << file_path << ": " << strerror(errno) << endl;
406 break;
407 }
408 len = read(fd, buff, sizeof(buff));
409 }
410 close(fd);
411 archive_entry_free(entry);
412 }
413 if (verbose && missing_files > 0 )
414 cerr << missing_files << " file(s) listed above could not be found. " << endl;
415
416 archive_write_close(a);
417 archive_write_free(a);
418 }
419 #endif
420
421 int
main(int argc,char * argv[])422 main (int argc, char *argv[])
423 {
424 int remaining;
425
426 /* Parse and process arguments. This includes opening the modules. */
427 argp_children[0].argp = dwfl_standard_argp ();
428 argp_children[0].group = 1;
429
430 Dwfl *dwfl = NULL;
431 (void) argp_parse (&argp, argc, argv, 0, &remaining, &dwfl);
432 assert (dwfl != NULL);
433 /* Process all loaded modules - probably just one, except if -K or -p is used. */
434 (void) dwfl_getmodules (dwfl, &collect_sourcefiles, NULL, 0);
435
436 if (!debug_sourcefiles.empty ())
437 {
438 #ifdef HAVE_LIBARCHIVE
439 if (zip)
440 zip_files();
441 else
442 #endif
443 {
444 for (const auto &pair : debug_sourcefiles)
445 {
446 cout << pair.first;
447 if (null_arg)
448 cout << '\0';
449 else
450 cout << '\n';
451 }
452 }
453 }
454
455 dwfl_end (dwfl);
456 return 0;
457 }
458