xref: /aosp_15_r20/external/elfutils/src/srcfiles.cxx (revision 7304104da70ce23c86437a01be71edd1a2d7f37e)
1 /* Print the source files of a given ELF file.
2    Copyright (C) 2023 Red Hat, Inc.
3    This file is part of elfutils.
4    Written by Housam Alamour <[email protected]>.
5 
6    This file is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    elfutils is distributed in the hope that it will be useful, but
12    WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18 
19 
20 /* In case we have a bad fts we include this before config.h because it
21    can't handle _FILE_OFFSET_BITS.
22    Everything we need here is fine if its declarations just come first.
23    Also, include sys/types.h before fts.  On some systems fts.h is not self
24    contained.  */
25 #ifdef BAD_FTS
26 #include <sys/types.h>
27 #include <fts.h>
28 #endif
29 
30 #ifdef HAVE_CONFIG_H
31 # include <config.h>
32 #endif
33 
34 #include "printversion.h"
35 #include <dwarf.h>
36 #include <argp.h>
37 #include <cstring>
38 #include <set>
39 #include <string>
40 #include <cassert>
41 #include <gelf.h>
42 #include <memory>
43 
44 #ifdef ENABLE_LIBDEBUGINFOD
45 #include "debuginfod.h"
46 #endif
47 
48 #include <libdwfl.h>
49 #include <fcntl.h>
50 #include <iostream>
51 #include <libdw.h>
52 #include <sstream>
53 #include <vector>
54 
55 /* Libraries for use by the --zip option */
56 #ifdef HAVE_LIBARCHIVE
57 #include <archive.h>
58 #include <archive_entry.h>
59 #endif
60 
61 /* If fts.h is included before config.h, its indirect inclusions may not
62    give us the right LFS aliases of these functions, so map them manually.  */
63 #ifdef BAD_FTS
64 #ifdef _FILE_OFFSET_BITS
65 #define open open64
66 #define fopen fopen64
67 #endif
68 #else
69   #include <sys/types.h>
70   #include <fts.h>
71 #endif
72 
73 using namespace std;
74 
75 /* Name and version of program.  */
76 ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
77 
78 /* Bug report address.  */
79 ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
80 
81 constexpr size_t BUFFER_SIZE = 8192;
82 
83 /* Definitions of arguments for argp functions.  */
84 static const struct argp_option options[] =
85 {
86   { NULL, 0, NULL, OPTION_DOC, N_("Output options:"), 1 },
87   { "null", '0', NULL, 0,
88     N_ ("Separate items by a null instead of a newline."), 0 },
89   { "verbose", 'v', NULL, 0,
90     N_ ("Increase verbosity of logging messages."), 0 },
91   { "cu-only", 'c', NULL, 0, N_("Only list the CU names."), 0 },
92   #ifdef HAVE_LIBARCHIVE
93   { "zip", 'z', NULL, 0, N_("Zip all the source files and send to stdout. "
94     "Cannot be used with the null option"), 0 },
95     #ifdef ENABLE_LIBDEBUGINFOD
96     { "no-backup", 'b', NULL, 0, N_("Disables local source file search when "
97       "debuginfod fails to fetch files. This option is only applicable"
98       "when fetching and zipping files."), 0 },
99     #endif
100   #endif
101   { NULL, 0, NULL, 0, NULL, 0 }
102 };
103 
104 /* Short description of program.  */
105 static const char doc[] = N_("Lists the source files of a DWARF/ELF file.  The default input is the file 'a.out'.");
106 
107 /* Strings for arguments in help texts.  */
108 static const char args_doc[] = N_("INPUT");
109 
110 /* Prototype for option handler.  */
111 static error_t parse_opt (int key, char *arg, struct argp_state *state);
112 
113 static struct argp_child argp_children[2]; /* [0] is set in main.  */
114 
115 /* Data structure to communicate with argp functions.  */
116 static const struct argp argp =
117 {
118   options, parse_opt, args_doc, doc, argp_children, NULL, NULL
119 };
120 
121 /* Verbose message printing.  */
122 static bool verbose;
123 /* Delimit the output with nulls.  */
124 static bool null_arg;
125 /* Only print compilation unit names.  */
126 static bool CU_only;
127 #ifdef HAVE_LIBARCHIVE
128   /* Zip all the source files and send to stdout. */
129   static bool zip;
130 
131   #ifdef ENABLE_LIBDEBUGINFOD
132     /* Disables local source file search when debuginfod fails to fetch them.
133        This option is only applicable when fetching and zipping files.*/
134     static bool no_backup;
135   #endif
136 #endif
137 
138 /* Handle program arguments.  Note null arg and zip
139     cannot be combined due to warnings raised when unzipping.  */
140 static error_t
parse_opt(int key,char * arg,struct argp_state * state)141 parse_opt (int key, char *arg, struct argp_state *state)
142 {
143   /* Suppress "unused parameter" warning.  */
144   (void)arg;
145   switch (key)
146     {
147     case ARGP_KEY_INIT:
148       state->child_inputs[0] = state->input;
149       break;
150 
151     case '0':
152       null_arg = true;
153       break;
154 
155     case 'v':
156       verbose = true;
157       break;
158 
159     case 'c':
160       CU_only = true;
161       break;
162 
163     #ifdef HAVE_LIBARCHIVE
164       case 'z':
165       zip = true;
166       break;
167 
168       #ifdef ENABLE_LIBDEBUGINFOD
169         case 'b':
170         no_backup = true;
171         break;
172       #endif
173     #endif
174 
175     default:
176       return ARGP_ERR_UNKNOWN;
177     }
178   return 0;
179 }
180 
181 /* Remove the "/./" , "../" and the preceding directory
182     that some paths include which raise errors during unzip.  */
canonicalize_path(string path)183 string canonicalize_path(string path)
184 {
185     stringstream ss(path);
186     string token;
187     vector<string> tokens;
188     /* Extract each directory of the path and place into a vector.  */
189     while (getline(ss, token, '/')) {
190       /* Ignore any empty //, or /./ dirs.  */
191         if (token == "" || token == ".")
192             continue;
193       /* When /..  is encountered, remove the most recent directory from the vector.  */
194         else if (token == "..") {
195             if (!tokens.empty())
196                 tokens.pop_back();
197         } else
198             tokens.push_back(token);
199     }
200     stringstream result;
201     if (tokens.empty())
202         return "/";
203     /* Reconstruct the path from the extracted directories.  */
204     for (const string &t : tokens) {
205         result << '/' << t;
206     }
207     return result.str();
208 }
209 
210 /* Global list of collected source files and their respective module.
211    Normally, it'll contain the sources of just one named binary, but
212    the '-K' option can cause multiple dwfl modules to be loaded, thus
213    listed.  */
214 set<pair<string, Dwfl_Module*>> debug_sourcefiles;
215 
216 static int
collect_sourcefiles(Dwfl_Module * dwflmod,void ** userdata,const char * name,Dwarf_Addr base,void * arg)217 collect_sourcefiles (Dwfl_Module *dwflmod,
218                      void **userdata __attribute__ ((unused)),
219                      const char *name __attribute__ ((unused)),
220                      Dwarf_Addr base __attribute__ ((unused)),
221                      void *arg __attribute__ ((unused)))
222 {
223   Dwarf *dbg;
224   Dwarf_Addr bias; /* ignored - for addressing purposes only.  */
225 
226   dbg = dwfl_module_getdwarf (dwflmod, &bias);
227 
228   Dwarf_Off offset = 0;
229   Dwarf_Off old_offset;
230   size_t hsize;
231   /* Traverse all CUs of this module.  */
232   while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
233     {
234       Dwarf_Die cudie_mem;
235       Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
236 
237       if (cudie == NULL)
238         continue;
239 
240       const char *cuname = dwarf_diename (cudie) ?: "<unknown>";
241       Dwarf_Files *files;
242       size_t nfiles;
243       if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
244         continue;
245 
246       /* extract DW_AT_comp_dir to resolve relative file names.  */
247       const char *comp_dir = "";
248       const char *const *dirs;
249       size_t ndirs;
250 
251       if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 && dirs[0] != NULL)
252         comp_dir = dirs[0];
253       if (comp_dir == NULL)
254         comp_dir = "";
255 
256       if (verbose)
257         clog << "searching for sources for cu=" << cuname
258                   << " comp_dir=" << comp_dir << " #files=" << nfiles
259                   << " #dirs=" << ndirs << endl;
260 
261       if (comp_dir[0] == '\0' && cuname[0] != '/')
262         {
263           /* This is a common symptom for dwz-compressed debug files,
264              where the altdebug file cannot be resolved.  */
265           if (verbose)
266             clog << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
267           continue;
268         }
269       for (size_t f = 1; f < nfiles; ++f)
270         {
271           const char *hat;
272           if (CU_only)
273           {
274             if (strcmp(cuname, "<unknown>") == 0 || strcmp(cuname, "<artificial>") == 0 )
275               continue;
276             hat = cuname;
277           }
278           else
279             hat = dwarf_filesrc (files, f, NULL, NULL);
280 
281           if (hat == NULL)
282             continue;
283 
284           if (string(hat).find("<built-in>")
285               != string::npos) /* gcc intrinsics, don't bother recording */
286             continue;
287 
288           string waldo;
289           if (hat[0] == '/') /* absolute */
290             waldo = (string (hat));
291           else if (comp_dir[0] != '\0') /* comp_dir relative */
292             waldo = (string (comp_dir) + string ("/") + string (hat));
293           else
294            {
295              if (verbose)
296               clog << "skipping file=" << hat << " due to empty comp_dir" << endl;
297              continue;
298            }
299           waldo = canonicalize_path (waldo);
300           debug_sourcefiles.insert (make_pair(waldo, dwflmod));
301         }
302     }
303   return DWARF_CB_OK;
304 }
305 
306 #ifdef HAVE_LIBARCHIVE
zip_files()307 void zip_files()
308 {
309   struct archive *a = archive_write_new();
310   struct stat st;
311   char buff[BUFFER_SIZE];
312   int len;
313   int fd;
314   #ifdef ENABLE_LIBDEBUGINFOD
315   /* Initialize a debuginfod client.  */
316   static unique_ptr <debuginfod_client, void (*)(debuginfod_client*)>
317     client (debuginfod_begin(), &debuginfod_end);
318   #endif
319 
320   archive_write_set_format_zip(a);
321   archive_write_open_fd(a, STDOUT_FILENO);
322 
323   int missing_files = 0;
324   for (const auto &pair : debug_sourcefiles)
325   {
326     fd = -1;
327     const std::string &file_path = pair.first;
328 
329     /* Attempt to query debuginfod client to fetch source files.  */
330     #ifdef ENABLE_LIBDEBUGINFOD
331     Dwfl_Module* dwflmod = pair.second;
332     /* Obtain source file's build ID.  */
333     const unsigned char *bits;
334     GElf_Addr vaddr;
335     int bits_length = dwfl_module_build_id(dwflmod, &bits, &vaddr);
336     /* Ensure successful client and build ID acquisition.  */
337     if (client.get() != NULL && bits_length > 0)
338     {
339       fd = debuginfod_find_source(client.get(),
340                                     bits, bits_length,
341                                     file_path.c_str(), NULL);
342     }
343     else
344     {
345         if (client.get() == NULL)
346             cerr << "Error: Failed to initialize debuginfod client." << endl;
347         else
348             cerr << "Error: Invalid build ID length (" << bits_length << ")." << endl;
349     }
350     #endif
351 
352     if (!no_backup)
353       /* Files could not be located using debuginfod, search locally */
354       if (fd < 0)
355         fd = open(file_path.c_str(), O_RDONLY);
356     if (fd < 0)
357     {
358       if (verbose)
359         cerr << file_path << endl;
360       missing_files++;
361       continue;
362     }
363 
364     /* Create an entry for each file including file information to be placed in the zip.  */
365     if (fstat(fd, &st) == -1)
366     {
367       if (verbose)
368         cerr << file_path << endl;
369       missing_files++;
370       if (verbose)
371         cerr << "Error: Failed to get file status for " << file_path << ": " << strerror(errno) << endl;
372       continue;
373     }
374     struct archive_entry *entry = archive_entry_new();
375     /* Removing first "/"" to make the path "relative" before zipping, otherwise warnings are raised when unzipping.  */
376     string entry_name = file_path.substr(file_path.find_first_of('/') + 1);
377     archive_entry_set_pathname(entry, entry_name.c_str());
378     archive_entry_copy_stat(entry, &st);
379     if (archive_write_header(a, entry) != ARCHIVE_OK)
380     {
381       if (verbose)
382         cerr << file_path << endl;
383       missing_files++;
384       if (verbose)
385         cerr << "Error: failed to write header for " << file_path << ": " << archive_error_string(a) << endl;
386       continue;
387     }
388 
389     /* Write the file to the zip.  */
390     len = read(fd, buff, sizeof(buff));
391     if (len == -1)
392     {
393       if (verbose)
394         cerr << file_path << endl;
395       missing_files++;
396       if (verbose)
397         cerr << "Error: Failed to open file: " << file_path << ": " << strerror(errno) <<endl;
398       continue;
399     }
400     while (len > 0)
401     {
402       if (archive_write_data(a, buff, len) < ARCHIVE_OK)
403       {
404         if (verbose)
405           cerr << "Error: Failed to read from the file: " << file_path << ": " << strerror(errno) << endl;
406         break;
407       }
408       len = read(fd, buff, sizeof(buff));
409     }
410     close(fd);
411     archive_entry_free(entry);
412   }
413   if (verbose && missing_files > 0 )
414     cerr << missing_files << " file(s) listed above could not be found.  " << endl;
415 
416   archive_write_close(a);
417   archive_write_free(a);
418 }
419 #endif
420 
421 int
main(int argc,char * argv[])422 main (int argc, char *argv[])
423 {
424   int remaining;
425 
426   /* Parse and process arguments.  This includes opening the modules.  */
427   argp_children[0].argp = dwfl_standard_argp ();
428   argp_children[0].group = 1;
429 
430   Dwfl *dwfl = NULL;
431   (void) argp_parse (&argp, argc, argv, 0, &remaining, &dwfl);
432   assert (dwfl != NULL);
433   /* Process all loaded modules - probably just one, except if -K or -p is used.  */
434   (void) dwfl_getmodules (dwfl, &collect_sourcefiles, NULL, 0);
435 
436   if (!debug_sourcefiles.empty ())
437   {
438     #ifdef HAVE_LIBARCHIVE
439       if (zip)
440         zip_files();
441       else
442     #endif
443       {
444         for (const auto &pair : debug_sourcefiles)
445           {
446             cout << pair.first;
447             if (null_arg)
448               cout << '\0';
449             else
450               cout << '\n';
451           }
452       }
453   }
454 
455   dwfl_end (dwfl);
456   return 0;
457 }
458