xref: /aosp_15_r20/external/musl/android/relinterp.c (revision c9945492fdd68bbe62686c5b452b4dc1be3f8453)
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *  * Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #define SYSCALL_NO_TLS 1
30 #include <elf.h>
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <link.h>
34 #include <stdalign.h>
35 #include <stdarg.h>
36 #include <stdbool.h>
37 #include <stdint.h>
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/syscall.h>
41 #include <sys/user.h>
42 #include <unistd.h>
43 
44 #include "reloc.h"
45 #include "syscall.h"
46 
47 typedef void EntryFunc(void);
48 
49 // arm64 doesn't have a constant page size and has to use the value from AT_PAGESZ.
50 #ifndef PAGE_SIZE
51 #define PAGE_SIZE g_page_size
52 #endif
53 
54 #define PAGE_START(x) ((x) & (~(PAGE_SIZE-1)))
55 #define PAGE_END(x) PAGE_START((x) + (PAGE_SIZE - 1))
56 
57 #define START "_start"
58 #include "crt_arch.h"
59 
60 int main();
61 weak void _init();
62 weak void _fini();
63 int __libc_start_main(int (*)(), int, char **,
64   void (*)(), void(*)(), void(*)());
65 
66 static ElfW(Phdr) replacement_phdr_table[64];
67 static char replacement_interp[PATH_MAX];
68 
69 static bool g_debug = false;
70 static const char* g_prog_name = NULL;
71 static uintptr_t g_page_size = 0;
72 static int g_errno = 0;
73 
74 __attribute__((visibility("hidden"))) extern ElfW(Dyn) _DYNAMIC[];
75 
76 __attribute__((used))
ri_set_errno(unsigned long val)77 static long ri_set_errno(unsigned long val) {
78   if (val > -4096UL) {
79     g_errno = -val;
80     return -1;
81   }
82   return val;
83 }
84 
85 #define ri_syscall(...) ri_set_errno(__syscall(__VA_ARGS__))
86 
ri_write(int fd,const void * buf,size_t amt)87 static ssize_t ri_write(int fd, const void* buf, size_t amt) {
88   return ri_syscall(SYS_write, fd, buf, amt);
89 }
90 
91 __attribute__((noreturn))
ri_exit(int status)92 static void ri_exit(int status) {
93   ri_syscall(SYS_exit, status);
94   __builtin_unreachable();
95 }
96 
ri_open(const char * path,int flags,mode_t mode)97 static int ri_open(const char* path, int flags, mode_t mode) {
98   return ri_syscall(SYS_openat, AT_FDCWD, path, flags, mode);
99 }
100 
ri_close(int fd)101 static int ri_close(int fd) {
102   return ri_syscall(SYS_close, fd);
103 }
104 
ri_lseek(int fd,off_t offset,int whence)105 static off_t ri_lseek(int fd, off_t offset, int whence) {
106   return ri_syscall(SYS_lseek, fd, offset, whence);
107 }
108 
ri_readlink(const char * path,char * buf,size_t size)109 static ssize_t ri_readlink(const char* path, char* buf, size_t size) {
110   return ri_syscall(SYS_readlinkat, AT_FDCWD, path, buf, size);
111 }
112 
ri_mmap(void * addr,size_t length,int prot,int flags,int fd,off_t offset)113 static void* ri_mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) {
114 #ifdef SYS_mmap2
115   return (void*)ri_syscall(SYS_mmap2, addr, length, prot, flags, fd, offset/SYSCALL_MMAP2_UNIT);
116 #else
117   return (void*)ri_syscall(SYS_mmap, addr, length, prot, flags, fd, offset);
118 #endif
119 }
120 
ri_munmap(void * addr,size_t length)121 static void* ri_munmap(void* addr, size_t length) {
122   return (void*)ri_syscall(SYS_munmap, addr, length);
123 }
124 
ri_mprotect(void * addr,size_t len,int prot)125 static int ri_mprotect(void* addr, size_t len, int prot) {
126   return ri_syscall(SYS_mprotect, addr, len, prot);
127 }
128 
ri_pread(int fd,void * buf,size_t size,off_t ofs)129 static ssize_t ri_pread(int fd, void* buf, size_t size, off_t ofs) {
130   return ri_syscall(SYS_pread, fd, buf, size, __SYSCALL_LL_PRW(ofs));
131 }
132 
ri_strlen(const char * src)133 static size_t ri_strlen(const char* src) {
134   for (size_t len = 0;; ++len) {
135     if (src[len] == '\0') return len;
136   }
137 }
138 
ri_strcpy(char * dst,const char * src)139 static char* ri_strcpy(char* dst, const char* src) {
140   char* result = dst;
141   while ((*dst = *src) != '\0') {
142     ++dst;
143     ++src;
144   }
145   return result;
146 }
147 
ri_strcat(char * dst,const char * src)148 static char* ri_strcat(char* dst, const char* src) {
149   ri_strcpy(dst + ri_strlen(dst), src);
150   return dst;
151 }
152 
ri_memset(void * dst,int val,size_t len)153 static void* ri_memset(void* dst, int val, size_t len) {
154   for (size_t i = 0; i < len; ++i) {
155     ((char*)dst)[i] = val;
156   }
157   return dst;
158 }
159 
160 __attribute__ ((unused))
ri_memcpy(void * dst,const void * src,size_t len)161 static void* ri_memcpy(void* dst, const void* src, size_t len) {
162   for (size_t i = 0; i < len; ++i) {
163     ((char*)dst)[i] = ((char*)src)[i];
164   }
165   return dst;
166 }
167 
ri_strncmp(const char * x,const char * y,size_t maxlen)168 static int ri_strncmp(const char* x, const char *y, size_t maxlen) {
169   for (size_t i = 0;; ++i) {
170     if (i == maxlen) return 0;
171     int result = (unsigned char)x[i] - (unsigned char)y[i];
172     if (result != 0) return result;
173     if (x[i] == '\0') return 0;
174   }
175 }
176 
ri_strcmp(const char * x,const char * y)177 static int ri_strcmp(const char* x, const char *y) {
178   return ri_strncmp(x, y, SIZE_MAX);
179 }
180 
ri_strrchr(const char * str,int ch)181 static char* ri_strrchr(const char* str, int ch) {
182   char* result = NULL;
183   while (true) {
184     if (*str == ch) result = (char*)str;
185     if (*str == '\0') break;
186     ++str;
187   }
188   return result;
189 }
190 
ri_strchr(const char * str,int ch)191 static char* ri_strchr(const char* str, int ch) {
192   while (*str) {
193     if (*str == ch) return (char*)str;
194     ++str;
195   }
196   return NULL;
197 }
198 
ri_dirname(char * path)199 static void ri_dirname(char* path) {
200   char* last_slash = ri_strrchr(path, '/');
201   if (last_slash == NULL) {
202     path[0] = '.';   // returns "."
203     path[1] = '\0';
204   } else if (last_slash == path) {
205     path[1] = '\0';  // returns "/"
206   } else {
207     *last_slash = '\0';
208   }
209 }
210 
out_str_n(const char * str,size_t n)211 static void out_str_n(const char* str, size_t n) {
212   ri_write(STDERR_FILENO, str, n);
213 }
214 
out_str(const char * str)215 static void out_str(const char* str) {
216   out_str_n(str, ri_strlen(str));
217 }
218 
ul_to_str(unsigned long i,char * out,unsigned char base)219 static char* ul_to_str(unsigned long i, char* out, unsigned char base) {
220   char buf[65];
221   char* cur = &buf[65];
222   *--cur = '\0';
223   do {
224     *--cur = "0123456789abcdef"[i % base];
225     i /= base;
226   } while (i > 0);
227   return ri_strcpy(out, cur);
228 }
229 
l_to_str(long i,char * out,unsigned char base)230 static char* l_to_str(long i, char* out, unsigned char base) {
231   if (i < 0) {
232     *out = '-';
233     ul_to_str(-(unsigned long)i, out + 1, base);
234     return out;
235   } else {
236     return ul_to_str(i, out, base);
237   }
238 }
239 
ri_strerror(int err)240 static const char* ri_strerror(int err) {
241   switch (err) {
242     case EPERM: return "Operation not permitted";
243     case ENOENT: return "No such file or directory";
244     case EIO: return "I/O error";
245     case ENXIO: return "No such device or address";
246     case EAGAIN: return "Try again";
247     case ENOMEM: return "Out of memory";
248     case EACCES: return "Permission denied";
249     case ENODEV: return "No such device";
250     case ENOTDIR: return "Not a directory";
251     case EINVAL: return "Invalid argument";
252     case ENFILE: return "File table overflow";
253     case EMFILE: return "Too many open files";
254     case ESPIPE: return "Illegal seek";
255     case ENAMETOOLONG: return "File name too long";
256     case ELOOP: return "Too many symbolic links encountered";
257   }
258   static char buf[64];
259   ri_strcpy(buf, "Unknown error ");
260   l_to_str(err, buf + ri_strlen(buf), 10);
261   return buf;
262 }
263 
outv(const char * fmt,va_list ap)264 static void outv(const char *fmt, va_list ap) {
265   char buf[65];
266   while (true) {
267     if (fmt[0] == '\0') break;
268 
269 #define NUM_FMT(num_fmt, type, func, base)                  \
270     if (!ri_strncmp(fmt, num_fmt, sizeof(num_fmt) - 1)) {   \
271       out_str(func(va_arg(ap, type), buf, base));           \
272       fmt += sizeof(num_fmt) - 1;                           \
273       continue;                                             \
274     }
275     NUM_FMT("%d",  int,           l_to_str,  10);
276     NUM_FMT("%ld", long,          l_to_str,  10);
277     NUM_FMT("%u",  unsigned int,  ul_to_str, 10);
278     NUM_FMT("%lu", unsigned long, ul_to_str, 10);
279     NUM_FMT("%zu", size_t,        ul_to_str, 10);
280     NUM_FMT("%x",  unsigned int,  ul_to_str, 16);
281     NUM_FMT("%lx", unsigned long, ul_to_str, 16);
282     NUM_FMT("%zx", size_t,        ul_to_str, 16);
283 #undef NUM_FMT
284 
285     if (!ri_strncmp(fmt, "%p", 2)) {
286       out_str(ul_to_str((unsigned long)va_arg(ap, void*), buf, 16));
287       fmt += 2;
288     } else if (!ri_strncmp(fmt, "%s", 2)) {
289       const char* arg = va_arg(ap, const char*);
290       out_str(arg ? arg : "(null)");
291       fmt += 2;
292     } else if (!ri_strncmp(fmt, "%%", 2)) {
293       out_str("%");
294       fmt += 2;
295     } else if (fmt[0] == '%') {
296       buf[0] = fmt[1];
297       buf[1] = '\0';
298       out_str("relinterp error: unrecognized output specifier: '%");
299       out_str(buf);
300       out_str("'\n");
301       ri_exit(1);
302     } else {
303       size_t len = 0;
304       while (fmt[len] != '\0' && fmt[len] != '%') ++len;
305       out_str_n(fmt, len);
306       fmt += len;
307     }
308   }
309 }
310 
311 __attribute__((format(printf, 1, 2)))
debug(const char * fmt,...)312 static void debug(const char* fmt, ...) {
313   if (!g_debug) return;
314   out_str("relinterp: ");
315 
316   va_list ap;
317   va_start(ap, fmt);
318   outv(fmt, ap);
319   va_end(ap);
320   out_str("\n");
321 }
322 
323 __attribute__((format(printf, 1, 2), noreturn))
fatal(const char * fmt,...)324 static void fatal(const char* fmt, ...) {
325   out_str("relinterp: ");
326   if (g_prog_name) {
327     out_str(g_prog_name);
328     out_str(": ");
329   }
330   out_str("fatal error: ");
331 
332   va_list ap;
333   va_start(ap, fmt);
334   outv(fmt, ap);
335   va_end(ap);
336   out_str("\n");
337   ri_exit(1);
338 }
339 
optimizer_barrier(void * val)340 static void* optimizer_barrier(void* val) {
341   __asm__ volatile ("nop" :: "r"(&val) : "memory");
342   return val;
343 }
344 
345 typedef struct {
346   unsigned long key;
347   unsigned long value;
348 } AuxEntry;
349 
350 typedef struct {
351   int argc;
352   char **argv;
353   char **envp;
354   size_t envp_count;
355   AuxEntry* auxv;
356   size_t auxv_count;
357 } KernelArguments;
358 
read_args(void * raw_args)359 static KernelArguments read_args(void* raw_args) {
360   KernelArguments result;
361   result.argc = *(long*)raw_args;
362   result.argv = (char**)((void**)raw_args + 1);
363   result.envp = result.argv + result.argc + 1;
364 
365   char** envp = result.envp;
366   while (*envp != NULL) ++envp;
367   result.envp_count = envp - result.envp;
368   ++envp;
369 
370   result.auxv = (AuxEntry*)envp;
371   size_t count = 0;
372   while (result.auxv[count].key != 0) {
373     ++count;
374   }
375   result.auxv_count = count;
376   return result;
377 }
378 
dump_auxv(const KernelArguments * args)379 static void dump_auxv(const KernelArguments* args) {
380   for (size_t i = 0; i < args->auxv_count; ++i) {
381     const char* name = "";
382     switch (args->auxv[i].key) {
383       case AT_BASE: name = " [AT_BASE]"; break;
384       case AT_EGID: name = " [AT_EGID]"; break;
385       case AT_ENTRY: name = " [AT_ENTRY]"; break;
386       case AT_EUID: name = " [AT_EUID]"; break;
387       case AT_GID: name = " [AT_GID]"; break;
388       case AT_PAGESZ: name = " [AT_PAGESZ]"; break;
389       case AT_PHDR: name = " [AT_PHDR]"; break;
390       case AT_PHENT: name = " [AT_PHENT]"; break;
391       case AT_PHNUM: name = " [AT_PHNUM]"; break;
392       case AT_SECURE: name = " [AT_SECURE]"; break;
393       case AT_SYSINFO: name = " [AT_SYSINFO]"; break;
394       case AT_SYSINFO_EHDR: name = " [AT_SYSINFO_EHDR]"; break;
395       case AT_UID: name = " [AT_UID]"; break;
396     }
397     debug("  %lu => 0x%lx%s", args->auxv[i].key, args->auxv[i].value, name);
398   }
399 }
400 
ri_getauxval(const KernelArguments * args,unsigned long kind,bool allow_missing)401 static unsigned long ri_getauxval(const KernelArguments* args, unsigned long kind,
402                                   bool allow_missing) {
403   for (size_t i = 0; i < args->auxv_count; ++i) {
404     if (args->auxv[i].key == kind) return args->auxv[i].value;
405   }
406   if (!allow_missing) fatal("could not find aux vector entry %lu", kind);
407   return 0;
408 }
409 
elf_flags_to_prot(int flags)410 static int elf_flags_to_prot(int flags) {
411   int result = 0;
412   if (flags & PF_R) result |= PROT_READ;
413   if (flags & PF_W) result |= PROT_WRITE;
414   if (flags & PF_X) result |= PROT_EXEC;
415   return result;
416 }
417 
418 typedef struct {
419   int fd;
420   char path[PATH_MAX];
421 } OpenedLoader;
422 
423 typedef struct {
424   void* base_addr;
425   EntryFunc* entry;
426 } LoadedInterp;
427 
load_interp(const OpenedLoader * loader,ElfW (Ehdr)* hdr)428 static LoadedInterp load_interp(const OpenedLoader *loader, ElfW(Ehdr)* hdr) {
429   ElfW(Phdr)* phdr = (ElfW(Phdr)*)((char*)hdr + hdr->e_phoff);
430   size_t phdr_count = hdr->e_phnum;
431 
432   size_t max_vaddr = 0;
433 
434   // Find the virtual address extent.
435   for (size_t i = 0; i < phdr_count; ++i) {
436     if (phdr[i].p_type == PT_LOAD) {
437       max_vaddr = PAGE_END(MAX(max_vaddr, phdr[i].p_vaddr + phdr[i].p_memsz));
438     }
439   }
440 
441   // Map an area to fit the loader.
442   void* loader_vaddr = ri_mmap(NULL, max_vaddr, PROT_READ | PROT_WRITE,
443                                MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
444   if (loader_vaddr == (void*)MAP_FAILED) {
445     fatal("reservation mmap of 0x%zx bytes for %s failed: %s", max_vaddr, loader->path,
446           ri_strerror(g_errno));
447   }
448 
449   // Map each PT_LOAD.
450   for (size_t i = 0; i < phdr_count; ++i) {
451     if (phdr[i].p_type == PT_LOAD) {
452       size_t start = PAGE_START(phdr[i].p_vaddr);
453       const size_t end = PAGE_END(phdr[i].p_vaddr + phdr[i].p_memsz);
454       if (phdr[i].p_filesz > 0) {
455         const size_t file_end = phdr[i].p_vaddr + phdr[i].p_filesz;
456         void* tmp = ri_mmap((char*)loader_vaddr + start,
457                             file_end - start,
458                             elf_flags_to_prot(phdr[i].p_flags),
459                             MAP_PRIVATE | MAP_FIXED, loader->fd, PAGE_START(phdr[i].p_offset));
460         if (tmp == (void*)MAP_FAILED) {
461           fatal("PT_LOAD mmap failed (%s segment #%zu): %s", loader->path, i,
462                 ri_strerror(g_errno));
463         }
464         start = file_end;
465         if (phdr[i].p_flags & PF_W) {
466           // The bytes between p_filesz and PAGE_END(p_filesz) currently come from the file mapping,
467           // but they need to be zeroed. (Apparently this zeroing isn't necessary if the segment isn't
468           // writable, and zeroing a non-writable page would be inconvenient.)
469           ri_memset((char*)loader_vaddr + start, '\0', PAGE_END(start) - start);
470         }
471         start = PAGE_END(start);
472       }
473       if (start < end) {
474         // The memory is already zeroed, because it comes from an anonymous file mapping. Just set
475         // the protections correctly.
476         int result = ri_mprotect((char*)loader_vaddr + start, end - start,
477                                  elf_flags_to_prot(phdr[i].p_flags));
478         if (result != 0) {
479           fatal("mprotect of PT_LOAD failed (%s segment #%zu): %s", loader->path, i,
480                 ri_strerror(g_errno));
481         }
482       }
483     }
484   }
485 
486   return (LoadedInterp) {
487     .base_addr = loader_vaddr,
488     .entry = (EntryFunc*)((uintptr_t)loader_vaddr + hdr->e_entry),
489   };
490 }
491 
492 typedef struct {
493   ElfW(Phdr)* phdr;
494   size_t phdr_count;
495   uintptr_t load_bias;
496   uintptr_t page_size;
497   char* search_paths;
498   ElfW(Ehdr)* ehdr;
499   ElfW(Phdr)* first_load;
500   bool secure;
501 } ExeInfo;
502 
get_exe_info(const KernelArguments * args)503 static ExeInfo get_exe_info(const KernelArguments* args) {
504   ExeInfo result = { 0 };
505   result.phdr = (ElfW(Phdr)*)ri_getauxval(args, AT_PHDR, false);
506   result.phdr_count = ri_getauxval(args, AT_PHNUM, false);
507   result.page_size = ri_getauxval(args, AT_PAGESZ, false);
508 
509   unsigned long uid = ri_getauxval(args, AT_UID, false);
510   unsigned long euid = ri_getauxval(args, AT_EUID, false);
511   unsigned long gid = ri_getauxval(args, AT_GID, false);
512   unsigned long egid = ri_getauxval(args, AT_EGID, false);
513   unsigned long secure = ri_getauxval(args, AT_SECURE, true);
514   result.secure = uid != euid || gid != egid || secure;
515 
516   debug("orig phdr     = %p", (void*)result.phdr);
517   debug("orig phnum    = %zu", result.phdr_count);
518 
519   for (size_t i = 0; i < result.phdr_count; ++i) {
520     if (result.phdr[i].p_type == PT_DYNAMIC) {
521       result.load_bias = (uintptr_t)&_DYNAMIC - result.phdr[i].p_vaddr;
522     }
523   }
524   debug("load_bias     = 0x%lx", (unsigned long)result.load_bias);
525 
526   for (size_t i = 0; i < result.phdr_count; ++i) {
527     ElfW(Phdr)* phdr = &result.phdr[i];
528     if (phdr->p_type != PT_LOAD) continue;
529     result.first_load = phdr;
530     if (phdr->p_offset != 0) {
531       fatal("expected zero p_offset for first PT_LOAD, found 0x%zx instead",
532             (size_t)phdr->p_offset);
533     }
534     result.ehdr = (ElfW(Ehdr)*)(phdr->p_vaddr + result.load_bias);
535     break;
536   }
537   debug("ehdr          = %p", (void*)result.ehdr);
538 
539   ElfW(Word) runpath_offset = -1;
540   char* strtab = NULL;
541   for (ElfW(Dyn)* dyn = _DYNAMIC; dyn->d_tag != DT_NULL; dyn++) {
542     switch (dyn->d_tag) {
543     case DT_RUNPATH:
544       runpath_offset = dyn->d_un.d_val;
545       break;
546     case DT_RPATH:
547       if (runpath_offset == -1) runpath_offset = dyn->d_un.d_val;
548       break;
549     case DT_STRTAB:
550       strtab = (char*)(dyn->d_un.d_ptr + result.load_bias);
551       break;
552     }
553   }
554 
555   if (strtab && runpath_offset != -1) {
556     result.search_paths = strtab + runpath_offset;
557     debug("dt_runpath    = %s", result.search_paths);
558   }
559   return result;
560 }
561 
562 // Loaders typically read the PT_INTERP of the executable, e.g. to set a pathname on the loader.
563 // glibc insists on the executable having PT_INTERP, and aborts if it's missing.  Musl passes it
564 // to debuggers to find symbols for the loader, which includes all the libc symbols.
565 //
566 // Make a copy of the phdr table and insert PT_INTERP into the copy.
567 //
insert_pt_interp_into_phdr_table(const KernelArguments * args,const ExeInfo * exe,const char * loader_realpath)568 static void insert_pt_interp_into_phdr_table(const KernelArguments* args, const ExeInfo* exe,
569                                              const char* loader_realpath) {
570   // Reserve extra space for the inserted PT_PHDR and PT_INTERP segments and a null terminator.
571   if (exe->phdr_count + 3 > sizeof(replacement_phdr_table) / sizeof(replacement_phdr_table[0])) {
572     fatal("too many phdr table entries in executable");
573   }
574 
575   ElfW(Phdr) newPhdr = {
576     .p_type = PT_PHDR,
577     // The replacement phdr is in the BSS section, which has no file location.
578     // Use 0 for the offset.  If this causes a problem the replacement phdr could
579     // be moved to the data section and the correct p_offset calculated.
580     .p_offset = 0,
581     .p_vaddr = (uintptr_t)&replacement_phdr_table - exe->load_bias,
582     .p_paddr = (uintptr_t)&replacement_phdr_table - exe->load_bias,
583     .p_memsz = (exe->phdr_count + 1) * sizeof(ElfW(Phdr)),
584     .p_filesz = (exe->phdr_count + 1) * sizeof(ElfW(Phdr)),
585     .p_flags = PF_R,
586     .p_align = alignof(ElfW(Phdr)),
587   };
588 
589   ElfW(Phdr*) cur = replacement_phdr_table;
590   if (exe->phdr[0].p_type != PT_PHDR) {
591     // ld.bfd does not insert a PT_PHDR if there is no PT_INTERP, fake one.
592     // It has to be first.  We're adding an entry so increase memsz and filesz.
593     newPhdr.p_memsz += sizeof(ElfW(Phdr));
594     newPhdr.p_filesz += sizeof(ElfW(Phdr));
595     *cur = newPhdr;
596     ++cur;
597   }
598 
599   for (size_t i = 0; i < exe->phdr_count; ++i) {
600     switch (exe->phdr[i].p_type) {
601     case 0:
602       fatal("unexpected null phdr entry at index %zu", i);
603       break;
604     case PT_PHDR:
605       *cur = newPhdr;
606       break;
607     default:
608       *cur = exe->phdr[i];
609     }
610     ++cur;
611   }
612 
613   // Insert PT_INTERP at the end.
614   cur->p_type = PT_INTERP;
615   cur->p_offset = 0;
616   cur->p_vaddr = (uintptr_t)&replacement_interp - exe->load_bias;
617   cur->p_paddr = cur->p_vaddr;
618   cur->p_filesz = ri_strlen(replacement_interp) + 1;
619   cur->p_memsz = ri_strlen(replacement_interp) + 1;
620   cur->p_flags = PF_R;
621   cur->p_align = 1;
622   ++cur;
623 
624   ri_strcpy(replacement_interp, loader_realpath);
625 
626   debug("new phdr      = %p", (void*)&replacement_phdr_table);
627   debug("new phnum     = %zu", cur - replacement_phdr_table);
628 
629   // Update the aux vector with the new phdr+phnum.
630   for (size_t i = 0; i < args->auxv_count; ++i) {
631     if (args->auxv[i].key == AT_PHDR) {
632       args->auxv[i].value = (unsigned long)&replacement_phdr_table;
633     } else if (args->auxv[i].key == AT_PHNUM) {
634       args->auxv[i].value = cur - replacement_phdr_table;
635     }
636   }
637 
638   // AT_PHDR and AT_PHNUM are now updated to point to the replacement program
639   // headers, but the e_phoff and e_phnum in the ELF headers still point to the
640   // original program headers.  dynlink.c doesn't use e_phoff value from the
641   // main application's program headers.  The e_phoff and e_phnum values could
642   // be updated, but that would require using mprotect to allow modifications
643   // to the read-only first page.
644 }
645 
realpath_fd(int fd,const char * orig_path,char * out,size_t len)646 static void realpath_fd(int fd, const char* orig_path, char* out, size_t len) {
647   char path[64];
648   ri_strcpy(path, "/proc/self/fd/");
649   ul_to_str(fd, path + ri_strlen(path), 10);
650   ssize_t result = ri_readlink(path, out, len);
651   if (result == -1) fatal("could not get realpath of %s: %s", orig_path, ri_strerror(g_errno));
652   if ((size_t)result >= len) fatal("realpath of %s too long", orig_path);
653 }
654 
open_loader(const ExeInfo * exe,const char * path,OpenedLoader * loader)655 static int open_loader(const ExeInfo* exe, const char* path, OpenedLoader* loader) {
656   debug("trying to open '%s'", path);
657   loader->fd = ri_open(path, O_RDONLY, 0);
658   if (loader->fd < 0) {
659     debug("could not open loader %s: %s", path, ri_strerror(g_errno));
660     return -1;
661   }
662 
663   ElfW(Ehdr) hdr;
664   ssize_t l = ri_pread(loader->fd, &hdr, sizeof(hdr), 0);
665   if (l < 0) {
666     debug("reading elf header from %s failed: %s", path, ri_strerror(g_errno));
667     return -1;
668   }
669   if (l != sizeof(hdr)) {
670     debug("file %s too short to contain elf header", path);
671     return -1;
672   }
673 
674   if (hdr.e_ident[0] != ELFMAG0 ||
675       hdr.e_ident[1] != ELFMAG1 ||
676       hdr.e_ident[2] != ELFMAG2 ||
677       hdr.e_ident[3] != ELFMAG3) {
678     debug("file %s is not an elf file", path);
679     return -1;
680   }
681 
682   if (hdr.e_machine != exe->ehdr->e_machine) {
683     debug("incorrect elf machine for loader %s, expected %d got %d",
684           path, exe->ehdr->e_machine, hdr.e_machine);
685     return -1;
686   }
687 
688   if (hdr.e_ident[EI_CLASS] != exe->ehdr->e_ident[EI_CLASS]) {
689     debug("incorrect elf class for loader %s, expected %d got %d",
690           path, exe->ehdr->e_ident[EI_CLASS], hdr.e_ident[EI_CLASS]);
691     return -1;
692   }
693 
694   realpath_fd(loader->fd, path, loader->path, sizeof(loader->path));
695 
696   return 0;
697 }
698 
open_rel_loader(const ExeInfo * exe,const char * dir,const char * rel,OpenedLoader * loader)699 static int open_rel_loader(const ExeInfo* exe, const char* dir, const char* rel, OpenedLoader* loader) {
700   char buf[PATH_MAX];
701 
702   size_t dir_len = ri_strlen(dir);
703 
704   if (dir_len + (dir_len == 0 ? 1 : 0) + ri_strlen(rel) + 2 > sizeof(buf)) {
705     debug("path to loader exceeds PATH_MAX: %s/%s", dir, rel);
706     return 1;
707   }
708 
709   if (dir_len == 0) {
710     ri_strcpy(buf, ".");
711   } else {
712     ri_strcpy(buf, dir);
713     if (dir[dir_len-1] != '/') {
714       ri_strcat(buf, "/");
715     }
716   }
717   ri_strcat(buf, rel);
718 
719   return open_loader(exe, buf, loader);
720 }
721 
get_origin(char * buf,size_t buf_len)722 static void get_origin(char* buf, size_t buf_len) {
723   ssize_t len = ri_readlink("/proc/self/exe", buf, buf_len);
724   if (len <= 0 || (size_t)len >= buf_len) {
725     fatal("could not readlink /proc/self/exe: %s", ri_strerror(g_errno));
726   }
727   buf[len] = '\0';
728 
729   ri_dirname(buf);
730 }
731 
search_path_list_for_loader(const ExeInfo * exe,const char * loader_rel_path,const char * search_path,const char * search_path_name,bool expand_origin,OpenedLoader * loader)732 static int search_path_list_for_loader(const ExeInfo* exe, const char* loader_rel_path, const char* search_path,
733                                        const char* search_path_name, bool expand_origin, OpenedLoader *loader) {
734   char origin_buf[PATH_MAX];
735   char* origin = NULL;
736 
737   const char* p = search_path;
738   while (p && p[0]) {
739     const char* start = p;
740     const char* end = ri_strchr(p, ':');
741     if (end == NULL) {
742       end = start + ri_strlen(p);
743       p = NULL;
744     } else {
745       p = end + 1;
746     }
747     size_t n = end - start;
748     char search_path_entry[PATH_MAX];
749     if (n >= sizeof(search_path_entry)) {
750       // Too long, skip.
751       debug("%s entry too long: %s", search_path_name, start);
752       continue;
753     }
754 
755     ri_memcpy(search_path_entry, start, n);
756     search_path_entry[n] = '\0';
757 
758     char buf[PATH_MAX];
759     char* d = NULL;
760     if (expand_origin) {
761       d = ri_strchr(search_path_entry, '$');
762     }
763     if (d && (!ri_strncmp(d, "$ORIGIN", 7) || !ri_strncmp(d, "${ORIGIN}", 9))) {
764       if (!origin) {
765         get_origin(origin_buf, sizeof(origin_buf));
766         origin = origin_buf;
767       }
768 
769       size_t s = 7;
770       if (d[1] == '{') {
771         s += 2;
772       }
773       ri_memcpy(buf, search_path_entry, d - search_path_entry);
774       buf[d - search_path_entry] = '\0';
775       if (ri_strlen(buf) + ri_strlen(origin) + ri_strlen(d+s) >= sizeof(buf)) {
776         debug("path to loader %s%s%s too long", buf, origin, d+s);
777         continue;
778       }
779 
780       ri_strcat(buf, origin);
781       ri_strcat(buf, d+s);
782     } else {
783       ri_strcpy(buf, search_path_entry);
784     }
785     debug("trying loader %s at %s", loader_rel_path, buf);
786     if (!open_rel_loader(exe, buf, loader_rel_path, loader)) {
787       debug("opened loader %s at %s", loader_rel_path, buf);
788       return 0;
789     }
790   }
791 
792   return -1;
793 }
794 
find_and_open_loader(const ExeInfo * exe,const char * ld_library_path,OpenedLoader * loader)795 static int find_and_open_loader(const ExeInfo* exe, const char* ld_library_path, OpenedLoader* loader) {
796   const char* loader_rel_path = LOADER_PATH;
797 
798   if (loader_rel_path[0] == '/') {
799     return open_loader(exe, loader_rel_path, loader);
800   }
801 
802   if (exe->secure) {
803     fatal("relinterp not supported for secure executables");
804   }
805 
806   if (!search_path_list_for_loader(exe, loader_rel_path, ld_library_path, "LD_LIBRARY_PATH", false, loader)) {
807     return 0;
808   }
809 
810   if (!exe->search_paths || ri_strlen(exe->search_paths) == 0) {
811     // If no DT_RUNPATH search relative to the exe.
812     char origin[PATH_MAX];
813     get_origin(origin, sizeof(origin));
814     return open_rel_loader(exe, origin, loader_rel_path, loader);
815   }
816 
817   if (!search_path_list_for_loader(exe, loader_rel_path, exe->search_paths, "rpath", true, loader)) {
818     return 0;
819   }
820 
821   fatal("unable to find loader %s in rpath %s", loader_rel_path, exe->search_paths);
822 }
823 
824 // Use a trick to determine whether the executable has been relocated yet. This variable points to
825 // a variable in libc. It will be NULL if and only if the program hasn't been linked yet. This
826 // should accommodate these situations:
827 //  - The program was actually statically-linked instead.
828 //  - Either a PIE or non-PIE dynamic executable.
829 //  - Any situation where the loader calls the executable's _start:
830 //     - In normal operation, the kernel calls the executable's _start, _start jumps to the loader's
831 //       entry point, which jumps to _start again after linking it.
832 //     - The executable actually has its PT_INTERP set after all.
833 //     - The user runs the loader, passing it the path of the executable.
834 // This C file must always be compiled as PIC, or else the linker will use a COPY relocation and
835 // duplicate "environ" into the executable.
is_exe_relocated(void)836 static bool is_exe_relocated(void) {
837   // Use the GOT to get the address of environ.
838   extern char** environ;
839   void* read_environ = optimizer_barrier(&environ);
840   debug("read_environ = %p", read_environ);
841   return read_environ != NULL;
842 }
843 
_start_c(long * raw_args)844 void _start_c(long* raw_args) {
845   const KernelArguments args = read_args(raw_args);
846   const char* ld_library_path = NULL;
847 
848   for (size_t i = 0; i < args.envp_count; ++i) {
849     if (!ri_strcmp(args.envp[i], "RELINTERP_DEBUG=1")) {
850       g_debug = true;
851     }
852     if (!ri_strncmp(args.envp[i], "LD_LIBRARY_PATH=", 16)) {
853       ld_library_path = args.envp[i] + 16;
854     }
855   }
856   if (args.argc >= 1) {
857     g_prog_name = args.argv[0];
858   }
859 
860   if (is_exe_relocated()) {
861     debug("exe is already relocated, starting main executable");
862     int argc = raw_args[0];
863     char **argv = (void *)(raw_args+1);
864     __libc_start_main(main, argc, argv, _init, _fini, 0);
865   }
866 
867   debug("entering relinterp");
868 
869   const ExeInfo exe = get_exe_info(&args);
870   g_page_size = exe.page_size;
871 
872   OpenedLoader loader;
873   if (find_and_open_loader(&exe, ld_library_path, &loader)) {
874     fatal("failed to open loader");
875   }
876   off_t len = ri_lseek(loader.fd, 0, SEEK_END);
877   if (len == (off_t)-1) fatal("lseek on %s failed: %s", loader.path, ri_strerror(g_errno));
878 
879   void* loader_data = ri_mmap(NULL, len, PROT_READ, MAP_PRIVATE, loader.fd, 0);
880   if (loader_data == (void*)MAP_FAILED) {
881     fatal("could not mmap %s: %s", loader.path, ri_strerror(g_errno));
882   }
883 
884   LoadedInterp interp = load_interp(&loader, (ElfW(Ehdr)*)loader_data);
885   if (ri_munmap(loader_data, len) != 0) fatal("munmap failed: %s", ri_strerror(g_errno));
886 
887   debug("original auxv:");
888   dump_auxv(&args);
889 
890   // Create a virtual phdr table that includes PT_INTERP, for the benefit of loaders that read the
891   // executable PT_INTERP.
892   insert_pt_interp_into_phdr_table(&args, &exe, loader.path);
893   ri_close(loader.fd);
894 
895   // TODO: /proc/pid/auxv isn't updated with the new auxv vector. Is it possible to update it?
896   // XXX: If we try to update it, we'd use prctl(PR_SET_MM, PR_SET_MM_AUXV, &vec, size, 0)
897   // Maybe updating it would be useful as a way to communicate the loader's base to a debugger.
898   // e.g. lldb uses AT_BASE in the aux vector, but it caches the values at process startup, so
899   // it wouldn't currently notice a changed value.
900 
901   // The loader uses AT_BASE to locate itself, so search for the entry and update it. Even though
902   // its value is always zero, the kernel still includes the entry[0]. If this changes (or we want
903   // to make weaker assumptions about the kernel's behavior), then we can copy the kernel arguments
904   // onto the stack (e.g. using alloca) before jumping to the loader's entry point.
905   // [0] https://github.com/torvalds/linux/blob/v5.13/fs/binfmt_elf.c#L263
906   for (size_t i = 0; i < args.auxv_count; ++i) {
907     if (args.auxv[i].key == AT_BASE) {
908       args.auxv[i].value = (unsigned long)interp.base_addr;
909       debug("new auxv:");
910       dump_auxv(&args);
911       debug("transferring to real loader");
912       CRTJMP(interp.entry, raw_args);
913     }
914   }
915   fatal("AT_BASE not found in aux vector");
916 }
917 
918 
919 // Normally gdb and lldb look for a symbol named "_dl_debug_state" in the
920 // interpreter to get notified when the dynamic loader has modified the
921 // list of shared libraries.  When using relinterp, the debugger is not
922 // aware of the interpreter (PT_INTERP is unset and auxv AT_BASE is 0) so it
923 // doesn't know where to look for the symbol.  It falls back to looking in the
924 // executable, so provide a symbol for it to find.  The dynamic loader will
925 // need to forward its calls to its own _dl_debug_state symbol to this one.
926 //
927 // This has to be defined in a .c file because lldb looks for a symbol with
928 // DWARF language type DW_LANG_C.
_dl_debug_state()929 extern void _dl_debug_state() {
930 }
931