xref: /aosp_15_r20/external/google-breakpad/src/common/linux/file_id.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2006 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 //
29 // file_id.cc: Return a unique identifier for a file
30 //
31 // See file_id.h for documentation
32 //
33 
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>  // Must come first
36 #endif
37 
38 #include "common/linux/file_id.h"
39 
40 #include <arpa/inet.h>
41 #include <assert.h>
42 #include <string.h>
43 
44 #include <algorithm>
45 #include <string>
46 
47 #include "common/linux/elf_gnu_compat.h"
48 #include "common/linux/elfutils.h"
49 #include "common/linux/linux_libc_support.h"
50 #include "common/linux/memory_mapped_file.h"
51 #include "common/using_std_string.h"
52 #include "third_party/lss/linux_syscall_support.h"
53 
54 namespace google_breakpad {
55 namespace elf {
56 
57 // Used in a few places for backwards-compatibility.
58 const size_t kMDGUIDSize = sizeof(MDGUID);
59 
FileID(const char * path)60 FileID::FileID(const char* path) : path_(path) {}
61 
62 // ELF note name and desc are 32-bits word padded.
63 #define NOTE_PADDING(a) ((a + 3) & ~3)
64 
65 // These functions are also used inside the crashed process, so be safe
66 // and use the syscall/libc wrappers instead of direct syscalls or libc.
67 
ElfClassBuildIDNoteIdentifier(const void * section,size_t length,wasteful_vector<uint8_t> & identifier)68 static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length,
69                                           wasteful_vector<uint8_t>& identifier) {
70   static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr),
71                 "Elf32_Nhdr and Elf64_Nhdr should be the same");
72   typedef typename ElfClass32::Nhdr Nhdr;
73 
74   const void* section_end = reinterpret_cast<const char*>(section) + length;
75   const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
76   while (reinterpret_cast<const void*>(note_header) < section_end) {
77     if (note_header->n_type == NT_GNU_BUILD_ID)
78       break;
79     note_header = reinterpret_cast<const Nhdr*>(
80                   reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
81                   NOTE_PADDING(note_header->n_namesz) +
82                   NOTE_PADDING(note_header->n_descsz));
83   }
84   if (reinterpret_cast<const void*>(note_header) >= section_end ||
85       note_header->n_descsz == 0) {
86     return false;
87   }
88 
89   const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
90     sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
91   identifier.insert(identifier.end(),
92                     build_id,
93                     build_id + note_header->n_descsz);
94 
95   return true;
96 }
97 
98 // Attempt to locate a .note.gnu.build-id section in an ELF binary
99 // and copy it into |identifier|.
FindElfBuildIDNote(const void * elf_mapped_base,wasteful_vector<uint8_t> & identifier)100 static bool FindElfBuildIDNote(const void* elf_mapped_base,
101                                wasteful_vector<uint8_t>& identifier) {
102   PageAllocator allocator;
103   // lld normally creates 2 PT_NOTEs, gold normally creates 1.
104   auto_wasteful_vector<ElfSegment, 2> segs(&allocator);
105   if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
106     for (ElfSegment& seg : segs) {
107       if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
108         return true;
109       }
110     }
111   }
112 
113   void* note_section;
114   size_t note_size;
115   if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
116                      (const void**)&note_section, &note_size)) {
117     return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
118   }
119 
120   return false;
121 }
122 
123 // Attempt to locate the .text section of an ELF binary and generate
124 // a simple hash by XORing the first page worth of bytes into |identifier|.
HashElfTextSection(const void * elf_mapped_base,wasteful_vector<uint8_t> & identifier)125 static bool HashElfTextSection(const void* elf_mapped_base,
126                                wasteful_vector<uint8_t>& identifier) {
127   identifier.resize(kMDGUIDSize);
128 
129   void* text_section;
130   size_t text_size;
131   if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
132                       (const void**)&text_section, &text_size) ||
133       text_size == 0) {
134     return false;
135   }
136 
137   // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
138   // function backwards-compatible.
139   my_memset(&identifier[0], 0, kMDGUIDSize);
140   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
141   const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096));
142   while (ptr < ptr_end) {
143     for (unsigned i = 0; i < kMDGUIDSize; i++)
144       identifier[i] ^= ptr[i];
145     ptr += kMDGUIDSize;
146   }
147   return true;
148 }
149 
150 // static
ElfFileIdentifierFromMappedFile(const void * base,wasteful_vector<uint8_t> & identifier)151 bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
152                                              wasteful_vector<uint8_t>& identifier) {
153   // Look for a build id note first.
154   if (FindElfBuildIDNote(base, identifier))
155     return true;
156 
157   // Fall back on hashing the first page of the text section.
158   return HashElfTextSection(base, identifier);
159 }
160 
ElfFileIdentifier(wasteful_vector<uint8_t> & identifier)161 bool FileID::ElfFileIdentifier(wasteful_vector<uint8_t>& identifier) {
162   MemoryMappedFile mapped_file(path_.c_str(), 0);
163   if (!mapped_file.data())  // Should probably check if size >= ElfW(Ehdr)?
164     return false;
165 
166   return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
167 }
168 
169 // These three functions are not ever called in an unsafe context, so it's OK
170 // to allocate memory and use libc.
bytes_to_hex_string(const uint8_t * bytes,size_t count)171 static string bytes_to_hex_string(const uint8_t* bytes, size_t count) {
172   string result;
173   for (unsigned int idx = 0; idx < count; ++idx) {
174     char buf[3];
175     snprintf(buf, sizeof(buf), "%02X", bytes[idx]);
176     result.append(buf);
177   }
178   return result;
179 }
180 
181 // static
ConvertIdentifierToUUIDString(const wasteful_vector<uint8_t> & identifier)182 string FileID::ConvertIdentifierToUUIDString(
183     const wasteful_vector<uint8_t>& identifier) {
184   uint8_t identifier_swapped[kMDGUIDSize] = { 0 };
185 
186   // Endian-ness swap to match dump processor expectation.
187   memcpy(identifier_swapped, &identifier[0],
188          std::min(kMDGUIDSize, identifier.size()));
189   uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
190   *data1 = htonl(*data1);
191   uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
192   *data2 = htons(*data2);
193   uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
194   *data3 = htons(*data3);
195 
196   return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
197 }
198 
199 // static
ConvertIdentifierToString(const wasteful_vector<uint8_t> & identifier)200 string FileID::ConvertIdentifierToString(
201     const wasteful_vector<uint8_t>& identifier) {
202   return bytes_to_hex_string(&identifier[0], identifier.size());
203 }
204 
205 }  // elf
206 }  // namespace google_breakpad
207