1*9712c20fSFrederick Mayle // Copyright 2006 Google LLC
2*9712c20fSFrederick Mayle //
3*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without
4*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are
5*9712c20fSFrederick Mayle // met:
6*9712c20fSFrederick Mayle //
7*9712c20fSFrederick Mayle // * Redistributions of source code must retain the above copyright
8*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer.
9*9712c20fSFrederick Mayle // * Redistributions in binary form must reproduce the above
10*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer
11*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the
12*9712c20fSFrederick Mayle // distribution.
13*9712c20fSFrederick Mayle // * Neither the name of Google LLC nor the names of its
14*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from
15*9712c20fSFrederick Mayle // this software without specific prior written permission.
16*9712c20fSFrederick Mayle //
17*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*9712c20fSFrederick Mayle //
29*9712c20fSFrederick Mayle // file_id.cc: Return a unique identifier for a file
30*9712c20fSFrederick Mayle //
31*9712c20fSFrederick Mayle // See file_id.h for documentation
32*9712c20fSFrederick Mayle //
33*9712c20fSFrederick Mayle
34*9712c20fSFrederick Mayle #ifdef HAVE_CONFIG_H
35*9712c20fSFrederick Mayle #include <config.h> // Must come first
36*9712c20fSFrederick Mayle #endif
37*9712c20fSFrederick Mayle
38*9712c20fSFrederick Mayle #include "common/linux/file_id.h"
39*9712c20fSFrederick Mayle
40*9712c20fSFrederick Mayle #include <arpa/inet.h>
41*9712c20fSFrederick Mayle #include <assert.h>
42*9712c20fSFrederick Mayle #include <string.h>
43*9712c20fSFrederick Mayle
44*9712c20fSFrederick Mayle #include <algorithm>
45*9712c20fSFrederick Mayle #include <string>
46*9712c20fSFrederick Mayle
47*9712c20fSFrederick Mayle #include "common/linux/elf_gnu_compat.h"
48*9712c20fSFrederick Mayle #include "common/linux/elfutils.h"
49*9712c20fSFrederick Mayle #include "common/linux/linux_libc_support.h"
50*9712c20fSFrederick Mayle #include "common/linux/memory_mapped_file.h"
51*9712c20fSFrederick Mayle #include "common/using_std_string.h"
52*9712c20fSFrederick Mayle #include "third_party/lss/linux_syscall_support.h"
53*9712c20fSFrederick Mayle
54*9712c20fSFrederick Mayle namespace google_breakpad {
55*9712c20fSFrederick Mayle namespace elf {
56*9712c20fSFrederick Mayle
57*9712c20fSFrederick Mayle // Used in a few places for backwards-compatibility.
58*9712c20fSFrederick Mayle const size_t kMDGUIDSize = sizeof(MDGUID);
59*9712c20fSFrederick Mayle
FileID(const char * path)60*9712c20fSFrederick Mayle FileID::FileID(const char* path) : path_(path) {}
61*9712c20fSFrederick Mayle
62*9712c20fSFrederick Mayle // ELF note name and desc are 32-bits word padded.
63*9712c20fSFrederick Mayle #define NOTE_PADDING(a) ((a + 3) & ~3)
64*9712c20fSFrederick Mayle
65*9712c20fSFrederick Mayle // These functions are also used inside the crashed process, so be safe
66*9712c20fSFrederick Mayle // and use the syscall/libc wrappers instead of direct syscalls or libc.
67*9712c20fSFrederick Mayle
ElfClassBuildIDNoteIdentifier(const void * section,size_t length,wasteful_vector<uint8_t> & identifier)68*9712c20fSFrederick Mayle static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length,
69*9712c20fSFrederick Mayle wasteful_vector<uint8_t>& identifier) {
70*9712c20fSFrederick Mayle static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr),
71*9712c20fSFrederick Mayle "Elf32_Nhdr and Elf64_Nhdr should be the same");
72*9712c20fSFrederick Mayle typedef typename ElfClass32::Nhdr Nhdr;
73*9712c20fSFrederick Mayle
74*9712c20fSFrederick Mayle const void* section_end = reinterpret_cast<const char*>(section) + length;
75*9712c20fSFrederick Mayle const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
76*9712c20fSFrederick Mayle while (reinterpret_cast<const void*>(note_header) < section_end) {
77*9712c20fSFrederick Mayle if (note_header->n_type == NT_GNU_BUILD_ID)
78*9712c20fSFrederick Mayle break;
79*9712c20fSFrederick Mayle note_header = reinterpret_cast<const Nhdr*>(
80*9712c20fSFrederick Mayle reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
81*9712c20fSFrederick Mayle NOTE_PADDING(note_header->n_namesz) +
82*9712c20fSFrederick Mayle NOTE_PADDING(note_header->n_descsz));
83*9712c20fSFrederick Mayle }
84*9712c20fSFrederick Mayle if (reinterpret_cast<const void*>(note_header) >= section_end ||
85*9712c20fSFrederick Mayle note_header->n_descsz == 0) {
86*9712c20fSFrederick Mayle return false;
87*9712c20fSFrederick Mayle }
88*9712c20fSFrederick Mayle
89*9712c20fSFrederick Mayle const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
90*9712c20fSFrederick Mayle sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
91*9712c20fSFrederick Mayle identifier.insert(identifier.end(),
92*9712c20fSFrederick Mayle build_id,
93*9712c20fSFrederick Mayle build_id + note_header->n_descsz);
94*9712c20fSFrederick Mayle
95*9712c20fSFrederick Mayle return true;
96*9712c20fSFrederick Mayle }
97*9712c20fSFrederick Mayle
98*9712c20fSFrederick Mayle // Attempt to locate a .note.gnu.build-id section in an ELF binary
99*9712c20fSFrederick Mayle // and copy it into |identifier|.
FindElfBuildIDNote(const void * elf_mapped_base,wasteful_vector<uint8_t> & identifier)100*9712c20fSFrederick Mayle static bool FindElfBuildIDNote(const void* elf_mapped_base,
101*9712c20fSFrederick Mayle wasteful_vector<uint8_t>& identifier) {
102*9712c20fSFrederick Mayle PageAllocator allocator;
103*9712c20fSFrederick Mayle // lld normally creates 2 PT_NOTEs, gold normally creates 1.
104*9712c20fSFrederick Mayle auto_wasteful_vector<ElfSegment, 2> segs(&allocator);
105*9712c20fSFrederick Mayle if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
106*9712c20fSFrederick Mayle for (ElfSegment& seg : segs) {
107*9712c20fSFrederick Mayle if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
108*9712c20fSFrederick Mayle return true;
109*9712c20fSFrederick Mayle }
110*9712c20fSFrederick Mayle }
111*9712c20fSFrederick Mayle }
112*9712c20fSFrederick Mayle
113*9712c20fSFrederick Mayle void* note_section;
114*9712c20fSFrederick Mayle size_t note_size;
115*9712c20fSFrederick Mayle if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
116*9712c20fSFrederick Mayle (const void**)¬e_section, ¬e_size)) {
117*9712c20fSFrederick Mayle return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
118*9712c20fSFrederick Mayle }
119*9712c20fSFrederick Mayle
120*9712c20fSFrederick Mayle return false;
121*9712c20fSFrederick Mayle }
122*9712c20fSFrederick Mayle
123*9712c20fSFrederick Mayle // Attempt to locate the .text section of an ELF binary and generate
124*9712c20fSFrederick Mayle // a simple hash by XORing the first page worth of bytes into |identifier|.
HashElfTextSection(const void * elf_mapped_base,wasteful_vector<uint8_t> & identifier)125*9712c20fSFrederick Mayle static bool HashElfTextSection(const void* elf_mapped_base,
126*9712c20fSFrederick Mayle wasteful_vector<uint8_t>& identifier) {
127*9712c20fSFrederick Mayle identifier.resize(kMDGUIDSize);
128*9712c20fSFrederick Mayle
129*9712c20fSFrederick Mayle void* text_section;
130*9712c20fSFrederick Mayle size_t text_size;
131*9712c20fSFrederick Mayle if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
132*9712c20fSFrederick Mayle (const void**)&text_section, &text_size) ||
133*9712c20fSFrederick Mayle text_size == 0) {
134*9712c20fSFrederick Mayle return false;
135*9712c20fSFrederick Mayle }
136*9712c20fSFrederick Mayle
137*9712c20fSFrederick Mayle // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
138*9712c20fSFrederick Mayle // function backwards-compatible.
139*9712c20fSFrederick Mayle my_memset(&identifier[0], 0, kMDGUIDSize);
140*9712c20fSFrederick Mayle const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
141*9712c20fSFrederick Mayle const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096));
142*9712c20fSFrederick Mayle while (ptr < ptr_end) {
143*9712c20fSFrederick Mayle for (unsigned i = 0; i < kMDGUIDSize; i++)
144*9712c20fSFrederick Mayle identifier[i] ^= ptr[i];
145*9712c20fSFrederick Mayle ptr += kMDGUIDSize;
146*9712c20fSFrederick Mayle }
147*9712c20fSFrederick Mayle return true;
148*9712c20fSFrederick Mayle }
149*9712c20fSFrederick Mayle
150*9712c20fSFrederick Mayle // static
ElfFileIdentifierFromMappedFile(const void * base,wasteful_vector<uint8_t> & identifier)151*9712c20fSFrederick Mayle bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
152*9712c20fSFrederick Mayle wasteful_vector<uint8_t>& identifier) {
153*9712c20fSFrederick Mayle // Look for a build id note first.
154*9712c20fSFrederick Mayle if (FindElfBuildIDNote(base, identifier))
155*9712c20fSFrederick Mayle return true;
156*9712c20fSFrederick Mayle
157*9712c20fSFrederick Mayle // Fall back on hashing the first page of the text section.
158*9712c20fSFrederick Mayle return HashElfTextSection(base, identifier);
159*9712c20fSFrederick Mayle }
160*9712c20fSFrederick Mayle
ElfFileIdentifier(wasteful_vector<uint8_t> & identifier)161*9712c20fSFrederick Mayle bool FileID::ElfFileIdentifier(wasteful_vector<uint8_t>& identifier) {
162*9712c20fSFrederick Mayle MemoryMappedFile mapped_file(path_.c_str(), 0);
163*9712c20fSFrederick Mayle if (!mapped_file.data()) // Should probably check if size >= ElfW(Ehdr)?
164*9712c20fSFrederick Mayle return false;
165*9712c20fSFrederick Mayle
166*9712c20fSFrederick Mayle return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
167*9712c20fSFrederick Mayle }
168*9712c20fSFrederick Mayle
169*9712c20fSFrederick Mayle // These three functions are not ever called in an unsafe context, so it's OK
170*9712c20fSFrederick Mayle // to allocate memory and use libc.
bytes_to_hex_string(const uint8_t * bytes,size_t count)171*9712c20fSFrederick Mayle static string bytes_to_hex_string(const uint8_t* bytes, size_t count) {
172*9712c20fSFrederick Mayle string result;
173*9712c20fSFrederick Mayle for (unsigned int idx = 0; idx < count; ++idx) {
174*9712c20fSFrederick Mayle char buf[3];
175*9712c20fSFrederick Mayle snprintf(buf, sizeof(buf), "%02X", bytes[idx]);
176*9712c20fSFrederick Mayle result.append(buf);
177*9712c20fSFrederick Mayle }
178*9712c20fSFrederick Mayle return result;
179*9712c20fSFrederick Mayle }
180*9712c20fSFrederick Mayle
181*9712c20fSFrederick Mayle // static
ConvertIdentifierToUUIDString(const wasteful_vector<uint8_t> & identifier)182*9712c20fSFrederick Mayle string FileID::ConvertIdentifierToUUIDString(
183*9712c20fSFrederick Mayle const wasteful_vector<uint8_t>& identifier) {
184*9712c20fSFrederick Mayle uint8_t identifier_swapped[kMDGUIDSize] = { 0 };
185*9712c20fSFrederick Mayle
186*9712c20fSFrederick Mayle // Endian-ness swap to match dump processor expectation.
187*9712c20fSFrederick Mayle memcpy(identifier_swapped, &identifier[0],
188*9712c20fSFrederick Mayle std::min(kMDGUIDSize, identifier.size()));
189*9712c20fSFrederick Mayle uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
190*9712c20fSFrederick Mayle *data1 = htonl(*data1);
191*9712c20fSFrederick Mayle uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
192*9712c20fSFrederick Mayle *data2 = htons(*data2);
193*9712c20fSFrederick Mayle uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
194*9712c20fSFrederick Mayle *data3 = htons(*data3);
195*9712c20fSFrederick Mayle
196*9712c20fSFrederick Mayle return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
197*9712c20fSFrederick Mayle }
198*9712c20fSFrederick Mayle
199*9712c20fSFrederick Mayle // static
ConvertIdentifierToString(const wasteful_vector<uint8_t> & identifier)200*9712c20fSFrederick Mayle string FileID::ConvertIdentifierToString(
201*9712c20fSFrederick Mayle const wasteful_vector<uint8_t>& identifier) {
202*9712c20fSFrederick Mayle return bytes_to_hex_string(&identifier[0], identifier.size());
203*9712c20fSFrederick Mayle }
204*9712c20fSFrederick Mayle
205*9712c20fSFrederick Mayle } // elf
206*9712c20fSFrederick Mayle } // namespace google_breakpad
207