xref: /aosp_15_r20/system/unwinding/libunwindstack/Symbols.cpp (revision eb293b8f56ee8303637c5595cfcdeef8039e85c6)
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <elf.h>
18 #include <stdint.h>
19 #include <string.h>
20 
21 #include <algorithm>
22 #include <string>
23 #include <vector>
24 
25 #include <unwindstack/Memory.h>
26 
27 #include "Check.h"
28 #include "Symbols.h"
29 
30 namespace unwindstack {
31 
Symbols(uint64_t offset,uint64_t size,uint64_t entry_size,uint64_t str_offset,uint64_t str_size)32 Symbols::Symbols(uint64_t offset, uint64_t size, uint64_t entry_size, uint64_t str_offset,
33                  uint64_t str_size)
34     : offset_(offset),
35       count_(entry_size != 0 ? ((size / entry_size > kMaxSymbols) ? kMaxSymbols : size / entry_size)
36                              : 0),
37       entry_size_(entry_size),
38       str_offset_(str_offset) {
39   if (__builtin_add_overflow(str_offset_, str_size, &str_end_)) {
40     // Set to the max so that the code will still try to get symbol names.
41     // Any reads that might be invalid will simply return no data, so
42     // this will not result in crashes.
43     // The assumption is that this value might have been corrupted, but
44     // enough of the elf data is valid such that the code can still
45     // get symbol information.
46     str_end_ = UINT64_MAX;
47   }
48 }
49 
50 template <typename SymType>
IsFunc(const SymType * entry)51 static bool IsFunc(const SymType* entry) {
52   return entry->st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry->st_info) == STT_FUNC;
53 }
54 
55 // Binary search the symbol table to find function containing the given address.
56 // Without remap, the symbol table is assumed to be sorted and accessed directly.
57 // If the symbol table is not sorted this method might fail but should not crash.
58 // When the indices are remapped, they are guaranteed to be sorted by address.
59 template <typename SymType, bool RemapIndices>
BinarySearch(uint64_t addr,Memory * elf_memory,uint64_t * func_offset)60 Symbols::Info* Symbols::BinarySearch(uint64_t addr, Memory* elf_memory, uint64_t* func_offset) {
61   // Fast-path: Check if the symbol has been already read from memory.
62   // Otherwise use the cache iterator to constrain the binary search range.
63   // (the symbol must be in the gap between this and the previous iterator)
64   auto it = symbols_.upper_bound(addr);
65   if (it != symbols_.end()) {
66     uint64_t sym_value = (it->first - it->second.size);  // Function address.
67     if (sym_value <= addr) {
68       *func_offset = addr - sym_value;
69       return &it->second;
70     }
71   }
72   uint32_t count = RemapIndices ? remap_->size() : count_;
73   uint32_t last = (it != symbols_.end()) ? it->second.index : count;
74   uint32_t first = (it != symbols_.begin()) ? std::prev(it)->second.index + 1 : 0;
75 
76   while (first < last) {
77     uint32_t current = first + (last - first) / 2;
78     uint32_t symbol_index = RemapIndices ? remap_.value()[current] : current;
79     uint64_t offset = symbol_index * entry_size_;
80     if (__builtin_add_overflow(offset, offset_, &offset)) {
81       // The elf data might be malformed.
82       return nullptr;
83     }
84     SymType sym;
85     if (!elf_memory->ReadFully(offset, &sym, sizeof(sym))) {
86       return nullptr;
87     }
88     // There shouldn't be multiple symbols with same end address, but in case there are,
89     // overwrite the cache with the last entry, so that 'sym' and 'info' are consistent.
90     Info& info = symbols_[sym.st_value + sym.st_size];
91     info = {.size = static_cast<uint32_t>(sym.st_size), .index = current};
92     if (addr < sym.st_value) {
93       last = current;
94     } else if (addr < sym.st_value + sym.st_size) {
95       *func_offset = addr - sym.st_value;
96       return &info;
97     } else {
98       first = current + 1;
99     }
100   }
101   return nullptr;
102 }
103 
104 // Create remapping table which allows us to access symbols as if they were sorted by address.
105 template <typename SymType>
BuildRemapTable(Memory * elf_memory)106 void Symbols::BuildRemapTable(Memory* elf_memory) {
107   std::vector<uint64_t> addrs;  // Addresses of all symbols (addrs[i] == symbols[i].st_value).
108   addrs.reserve(count_);
109   remap_.emplace();  // Construct the optional remap table.
110   remap_->reserve(count_);
111   for (size_t symbol_idx = 0; symbol_idx < count_;) {
112     // Read symbols from memory.  We intentionally bypass the cache to save memory.
113     // Do the reads in batches so that we minimize the number of memory read calls.
114     uint64_t read_bytes = (count_ - symbol_idx) * entry_size_;
115     uint8_t buffer[1024];
116     read_bytes = std::min<size_t>(sizeof(buffer), read_bytes);
117     uint64_t offset = symbol_idx * entry_size_;
118     if (__builtin_add_overflow(offset, offset_, &offset)) {
119       // The elf data might be malformed.
120       break;
121     }
122     read_bytes = elf_memory->Read(offset, buffer, read_bytes);
123     if (read_bytes < sizeof(SymType)) {
124       // The elf data might be malformed.
125       break;
126     }
127     for (uint64_t offset = 0; offset <= read_bytes - sizeof(SymType);
128          offset += entry_size_, symbol_idx++) {
129       SymType sym;
130       memcpy(&sym, &buffer[offset], sizeof(SymType));  // Copy to ensure alignment.
131       addrs.push_back(sym.st_value);  // Always insert so it is indexable by symbol index.
132       // NB: It is important to filter our zero-sized symbols since otherwise we can get
133       // duplicate end addresses in the table (e.g. if there is custom "end" symbol marker).
134       if (IsFunc(&sym) && sym.st_size != 0) {
135         remap_->push_back(symbol_idx);  // Indices of function symbols only.
136       }
137     }
138   }
139   // Sort by address to make the remap list binary searchable (stable due to the a<b tie break).
140   auto comp = [&addrs](auto a, auto b) { return std::tie(addrs[a], a) < std::tie(addrs[b], b); };
141   std::sort(remap_->begin(), remap_->end(), comp);
142   // Remove duplicate entries (methods de-duplicated by the linker).
143   auto pred = [&addrs](auto a, auto b) { return addrs[a] == addrs[b]; };
144   remap_->erase(std::unique(remap_->begin(), remap_->end(), pred), remap_->end());
145   remap_->shrink_to_fit();
146 }
147 
148 template <typename SymType>
GetName(uint64_t addr,Memory * elf_memory,SharedString * name,uint64_t * func_offset)149 bool Symbols::GetName(uint64_t addr, Memory* elf_memory, SharedString* name,
150                       uint64_t* func_offset) {
151   Info* info;
152   if (!remap_.has_value()) {
153     // Assume the symbol table is sorted. If it is not, this will gracefully fail.
154     info = BinarySearch<SymType, false>(addr, elf_memory, func_offset);
155     if (info == nullptr) {
156       // Create the remapping table and retry the search.
157       BuildRemapTable<SymType>(elf_memory);
158       symbols_.clear();  // Remove cached symbols since the access pattern will be different.
159       info = BinarySearch<SymType, true>(addr, elf_memory, func_offset);
160     }
161   } else {
162     // Fast search using the previously created remap table.
163     info = BinarySearch<SymType, true>(addr, elf_memory, func_offset);
164   }
165   if (info == nullptr) {
166     return false;
167   }
168   // Read and cache the symbol name.
169   if (info->name.is_null()) {
170     SymType sym;
171     uint32_t symbol_index = remap_.has_value() ? remap_.value()[info->index] : info->index;
172     uint64_t offset = symbol_index * entry_size_;
173     if (__builtin_add_overflow(offset, offset_, &offset)) {
174       // The elf data might be malformed.
175       return false;
176     }
177     if (!elf_memory->ReadFully(offset, &sym, sizeof(sym))) {
178       return false;
179     }
180     std::string symbol_name;
181     uint64_t str;
182     if (__builtin_add_overflow(str_offset_, sym.st_name, &str) || str >= str_end_) {
183       return false;
184     }
185     if (!IsFunc(&sym) || !elf_memory->ReadString(str, &symbol_name, str_end_ - str)) {
186       return false;
187     }
188     info->name = SharedString(std::move(symbol_name));
189   }
190   *name = info->name;
191   return true;
192 }
193 
194 template <typename SymType>
GetGlobal(Memory * elf_memory,const std::string & name,uint64_t * memory_address)195 bool Symbols::GetGlobal(Memory* elf_memory, const std::string& name, uint64_t* memory_address) {
196   // Lookup from cache.
197   auto it = global_variables_.find(name);
198   if (it != global_variables_.end()) {
199     if (it->second.has_value()) {
200       *memory_address = it->second.value();
201       return true;
202     }
203     return false;
204   }
205 
206   // Linear scan of all symbols.
207   for (uint32_t i = 0; i < count_; i++) {
208     uint64_t offset = i * entry_size_;
209     if (__builtin_add_overflow(offset_, offset, &offset)) {
210       // The elf data might be malformed.
211       return false;
212     }
213     SymType entry;
214     if (!elf_memory->ReadFully(offset, &entry, sizeof(entry))) {
215       return false;
216     }
217 
218     if (entry.st_shndx != SHN_UNDEF && ELF32_ST_TYPE(entry.st_info) == STT_OBJECT &&
219         ELF32_ST_BIND(entry.st_info) == STB_GLOBAL) {
220       uint64_t str_offset = str_offset_ + entry.st_name;
221       if (__builtin_add_overflow(str_offset_, entry.st_name, &str_offset)) {
222         // The elf data might be malformed.
223         return false;
224       }
225       if (str_offset < str_end_) {
226         std::string symbol;
227         if (elf_memory->ReadString(str_offset, &symbol, str_end_ - str_offset) && symbol == name) {
228           global_variables_.emplace(name, entry.st_value);
229           *memory_address = entry.st_value;
230           return true;
231         }
232       }
233     }
234   }
235   global_variables_.emplace(name, std::optional<uint64_t>());  // Remember "not found" outcome.
236   return false;
237 }
238 
239 // Instantiate all of the needed template functions.
240 template bool Symbols::GetName<Elf32_Sym>(uint64_t, Memory*, SharedString*, uint64_t*);
241 template bool Symbols::GetName<Elf64_Sym>(uint64_t, Memory*, SharedString*, uint64_t*);
242 
243 template bool Symbols::GetGlobal<Elf32_Sym>(Memory*, const std::string&, uint64_t*);
244 template bool Symbols::GetGlobal<Elf64_Sym>(Memory*, const std::string&, uint64_t*);
245 }  // namespace unwindstack
246