1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <algorithm>
18 #include <deque>
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <unordered_map>
24 #include <vector>
25
26 #include "android-base/logging.h"
27 #include "base/os.h"
28 #include "base/unix_file/fd_file.h"
29 #include "elf/elf_builder.h"
30 #include "elf/elf_debug_reader.h"
31 #include "elf/xz_utils.h"
32 #include "stream/file_output_stream.h"
33 #include "stream/vector_output_stream.h"
34
35 namespace art {
36
37 static constexpr size_t kBlockSize = 32 * KB;
38
39 constexpr const char kSortedSymbolName[] = "$android.symtab.sorted";
40
41 template<typename ElfTypes>
WriteMinidebugInfo(const std::vector<uint8_t> & input,std::vector<uint8_t> * output)42 static void WriteMinidebugInfo(const std::vector<uint8_t>& input, std::vector<uint8_t>* output) {
43 using Elf_Addr = typename ElfTypes::Addr;
44 using Elf_Shdr = typename ElfTypes::Shdr;
45 using Elf_Sym = typename ElfTypes::Sym;
46 using Elf_Word = typename ElfTypes::Word;
47 using CIE = typename ElfDebugReader<ElfTypes>::CIE;
48 using FDE = typename ElfDebugReader<ElfTypes>::FDE;
49
50 ElfDebugReader<ElfTypes> reader(input);
51
52 std::vector<uint8_t> output_elf_data;
53 VectorOutputStream output_stream("Output ELF", &output_elf_data);
54 InstructionSet isa = ElfBuilder<ElfTypes>::GetIsaFromHeader(*reader.GetHeader());
55 std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &output_stream));
56 builder->Start(/*write_program_headers=*/ false);
57
58 auto* text = builder->GetText();
59 const Elf_Shdr* original_text = reader.GetSection(".text");
60 CHECK(original_text != nullptr);
61 text->AllocateVirtualMemory(original_text->sh_addr, original_text->sh_size);
62
63 auto* strtab = builder->GetStrTab();
64 auto* symtab = builder->GetSymTab();
65 strtab->Start();
66 {
67 std::unordered_map<uint64_t, uint64_t> dyn_funcs_by_offset;
68 reader.VisitDynamicSymbols([&](Elf_Sym sym, const char*) {
69 // Keep track of all of the dynamic function symbols.
70 if (ELF32_ST_TYPE(sym.st_info) == STT_FUNC && sym.st_size != 0) {
71 auto it = dyn_funcs_by_offset.find(sym.st_value);
72 if (it == dyn_funcs_by_offset.end() || it->second < sym.st_size) {
73 dyn_funcs_by_offset[sym.st_value] = sym.st_size;
74 }
75 }
76 });
77 std::unordered_map<uint64_t, std::string_view> funcs_by_offset;
78 std::multimap<std::string_view, Elf_Sym> syms;
79 reader.VisitFunctionSymbols([&](Elf_Sym sym, const char* name) {
80 // Exclude non-function or empty symbols.
81 if (ELF32_ST_TYPE(sym.st_info) != STT_FUNC || sym.st_size == 0) {
82 return;
83 }
84
85 // Exclude symbols at the same offset as a symbol in the set of
86 // dynamic symbols.
87 auto dyn_it = dyn_funcs_by_offset.find(sym.st_value);
88 if (dyn_it != dyn_funcs_by_offset.end()) {
89 CHECK(dyn_it->second >= sym.st_size);
90 return;
91 }
92
93 // Exclude symbols with the same offset as a previous symbol.
94 if (funcs_by_offset.contains(sym.st_value)) {
95 const std::string_view& previous_name = funcs_by_offset[sym.st_value];
96
97 // Find the previous symbol entry.
98 auto it = syms.find(previous_name);
99 while (it != syms.end() && it->second.st_value != sym.st_value) {
100 ++it;
101 }
102 CHECK(it != syms.end());
103
104 // When there is a duplicate, always choose the symbol with the
105 // largest size.
106 // In order to produce the same symbol table every time, if the
107 // symbol has the same size choose the symbol with the shortest
108 // name, or the symbol first according to ascii comparison.
109 if (sym.st_size < it->second.st_size ||
110 (sym.st_size == it->second.st_size &&
111 (previous_name.size() < strlen(name) || previous_name.compare(name) <= 0))) {
112 return;
113 }
114 syms.erase(it);
115 }
116 funcs_by_offset[sym.st_value] = name;
117 syms.emplace(name, sym);
118 });
119 if (!syms.empty()) {
120 symtab->Add(strtab->Write(kSortedSymbolName), nullptr, 0, 0, STB_GLOBAL, STT_NOTYPE);
121 }
122 for (auto& entry : syms) {
123 std::string_view name = entry.first;
124 const Elf_Sym& sym = entry.second;
125 Elf_Word name_idx = strtab->Write(name);
126 symtab->Add(name_idx, text, sym.st_value, sym.st_size, STB_GLOBAL, STT_FUNC);
127 }
128 }
129 strtab->End();
130 symtab->WriteCachedSection();
131
132 auto* debug_frame = builder->GetDebugFrame();
133 debug_frame->Start();
134 {
135 std::map<std::string_view, Elf_Addr> cie_dedup;
136 std::unordered_map<const CIE*, Elf_Addr> new_cie_offset;
137 std::deque<std::pair<const FDE*, const CIE*>> entries;
138 // Read, de-duplicate and write CIE entries. Read FDE entries.
139 reader.VisitDebugFrame(
140 [&](const CIE* cie) {
141 std::string_view key(reinterpret_cast<const char*>(cie->data()), cie->size());
142 auto it = cie_dedup.emplace(key, debug_frame->GetPosition());
143 if (/* inserted */ it.second) {
144 debug_frame->WriteFully(cie->data(), cie->size());
145 }
146 new_cie_offset[cie] = it.first->second;
147 },
148 [&](const FDE* fde, const CIE* cie) { entries.emplace_back(std::make_pair(fde, cie)); });
149 // Sort FDE entries by opcodes to improve locality for compression (saves ~25%).
150 std::stable_sort(entries.begin(), entries.end(), [](const auto& lhs, const auto& rhs) {
151 constexpr size_t opcode_offset = sizeof(FDE);
152 return std::lexicographical_compare(
153 lhs.first->data() + opcode_offset, lhs.first->data() + lhs.first->size(),
154 rhs.first->data() + opcode_offset, rhs.first->data() + rhs.first->size());
155 });
156 // Write all FDE entries while adjusting the CIE offsets to the new locations.
157 for (const auto& entry : entries) {
158 const FDE* fde = entry.first;
159 const CIE* cie = entry.second;
160 FDE new_header = *fde;
161 new_header.cie_pointer = new_cie_offset[cie];
162 debug_frame->WriteFully(&new_header, sizeof(FDE));
163 debug_frame->WriteFully(fde->data() + sizeof(FDE), fde->size() - sizeof(FDE));
164 }
165 }
166 debug_frame->End();
167
168 builder->End();
169 CHECK(builder->Good());
170
171 XzCompress(ArrayRef<const uint8_t>(output_elf_data), output, 9 /*size*/, kBlockSize);
172 }
173
Main(int argc,char ** argv)174 static int Main(int argc, char** argv) {
175 // Check command like arguments.
176 if (argc != 3) {
177 printf("Usage: create_minidebuginfo ELF_FILE OUT_FILE\n");
178 printf(" ELF_FILE: The path to an ELF file with full symbols (before being stripped).\n");
179 printf(" OUT_FILE: The path for the generated mini-debug-info data (not an elf file).\n");
180 return 1;
181 }
182 const char* input_filename = argv[1];
183 const char* output_filename = argv[2];
184
185 // Read input file.
186 std::unique_ptr<File> input_file(OS::OpenFileForReading(input_filename));
187 CHECK(input_file.get() != nullptr) << "Failed to open input file";
188 std::vector<uint8_t> elf(input_file->GetLength());
189 CHECK(input_file->ReadFully(elf.data(), elf.size())) << "Failed to read input file";
190
191 // Write output file.
192 std::vector<uint8_t> output;
193 if (ElfDebugReader<ElfTypes32>::IsValidElfHeader(elf)) {
194 WriteMinidebugInfo<ElfTypes32>(elf, &output);
195 } else if (ElfDebugReader<ElfTypes64>::IsValidElfHeader(elf)) {
196 WriteMinidebugInfo<ElfTypes64>(elf, &output);
197 } else {
198 LOG(FATAL) << "Invalid ELF file header " << input_filename;
199 }
200 std::unique_ptr<File> output_file(OS::CreateEmptyFile(output_filename));
201 if (!output_file->WriteFully(output.data(), output.size()) || output_file->FlushClose() != 0) {
202 LOG(FATAL) << "Failed to write " << output_filename;
203 }
204 return 0;
205 }
206
207 } // namespace art
208
main(int argc,char ** argv)209 int main(int argc, char** argv) {
210 return art::Main(argc, argv);
211 }
212