xref: /aosp_15_r20/art/tools/create_minidebuginfo/create_minidebuginfo.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <algorithm>
18 #include <deque>
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <unordered_map>
24 #include <vector>
25 
26 #include "android-base/logging.h"
27 #include "base/os.h"
28 #include "base/unix_file/fd_file.h"
29 #include "elf/elf_builder.h"
30 #include "elf/elf_debug_reader.h"
31 #include "elf/xz_utils.h"
32 #include "stream/file_output_stream.h"
33 #include "stream/vector_output_stream.h"
34 
35 namespace art {
36 
37 static constexpr size_t kBlockSize = 32 * KB;
38 
39 constexpr const char kSortedSymbolName[] = "$android.symtab.sorted";
40 
41 template<typename ElfTypes>
WriteMinidebugInfo(const std::vector<uint8_t> & input,std::vector<uint8_t> * output)42 static void WriteMinidebugInfo(const std::vector<uint8_t>& input, std::vector<uint8_t>* output) {
43   using Elf_Addr = typename ElfTypes::Addr;
44   using Elf_Shdr = typename ElfTypes::Shdr;
45   using Elf_Sym = typename ElfTypes::Sym;
46   using Elf_Word = typename ElfTypes::Word;
47   using CIE = typename ElfDebugReader<ElfTypes>::CIE;
48   using FDE = typename ElfDebugReader<ElfTypes>::FDE;
49 
50   ElfDebugReader<ElfTypes> reader(input);
51 
52   std::vector<uint8_t> output_elf_data;
53   VectorOutputStream output_stream("Output ELF", &output_elf_data);
54   InstructionSet isa = ElfBuilder<ElfTypes>::GetIsaFromHeader(*reader.GetHeader());
55   std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &output_stream));
56   builder->Start(/*write_program_headers=*/ false);
57 
58   auto* text = builder->GetText();
59   const Elf_Shdr* original_text = reader.GetSection(".text");
60   CHECK(original_text != nullptr);
61   text->AllocateVirtualMemory(original_text->sh_addr, original_text->sh_size);
62 
63   auto* strtab = builder->GetStrTab();
64   auto* symtab = builder->GetSymTab();
65   strtab->Start();
66   {
67     std::unordered_map<uint64_t, uint64_t> dyn_funcs_by_offset;
68     reader.VisitDynamicSymbols([&](Elf_Sym sym, const char*) {
69       // Keep track of all of the dynamic function symbols.
70       if (ELF32_ST_TYPE(sym.st_info) == STT_FUNC && sym.st_size != 0) {
71         auto it = dyn_funcs_by_offset.find(sym.st_value);
72         if (it == dyn_funcs_by_offset.end() || it->second < sym.st_size) {
73           dyn_funcs_by_offset[sym.st_value] = sym.st_size;
74         }
75       }
76     });
77     std::unordered_map<uint64_t, std::string_view> funcs_by_offset;
78     std::multimap<std::string_view, Elf_Sym> syms;
79     reader.VisitFunctionSymbols([&](Elf_Sym sym, const char* name) {
80       // Exclude non-function or empty symbols.
81       if (ELF32_ST_TYPE(sym.st_info) != STT_FUNC || sym.st_size == 0) {
82         return;
83       }
84 
85       // Exclude symbols at the same offset as a symbol in the set of
86       // dynamic symbols.
87       auto dyn_it = dyn_funcs_by_offset.find(sym.st_value);
88       if (dyn_it != dyn_funcs_by_offset.end()) {
89         CHECK(dyn_it->second >= sym.st_size);
90         return;
91       }
92 
93       // Exclude symbols with the same offset as a previous symbol.
94       if (funcs_by_offset.contains(sym.st_value)) {
95         const std::string_view& previous_name = funcs_by_offset[sym.st_value];
96 
97         // Find the previous symbol entry.
98         auto it = syms.find(previous_name);
99         while (it != syms.end() && it->second.st_value != sym.st_value) {
100           ++it;
101         }
102         CHECK(it != syms.end());
103 
104         // When there is a duplicate, always choose the symbol with the
105         // largest size.
106         // In order to produce the same symbol table every time, if the
107         // symbol has the same size choose the symbol with the shortest
108         // name, or the symbol first according to ascii comparison.
109         if (sym.st_size < it->second.st_size ||
110             (sym.st_size == it->second.st_size &&
111              (previous_name.size() < strlen(name) || previous_name.compare(name) <= 0))) {
112           return;
113         }
114         syms.erase(it);
115       }
116       funcs_by_offset[sym.st_value] = name;
117       syms.emplace(name, sym);
118     });
119     if (!syms.empty()) {
120       symtab->Add(strtab->Write(kSortedSymbolName), nullptr, 0, 0, STB_GLOBAL, STT_NOTYPE);
121     }
122     for (auto& entry : syms) {
123       std::string_view name = entry.first;
124       const Elf_Sym& sym = entry.second;
125       Elf_Word name_idx = strtab->Write(name);
126       symtab->Add(name_idx, text, sym.st_value, sym.st_size, STB_GLOBAL, STT_FUNC);
127     }
128   }
129   strtab->End();
130   symtab->WriteCachedSection();
131 
132   auto* debug_frame = builder->GetDebugFrame();
133   debug_frame->Start();
134   {
135     std::map<std::string_view, Elf_Addr> cie_dedup;
136     std::unordered_map<const CIE*, Elf_Addr> new_cie_offset;
137     std::deque<std::pair<const FDE*, const CIE*>> entries;
138     // Read, de-duplicate and write CIE entries.  Read FDE entries.
139     reader.VisitDebugFrame(
140         [&](const CIE* cie) {
141           std::string_view key(reinterpret_cast<const char*>(cie->data()), cie->size());
142           auto it = cie_dedup.emplace(key, debug_frame->GetPosition());
143           if (/* inserted */ it.second) {
144             debug_frame->WriteFully(cie->data(), cie->size());
145           }
146           new_cie_offset[cie] = it.first->second;
147         },
148         [&](const FDE* fde, const CIE* cie) { entries.emplace_back(std::make_pair(fde, cie)); });
149     // Sort FDE entries by opcodes to improve locality for compression (saves ~25%).
150     std::stable_sort(entries.begin(), entries.end(), [](const auto& lhs, const auto& rhs) {
151       constexpr size_t opcode_offset = sizeof(FDE);
152       return std::lexicographical_compare(
153           lhs.first->data() + opcode_offset, lhs.first->data() + lhs.first->size(),
154           rhs.first->data() + opcode_offset, rhs.first->data() + rhs.first->size());
155     });
156     // Write all FDE entries while adjusting the CIE offsets to the new locations.
157     for (const auto& entry : entries) {
158       const FDE* fde = entry.first;
159       const CIE* cie = entry.second;
160       FDE new_header = *fde;
161       new_header.cie_pointer = new_cie_offset[cie];
162       debug_frame->WriteFully(&new_header, sizeof(FDE));
163       debug_frame->WriteFully(fde->data() + sizeof(FDE), fde->size() - sizeof(FDE));
164     }
165   }
166   debug_frame->End();
167 
168   builder->End();
169   CHECK(builder->Good());
170 
171   XzCompress(ArrayRef<const uint8_t>(output_elf_data), output, 9 /*size*/, kBlockSize);
172 }
173 
Main(int argc,char ** argv)174 static int Main(int argc, char** argv) {
175   // Check command like arguments.
176   if (argc != 3) {
177     printf("Usage: create_minidebuginfo ELF_FILE OUT_FILE\n");
178     printf("  ELF_FILE: The path to an ELF file with full symbols (before being stripped).\n");
179     printf("  OUT_FILE: The path for the generated mini-debug-info data (not an elf file).\n");
180     return 1;
181   }
182   const char* input_filename = argv[1];
183   const char* output_filename = argv[2];
184 
185   // Read input file.
186   std::unique_ptr<File> input_file(OS::OpenFileForReading(input_filename));
187   CHECK(input_file.get() != nullptr) << "Failed to open input file";
188   std::vector<uint8_t> elf(input_file->GetLength());
189   CHECK(input_file->ReadFully(elf.data(), elf.size())) << "Failed to read input file";
190 
191   // Write output file.
192   std::vector<uint8_t> output;
193   if (ElfDebugReader<ElfTypes32>::IsValidElfHeader(elf)) {
194     WriteMinidebugInfo<ElfTypes32>(elf, &output);
195   } else if (ElfDebugReader<ElfTypes64>::IsValidElfHeader(elf)) {
196     WriteMinidebugInfo<ElfTypes64>(elf, &output);
197   } else {
198     LOG(FATAL) << "Invalid ELF file header " << input_filename;
199   }
200   std::unique_ptr<File> output_file(OS::CreateEmptyFile(output_filename));
201   if (!output_file->WriteFully(output.data(), output.size()) || output_file->FlushClose() != 0) {
202     LOG(FATAL) << "Failed to write " << output_filename;
203   }
204   return 0;
205 }
206 
207 }  // namespace art
208 
main(int argc,char ** argv)209 int main(int argc, char** argv) {
210   return art::Main(argc, argv);
211 }
212