1 //===------------ MachOBuilder.h -- Build MachO Objects ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Build MachO object files for interaction with the ObjC runtime and debugger.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
14 #define LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
15 
16 #include "llvm/BinaryFormat/MachO.h"
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/MathExtras.h"
19 
20 #include <list>
21 #include <map>
22 #include <vector>
23 
24 namespace llvm {
25 namespace orc {
26 
27 template <typename MachOStruct>
writeMachOStruct(MutableArrayRef<char> Buf,size_t Offset,MachOStruct S,bool SwapStruct)28 size_t writeMachOStruct(MutableArrayRef<char> Buf, size_t Offset, MachOStruct S,
29                         bool SwapStruct) {
30   if (SwapStruct)
31     MachO::swapStruct(S);
32   assert(Offset + sizeof(MachOStruct) <= Buf.size() && "Buffer overflow");
33   memcpy(&Buf[Offset], reinterpret_cast<const char *>(&S), sizeof(MachOStruct));
34   return Offset + sizeof(MachOStruct);
35 }
36 
37 /// Base type for MachOBuilder load command wrappers.
38 struct MachOBuilderLoadCommandBase {
~MachOBuilderLoadCommandBaseMachOBuilderLoadCommandBase39   virtual ~MachOBuilderLoadCommandBase() {}
40   virtual size_t size() const = 0;
41   virtual size_t write(MutableArrayRef<char> Buf, size_t Offset,
42                        bool SwapStruct) = 0;
43 };
44 
45 /// MachOBuilder load command wrapper type.
46 template <MachO::LoadCommandType LCType> struct MachOBuilderLoadCommand;
47 
48 #define HANDLE_LOAD_COMMAND(Name, Value, LCStruct)                             \
49   template <>                                                                  \
50   struct MachOBuilderLoadCommand<MachO::Name>                                  \
51       : public MachO::LCStruct, public MachOBuilderLoadCommandBase {           \
52     using CmdStruct = LCStruct;                                                \
53     MachOBuilderLoadCommand() {                                                \
54       memset(&rawStruct(), 0, sizeof(CmdStruct));                              \
55       cmd = Value;                                                             \
56       cmdsize = sizeof(CmdStruct);                                             \
57     }                                                                          \
58     template <typename... ArgTs>                                               \
59     MachOBuilderLoadCommand(ArgTs &&...Args)                                   \
60         : CmdStruct{Value, sizeof(CmdStruct), std::forward<ArgTs>(Args)...} {} \
61     CmdStruct &rawStruct() { return static_cast<CmdStruct &>(*this); }         \
62     size_t size() const override { return cmdsize; }                           \
63     size_t write(MutableArrayRef<char> Buf, size_t Offset,                     \
64                  bool SwapStruct) override {                                   \
65       return writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct);           \
66     }                                                                          \
67   };
68 
69 #include "llvm/BinaryFormat/MachO.def"
70 
71 #undef HANDLE_LOAD_COMMAND
72 
73 // Builds MachO objects.
74 template <typename MachOTraits> class MachOBuilder {
75 private:
76   struct SymbolContainer {
77     size_t SymbolIndexBase = 0;
78     std::vector<typename MachOTraits::NList> Symbols;
79   };
80 
81   struct StringTableEntry {
82     StringRef S;
83     size_t Offset;
84   };
85 
86   using StringTable = std::vector<StringTableEntry>;
87 
swapStruct()88   static bool swapStruct() {
89     return MachOTraits::Endianness != llvm::endianness::native;
90   }
91 
92 public:
93   using StringId = size_t;
94 
95   struct Section;
96 
97   // Points to either an nlist entry (as a (symbol-container, index) pair), or
98   // a section.
99   class RelocTarget {
100   public:
RelocTarget(const Section & S)101     RelocTarget(const Section &S) : S(&S), Idx(~0U) {}
RelocTarget(SymbolContainer & SC,size_t Idx)102     RelocTarget(SymbolContainer &SC, size_t Idx) : SC(&SC), Idx(Idx) {}
103 
isSymbol()104     bool isSymbol() { return Idx != ~0U; }
105 
getSymbolNum()106     uint32_t getSymbolNum() {
107       assert(isSymbol() && "Target is not a symbol");
108       return SC->SymbolIndexBase + Idx;
109     }
110 
getSectionId()111     uint32_t getSectionId() {
112       assert(!isSymbol() && "Target is not a section");
113       return S->SectionNumber;
114     }
115 
nlist()116     typename MachOTraits::NList &nlist() {
117       assert(isSymbol() && "Target is not a symbol");
118       return SC->Symbols[Idx];
119     }
120 
121   private:
122     union {
123       const Section *S;
124       SymbolContainer *SC;
125     };
126     size_t Idx;
127   };
128 
129   struct Reloc : public MachO::relocation_info {
130     RelocTarget Target;
131 
RelocReloc132     Reloc(int32_t Offset, RelocTarget Target, bool PCRel, unsigned Length,
133           unsigned Type)
134         : Target(Target) {
135       assert(Type < 16 && "Relocation type out of range");
136       r_address = Offset; // Will slide to account for sec addr during layout
137       r_symbolnum = 0;
138       r_pcrel = PCRel;
139       r_length = Length;
140       r_extern = Target.isSymbol();
141       r_type = Type;
142     }
143 
rawStructReloc144     MachO::relocation_info &rawStruct() {
145       return static_cast<MachO::relocation_info &>(*this);
146     }
147   };
148 
149   struct SectionContent {
150     const char *Data = nullptr;
151     size_t Size = 0;
152   };
153 
154   struct Section : public MachOTraits::Section, public RelocTarget {
155     MachOBuilder &Builder;
156     SectionContent Content;
157     size_t SectionNumber = 0;
158     SymbolContainer SC;
159     std::vector<Reloc> Relocs;
160 
SectionSection161     Section(MachOBuilder &Builder, StringRef SecName, StringRef SegName)
162         : RelocTarget(*this), Builder(Builder) {
163       memset(&rawStruct(), 0, sizeof(typename MachOTraits::Section));
164       assert(SecName.size() <= 16 && "SecName too long");
165       assert(SegName.size() <= 16 && "SegName too long");
166       memcpy(this->sectname, SecName.data(), SecName.size());
167       memcpy(this->segname, SegName.data(), SegName.size());
168     }
169 
addSymbolSection170     RelocTarget addSymbol(int32_t Offset, StringRef Name, uint8_t Type,
171                           uint16_t Desc) {
172       StringId SI = Builder.addString(Name);
173       typename MachOTraits::NList Sym;
174       Sym.n_strx = SI;
175       Sym.n_type = Type | MachO::N_SECT;
176       Sym.n_sect = MachO::NO_SECT; // Will be filled in later.
177       Sym.n_desc = Desc;
178       Sym.n_value = Offset;
179       SC.Symbols.push_back(Sym);
180       return {SC, SC.Symbols.size() - 1};
181     }
182 
addRelocSection183     void addReloc(int32_t Offset, RelocTarget Target, bool PCRel,
184                   unsigned Length, unsigned Type) {
185       Relocs.push_back({Offset, Target, PCRel, Length, Type});
186     }
187 
rawStructSection188     auto &rawStruct() {
189       return static_cast<typename MachOTraits::Section &>(*this);
190     }
191   };
192 
193   struct Segment : public MachOBuilderLoadCommand<MachOTraits::SegmentCmd> {
194     MachOBuilder &Builder;
195     std::vector<std::unique_ptr<Section>> Sections;
196 
SegmentSegment197     Segment(MachOBuilder &Builder, StringRef SegName)
198         : MachOBuilderLoadCommand<MachOTraits::SegmentCmd>(), Builder(Builder) {
199       assert(SegName.size() <= 16 && "SegName too long");
200       memcpy(this->segname, SegName.data(), SegName.size());
201       this->maxprot =
202           MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
203       this->initprot = this->maxprot;
204     }
205 
addSectionSegment206     Section &addSection(StringRef SecName, StringRef SegName) {
207       Sections.push_back(std::make_unique<Section>(Builder, SecName, SegName));
208       return *Sections.back();
209     }
210 
writeSegment211     size_t write(MutableArrayRef<char> Buf, size_t Offset,
212                  bool SwapStruct) override {
213       Offset = MachOBuilderLoadCommand<MachOTraits::SegmentCmd>::write(
214           Buf, Offset, SwapStruct);
215       for (auto &Sec : Sections)
216         Offset = writeMachOStruct(Buf, Offset, Sec->rawStruct(), SwapStruct);
217       return Offset;
218     }
219   };
220 
MachOBuilder(size_t PageSize)221   MachOBuilder(size_t PageSize) : PageSize(PageSize) {
222     memset((char *)&Header, 0, sizeof(Header));
223     Header.magic = MachOTraits::Magic;
224   }
225 
226   template <MachO::LoadCommandType LCType, typename... ArgTs>
addLoadCommand(ArgTs &&...Args)227   MachOBuilderLoadCommand<LCType> &addLoadCommand(ArgTs &&...Args) {
228     static_assert(LCType != MachOTraits::SegmentCmd,
229                   "Use addSegment to add segment load command");
230     auto LC = std::make_unique<MachOBuilderLoadCommand<LCType>>(
231         std::forward<ArgTs>(Args)...);
232     auto &Tmp = *LC;
233     LoadCommands.push_back(std::move(LC));
234     return Tmp;
235   }
236 
addString(StringRef Str)237   StringId addString(StringRef Str) {
238     if (Strings.empty() && !Str.empty())
239       addString("");
240     return Strings.insert(std::make_pair(Str, Strings.size())).first->second;
241   }
242 
addSegment(StringRef SegName)243   Segment &addSegment(StringRef SegName) {
244     Segments.push_back(Segment(*this, SegName));
245     return Segments.back();
246   }
247 
addSymbol(StringRef Name,uint8_t Type,uint8_t Sect,uint16_t Desc,typename MachOTraits::UIntPtr Value)248   RelocTarget addSymbol(StringRef Name, uint8_t Type, uint8_t Sect,
249                         uint16_t Desc, typename MachOTraits::UIntPtr Value) {
250     StringId SI = addString(Name);
251     typename MachOTraits::NList Sym;
252     Sym.n_strx = SI;
253     Sym.n_type = Type;
254     Sym.n_sect = Sect;
255     Sym.n_desc = Desc;
256     Sym.n_value = Value;
257     SC.Symbols.push_back(Sym);
258     return {SC, SC.Symbols.size() - 1};
259   }
260 
261   // Call to perform layout on the MachO. Returns the total size of the
262   // resulting file.
263   // This method will automatically insert some load commands (e.g.
264   // LC_SYMTAB) and fill in load command fields.
layout()265   size_t layout() {
266 
267     // Build symbol table and add LC_SYMTAB command.
268     makeStringTable();
269     MachOBuilderLoadCommand<MachOTraits::SymTabCmd> *SymTabLC = nullptr;
270     if (!StrTab.empty())
271       SymTabLC = &addLoadCommand<MachOTraits::SymTabCmd>();
272 
273     // Lay out header, segment load command, and other load commands.
274     size_t Offset = sizeof(Header);
275     for (auto &Seg : Segments) {
276       Seg.cmdsize +=
277           Seg.Sections.size() * sizeof(typename MachOTraits::Section);
278       Seg.nsects = Seg.Sections.size();
279       Offset += Seg.cmdsize;
280     }
281     for (auto &LC : LoadCommands)
282       Offset += LC->size();
283 
284     Header.sizeofcmds = Offset - sizeof(Header);
285 
286     // Lay out content, set segment / section addrs and offsets.
287     size_t SegVMAddr = 0;
288     for (auto &Seg : Segments) {
289       Seg.vmaddr = SegVMAddr;
290       Seg.fileoff = Offset;
291       for (auto &Sec : Seg.Sections) {
292         Offset = alignTo(Offset, size_t{1} << Sec->align);
293         if (Sec->Content.Size)
294           Sec->offset = Offset;
295         Sec->size = Sec->Content.Size;
296         Sec->addr = SegVMAddr + Sec->offset - Seg.fileoff;
297         Offset += Sec->Content.Size;
298       }
299       size_t SegContentSize = Offset - Seg.fileoff;
300       Seg.filesize = SegContentSize;
301       Seg.vmsize = Header.filetype == MachO::MH_OBJECT
302                        ? SegContentSize
303                        : alignTo(SegContentSize, PageSize);
304       SegVMAddr += Seg.vmsize;
305     }
306 
307     // Set string table offsets for non-section symbols.
308     for (auto &Sym : SC.Symbols)
309       Sym.n_strx = StrTab[Sym.n_strx].Offset;
310 
311     // Number sections, set symbol section numbers and string table offsets,
312     // count relocations.
313     size_t NumSymbols = SC.Symbols.size();
314     size_t SectionNumber = 0;
315     for (auto &Seg : Segments) {
316       for (auto &Sec : Seg.Sections) {
317         ++SectionNumber;
318         Sec->SectionNumber = SectionNumber;
319         Sec->SC.SymbolIndexBase = NumSymbols;
320         NumSymbols += Sec->SC.Symbols.size();
321         for (auto &Sym : Sec->SC.Symbols) {
322           Sym.n_sect = SectionNumber;
323           Sym.n_strx = StrTab[Sym.n_strx].Offset;
324           Sym.n_value += Sec->addr;
325         }
326       }
327     }
328 
329     // Handle relocations
330     bool OffsetAlignedForRelocs = false;
331     for (auto &Seg : Segments) {
332       for (auto &Sec : Seg.Sections) {
333         if (!Sec->Relocs.empty()) {
334           if (!OffsetAlignedForRelocs) {
335             Offset = alignTo(Offset, sizeof(MachO::relocation_info));
336             OffsetAlignedForRelocs = true;
337           }
338           Sec->reloff = Offset;
339           Sec->nreloc = Sec->Relocs.size();
340           Offset += Sec->Relocs.size() * sizeof(MachO::relocation_info);
341           for (auto &R : Sec->Relocs)
342             R.r_symbolnum = R.Target.isSymbol() ? R.Target.getSymbolNum()
343                                                 : R.Target.getSectionId();
344         }
345       }
346     }
347 
348     // Calculate offset to start of nlist and update symtab command.
349     if (NumSymbols > 0) {
350       Offset = alignTo(Offset, sizeof(typename MachOTraits::NList));
351       SymTabLC->symoff = Offset;
352       SymTabLC->nsyms = NumSymbols;
353 
354       // Calculate string table bounds and update symtab command.
355       if (!StrTab.empty()) {
356         Offset += NumSymbols * sizeof(typename MachOTraits::NList);
357         size_t StringTableSize =
358             StrTab.back().Offset + StrTab.back().S.size() + 1;
359 
360         SymTabLC->stroff = Offset;
361         SymTabLC->strsize = StringTableSize;
362         Offset += StringTableSize;
363       }
364     }
365 
366     return Offset;
367   }
368 
write(MutableArrayRef<char> Buffer)369   void write(MutableArrayRef<char> Buffer) {
370     size_t Offset = 0;
371     Offset = writeHeader(Buffer, Offset);
372     Offset = writeSegments(Buffer, Offset);
373     Offset = writeLoadCommands(Buffer, Offset);
374     Offset = writeSectionContent(Buffer, Offset);
375     Offset = writeRelocations(Buffer, Offset);
376     Offset = writeSymbols(Buffer, Offset);
377     Offset = writeStrings(Buffer, Offset);
378   }
379 
380   typename MachOTraits::Header Header;
381 
382 private:
makeStringTable()383   void makeStringTable() {
384     if (Strings.empty())
385       return;
386 
387     StrTab.resize(Strings.size());
388     for (auto &KV : Strings)
389       StrTab[KV.second] = {KV.first, 0};
390     size_t Offset = 0;
391     for (auto &Elem : StrTab) {
392       Elem.Offset = Offset;
393       Offset += Elem.S.size() + 1;
394     }
395   }
396 
writeHeader(MutableArrayRef<char> Buf,size_t Offset)397   size_t writeHeader(MutableArrayRef<char> Buf, size_t Offset) {
398     Header.ncmds = Segments.size() + LoadCommands.size();
399     return writeMachOStruct(Buf, Offset, Header, swapStruct());
400   }
401 
writeSegments(MutableArrayRef<char> Buf,size_t Offset)402   size_t writeSegments(MutableArrayRef<char> Buf, size_t Offset) {
403     for (auto &Seg : Segments)
404       Offset = Seg.write(Buf, Offset, swapStruct());
405     return Offset;
406   }
407 
writeLoadCommands(MutableArrayRef<char> Buf,size_t Offset)408   size_t writeLoadCommands(MutableArrayRef<char> Buf, size_t Offset) {
409     for (auto &LC : LoadCommands)
410       Offset = LC->write(Buf, Offset, swapStruct());
411     return Offset;
412   }
413 
writeSectionContent(MutableArrayRef<char> Buf,size_t Offset)414   size_t writeSectionContent(MutableArrayRef<char> Buf, size_t Offset) {
415     for (auto &Seg : Segments) {
416       for (auto &Sec : Seg.Sections) {
417         if (!Sec->Content.Data) {
418           assert(Sec->Relocs.empty() &&
419                  "Cant' have relocs for zero-fill segment");
420           continue;
421         }
422         while (Offset != Sec->offset)
423           Buf[Offset++] = '\0';
424 
425         assert(Offset + Sec->Content.Size <= Buf.size() && "Buffer overflow");
426         memcpy(&Buf[Offset], Sec->Content.Data, Sec->Content.Size);
427         Offset += Sec->Content.Size;
428       }
429     }
430     return Offset;
431   }
432 
writeRelocations(MutableArrayRef<char> Buf,size_t Offset)433   size_t writeRelocations(MutableArrayRef<char> Buf, size_t Offset) {
434     for (auto &Seg : Segments) {
435       for (auto &Sec : Seg.Sections) {
436         if (!Sec->Relocs.empty()) {
437           while (Offset % sizeof(MachO::relocation_info))
438             Buf[Offset++] = '\0';
439         }
440         for (auto &R : Sec->Relocs) {
441           assert(Offset + sizeof(MachO::relocation_info) <= Buf.size() &&
442                  "Buffer overflow");
443           memcpy(&Buf[Offset], reinterpret_cast<const char *>(&R.rawStruct()),
444                  sizeof(MachO::relocation_info));
445           Offset += sizeof(MachO::relocation_info);
446         }
447       }
448     }
449     return Offset;
450   }
451 
writeSymbols(MutableArrayRef<char> Buf,size_t Offset)452   size_t writeSymbols(MutableArrayRef<char> Buf, size_t Offset) {
453 
454     // Count symbols.
455     size_t NumSymbols = SC.Symbols.size();
456     for (auto &Seg : Segments)
457       for (auto &Sec : Seg.Sections)
458         NumSymbols += Sec->SC.Symbols.size();
459 
460     // If none then return.
461     if (NumSymbols == 0)
462       return Offset;
463 
464     // Align to nlist entry size.
465     while (Offset % sizeof(typename MachOTraits::NList))
466       Buf[Offset++] = '\0';
467 
468     // Write non-section symbols.
469     for (auto &Sym : SC.Symbols)
470       Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct());
471 
472     // Write section symbols.
473     for (auto &Seg : Segments) {
474       for (auto &Sec : Seg.Sections) {
475         for (auto &Sym : Sec->SC.Symbols) {
476           Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct());
477         }
478       }
479     }
480     return Offset;
481   }
482 
writeStrings(MutableArrayRef<char> Buf,size_t Offset)483   size_t writeStrings(MutableArrayRef<char> Buf, size_t Offset) {
484     for (auto &Elem : StrTab) {
485       assert(Offset + Elem.S.size() + 1 <= Buf.size() && "Buffer overflow");
486       memcpy(&Buf[Offset], Elem.S.data(), Elem.S.size());
487       Offset += Elem.S.size();
488       Buf[Offset++] = '\0';
489     }
490     return Offset;
491   }
492 
493   size_t PageSize;
494   std::list<Segment> Segments;
495   std::vector<std::unique_ptr<MachOBuilderLoadCommandBase>> LoadCommands;
496   SymbolContainer SC;
497 
498   // Maps strings to their "id" (addition order).
499   std::map<StringRef, size_t> Strings;
500   StringTable StrTab;
501 };
502 
503 struct MachO64LE {
504   using UIntPtr = uint64_t;
505   using Header = MachO::mach_header_64;
506   using Section = MachO::section_64;
507   using NList = MachO::nlist_64;
508   using Relocation = MachO::relocation_info;
509 
510   static constexpr llvm::endianness Endianness = llvm::endianness::little;
511   static constexpr uint32_t Magic = MachO::MH_MAGIC_64;
512   static constexpr MachO::LoadCommandType SegmentCmd = MachO::LC_SEGMENT_64;
513   static constexpr MachO::LoadCommandType SymTabCmd = MachO::LC_SYMTAB;
514 };
515 
516 } // namespace orc
517 } // namespace llvm
518 
519 #endif // LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H
520