1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of SanitizerBinaryMetadata.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/IR/Constant.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/IR/GlobalVariable.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instruction.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/InitializePasses.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Transforms/Instrumentation.h"
39 #include "llvm/Transforms/Utils/ModuleUtils.h"
40
41 #include <array>
42 #include <cstdint>
43
44 using namespace llvm;
45
46 #define DEBUG_TYPE "sanmd"
47
48 namespace {
49
50 //===--- Constants --------------------------------------------------------===//
51
52 constexpr uint32_t kVersionBase = 1; // occupies lower 16 bits
53 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
54 constexpr int kCtorDtorPriority = 2;
55
56 // Pairs of names of initialization callback functions and which section
57 // contains the relevant metadata.
58 class MetadataInfo {
59 public:
60 const StringRef FunctionPrefix;
61 const StringRef SectionSuffix;
62 const uint32_t FeatureMask;
63
64 static const MetadataInfo Covered;
65 static const MetadataInfo Atomics;
66
67 private:
68 // Forbid construction elsewhere.
MetadataInfo(StringRef FunctionPrefix,StringRef SectionSuffix,uint32_t Feature)69 explicit constexpr MetadataInfo(StringRef FunctionPrefix,
70 StringRef SectionSuffix, uint32_t Feature)
71 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix),
72 FeatureMask(Feature) {}
73 };
74 const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered",
75 kSanitizerBinaryMetadataCoveredSection,
76 kSanitizerBinaryMetadataNone};
77 const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics",
78 kSanitizerBinaryMetadataAtomicsSection,
79 kSanitizerBinaryMetadataAtomics};
80
81 // The only instances of MetadataInfo are the constants above, so a set of
82 // them may simply store pointers to them. To deterministically generate code,
83 // we need to use a set with stable iteration order, such as SetVector.
84 using MetadataInfoSet = SetVector<const MetadataInfo *>;
85
86 //===--- Command-line options ---------------------------------------------===//
87
88 cl::opt<bool> ClWeakCallbacks(
89 "sanitizer-metadata-weak-callbacks",
90 cl::desc("Declare callbacks extern weak, and only call if non-null."),
91 cl::Hidden, cl::init(true));
92
93 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
94 cl::desc("Emit PCs for covered functions."),
95 cl::Hidden, cl::init(false));
96 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
97 cl::desc("Emit PCs for atomic operations."),
98 cl::Hidden, cl::init(false));
99 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
100 cl::desc("Emit PCs for start of functions that are "
101 "subject for use-after-return checking"),
102 cl::Hidden, cl::init(false));
103
104 //===--- Statistics -------------------------------------------------------===//
105
106 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
107 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
108 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
109
110 //===----------------------------------------------------------------------===//
111
112 // Apply opt overrides.
113 SanitizerBinaryMetadataOptions &&
transformOptionsFromCl(SanitizerBinaryMetadataOptions && Opts)114 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
115 Opts.Covered |= ClEmitCovered;
116 Opts.Atomics |= ClEmitAtomics;
117 Opts.UAR |= ClEmitUAR;
118 return std::move(Opts);
119 }
120
121 class SanitizerBinaryMetadata {
122 public:
SanitizerBinaryMetadata(Module & M,SanitizerBinaryMetadataOptions Opts)123 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts)
124 : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
125 TargetTriple(M.getTargetTriple()), IRB(M.getContext()) {
126 // FIXME: Make it work with other formats.
127 assert(TargetTriple.isOSBinFormatELF() && "ELF only");
128 }
129
130 bool run();
131
132 private:
133 // Return enabled feature mask of per-instruction metadata.
getEnabledPerInstructionFeature() const134 uint32_t getEnabledPerInstructionFeature() const {
135 uint32_t FeatureMask = 0;
136 if (Options.Atomics)
137 FeatureMask |= MetadataInfo::Atomics.FeatureMask;
138 return FeatureMask;
139 }
140
getVersion() const141 uint32_t getVersion() const {
142 uint32_t Version = kVersionBase;
143 const auto CM = Mod.getCodeModel();
144 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
145 Version |= kVersionPtrSizeRel;
146 return Version;
147 }
148
149 void runOn(Function &F, MetadataInfoSet &MIS);
150
151 // Determines which set of metadata to collect for this instruction.
152 //
153 // Returns true if covered metadata is required to unambiguously interpret
154 // other metadata. For example, if we are interested in atomics metadata, any
155 // function with memory operations (atomic or not) requires covered metadata
156 // to determine if a memory operation is atomic or not in modules compiled
157 // with SanitizerBinaryMetadata.
158 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
159 uint32_t &FeatureMask);
160
161 // Get start/end section marker pointer.
162 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
163
164 // Returns the target-dependent section name.
165 StringRef getSectionName(StringRef SectionSuffix);
166
167 // Returns the section start marker name.
168 Twine getSectionStart(StringRef SectionSuffix);
169
170 // Returns the section end marker name.
171 Twine getSectionEnd(StringRef SectionSuffix);
172
173 Module &Mod;
174 const SanitizerBinaryMetadataOptions Options;
175 const Triple TargetTriple;
176 IRBuilder<> IRB;
177 };
178
run()179 bool SanitizerBinaryMetadata::run() {
180 MetadataInfoSet MIS;
181
182 for (Function &F : Mod)
183 runOn(F, MIS);
184
185 if (MIS.empty())
186 return false;
187
188 //
189 // Setup constructors and call all initialization functions for requested
190 // metadata features.
191 //
192
193 auto *Int8PtrTy = IRB.getInt8PtrTy();
194 auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy);
195 auto *Int32Ty = IRB.getInt32Ty();
196 const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy};
197 auto *Version = ConstantInt::get(Int32Ty, getVersion());
198
199 for (const MetadataInfo *MI : MIS) {
200 const std::array<Value *, InitTypes.size()> InitArgs = {
201 Version,
202 getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy),
203 getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy),
204 };
205 // We declare the _add and _del functions as weak, and only call them if
206 // there is a valid symbol linked. This allows building binaries with
207 // semantic metadata, but without having callbacks. When a tool that wants
208 // the metadata is linked which provides the callbacks, they will be called.
209 Function *Ctor =
210 createSanitizerCtorAndInitFunctions(
211 Mod, (MI->FunctionPrefix + ".module_ctor").str(),
212 (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
213 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
214 .first;
215 Function *Dtor =
216 createSanitizerCtorAndInitFunctions(
217 Mod, (MI->FunctionPrefix + ".module_dtor").str(),
218 (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
219 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
220 .first;
221 Constant *CtorData = nullptr;
222 Constant *DtorData = nullptr;
223 if (TargetTriple.supportsCOMDAT()) {
224 // Use COMDAT to deduplicate constructor/destructor function.
225 Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
226 Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
227 CtorData = Ctor;
228 DtorData = Dtor;
229 }
230 appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData);
231 appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData);
232 }
233
234 return true;
235 }
236
runOn(Function & F,MetadataInfoSet & MIS)237 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
238 if (F.empty())
239 return;
240 if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
241 return;
242 // Don't touch available_externally functions, their actual body is elsewhere.
243 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
244 return;
245
246 MDBuilder MDB(F.getContext());
247
248 // The metadata features enabled for this function, stored along covered
249 // metadata (if enabled).
250 uint32_t FeatureMask = getEnabledPerInstructionFeature();
251 // Don't emit unnecessary covered metadata for all functions to save space.
252 bool RequiresCovered = false;
253 // We can only understand if we need to set UAR feature after looking
254 // at the instructions. So we need to check instructions even if FeatureMask
255 // is empty.
256 if (FeatureMask || Options.UAR) {
257 for (BasicBlock &BB : F)
258 for (Instruction &I : BB)
259 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
260 }
261
262 if (F.isVarArg())
263 FeatureMask &= ~kSanitizerBinaryMetadataUAR;
264 if (FeatureMask & kSanitizerBinaryMetadataUAR) {
265 RequiresCovered = true;
266 NumMetadataUAR++;
267 }
268
269 // Covered metadata is always emitted if explicitly requested, otherwise only
270 // if some other metadata requires it to unambiguously interpret it for
271 // modules compiled with SanitizerBinaryMetadata.
272 if (Options.Covered || (FeatureMask && RequiresCovered)) {
273 NumMetadataCovered++;
274 const auto *MI = &MetadataInfo::Covered;
275 MIS.insert(MI);
276 const StringRef Section = getSectionName(MI->SectionSuffix);
277 // The feature mask will be placed after the size (32 bit) of the function,
278 // so in total one covered entry will use `sizeof(void*) + 4 + 4`.
279 Constant *CFM = IRB.getInt32(FeatureMask);
280 F.setMetadata(LLVMContext::MD_pcsections,
281 MDB.createPCSections({{Section, {CFM}}}));
282 }
283 }
284
isUARSafeCall(CallInst * CI)285 bool isUARSafeCall(CallInst *CI) {
286 auto *F = CI->getCalledFunction();
287 // There are no intrinsic functions that leak arguments.
288 // If the called function does not return, the current function
289 // does not return as well, so no possibility of use-after-return.
290 // Sanitizer function also don't leak or don't return.
291 // It's safe to both pass pointers to local variables to them
292 // and to tail-call them.
293 return F && (F->isIntrinsic() || F->doesNotReturn() ||
294 F->getName().startswith("__asan_") ||
295 F->getName().startswith("__hwsan_") ||
296 F->getName().startswith("__ubsan_") ||
297 F->getName().startswith("__msan_") ||
298 F->getName().startswith("__tsan_"));
299 }
300
hasUseAfterReturnUnsafeUses(Value & V)301 bool hasUseAfterReturnUnsafeUses(Value &V) {
302 for (User *U : V.users()) {
303 if (auto *I = dyn_cast<Instruction>(U)) {
304 if (I->isLifetimeStartOrEnd() || I->isDroppable())
305 continue;
306 if (auto *CI = dyn_cast<CallInst>(U)) {
307 if (isUARSafeCall(CI))
308 continue;
309 }
310 if (isa<LoadInst>(U))
311 continue;
312 if (auto *SI = dyn_cast<StoreInst>(U)) {
313 // If storing TO the alloca, then the address isn't taken.
314 if (SI->getOperand(1) == &V)
315 continue;
316 }
317 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
318 if (!hasUseAfterReturnUnsafeUses(*GEPI))
319 continue;
320 } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
321 if (!hasUseAfterReturnUnsafeUses(*BCI))
322 continue;
323 }
324 }
325 return true;
326 }
327 return false;
328 }
329
useAfterReturnUnsafe(Instruction & I)330 bool useAfterReturnUnsafe(Instruction &I) {
331 if (isa<AllocaInst>(I))
332 return hasUseAfterReturnUnsafeUses(I);
333 // Tail-called functions are not necessary intercepted
334 // at runtime because there is no call instruction.
335 // So conservatively mark the caller as requiring checking.
336 else if (auto *CI = dyn_cast<CallInst>(&I))
337 return CI->isTailCall() && !isUARSafeCall(CI);
338 return false;
339 }
340
runOn(Instruction & I,MetadataInfoSet & MIS,MDBuilder & MDB,uint32_t & FeatureMask)341 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
342 MDBuilder &MDB, uint32_t &FeatureMask) {
343 SmallVector<const MetadataInfo *, 1> InstMetadata;
344 bool RequiresCovered = false;
345
346 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
347 if (useAfterReturnUnsafe(I))
348 FeatureMask |= kSanitizerBinaryMetadataUAR;
349 }
350
351 if (Options.Atomics && I.mayReadOrWriteMemory()) {
352 auto SSID = getAtomicSyncScopeID(&I);
353 if (SSID.has_value() && *SSID != SyncScope::SingleThread) {
354 NumMetadataAtomics++;
355 InstMetadata.push_back(&MetadataInfo::Atomics);
356 }
357 RequiresCovered = true;
358 }
359
360 // Attach MD_pcsections to instruction.
361 if (!InstMetadata.empty()) {
362 MIS.insert(InstMetadata.begin(), InstMetadata.end());
363 SmallVector<MDBuilder::PCSection, 1> Sections;
364 for (const auto &MI : InstMetadata)
365 Sections.push_back({getSectionName(MI->SectionSuffix), {}});
366 I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
367 }
368
369 return RequiresCovered;
370 }
371
372 GlobalVariable *
getSectionMarker(const Twine & MarkerName,Type * Ty)373 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
374 // Use ExternalWeak so that if all sections are discarded due to section
375 // garbage collection, the linker will not report undefined symbol errors.
376 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
377 GlobalVariable::ExternalWeakLinkage,
378 /*Initializer=*/nullptr, MarkerName);
379 Marker->setVisibility(GlobalValue::HiddenVisibility);
380 return Marker;
381 }
382
getSectionName(StringRef SectionSuffix)383 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
384 // FIXME: Other TargetTriple (req. string pool)
385 return SectionSuffix;
386 }
387
getSectionStart(StringRef SectionSuffix)388 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
389 return "__start_" + SectionSuffix;
390 }
391
getSectionEnd(StringRef SectionSuffix)392 Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
393 return "__stop_" + SectionSuffix;
394 }
395
396 } // namespace
397
SanitizerBinaryMetadataPass(SanitizerBinaryMetadataOptions Opts)398 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
399 SanitizerBinaryMetadataOptions Opts)
400 : Options(std::move(Opts)) {}
401
402 PreservedAnalyses
run(Module & M,AnalysisManager<Module> & AM)403 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
404 SanitizerBinaryMetadata Pass(M, Options);
405 if (Pass.run())
406 return PreservedAnalyses::none();
407 return PreservedAnalyses::all();
408 }
409