1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/ProfileSummary.h"
28 #include "llvm/ProfileData/ProfileCommon.h"
29 #include "llvm/ProfileData/SampleProf.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Compression.h"
32 #include "llvm/Support/ErrorOr.h"
33 #include "llvm/Support/JSON.h"
34 #include "llvm/Support/LEB128.h"
35 #include "llvm/Support/LineIterator.h"
36 #include "llvm/Support/MD5.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <algorithm>
40 #include <cstddef>
41 #include <cstdint>
42 #include <limits>
43 #include <memory>
44 #include <system_error>
45 #include <vector>
46
47 using namespace llvm;
48 using namespace sampleprof;
49
50 #define DEBUG_TYPE "samplepgo-reader"
51
52 // This internal option specifies if the profile uses FS discriminators.
53 // It only applies to text, binary and compact binary format profiles.
54 // For ext-binary format profiles, the flag is set in the summary.
55 static cl::opt<bool> ProfileIsFSDisciminator(
56 "profile-isfs", cl::Hidden, cl::init(false),
57 cl::desc("Profile uses flow sensitive discriminators"));
58
59 /// Dump the function profile for \p FName.
60 ///
61 /// \param FContext Name + context of the function to print.
62 /// \param OS Stream to emit the output to.
dumpFunctionProfile(SampleContext FContext,raw_ostream & OS)63 void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
64 raw_ostream &OS) {
65 OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
66 }
67
68 /// Dump all the function profiles found on stream \p OS.
dump(raw_ostream & OS)69 void SampleProfileReader::dump(raw_ostream &OS) {
70 std::vector<NameFunctionSamples> V;
71 sortFuncProfiles(Profiles, V);
72 for (const auto &I : V)
73 dumpFunctionProfile(I.first, OS);
74 }
75
dumpFunctionProfileJson(const FunctionSamples & S,json::OStream & JOS,bool TopLevel=false)76 static void dumpFunctionProfileJson(const FunctionSamples &S,
77 json::OStream &JOS, bool TopLevel = false) {
78 auto DumpBody = [&](const BodySampleMap &BodySamples) {
79 for (const auto &I : BodySamples) {
80 const LineLocation &Loc = I.first;
81 const SampleRecord &Sample = I.second;
82 JOS.object([&] {
83 JOS.attribute("line", Loc.LineOffset);
84 if (Loc.Discriminator)
85 JOS.attribute("discriminator", Loc.Discriminator);
86 JOS.attribute("samples", Sample.getSamples());
87
88 auto CallTargets = Sample.getSortedCallTargets();
89 if (!CallTargets.empty()) {
90 JOS.attributeArray("calls", [&] {
91 for (const auto &J : CallTargets) {
92 JOS.object([&] {
93 JOS.attribute("function", J.first);
94 JOS.attribute("samples", J.second);
95 });
96 }
97 });
98 }
99 });
100 }
101 };
102
103 auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) {
104 for (const auto &I : CallsiteSamples)
105 for (const auto &FS : I.second) {
106 const LineLocation &Loc = I.first;
107 const FunctionSamples &CalleeSamples = FS.second;
108 JOS.object([&] {
109 JOS.attribute("line", Loc.LineOffset);
110 if (Loc.Discriminator)
111 JOS.attribute("discriminator", Loc.Discriminator);
112 JOS.attributeArray(
113 "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); });
114 });
115 }
116 };
117
118 JOS.object([&] {
119 JOS.attribute("name", S.getName());
120 JOS.attribute("total", S.getTotalSamples());
121 if (TopLevel)
122 JOS.attribute("head", S.getHeadSamples());
123
124 const auto &BodySamples = S.getBodySamples();
125 if (!BodySamples.empty())
126 JOS.attributeArray("body", [&] { DumpBody(BodySamples); });
127
128 const auto &CallsiteSamples = S.getCallsiteSamples();
129 if (!CallsiteSamples.empty())
130 JOS.attributeArray("callsites",
131 [&] { DumpCallsiteSamples(CallsiteSamples); });
132 });
133 }
134
135 /// Dump all the function profiles found on stream \p OS in the JSON format.
dumpJson(raw_ostream & OS)136 void SampleProfileReader::dumpJson(raw_ostream &OS) {
137 std::vector<NameFunctionSamples> V;
138 sortFuncProfiles(Profiles, V);
139 json::OStream JOS(OS, 2);
140 JOS.arrayBegin();
141 for (const auto &F : V)
142 dumpFunctionProfileJson(*F.second, JOS, true);
143 JOS.arrayEnd();
144
145 // Emit a newline character at the end as json::OStream doesn't emit one.
146 OS << "\n";
147 }
148
149 /// Parse \p Input as function head.
150 ///
151 /// Parse one line of \p Input, and update function name in \p FName,
152 /// function's total sample count in \p NumSamples, function's entry
153 /// count in \p NumHeadSamples.
154 ///
155 /// \returns true if parsing is successful.
ParseHead(const StringRef & Input,StringRef & FName,uint64_t & NumSamples,uint64_t & NumHeadSamples)156 static bool ParseHead(const StringRef &Input, StringRef &FName,
157 uint64_t &NumSamples, uint64_t &NumHeadSamples) {
158 if (Input[0] == ' ')
159 return false;
160 size_t n2 = Input.rfind(':');
161 size_t n1 = Input.rfind(':', n2 - 1);
162 FName = Input.substr(0, n1);
163 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
164 return false;
165 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
166 return false;
167 return true;
168 }
169
170 /// Returns true if line offset \p L is legal (only has 16 bits).
isOffsetLegal(unsigned L)171 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
172
173 /// Parse \p Input that contains metadata.
174 /// Possible metadata:
175 /// - CFG Checksum information:
176 /// !CFGChecksum: 12345
177 /// - CFG Checksum information:
178 /// !Attributes: 1
179 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
parseMetadata(const StringRef & Input,uint64_t & FunctionHash,uint32_t & Attributes)180 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
181 uint32_t &Attributes) {
182 if (Input.startswith("!CFGChecksum:")) {
183 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
184 return !CFGInfo.getAsInteger(10, FunctionHash);
185 }
186
187 if (Input.startswith("!Attributes:")) {
188 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
189 return !Attrib.getAsInteger(10, Attributes);
190 }
191
192 return false;
193 }
194
195 enum class LineType {
196 CallSiteProfile,
197 BodyProfile,
198 Metadata,
199 };
200
201 /// Parse \p Input as line sample.
202 ///
203 /// \param Input input line.
204 /// \param LineTy Type of this line.
205 /// \param Depth the depth of the inline stack.
206 /// \param NumSamples total samples of the line/inlined callsite.
207 /// \param LineOffset line offset to the start of the function.
208 /// \param Discriminator discriminator of the line.
209 /// \param TargetCountMap map from indirect call target to count.
210 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
211 ///
212 /// returns true if parsing is successful.
ParseLine(const StringRef & Input,LineType & LineTy,uint32_t & Depth,uint64_t & NumSamples,uint32_t & LineOffset,uint32_t & Discriminator,StringRef & CalleeName,DenseMap<StringRef,uint64_t> & TargetCountMap,uint64_t & FunctionHash,uint32_t & Attributes)213 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
214 uint64_t &NumSamples, uint32_t &LineOffset,
215 uint32_t &Discriminator, StringRef &CalleeName,
216 DenseMap<StringRef, uint64_t> &TargetCountMap,
217 uint64_t &FunctionHash, uint32_t &Attributes) {
218 for (Depth = 0; Input[Depth] == ' '; Depth++)
219 ;
220 if (Depth == 0)
221 return false;
222
223 if (Input[Depth] == '!') {
224 LineTy = LineType::Metadata;
225 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
226 }
227
228 size_t n1 = Input.find(':');
229 StringRef Loc = Input.substr(Depth, n1 - Depth);
230 size_t n2 = Loc.find('.');
231 if (n2 == StringRef::npos) {
232 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
233 return false;
234 Discriminator = 0;
235 } else {
236 if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
237 return false;
238 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
239 return false;
240 }
241
242 StringRef Rest = Input.substr(n1 + 2);
243 if (isDigit(Rest[0])) {
244 LineTy = LineType::BodyProfile;
245 size_t n3 = Rest.find(' ');
246 if (n3 == StringRef::npos) {
247 if (Rest.getAsInteger(10, NumSamples))
248 return false;
249 } else {
250 if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
251 return false;
252 }
253 // Find call targets and their sample counts.
254 // Note: In some cases, there are symbols in the profile which are not
255 // mangled. To accommodate such cases, use colon + integer pairs as the
256 // anchor points.
257 // An example:
258 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
259 // ":1000" and ":437" are used as anchor points so the string above will
260 // be interpreted as
261 // target: _M_construct<char *>
262 // count: 1000
263 // target: string_view<std::allocator<char> >
264 // count: 437
265 while (n3 != StringRef::npos) {
266 n3 += Rest.substr(n3).find_first_not_of(' ');
267 Rest = Rest.substr(n3);
268 n3 = Rest.find_first_of(':');
269 if (n3 == StringRef::npos || n3 == 0)
270 return false;
271
272 StringRef Target;
273 uint64_t count, n4;
274 while (true) {
275 // Get the segment after the current colon.
276 StringRef AfterColon = Rest.substr(n3 + 1);
277 // Get the target symbol before the current colon.
278 Target = Rest.substr(0, n3);
279 // Check if the word after the current colon is an integer.
280 n4 = AfterColon.find_first_of(' ');
281 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
282 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
283 if (!WordAfterColon.getAsInteger(10, count))
284 break;
285
286 // Try to find the next colon.
287 uint64_t n5 = AfterColon.find_first_of(':');
288 if (n5 == StringRef::npos)
289 return false;
290 n3 += n5 + 1;
291 }
292
293 // An anchor point is found. Save the {target, count} pair
294 TargetCountMap[Target] = count;
295 if (n4 == Rest.size())
296 break;
297 // Change n3 to the next blank space after colon + integer pair.
298 n3 = n4;
299 }
300 } else {
301 LineTy = LineType::CallSiteProfile;
302 size_t n3 = Rest.find_last_of(':');
303 CalleeName = Rest.substr(0, n3);
304 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
305 return false;
306 }
307 return true;
308 }
309
310 /// Load samples from a text file.
311 ///
312 /// See the documentation at the top of the file for an explanation of
313 /// the expected format.
314 ///
315 /// \returns true if the file was loaded successfully, false otherwise.
readImpl()316 std::error_code SampleProfileReaderText::readImpl() {
317 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
318 sampleprof_error Result = sampleprof_error::success;
319
320 InlineCallStack InlineStack;
321 uint32_t TopLevelProbeProfileCount = 0;
322
323 // DepthMetadata tracks whether we have processed metadata for the current
324 // top-level or nested function profile.
325 uint32_t DepthMetadata = 0;
326
327 ProfileIsFS = ProfileIsFSDisciminator;
328 FunctionSamples::ProfileIsFS = ProfileIsFS;
329 for (; !LineIt.is_at_eof(); ++LineIt) {
330 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
331 continue;
332 // Read the header of each function.
333 //
334 // Note that for function identifiers we are actually expecting
335 // mangled names, but we may not always get them. This happens when
336 // the compiler decides not to emit the function (e.g., it was inlined
337 // and removed). In this case, the binary will not have the linkage
338 // name for the function, so the profiler will emit the function's
339 // unmangled name, which may contain characters like ':' and '>' in its
340 // name (member functions, templates, etc).
341 //
342 // The only requirement we place on the identifier, then, is that it
343 // should not begin with a number.
344 if ((*LineIt)[0] != ' ') {
345 uint64_t NumSamples, NumHeadSamples;
346 StringRef FName;
347 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
348 reportError(LineIt.line_number(),
349 "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
350 return sampleprof_error::malformed;
351 }
352 DepthMetadata = 0;
353 SampleContext FContext(FName, CSNameTable);
354 if (FContext.hasContext())
355 ++CSProfileCount;
356 Profiles[FContext] = FunctionSamples();
357 FunctionSamples &FProfile = Profiles[FContext];
358 FProfile.setContext(FContext);
359 MergeResult(Result, FProfile.addTotalSamples(NumSamples));
360 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
361 InlineStack.clear();
362 InlineStack.push_back(&FProfile);
363 } else {
364 uint64_t NumSamples;
365 StringRef FName;
366 DenseMap<StringRef, uint64_t> TargetCountMap;
367 uint32_t Depth, LineOffset, Discriminator;
368 LineType LineTy;
369 uint64_t FunctionHash = 0;
370 uint32_t Attributes = 0;
371 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
372 Discriminator, FName, TargetCountMap, FunctionHash,
373 Attributes)) {
374 reportError(LineIt.line_number(),
375 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
376 *LineIt);
377 return sampleprof_error::malformed;
378 }
379 if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
380 // Metadata must be put at the end of a function profile.
381 reportError(LineIt.line_number(),
382 "Found non-metadata after metadata: " + *LineIt);
383 return sampleprof_error::malformed;
384 }
385
386 // Here we handle FS discriminators.
387 Discriminator &= getDiscriminatorMask();
388
389 while (InlineStack.size() > Depth) {
390 InlineStack.pop_back();
391 }
392 switch (LineTy) {
393 case LineType::CallSiteProfile: {
394 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
395 LineLocation(LineOffset, Discriminator))[std::string(FName)];
396 FSamples.setName(FName);
397 MergeResult(Result, FSamples.addTotalSamples(NumSamples));
398 InlineStack.push_back(&FSamples);
399 DepthMetadata = 0;
400 break;
401 }
402 case LineType::BodyProfile: {
403 while (InlineStack.size() > Depth) {
404 InlineStack.pop_back();
405 }
406 FunctionSamples &FProfile = *InlineStack.back();
407 for (const auto &name_count : TargetCountMap) {
408 MergeResult(Result, FProfile.addCalledTargetSamples(
409 LineOffset, Discriminator, name_count.first,
410 name_count.second));
411 }
412 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
413 NumSamples));
414 break;
415 }
416 case LineType::Metadata: {
417 FunctionSamples &FProfile = *InlineStack.back();
418 if (FunctionHash) {
419 FProfile.setFunctionHash(FunctionHash);
420 if (Depth == 1)
421 ++TopLevelProbeProfileCount;
422 }
423 FProfile.getContext().setAllAttributes(Attributes);
424 if (Attributes & (uint32_t)ContextShouldBeInlined)
425 ProfileIsPreInlined = true;
426 DepthMetadata = Depth;
427 break;
428 }
429 }
430 }
431 }
432
433 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
434 "Cannot have both context-sensitive and regular profile");
435 ProfileIsCS = (CSProfileCount > 0);
436 assert((TopLevelProbeProfileCount == 0 ||
437 TopLevelProbeProfileCount == Profiles.size()) &&
438 "Cannot have both probe-based profiles and regular profiles");
439 ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
440 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
441 FunctionSamples::ProfileIsCS = ProfileIsCS;
442 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
443
444 if (Result == sampleprof_error::success)
445 computeSummary();
446
447 return Result;
448 }
449
hasFormat(const MemoryBuffer & Buffer)450 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
451 bool result = false;
452
453 // Check that the first non-comment line is a valid function header.
454 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
455 if (!LineIt.is_at_eof()) {
456 if ((*LineIt)[0] != ' ') {
457 uint64_t NumSamples, NumHeadSamples;
458 StringRef FName;
459 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
460 }
461 }
462
463 return result;
464 }
465
readNumber()466 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
467 unsigned NumBytesRead = 0;
468 std::error_code EC;
469 uint64_t Val = decodeULEB128(Data, &NumBytesRead);
470
471 if (Val > std::numeric_limits<T>::max())
472 EC = sampleprof_error::malformed;
473 else if (Data + NumBytesRead > End)
474 EC = sampleprof_error::truncated;
475 else
476 EC = sampleprof_error::success;
477
478 if (EC) {
479 reportError(0, EC.message());
480 return EC;
481 }
482
483 Data += NumBytesRead;
484 return static_cast<T>(Val);
485 }
486
readString()487 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
488 std::error_code EC;
489 StringRef Str(reinterpret_cast<const char *>(Data));
490 if (Data + Str.size() + 1 > End) {
491 EC = sampleprof_error::truncated;
492 reportError(0, EC.message());
493 return EC;
494 }
495
496 Data += Str.size() + 1;
497 return Str;
498 }
499
500 template <typename T>
readUnencodedNumber()501 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
502 std::error_code EC;
503
504 if (Data + sizeof(T) > End) {
505 EC = sampleprof_error::truncated;
506 reportError(0, EC.message());
507 return EC;
508 }
509
510 using namespace support;
511 T Val = endian::readNext<T, little, unaligned>(Data);
512 return Val;
513 }
514
515 template <typename T>
readStringIndex(T & Table)516 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
517 std::error_code EC;
518 auto Idx = readNumber<uint32_t>();
519 if (std::error_code EC = Idx.getError())
520 return EC;
521 if (*Idx >= Table.size())
522 return sampleprof_error::truncated_name_table;
523 return *Idx;
524 }
525
readStringFromTable()526 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
527 auto Idx = readStringIndex(NameTable);
528 if (std::error_code EC = Idx.getError())
529 return EC;
530
531 return NameTable[*Idx];
532 }
533
readSampleContextFromTable()534 ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
535 auto FName(readStringFromTable());
536 if (std::error_code EC = FName.getError())
537 return EC;
538 return SampleContext(*FName);
539 }
540
readStringFromTable()541 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
542 if (!FixedLengthMD5)
543 return SampleProfileReaderBinary::readStringFromTable();
544
545 // read NameTable index.
546 auto Idx = readStringIndex(NameTable);
547 if (std::error_code EC = Idx.getError())
548 return EC;
549
550 // Check whether the name to be accessed has been accessed before,
551 // if not, read it from memory directly.
552 StringRef &SR = NameTable[*Idx];
553 if (SR.empty()) {
554 const uint8_t *SavedData = Data;
555 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
556 auto FID = readUnencodedNumber<uint64_t>();
557 if (std::error_code EC = FID.getError())
558 return EC;
559 // Save the string converted from uint64_t in MD5StringBuf. All the
560 // references to the name are all StringRefs refering to the string
561 // in MD5StringBuf.
562 MD5StringBuf->push_back(std::to_string(*FID));
563 SR = MD5StringBuf->back();
564 Data = SavedData;
565 }
566 return SR;
567 }
568
readStringFromTable()569 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
570 auto Idx = readStringIndex(NameTable);
571 if (std::error_code EC = Idx.getError())
572 return EC;
573
574 return StringRef(NameTable[*Idx]);
575 }
576
577 std::error_code
readProfile(FunctionSamples & FProfile)578 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
579 auto NumSamples = readNumber<uint64_t>();
580 if (std::error_code EC = NumSamples.getError())
581 return EC;
582 FProfile.addTotalSamples(*NumSamples);
583
584 // Read the samples in the body.
585 auto NumRecords = readNumber<uint32_t>();
586 if (std::error_code EC = NumRecords.getError())
587 return EC;
588
589 for (uint32_t I = 0; I < *NumRecords; ++I) {
590 auto LineOffset = readNumber<uint64_t>();
591 if (std::error_code EC = LineOffset.getError())
592 return EC;
593
594 if (!isOffsetLegal(*LineOffset)) {
595 return std::error_code();
596 }
597
598 auto Discriminator = readNumber<uint64_t>();
599 if (std::error_code EC = Discriminator.getError())
600 return EC;
601
602 auto NumSamples = readNumber<uint64_t>();
603 if (std::error_code EC = NumSamples.getError())
604 return EC;
605
606 auto NumCalls = readNumber<uint32_t>();
607 if (std::error_code EC = NumCalls.getError())
608 return EC;
609
610 // Here we handle FS discriminators:
611 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
612
613 for (uint32_t J = 0; J < *NumCalls; ++J) {
614 auto CalledFunction(readStringFromTable());
615 if (std::error_code EC = CalledFunction.getError())
616 return EC;
617
618 auto CalledFunctionSamples = readNumber<uint64_t>();
619 if (std::error_code EC = CalledFunctionSamples.getError())
620 return EC;
621
622 FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
623 *CalledFunction, *CalledFunctionSamples);
624 }
625
626 FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
627 }
628
629 // Read all the samples for inlined function calls.
630 auto NumCallsites = readNumber<uint32_t>();
631 if (std::error_code EC = NumCallsites.getError())
632 return EC;
633
634 for (uint32_t J = 0; J < *NumCallsites; ++J) {
635 auto LineOffset = readNumber<uint64_t>();
636 if (std::error_code EC = LineOffset.getError())
637 return EC;
638
639 auto Discriminator = readNumber<uint64_t>();
640 if (std::error_code EC = Discriminator.getError())
641 return EC;
642
643 auto FName(readStringFromTable());
644 if (std::error_code EC = FName.getError())
645 return EC;
646
647 // Here we handle FS discriminators:
648 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
649
650 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
651 LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)];
652 CalleeProfile.setName(*FName);
653 if (std::error_code EC = readProfile(CalleeProfile))
654 return EC;
655 }
656
657 return sampleprof_error::success;
658 }
659
660 std::error_code
readFuncProfile(const uint8_t * Start)661 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
662 Data = Start;
663 auto NumHeadSamples = readNumber<uint64_t>();
664 if (std::error_code EC = NumHeadSamples.getError())
665 return EC;
666
667 ErrorOr<SampleContext> FContext(readSampleContextFromTable());
668 if (std::error_code EC = FContext.getError())
669 return EC;
670
671 Profiles[*FContext] = FunctionSamples();
672 FunctionSamples &FProfile = Profiles[*FContext];
673 FProfile.setContext(*FContext);
674 FProfile.addHeadSamples(*NumHeadSamples);
675
676 if (FContext->hasContext())
677 CSProfileCount++;
678
679 if (std::error_code EC = readProfile(FProfile))
680 return EC;
681 return sampleprof_error::success;
682 }
683
readImpl()684 std::error_code SampleProfileReaderBinary::readImpl() {
685 ProfileIsFS = ProfileIsFSDisciminator;
686 FunctionSamples::ProfileIsFS = ProfileIsFS;
687 while (!at_eof()) {
688 if (std::error_code EC = readFuncProfile(Data))
689 return EC;
690 }
691
692 return sampleprof_error::success;
693 }
694
695 ErrorOr<SampleContextFrames>
readContextFromTable()696 SampleProfileReaderExtBinaryBase::readContextFromTable() {
697 auto ContextIdx = readNumber<uint32_t>();
698 if (std::error_code EC = ContextIdx.getError())
699 return EC;
700 if (*ContextIdx >= CSNameTable->size())
701 return sampleprof_error::truncated_name_table;
702 return (*CSNameTable)[*ContextIdx];
703 }
704
705 ErrorOr<SampleContext>
readSampleContextFromTable()706 SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
707 if (ProfileIsCS) {
708 auto FContext(readContextFromTable());
709 if (std::error_code EC = FContext.getError())
710 return EC;
711 return SampleContext(*FContext);
712 } else {
713 auto FName(readStringFromTable());
714 if (std::error_code EC = FName.getError())
715 return EC;
716 return SampleContext(*FName);
717 }
718 }
719
readOneSection(const uint8_t * Start,uint64_t Size,const SecHdrTableEntry & Entry)720 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
721 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
722 Data = Start;
723 End = Start + Size;
724 switch (Entry.Type) {
725 case SecProfSummary:
726 if (std::error_code EC = readSummary())
727 return EC;
728 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
729 Summary->setPartialProfile(true);
730 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
731 FunctionSamples::ProfileIsCS = ProfileIsCS = true;
732 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
733 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
734 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
735 FunctionSamples::ProfileIsFS = ProfileIsFS = true;
736 break;
737 case SecNameTable: {
738 FixedLengthMD5 =
739 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
740 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
741 assert((!FixedLengthMD5 || UseMD5) &&
742 "If FixedLengthMD5 is true, UseMD5 has to be true");
743 FunctionSamples::HasUniqSuffix =
744 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
745 if (std::error_code EC = readNameTableSec(UseMD5))
746 return EC;
747 break;
748 }
749 case SecCSNameTable: {
750 if (std::error_code EC = readCSNameTableSec())
751 return EC;
752 break;
753 }
754 case SecLBRProfile:
755 if (std::error_code EC = readFuncProfiles())
756 return EC;
757 break;
758 case SecFuncOffsetTable:
759 FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered);
760 if (std::error_code EC = readFuncOffsetTable())
761 return EC;
762 break;
763 case SecFuncMetadata: {
764 ProfileIsProbeBased =
765 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
766 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
767 bool HasAttribute =
768 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
769 if (std::error_code EC = readFuncMetadata(HasAttribute))
770 return EC;
771 break;
772 }
773 case SecProfileSymbolList:
774 if (std::error_code EC = readProfileSymbolList())
775 return EC;
776 break;
777 default:
778 if (std::error_code EC = readCustomSection(Entry))
779 return EC;
780 break;
781 }
782 return sampleprof_error::success;
783 }
784
collectFuncsFromModule()785 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
786 if (!M)
787 return false;
788 FuncsToUse.clear();
789 for (auto &F : *M)
790 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
791 return true;
792 }
793
readFuncOffsetTable()794 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
795 // If there are more than one FuncOffsetTable, the profile read associated
796 // with previous FuncOffsetTable has to be done before next FuncOffsetTable
797 // is read.
798 FuncOffsetTable.clear();
799
800 auto Size = readNumber<uint64_t>();
801 if (std::error_code EC = Size.getError())
802 return EC;
803
804 FuncOffsetTable.reserve(*Size);
805
806 if (FuncOffsetsOrdered) {
807 OrderedFuncOffsets =
808 std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>();
809 OrderedFuncOffsets->reserve(*Size);
810 }
811
812 for (uint64_t I = 0; I < *Size; ++I) {
813 auto FContext(readSampleContextFromTable());
814 if (std::error_code EC = FContext.getError())
815 return EC;
816
817 auto Offset = readNumber<uint64_t>();
818 if (std::error_code EC = Offset.getError())
819 return EC;
820
821 FuncOffsetTable[*FContext] = *Offset;
822 if (FuncOffsetsOrdered)
823 OrderedFuncOffsets->emplace_back(*FContext, *Offset);
824 }
825
826 return sampleprof_error::success;
827 }
828
readFuncProfiles()829 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
830 // Collect functions used by current module if the Reader has been
831 // given a module.
832 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
833 // which will query FunctionSamples::HasUniqSuffix, so it has to be
834 // called after FunctionSamples::HasUniqSuffix is set, i.e. after
835 // NameTable section is read.
836 bool LoadFuncsToBeUsed = collectFuncsFromModule();
837
838 // When LoadFuncsToBeUsed is false, load all the function profiles.
839 const uint8_t *Start = Data;
840 if (!LoadFuncsToBeUsed) {
841 while (Data < End) {
842 if (std::error_code EC = readFuncProfile(Data))
843 return EC;
844 }
845 assert(Data == End && "More data is read than expected");
846 } else {
847 // Load function profiles on demand.
848 if (Remapper) {
849 for (auto Name : FuncsToUse) {
850 Remapper->insert(Name);
851 }
852 }
853
854 if (ProfileIsCS) {
855 DenseSet<uint64_t> FuncGuidsToUse;
856 if (useMD5()) {
857 for (auto Name : FuncsToUse)
858 FuncGuidsToUse.insert(Function::getGUID(Name));
859 }
860
861 // For each function in current module, load all context profiles for
862 // the function as well as their callee contexts which can help profile
863 // guided importing for ThinLTO. This can be achieved by walking
864 // through an ordered context container, where contexts are laid out
865 // as if they were walked in preorder of a context trie. While
866 // traversing the trie, a link to the highest common ancestor node is
867 // kept so that all of its decendants will be loaded.
868 assert(OrderedFuncOffsets.get() &&
869 "func offset table should always be sorted in CS profile");
870 const SampleContext *CommonContext = nullptr;
871 for (const auto &NameOffset : *OrderedFuncOffsets) {
872 const auto &FContext = NameOffset.first;
873 auto FName = FContext.getName();
874 // For function in the current module, keep its farthest ancestor
875 // context. This can be used to load itself and its child and
876 // sibling contexts.
877 if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
878 (!useMD5() && (FuncsToUse.count(FName) ||
879 (Remapper && Remapper->exist(FName))))) {
880 if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
881 CommonContext = &FContext;
882 }
883
884 if (CommonContext == &FContext ||
885 (CommonContext && CommonContext->IsPrefixOf(FContext))) {
886 // Load profile for the current context which originated from
887 // the common ancestor.
888 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
889 assert(FuncProfileAddr < End && "out of LBRProfile section");
890 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
891 return EC;
892 }
893 }
894 } else {
895 if (useMD5()) {
896 for (auto Name : FuncsToUse) {
897 auto GUID = std::to_string(MD5Hash(Name));
898 auto iter = FuncOffsetTable.find(StringRef(GUID));
899 if (iter == FuncOffsetTable.end())
900 continue;
901 const uint8_t *FuncProfileAddr = Start + iter->second;
902 assert(FuncProfileAddr < End && "out of LBRProfile section");
903 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
904 return EC;
905 }
906 } else {
907 for (auto NameOffset : FuncOffsetTable) {
908 SampleContext FContext(NameOffset.first);
909 auto FuncName = FContext.getName();
910 if (!FuncsToUse.count(FuncName) &&
911 (!Remapper || !Remapper->exist(FuncName)))
912 continue;
913 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
914 assert(FuncProfileAddr < End && "out of LBRProfile section");
915 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
916 return EC;
917 }
918 }
919 }
920 Data = End;
921 }
922 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
923 "Cannot have both context-sensitive and regular profile");
924 assert((!CSProfileCount || ProfileIsCS) &&
925 "Section flag should be consistent with actual profile");
926 return sampleprof_error::success;
927 }
928
readProfileSymbolList()929 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
930 if (!ProfSymList)
931 ProfSymList = std::make_unique<ProfileSymbolList>();
932
933 if (std::error_code EC = ProfSymList->read(Data, End - Data))
934 return EC;
935
936 Data = End;
937 return sampleprof_error::success;
938 }
939
decompressSection(const uint8_t * SecStart,const uint64_t SecSize,const uint8_t * & DecompressBuf,uint64_t & DecompressBufSize)940 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
941 const uint8_t *SecStart, const uint64_t SecSize,
942 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
943 Data = SecStart;
944 End = SecStart + SecSize;
945 auto DecompressSize = readNumber<uint64_t>();
946 if (std::error_code EC = DecompressSize.getError())
947 return EC;
948 DecompressBufSize = *DecompressSize;
949
950 auto CompressSize = readNumber<uint64_t>();
951 if (std::error_code EC = CompressSize.getError())
952 return EC;
953
954 if (!llvm::compression::zlib::isAvailable())
955 return sampleprof_error::zlib_unavailable;
956
957 uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize);
958 size_t UCSize = DecompressBufSize;
959 llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize),
960 Buffer, UCSize);
961 if (E)
962 return sampleprof_error::uncompress_failed;
963 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
964 return sampleprof_error::success;
965 }
966
readImpl()967 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
968 const uint8_t *BufStart =
969 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
970
971 for (auto &Entry : SecHdrTable) {
972 // Skip empty section.
973 if (!Entry.Size)
974 continue;
975
976 // Skip sections without context when SkipFlatProf is true.
977 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
978 continue;
979
980 const uint8_t *SecStart = BufStart + Entry.Offset;
981 uint64_t SecSize = Entry.Size;
982
983 // If the section is compressed, decompress it into a buffer
984 // DecompressBuf before reading the actual data. The pointee of
985 // 'Data' will be changed to buffer hold by DecompressBuf
986 // temporarily when reading the actual data.
987 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
988 if (isCompressed) {
989 const uint8_t *DecompressBuf;
990 uint64_t DecompressBufSize;
991 if (std::error_code EC = decompressSection(
992 SecStart, SecSize, DecompressBuf, DecompressBufSize))
993 return EC;
994 SecStart = DecompressBuf;
995 SecSize = DecompressBufSize;
996 }
997
998 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
999 return EC;
1000 if (Data != SecStart + SecSize)
1001 return sampleprof_error::malformed;
1002
1003 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
1004 if (isCompressed) {
1005 Data = BufStart + Entry.Offset;
1006 End = BufStart + Buffer->getBufferSize();
1007 }
1008 }
1009
1010 return sampleprof_error::success;
1011 }
1012
readImpl()1013 std::error_code SampleProfileReaderCompactBinary::readImpl() {
1014 // Collect functions used by current module if the Reader has been
1015 // given a module.
1016 bool LoadFuncsToBeUsed = collectFuncsFromModule();
1017 ProfileIsFS = ProfileIsFSDisciminator;
1018 FunctionSamples::ProfileIsFS = ProfileIsFS;
1019 std::vector<uint64_t> OffsetsToUse;
1020 if (!LoadFuncsToBeUsed) {
1021 // load all the function profiles.
1022 for (auto FuncEntry : FuncOffsetTable) {
1023 OffsetsToUse.push_back(FuncEntry.second);
1024 }
1025 } else {
1026 // load function profiles on demand.
1027 for (auto Name : FuncsToUse) {
1028 auto GUID = std::to_string(MD5Hash(Name));
1029 auto iter = FuncOffsetTable.find(StringRef(GUID));
1030 if (iter == FuncOffsetTable.end())
1031 continue;
1032 OffsetsToUse.push_back(iter->second);
1033 }
1034 }
1035
1036 for (auto Offset : OffsetsToUse) {
1037 const uint8_t *SavedData = Data;
1038 if (std::error_code EC = readFuncProfile(
1039 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1040 Offset))
1041 return EC;
1042 Data = SavedData;
1043 }
1044 return sampleprof_error::success;
1045 }
1046
verifySPMagic(uint64_t Magic)1047 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
1048 if (Magic == SPMagic())
1049 return sampleprof_error::success;
1050 return sampleprof_error::bad_magic;
1051 }
1052
verifySPMagic(uint64_t Magic)1053 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
1054 if (Magic == SPMagic(SPF_Ext_Binary))
1055 return sampleprof_error::success;
1056 return sampleprof_error::bad_magic;
1057 }
1058
1059 std::error_code
verifySPMagic(uint64_t Magic)1060 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
1061 if (Magic == SPMagic(SPF_Compact_Binary))
1062 return sampleprof_error::success;
1063 return sampleprof_error::bad_magic;
1064 }
1065
readNameTable()1066 std::error_code SampleProfileReaderBinary::readNameTable() {
1067 auto Size = readNumber<uint32_t>();
1068 if (std::error_code EC = Size.getError())
1069 return EC;
1070 NameTable.reserve(*Size + NameTable.size());
1071 for (uint32_t I = 0; I < *Size; ++I) {
1072 auto Name(readString());
1073 if (std::error_code EC = Name.getError())
1074 return EC;
1075 NameTable.push_back(*Name);
1076 }
1077
1078 return sampleprof_error::success;
1079 }
1080
readMD5NameTable()1081 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
1082 auto Size = readNumber<uint64_t>();
1083 if (std::error_code EC = Size.getError())
1084 return EC;
1085 MD5StringBuf = std::make_unique<std::vector<std::string>>();
1086 MD5StringBuf->reserve(*Size);
1087 if (FixedLengthMD5) {
1088 // Preallocate and initialize NameTable so we can check whether a name
1089 // index has been read before by checking whether the element in the
1090 // NameTable is empty, meanwhile readStringIndex can do the boundary
1091 // check using the size of NameTable.
1092 NameTable.resize(*Size + NameTable.size());
1093
1094 MD5NameMemStart = Data;
1095 Data = Data + (*Size) * sizeof(uint64_t);
1096 return sampleprof_error::success;
1097 }
1098 NameTable.reserve(*Size);
1099 for (uint64_t I = 0; I < *Size; ++I) {
1100 auto FID = readNumber<uint64_t>();
1101 if (std::error_code EC = FID.getError())
1102 return EC;
1103 MD5StringBuf->push_back(std::to_string(*FID));
1104 // NameTable is a vector of StringRef. Here it is pushing back a
1105 // StringRef initialized with the last string in MD5stringBuf.
1106 NameTable.push_back(MD5StringBuf->back());
1107 }
1108 return sampleprof_error::success;
1109 }
1110
readNameTableSec(bool IsMD5)1111 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
1112 if (IsMD5)
1113 return readMD5NameTable();
1114 return SampleProfileReaderBinary::readNameTable();
1115 }
1116
1117 // Read in the CS name table section, which basically contains a list of context
1118 // vectors. Each element of a context vector, aka a frame, refers to the
1119 // underlying raw function names that are stored in the name table, as well as
1120 // a callsite identifier that only makes sense for non-leaf frames.
readCSNameTableSec()1121 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1122 auto Size = readNumber<uint32_t>();
1123 if (std::error_code EC = Size.getError())
1124 return EC;
1125
1126 std::vector<SampleContextFrameVector> *PNameVec =
1127 new std::vector<SampleContextFrameVector>();
1128 PNameVec->reserve(*Size);
1129 for (uint32_t I = 0; I < *Size; ++I) {
1130 PNameVec->emplace_back(SampleContextFrameVector());
1131 auto ContextSize = readNumber<uint32_t>();
1132 if (std::error_code EC = ContextSize.getError())
1133 return EC;
1134 for (uint32_t J = 0; J < *ContextSize; ++J) {
1135 auto FName(readStringFromTable());
1136 if (std::error_code EC = FName.getError())
1137 return EC;
1138 auto LineOffset = readNumber<uint64_t>();
1139 if (std::error_code EC = LineOffset.getError())
1140 return EC;
1141
1142 if (!isOffsetLegal(*LineOffset))
1143 return std::error_code();
1144
1145 auto Discriminator = readNumber<uint64_t>();
1146 if (std::error_code EC = Discriminator.getError())
1147 return EC;
1148
1149 PNameVec->back().emplace_back(
1150 FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
1151 }
1152 }
1153
1154 // From this point the underlying object of CSNameTable should be immutable.
1155 CSNameTable.reset(PNameVec);
1156 return sampleprof_error::success;
1157 }
1158
1159 std::error_code
1160
readFuncMetadata(bool ProfileHasAttribute,FunctionSamples * FProfile)1161 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
1162 FunctionSamples *FProfile) {
1163 if (Data < End) {
1164 if (ProfileIsProbeBased) {
1165 auto Checksum = readNumber<uint64_t>();
1166 if (std::error_code EC = Checksum.getError())
1167 return EC;
1168 if (FProfile)
1169 FProfile->setFunctionHash(*Checksum);
1170 }
1171
1172 if (ProfileHasAttribute) {
1173 auto Attributes = readNumber<uint32_t>();
1174 if (std::error_code EC = Attributes.getError())
1175 return EC;
1176 if (FProfile)
1177 FProfile->getContext().setAllAttributes(*Attributes);
1178 }
1179
1180 if (!ProfileIsCS) {
1181 // Read all the attributes for inlined function calls.
1182 auto NumCallsites = readNumber<uint32_t>();
1183 if (std::error_code EC = NumCallsites.getError())
1184 return EC;
1185
1186 for (uint32_t J = 0; J < *NumCallsites; ++J) {
1187 auto LineOffset = readNumber<uint64_t>();
1188 if (std::error_code EC = LineOffset.getError())
1189 return EC;
1190
1191 auto Discriminator = readNumber<uint64_t>();
1192 if (std::error_code EC = Discriminator.getError())
1193 return EC;
1194
1195 auto FContext(readSampleContextFromTable());
1196 if (std::error_code EC = FContext.getError())
1197 return EC;
1198
1199 FunctionSamples *CalleeProfile = nullptr;
1200 if (FProfile) {
1201 CalleeProfile = const_cast<FunctionSamples *>(
1202 &FProfile->functionSamplesAt(LineLocation(
1203 *LineOffset,
1204 *Discriminator))[std::string(FContext.get().getName())]);
1205 }
1206 if (std::error_code EC =
1207 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
1208 return EC;
1209 }
1210 }
1211 }
1212
1213 return sampleprof_error::success;
1214 }
1215
1216 std::error_code
readFuncMetadata(bool ProfileHasAttribute)1217 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
1218 while (Data < End) {
1219 auto FContext(readSampleContextFromTable());
1220 if (std::error_code EC = FContext.getError())
1221 return EC;
1222 FunctionSamples *FProfile = nullptr;
1223 auto It = Profiles.find(*FContext);
1224 if (It != Profiles.end())
1225 FProfile = &It->second;
1226
1227 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
1228 return EC;
1229 }
1230
1231 assert(Data == End && "More data is read than expected");
1232 return sampleprof_error::success;
1233 }
1234
readNameTable()1235 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
1236 auto Size = readNumber<uint64_t>();
1237 if (std::error_code EC = Size.getError())
1238 return EC;
1239 NameTable.reserve(*Size);
1240 for (uint64_t I = 0; I < *Size; ++I) {
1241 auto FID = readNumber<uint64_t>();
1242 if (std::error_code EC = FID.getError())
1243 return EC;
1244 NameTable.push_back(std::to_string(*FID));
1245 }
1246 return sampleprof_error::success;
1247 }
1248
1249 std::error_code
readSecHdrTableEntry(uint32_t Idx)1250 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
1251 SecHdrTableEntry Entry;
1252 auto Type = readUnencodedNumber<uint64_t>();
1253 if (std::error_code EC = Type.getError())
1254 return EC;
1255 Entry.Type = static_cast<SecType>(*Type);
1256
1257 auto Flags = readUnencodedNumber<uint64_t>();
1258 if (std::error_code EC = Flags.getError())
1259 return EC;
1260 Entry.Flags = *Flags;
1261
1262 auto Offset = readUnencodedNumber<uint64_t>();
1263 if (std::error_code EC = Offset.getError())
1264 return EC;
1265 Entry.Offset = *Offset;
1266
1267 auto Size = readUnencodedNumber<uint64_t>();
1268 if (std::error_code EC = Size.getError())
1269 return EC;
1270 Entry.Size = *Size;
1271
1272 Entry.LayoutIndex = Idx;
1273 SecHdrTable.push_back(std::move(Entry));
1274 return sampleprof_error::success;
1275 }
1276
readSecHdrTable()1277 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1278 auto EntryNum = readUnencodedNumber<uint64_t>();
1279 if (std::error_code EC = EntryNum.getError())
1280 return EC;
1281
1282 for (uint64_t i = 0; i < (*EntryNum); i++)
1283 if (std::error_code EC = readSecHdrTableEntry(i))
1284 return EC;
1285
1286 return sampleprof_error::success;
1287 }
1288
readHeader()1289 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1290 const uint8_t *BufStart =
1291 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1292 Data = BufStart;
1293 End = BufStart + Buffer->getBufferSize();
1294
1295 if (std::error_code EC = readMagicIdent())
1296 return EC;
1297
1298 if (std::error_code EC = readSecHdrTable())
1299 return EC;
1300
1301 return sampleprof_error::success;
1302 }
1303
getSectionSize(SecType Type)1304 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1305 uint64_t Size = 0;
1306 for (auto &Entry : SecHdrTable) {
1307 if (Entry.Type == Type)
1308 Size += Entry.Size;
1309 }
1310 return Size;
1311 }
1312
getFileSize()1313 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1314 // Sections in SecHdrTable is not necessarily in the same order as
1315 // sections in the profile because section like FuncOffsetTable needs
1316 // to be written after section LBRProfile but needs to be read before
1317 // section LBRProfile, so we cannot simply use the last entry in
1318 // SecHdrTable to calculate the file size.
1319 uint64_t FileSize = 0;
1320 for (auto &Entry : SecHdrTable) {
1321 FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1322 }
1323 return FileSize;
1324 }
1325
getSecFlagsStr(const SecHdrTableEntry & Entry)1326 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1327 std::string Flags;
1328 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1329 Flags.append("{compressed,");
1330 else
1331 Flags.append("{");
1332
1333 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1334 Flags.append("flat,");
1335
1336 switch (Entry.Type) {
1337 case SecNameTable:
1338 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1339 Flags.append("fixlenmd5,");
1340 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1341 Flags.append("md5,");
1342 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1343 Flags.append("uniq,");
1344 break;
1345 case SecProfSummary:
1346 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1347 Flags.append("partial,");
1348 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1349 Flags.append("context,");
1350 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
1351 Flags.append("preInlined,");
1352 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1353 Flags.append("fs-discriminator,");
1354 break;
1355 case SecFuncOffsetTable:
1356 if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
1357 Flags.append("ordered,");
1358 break;
1359 case SecFuncMetadata:
1360 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
1361 Flags.append("probe,");
1362 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
1363 Flags.append("attr,");
1364 break;
1365 default:
1366 break;
1367 }
1368 char &last = Flags.back();
1369 if (last == ',')
1370 last = '}';
1371 else
1372 Flags.append("}");
1373 return Flags;
1374 }
1375
dumpSectionInfo(raw_ostream & OS)1376 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1377 uint64_t TotalSecsSize = 0;
1378 for (auto &Entry : SecHdrTable) {
1379 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1380 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1381 << "\n";
1382 ;
1383 TotalSecsSize += Entry.Size;
1384 }
1385 uint64_t HeaderSize = SecHdrTable.front().Offset;
1386 assert(HeaderSize + TotalSecsSize == getFileSize() &&
1387 "Size of 'header + sections' doesn't match the total size of profile");
1388
1389 OS << "Header Size: " << HeaderSize << "\n";
1390 OS << "Total Sections Size: " << TotalSecsSize << "\n";
1391 OS << "File Size: " << getFileSize() << "\n";
1392 return true;
1393 }
1394
readMagicIdent()1395 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1396 // Read and check the magic identifier.
1397 auto Magic = readNumber<uint64_t>();
1398 if (std::error_code EC = Magic.getError())
1399 return EC;
1400 else if (std::error_code EC = verifySPMagic(*Magic))
1401 return EC;
1402
1403 // Read the version number.
1404 auto Version = readNumber<uint64_t>();
1405 if (std::error_code EC = Version.getError())
1406 return EC;
1407 else if (*Version != SPVersion())
1408 return sampleprof_error::unsupported_version;
1409
1410 return sampleprof_error::success;
1411 }
1412
readHeader()1413 std::error_code SampleProfileReaderBinary::readHeader() {
1414 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1415 End = Data + Buffer->getBufferSize();
1416
1417 if (std::error_code EC = readMagicIdent())
1418 return EC;
1419
1420 if (std::error_code EC = readSummary())
1421 return EC;
1422
1423 if (std::error_code EC = readNameTable())
1424 return EC;
1425 return sampleprof_error::success;
1426 }
1427
readHeader()1428 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1429 SampleProfileReaderBinary::readHeader();
1430 if (std::error_code EC = readFuncOffsetTable())
1431 return EC;
1432 return sampleprof_error::success;
1433 }
1434
readFuncOffsetTable()1435 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1436 auto TableOffset = readUnencodedNumber<uint64_t>();
1437 if (std::error_code EC = TableOffset.getError())
1438 return EC;
1439
1440 const uint8_t *SavedData = Data;
1441 const uint8_t *TableStart =
1442 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1443 *TableOffset;
1444 Data = TableStart;
1445
1446 auto Size = readNumber<uint64_t>();
1447 if (std::error_code EC = Size.getError())
1448 return EC;
1449
1450 FuncOffsetTable.reserve(*Size);
1451 for (uint64_t I = 0; I < *Size; ++I) {
1452 auto FName(readStringFromTable());
1453 if (std::error_code EC = FName.getError())
1454 return EC;
1455
1456 auto Offset = readNumber<uint64_t>();
1457 if (std::error_code EC = Offset.getError())
1458 return EC;
1459
1460 FuncOffsetTable[*FName] = *Offset;
1461 }
1462 End = TableStart;
1463 Data = SavedData;
1464 return sampleprof_error::success;
1465 }
1466
collectFuncsFromModule()1467 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1468 if (!M)
1469 return false;
1470 FuncsToUse.clear();
1471 for (auto &F : *M)
1472 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1473 return true;
1474 }
1475
readSummaryEntry(std::vector<ProfileSummaryEntry> & Entries)1476 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1477 std::vector<ProfileSummaryEntry> &Entries) {
1478 auto Cutoff = readNumber<uint64_t>();
1479 if (std::error_code EC = Cutoff.getError())
1480 return EC;
1481
1482 auto MinBlockCount = readNumber<uint64_t>();
1483 if (std::error_code EC = MinBlockCount.getError())
1484 return EC;
1485
1486 auto NumBlocks = readNumber<uint64_t>();
1487 if (std::error_code EC = NumBlocks.getError())
1488 return EC;
1489
1490 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1491 return sampleprof_error::success;
1492 }
1493
readSummary()1494 std::error_code SampleProfileReaderBinary::readSummary() {
1495 auto TotalCount = readNumber<uint64_t>();
1496 if (std::error_code EC = TotalCount.getError())
1497 return EC;
1498
1499 auto MaxBlockCount = readNumber<uint64_t>();
1500 if (std::error_code EC = MaxBlockCount.getError())
1501 return EC;
1502
1503 auto MaxFunctionCount = readNumber<uint64_t>();
1504 if (std::error_code EC = MaxFunctionCount.getError())
1505 return EC;
1506
1507 auto NumBlocks = readNumber<uint64_t>();
1508 if (std::error_code EC = NumBlocks.getError())
1509 return EC;
1510
1511 auto NumFunctions = readNumber<uint64_t>();
1512 if (std::error_code EC = NumFunctions.getError())
1513 return EC;
1514
1515 auto NumSummaryEntries = readNumber<uint64_t>();
1516 if (std::error_code EC = NumSummaryEntries.getError())
1517 return EC;
1518
1519 std::vector<ProfileSummaryEntry> Entries;
1520 for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1521 std::error_code EC = readSummaryEntry(Entries);
1522 if (EC != sampleprof_error::success)
1523 return EC;
1524 }
1525 Summary = std::make_unique<ProfileSummary>(
1526 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1527 *MaxFunctionCount, *NumBlocks, *NumFunctions);
1528
1529 return sampleprof_error::success;
1530 }
1531
hasFormat(const MemoryBuffer & Buffer)1532 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1533 const uint8_t *Data =
1534 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1535 uint64_t Magic = decodeULEB128(Data);
1536 return Magic == SPMagic();
1537 }
1538
hasFormat(const MemoryBuffer & Buffer)1539 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1540 const uint8_t *Data =
1541 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1542 uint64_t Magic = decodeULEB128(Data);
1543 return Magic == SPMagic(SPF_Ext_Binary);
1544 }
1545
hasFormat(const MemoryBuffer & Buffer)1546 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1547 const uint8_t *Data =
1548 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1549 uint64_t Magic = decodeULEB128(Data);
1550 return Magic == SPMagic(SPF_Compact_Binary);
1551 }
1552
skipNextWord()1553 std::error_code SampleProfileReaderGCC::skipNextWord() {
1554 uint32_t dummy;
1555 if (!GcovBuffer.readInt(dummy))
1556 return sampleprof_error::truncated;
1557 return sampleprof_error::success;
1558 }
1559
readNumber()1560 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1561 if (sizeof(T) <= sizeof(uint32_t)) {
1562 uint32_t Val;
1563 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1564 return static_cast<T>(Val);
1565 } else if (sizeof(T) <= sizeof(uint64_t)) {
1566 uint64_t Val;
1567 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1568 return static_cast<T>(Val);
1569 }
1570
1571 std::error_code EC = sampleprof_error::malformed;
1572 reportError(0, EC.message());
1573 return EC;
1574 }
1575
readString()1576 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1577 StringRef Str;
1578 if (!GcovBuffer.readString(Str))
1579 return sampleprof_error::truncated;
1580 return Str;
1581 }
1582
readHeader()1583 std::error_code SampleProfileReaderGCC::readHeader() {
1584 // Read the magic identifier.
1585 if (!GcovBuffer.readGCDAFormat())
1586 return sampleprof_error::unrecognized_format;
1587
1588 // Read the version number. Note - the GCC reader does not validate this
1589 // version, but the profile creator generates v704.
1590 GCOV::GCOVVersion version;
1591 if (!GcovBuffer.readGCOVVersion(version))
1592 return sampleprof_error::unrecognized_format;
1593
1594 if (version != GCOV::V407)
1595 return sampleprof_error::unsupported_version;
1596
1597 // Skip the empty integer.
1598 if (std::error_code EC = skipNextWord())
1599 return EC;
1600
1601 return sampleprof_error::success;
1602 }
1603
readSectionTag(uint32_t Expected)1604 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1605 uint32_t Tag;
1606 if (!GcovBuffer.readInt(Tag))
1607 return sampleprof_error::truncated;
1608
1609 if (Tag != Expected)
1610 return sampleprof_error::malformed;
1611
1612 if (std::error_code EC = skipNextWord())
1613 return EC;
1614
1615 return sampleprof_error::success;
1616 }
1617
readNameTable()1618 std::error_code SampleProfileReaderGCC::readNameTable() {
1619 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1620 return EC;
1621
1622 uint32_t Size;
1623 if (!GcovBuffer.readInt(Size))
1624 return sampleprof_error::truncated;
1625
1626 for (uint32_t I = 0; I < Size; ++I) {
1627 StringRef Str;
1628 if (!GcovBuffer.readString(Str))
1629 return sampleprof_error::truncated;
1630 Names.push_back(std::string(Str));
1631 }
1632
1633 return sampleprof_error::success;
1634 }
1635
readFunctionProfiles()1636 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1637 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1638 return EC;
1639
1640 uint32_t NumFunctions;
1641 if (!GcovBuffer.readInt(NumFunctions))
1642 return sampleprof_error::truncated;
1643
1644 InlineCallStack Stack;
1645 for (uint32_t I = 0; I < NumFunctions; ++I)
1646 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1647 return EC;
1648
1649 computeSummary();
1650 return sampleprof_error::success;
1651 }
1652
readOneFunctionProfile(const InlineCallStack & InlineStack,bool Update,uint32_t Offset)1653 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1654 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1655 uint64_t HeadCount = 0;
1656 if (InlineStack.size() == 0)
1657 if (!GcovBuffer.readInt64(HeadCount))
1658 return sampleprof_error::truncated;
1659
1660 uint32_t NameIdx;
1661 if (!GcovBuffer.readInt(NameIdx))
1662 return sampleprof_error::truncated;
1663
1664 StringRef Name(Names[NameIdx]);
1665
1666 uint32_t NumPosCounts;
1667 if (!GcovBuffer.readInt(NumPosCounts))
1668 return sampleprof_error::truncated;
1669
1670 uint32_t NumCallsites;
1671 if (!GcovBuffer.readInt(NumCallsites))
1672 return sampleprof_error::truncated;
1673
1674 FunctionSamples *FProfile = nullptr;
1675 if (InlineStack.size() == 0) {
1676 // If this is a top function that we have already processed, do not
1677 // update its profile again. This happens in the presence of
1678 // function aliases. Since these aliases share the same function
1679 // body, there will be identical replicated profiles for the
1680 // original function. In this case, we simply not bother updating
1681 // the profile of the original function.
1682 FProfile = &Profiles[Name];
1683 FProfile->addHeadSamples(HeadCount);
1684 if (FProfile->getTotalSamples() > 0)
1685 Update = false;
1686 } else {
1687 // Otherwise, we are reading an inlined instance. The top of the
1688 // inline stack contains the profile of the caller. Insert this
1689 // callee in the caller's CallsiteMap.
1690 FunctionSamples *CallerProfile = InlineStack.front();
1691 uint32_t LineOffset = Offset >> 16;
1692 uint32_t Discriminator = Offset & 0xffff;
1693 FProfile = &CallerProfile->functionSamplesAt(
1694 LineLocation(LineOffset, Discriminator))[std::string(Name)];
1695 }
1696 FProfile->setName(Name);
1697
1698 for (uint32_t I = 0; I < NumPosCounts; ++I) {
1699 uint32_t Offset;
1700 if (!GcovBuffer.readInt(Offset))
1701 return sampleprof_error::truncated;
1702
1703 uint32_t NumTargets;
1704 if (!GcovBuffer.readInt(NumTargets))
1705 return sampleprof_error::truncated;
1706
1707 uint64_t Count;
1708 if (!GcovBuffer.readInt64(Count))
1709 return sampleprof_error::truncated;
1710
1711 // The line location is encoded in the offset as:
1712 // high 16 bits: line offset to the start of the function.
1713 // low 16 bits: discriminator.
1714 uint32_t LineOffset = Offset >> 16;
1715 uint32_t Discriminator = Offset & 0xffff;
1716
1717 InlineCallStack NewStack;
1718 NewStack.push_back(FProfile);
1719 llvm::append_range(NewStack, InlineStack);
1720 if (Update) {
1721 // Walk up the inline stack, adding the samples on this line to
1722 // the total sample count of the callers in the chain.
1723 for (auto *CallerProfile : NewStack)
1724 CallerProfile->addTotalSamples(Count);
1725
1726 // Update the body samples for the current profile.
1727 FProfile->addBodySamples(LineOffset, Discriminator, Count);
1728 }
1729
1730 // Process the list of functions called at an indirect call site.
1731 // These are all the targets that a function pointer (or virtual
1732 // function) resolved at runtime.
1733 for (uint32_t J = 0; J < NumTargets; J++) {
1734 uint32_t HistVal;
1735 if (!GcovBuffer.readInt(HistVal))
1736 return sampleprof_error::truncated;
1737
1738 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1739 return sampleprof_error::malformed;
1740
1741 uint64_t TargetIdx;
1742 if (!GcovBuffer.readInt64(TargetIdx))
1743 return sampleprof_error::truncated;
1744 StringRef TargetName(Names[TargetIdx]);
1745
1746 uint64_t TargetCount;
1747 if (!GcovBuffer.readInt64(TargetCount))
1748 return sampleprof_error::truncated;
1749
1750 if (Update)
1751 FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1752 TargetName, TargetCount);
1753 }
1754 }
1755
1756 // Process all the inlined callers into the current function. These
1757 // are all the callsites that were inlined into this function.
1758 for (uint32_t I = 0; I < NumCallsites; I++) {
1759 // The offset is encoded as:
1760 // high 16 bits: line offset to the start of the function.
1761 // low 16 bits: discriminator.
1762 uint32_t Offset;
1763 if (!GcovBuffer.readInt(Offset))
1764 return sampleprof_error::truncated;
1765 InlineCallStack NewStack;
1766 NewStack.push_back(FProfile);
1767 llvm::append_range(NewStack, InlineStack);
1768 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1769 return EC;
1770 }
1771
1772 return sampleprof_error::success;
1773 }
1774
1775 /// Read a GCC AutoFDO profile.
1776 ///
1777 /// This format is generated by the Linux Perf conversion tool at
1778 /// https://github.com/google/autofdo.
readImpl()1779 std::error_code SampleProfileReaderGCC::readImpl() {
1780 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1781 // Read the string table.
1782 if (std::error_code EC = readNameTable())
1783 return EC;
1784
1785 // Read the source profile.
1786 if (std::error_code EC = readFunctionProfiles())
1787 return EC;
1788
1789 return sampleprof_error::success;
1790 }
1791
hasFormat(const MemoryBuffer & Buffer)1792 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1793 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1794 return Magic == "adcg*704";
1795 }
1796
applyRemapping(LLVMContext & Ctx)1797 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1798 // If the reader uses MD5 to represent string, we can't remap it because
1799 // we don't know what the original function names were.
1800 if (Reader.useMD5()) {
1801 Ctx.diagnose(DiagnosticInfoSampleProfile(
1802 Reader.getBuffer()->getBufferIdentifier(),
1803 "Profile data remapping cannot be applied to profile data "
1804 "in compact format (original mangled names are not available).",
1805 DS_Warning));
1806 return;
1807 }
1808
1809 // CSSPGO-TODO: Remapper is not yet supported.
1810 // We will need to remap the entire context string.
1811 assert(Remappings && "should be initialized while creating remapper");
1812 for (auto &Sample : Reader.getProfiles()) {
1813 DenseSet<StringRef> NamesInSample;
1814 Sample.second.findAllNames(NamesInSample);
1815 for (auto &Name : NamesInSample)
1816 if (auto Key = Remappings->insert(Name))
1817 NameMap.insert({Key, Name});
1818 }
1819
1820 RemappingApplied = true;
1821 }
1822
1823 std::optional<StringRef>
lookUpNameInProfile(StringRef Fname)1824 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1825 if (auto Key = Remappings->lookup(Fname))
1826 return NameMap.lookup(Key);
1827 return std::nullopt;
1828 }
1829
1830 /// Prepare a memory buffer for the contents of \p Filename.
1831 ///
1832 /// \returns an error code indicating the status of the buffer.
1833 static ErrorOr<std::unique_ptr<MemoryBuffer>>
setupMemoryBuffer(const Twine & Filename)1834 setupMemoryBuffer(const Twine &Filename) {
1835 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
1836 if (std::error_code EC = BufferOrErr.getError())
1837 return EC;
1838 auto Buffer = std::move(BufferOrErr.get());
1839
1840 return std::move(Buffer);
1841 }
1842
1843 /// Create a sample profile reader based on the format of the input file.
1844 ///
1845 /// \param Filename The file to open.
1846 ///
1847 /// \param C The LLVM context to use to emit diagnostics.
1848 ///
1849 /// \param P The FSDiscriminatorPass.
1850 ///
1851 /// \param RemapFilename The file used for profile remapping.
1852 ///
1853 /// \returns an error code indicating the status of the created reader.
1854 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(const std::string Filename,LLVMContext & C,FSDiscriminatorPass P,const std::string RemapFilename)1855 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1856 FSDiscriminatorPass P,
1857 const std::string RemapFilename) {
1858 auto BufferOrError = setupMemoryBuffer(Filename);
1859 if (std::error_code EC = BufferOrError.getError())
1860 return EC;
1861 return create(BufferOrError.get(), C, P, RemapFilename);
1862 }
1863
1864 /// Create a sample profile remapper from the given input, to remap the
1865 /// function names in the given profile data.
1866 ///
1867 /// \param Filename The file to open.
1868 ///
1869 /// \param Reader The profile reader the remapper is going to be applied to.
1870 ///
1871 /// \param C The LLVM context to use to emit diagnostics.
1872 ///
1873 /// \returns an error code indicating the status of the created reader.
1874 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(const std::string Filename,SampleProfileReader & Reader,LLVMContext & C)1875 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1876 SampleProfileReader &Reader,
1877 LLVMContext &C) {
1878 auto BufferOrError = setupMemoryBuffer(Filename);
1879 if (std::error_code EC = BufferOrError.getError())
1880 return EC;
1881 return create(BufferOrError.get(), Reader, C);
1882 }
1883
1884 /// Create a sample profile remapper from the given input, to remap the
1885 /// function names in the given profile data.
1886 ///
1887 /// \param B The memory buffer to create the reader from (assumes ownership).
1888 ///
1889 /// \param C The LLVM context to use to emit diagnostics.
1890 ///
1891 /// \param Reader The profile reader the remapper is going to be applied to.
1892 ///
1893 /// \returns an error code indicating the status of the created reader.
1894 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
create(std::unique_ptr<MemoryBuffer> & B,SampleProfileReader & Reader,LLVMContext & C)1895 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1896 SampleProfileReader &Reader,
1897 LLVMContext &C) {
1898 auto Remappings = std::make_unique<SymbolRemappingReader>();
1899 if (Error E = Remappings->read(*B)) {
1900 handleAllErrors(
1901 std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1902 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1903 ParseError.getLineNum(),
1904 ParseError.getMessage()));
1905 });
1906 return sampleprof_error::malformed;
1907 }
1908
1909 return std::make_unique<SampleProfileReaderItaniumRemapper>(
1910 std::move(B), std::move(Remappings), Reader);
1911 }
1912
1913 /// Create a sample profile reader based on the format of the input data.
1914 ///
1915 /// \param B The memory buffer to create the reader from (assumes ownership).
1916 ///
1917 /// \param C The LLVM context to use to emit diagnostics.
1918 ///
1919 /// \param P The FSDiscriminatorPass.
1920 ///
1921 /// \param RemapFilename The file used for profile remapping.
1922 ///
1923 /// \returns an error code indicating the status of the created reader.
1924 ErrorOr<std::unique_ptr<SampleProfileReader>>
create(std::unique_ptr<MemoryBuffer> & B,LLVMContext & C,FSDiscriminatorPass P,const std::string RemapFilename)1925 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1926 FSDiscriminatorPass P,
1927 const std::string RemapFilename) {
1928 std::unique_ptr<SampleProfileReader> Reader;
1929 if (SampleProfileReaderRawBinary::hasFormat(*B))
1930 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1931 else if (SampleProfileReaderExtBinary::hasFormat(*B))
1932 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1933 else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1934 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1935 else if (SampleProfileReaderGCC::hasFormat(*B))
1936 Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1937 else if (SampleProfileReaderText::hasFormat(*B))
1938 Reader.reset(new SampleProfileReaderText(std::move(B), C));
1939 else
1940 return sampleprof_error::unrecognized_format;
1941
1942 if (!RemapFilename.empty()) {
1943 auto ReaderOrErr =
1944 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1945 if (std::error_code EC = ReaderOrErr.getError()) {
1946 std::string Msg = "Could not create remapper: " + EC.message();
1947 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1948 return EC;
1949 }
1950 Reader->Remapper = std::move(ReaderOrErr.get());
1951 }
1952
1953 if (std::error_code EC = Reader->readHeader()) {
1954 return EC;
1955 }
1956
1957 Reader->setDiscriminatorMaskedBitFrom(P);
1958
1959 return std::move(Reader);
1960 }
1961
1962 // For text and GCC file formats, we compute the summary after reading the
1963 // profile. Binary format has the profile summary in its header.
computeSummary()1964 void SampleProfileReader::computeSummary() {
1965 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1966 Summary = Builder.computeSummaryForProfiles(Profiles);
1967 }
1968