1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for instrumentation 10 // based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H 15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/LineIterator.h" 24 #include "llvm/Support/MemoryBuffer.h" 25 #include "llvm/Support/OnDiskHashTable.h" 26 #include "llvm/Support/SwapByteOrder.h" 27 #include <algorithm> 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <iterator> 32 #include <memory> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class InstrProfReader; 39 40 /// A file format agnostic iterator over profiling data. 41 class InstrProfIterator : public std::iterator<std::input_iterator_tag, 42 NamedInstrProfRecord> { 43 InstrProfReader *Reader = nullptr; 44 value_type Record; 45 46 void Increment(); 47 48 public: 49 InstrProfIterator() = default; InstrProfIterator(InstrProfReader * Reader)50 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } 51 52 InstrProfIterator &operator++() { Increment(); return *this; } 53 bool operator==(const InstrProfIterator &RHS) const { 54 return Reader == RHS.Reader; 55 } 56 bool operator!=(const InstrProfIterator &RHS) const { 57 return Reader != RHS.Reader; 58 } 59 value_type &operator*() { return Record; } 60 value_type *operator->() { return &Record; } 61 }; 62 63 /// Base class and interface for reading profiling data of any known instrprof 64 /// format. Provides an iterator over NamedInstrProfRecords. 65 class InstrProfReader { 66 instrprof_error LastError = instrprof_error::success; 67 68 public: 69 InstrProfReader() = default; 70 virtual ~InstrProfReader() = default; 71 72 /// Read the header. Required before reading first record. 73 virtual Error readHeader() = 0; 74 75 /// Read a single record. 76 virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; 77 78 /// Iterator over profile data. begin()79 InstrProfIterator begin() { return InstrProfIterator(this); } end()80 InstrProfIterator end() { return InstrProfIterator(); } 81 82 virtual bool isIRLevelProfile() const = 0; 83 84 virtual bool hasCSIRLevelProfile() const = 0; 85 86 /// Return the PGO symtab. There are three different readers: 87 /// Raw, Text, and Indexed profile readers. The first two types 88 /// of readers are used only by llvm-profdata tool, while the indexed 89 /// profile reader is also used by llvm-cov tool and the compiler ( 90 /// backend or frontend). Since creating PGO symtab can create 91 /// significant runtime and memory overhead (as it touches data 92 /// for the whole program), InstrProfSymtab for the indexed profile 93 /// reader should be created on demand and it is recommended to be 94 /// only used for dumping purpose with llvm-proftool, not with the 95 /// compiler. 96 virtual InstrProfSymtab &getSymtab() = 0; 97 98 /// Compute the sum of counts and return in Sum. 99 void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); 100 101 protected: 102 std::unique_ptr<InstrProfSymtab> Symtab; 103 104 /// Set the current error and return same. error(instrprof_error Err)105 Error error(instrprof_error Err) { 106 LastError = Err; 107 if (Err == instrprof_error::success) 108 return Error::success(); 109 return make_error<InstrProfError>(Err); 110 } 111 error(Error && E)112 Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } 113 114 /// Clear the current error and return a successful one. success()115 Error success() { return error(instrprof_error::success); } 116 117 public: 118 /// Return true if the reader has finished reading the profile data. isEOF()119 bool isEOF() { return LastError == instrprof_error::eof; } 120 121 /// Return true if the reader encountered an error reading profiling data. hasError()122 bool hasError() { return LastError != instrprof_error::success && !isEOF(); } 123 124 /// Get the current error. getError()125 Error getError() { 126 if (hasError()) 127 return make_error<InstrProfError>(LastError); 128 return Error::success(); 129 } 130 131 /// Factory method to create an appropriately typed reader for the given 132 /// instrprof file. 133 static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path); 134 135 static Expected<std::unique_ptr<InstrProfReader>> 136 create(std::unique_ptr<MemoryBuffer> Buffer); 137 }; 138 139 /// Reader for the simple text based instrprof format. 140 /// 141 /// This format is a simple text format that's suitable for test data. Records 142 /// are separated by one or more blank lines, and record fields are separated by 143 /// new lines. 144 /// 145 /// Each record consists of a function name, a function hash, a number of 146 /// counters, and then each counter value, in that order. 147 class TextInstrProfReader : public InstrProfReader { 148 private: 149 /// The profile data file contents. 150 std::unique_ptr<MemoryBuffer> DataBuffer; 151 /// Iterator over the profile data. 152 line_iterator Line; 153 bool IsIRLevelProfile = false; 154 bool HasCSIRLevelProfile = false; 155 156 Error readValueProfileData(InstrProfRecord &Record); 157 158 public: TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)159 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_) 160 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} 161 TextInstrProfReader(const TextInstrProfReader &) = delete; 162 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; 163 164 /// Return true if the given buffer is in text instrprof format. 165 static bool hasFormat(const MemoryBuffer &Buffer); 166 isIRLevelProfile()167 bool isIRLevelProfile() const override { return IsIRLevelProfile; } 168 hasCSIRLevelProfile()169 bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } 170 171 /// Read the header. 172 Error readHeader() override; 173 174 /// Read a single record. 175 Error readNextRecord(NamedInstrProfRecord &Record) override; 176 getSymtab()177 InstrProfSymtab &getSymtab() override { 178 assert(Symtab.get()); 179 return *Symtab.get(); 180 } 181 }; 182 183 /// Reader for the raw instrprof binary format from runtime. 184 /// 185 /// This format is a raw memory dump of the instrumentation-baed profiling data 186 /// from the runtime. It has no index. 187 /// 188 /// Templated on the unsigned type whose size matches pointers on the platform 189 /// that wrote the profile. 190 template <class IntPtrT> 191 class RawInstrProfReader : public InstrProfReader { 192 private: 193 /// The profile data file contents. 194 std::unique_ptr<MemoryBuffer> DataBuffer; 195 bool ShouldSwapBytes; 196 // The value of the version field of the raw profile data header. The lower 56 197 // bits specifies the format version and the most significant 8 bits specify 198 // the variant types of the profile. 199 uint64_t Version; 200 uint64_t CountersDelta; 201 uint64_t NamesDelta; 202 const RawInstrProf::ProfileData<IntPtrT> *Data; 203 const RawInstrProf::ProfileData<IntPtrT> *DataEnd; 204 const uint64_t *CountersStart; 205 const char *NamesStart; 206 uint64_t NamesSize; 207 // After value profile is all read, this pointer points to 208 // the header of next profile data (if exists) 209 const uint8_t *ValueDataStart; 210 uint32_t ValueKindLast; 211 uint32_t CurValueDataSize; 212 213 public: RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)214 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) 215 : DataBuffer(std::move(DataBuffer)) {} 216 RawInstrProfReader(const RawInstrProfReader &) = delete; 217 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; 218 219 static bool hasFormat(const MemoryBuffer &DataBuffer); 220 Error readHeader() override; 221 Error readNextRecord(NamedInstrProfRecord &Record) override; 222 isIRLevelProfile()223 bool isIRLevelProfile() const override { 224 return (Version & VARIANT_MASK_IR_PROF) != 0; 225 } 226 hasCSIRLevelProfile()227 bool hasCSIRLevelProfile() const override { 228 return (Version & VARIANT_MASK_CSIR_PROF) != 0; 229 } 230 getSymtab()231 InstrProfSymtab &getSymtab() override { 232 assert(Symtab.get()); 233 return *Symtab.get(); 234 } 235 236 private: 237 Error createSymtab(InstrProfSymtab &Symtab); 238 Error readNextHeader(const char *CurrentPos); 239 Error readHeader(const RawInstrProf::Header &Header); 240 swap(IntT Int)241 template <class IntT> IntT swap(IntT Int) const { 242 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; 243 } 244 getDataEndianness()245 support::endianness getDataEndianness() const { 246 support::endianness HostEndian = getHostEndianness(); 247 if (!ShouldSwapBytes) 248 return HostEndian; 249 if (HostEndian == support::little) 250 return support::big; 251 else 252 return support::little; 253 } 254 getNumPaddingBytes(uint64_t SizeInBytes)255 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { 256 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); 257 } 258 259 Error readName(NamedInstrProfRecord &Record); 260 Error readFuncHash(NamedInstrProfRecord &Record); 261 Error readRawCounts(InstrProfRecord &Record); 262 Error readValueProfilingData(InstrProfRecord &Record); atEnd()263 bool atEnd() const { return Data == DataEnd; } 264 advanceData()265 void advanceData() { 266 Data++; 267 ValueDataStart += CurValueDataSize; 268 } 269 getNextHeaderPos()270 const char *getNextHeaderPos() const { 271 assert(atEnd()); 272 return (const char *)ValueDataStart; 273 } 274 275 /// Get the offset of \p CounterPtr from the start of the counters section of 276 /// the profile. The offset has units of "number of counters", i.e. increasing 277 /// the offset by 1 corresponds to an increase in the *byte offset* by 8. getCounterOffset(IntPtrT CounterPtr)278 ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { 279 return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); 280 } 281 getCounter(ptrdiff_t Offset)282 const uint64_t *getCounter(ptrdiff_t Offset) const { 283 return CountersStart + Offset; 284 } 285 getName(uint64_t NameRef)286 StringRef getName(uint64_t NameRef) const { 287 return Symtab->getFuncName(swap(NameRef)); 288 } 289 }; 290 291 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>; 292 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>; 293 294 namespace IndexedInstrProf { 295 296 enum class HashT : uint32_t; 297 298 } // end namespace IndexedInstrProf 299 300 /// Trait for lookups into the on-disk hash table for the binary instrprof 301 /// format. 302 class InstrProfLookupTrait { 303 std::vector<NamedInstrProfRecord> DataBuffer; 304 IndexedInstrProf::HashT HashType; 305 unsigned FormatVersion; 306 // Endianness of the input value profile data. 307 // It should be LE by default, but can be changed 308 // for testing purpose. 309 support::endianness ValueProfDataEndianness = support::little; 310 311 public: InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)312 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) 313 : HashType(HashType), FormatVersion(FormatVersion) {} 314 315 using data_type = ArrayRef<NamedInstrProfRecord>; 316 317 using internal_key_type = StringRef; 318 using external_key_type = StringRef; 319 using hash_value_type = uint64_t; 320 using offset_type = uint64_t; 321 EqualKey(StringRef A,StringRef B)322 static bool EqualKey(StringRef A, StringRef B) { return A == B; } GetInternalKey(StringRef K)323 static StringRef GetInternalKey(StringRef K) { return K; } GetExternalKey(StringRef K)324 static StringRef GetExternalKey(StringRef K) { return K; } 325 326 hash_value_type ComputeHash(StringRef K); 327 328 static std::pair<offset_type, offset_type> ReadKeyDataLength(const unsigned char * & D)329 ReadKeyDataLength(const unsigned char *&D) { 330 using namespace support; 331 332 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D); 333 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D); 334 return std::make_pair(KeyLen, DataLen); 335 } 336 ReadKey(const unsigned char * D,offset_type N)337 StringRef ReadKey(const unsigned char *D, offset_type N) { 338 return StringRef((const char *)D, N); 339 } 340 341 bool readValueProfilingData(const unsigned char *&D, 342 const unsigned char *const End); 343 data_type ReadData(StringRef K, const unsigned char *D, offset_type N); 344 345 // Used for testing purpose only. setValueProfDataEndianness(support::endianness Endianness)346 void setValueProfDataEndianness(support::endianness Endianness) { 347 ValueProfDataEndianness = Endianness; 348 } 349 }; 350 351 struct InstrProfReaderIndexBase { 352 virtual ~InstrProfReaderIndexBase() = default; 353 354 // Read all the profile records with the same key pointed to the current 355 // iterator. 356 virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0; 357 358 // Read all the profile records with the key equal to FuncName 359 virtual Error getRecords(StringRef FuncName, 360 ArrayRef<NamedInstrProfRecord> &Data) = 0; 361 virtual void advanceToNextKey() = 0; 362 virtual bool atEnd() const = 0; 363 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; 364 virtual uint64_t getVersion() const = 0; 365 virtual bool isIRLevelProfile() const = 0; 366 virtual bool hasCSIRLevelProfile() const = 0; 367 virtual Error populateSymtab(InstrProfSymtab &) = 0; 368 }; 369 370 using OnDiskHashTableImplV3 = 371 OnDiskIterableChainedHashTable<InstrProfLookupTrait>; 372 373 template <typename HashTableImpl> 374 class InstrProfReaderItaniumRemapper; 375 376 template <typename HashTableImpl> 377 class InstrProfReaderIndex : public InstrProfReaderIndexBase { 378 private: 379 std::unique_ptr<HashTableImpl> HashTable; 380 typename HashTableImpl::data_iterator RecordIterator; 381 uint64_t FormatVersion; 382 383 friend class InstrProfReaderItaniumRemapper<HashTableImpl>; 384 385 public: 386 InstrProfReaderIndex(const unsigned char *Buckets, 387 const unsigned char *const Payload, 388 const unsigned char *const Base, 389 IndexedInstrProf::HashT HashType, uint64_t Version); 390 ~InstrProfReaderIndex() override = default; 391 392 Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override; 393 Error getRecords(StringRef FuncName, 394 ArrayRef<NamedInstrProfRecord> &Data) override; advanceToNextKey()395 void advanceToNextKey() override { RecordIterator++; } 396 atEnd()397 bool atEnd() const override { 398 return RecordIterator == HashTable->data_end(); 399 } 400 setValueProfDataEndianness(support::endianness Endianness)401 void setValueProfDataEndianness(support::endianness Endianness) override { 402 HashTable->getInfoObj().setValueProfDataEndianness(Endianness); 403 } 404 getVersion()405 uint64_t getVersion() const override { return GET_VERSION(FormatVersion); } 406 isIRLevelProfile()407 bool isIRLevelProfile() const override { 408 return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; 409 } 410 hasCSIRLevelProfile()411 bool hasCSIRLevelProfile() const override { 412 return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; 413 } 414 populateSymtab(InstrProfSymtab & Symtab)415 Error populateSymtab(InstrProfSymtab &Symtab) override { 416 return Symtab.create(HashTable->keys()); 417 } 418 }; 419 420 /// Name matcher supporting fuzzy matching of symbol names to names in profiles. 421 class InstrProfReaderRemapper { 422 public: ~InstrProfReaderRemapper()423 virtual ~InstrProfReaderRemapper() {} populateRemappings()424 virtual Error populateRemappings() { return Error::success(); } 425 virtual Error getRecords(StringRef FuncName, 426 ArrayRef<NamedInstrProfRecord> &Data) = 0; 427 }; 428 429 /// Reader for the indexed binary instrprof format. 430 class IndexedInstrProfReader : public InstrProfReader { 431 private: 432 /// The profile data file contents. 433 std::unique_ptr<MemoryBuffer> DataBuffer; 434 /// The profile remapping file contents. 435 std::unique_ptr<MemoryBuffer> RemappingBuffer; 436 /// The index into the profile data. 437 std::unique_ptr<InstrProfReaderIndexBase> Index; 438 /// The profile remapping file contents. 439 std::unique_ptr<InstrProfReaderRemapper> Remapper; 440 /// Profile summary data. 441 std::unique_ptr<ProfileSummary> Summary; 442 /// Context sensitive profile summary data. 443 std::unique_ptr<ProfileSummary> CS_Summary; 444 // Index to the current record in the record array. 445 unsigned RecordIndex; 446 447 // Read the profile summary. Return a pointer pointing to one byte past the 448 // end of the summary data if it exists or the input \c Cur. 449 // \c UseCS indicates whether to use the context-sensitive profile summary. 450 const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, 451 const unsigned char *Cur, bool UseCS); 452 453 public: 454 IndexedInstrProfReader( 455 std::unique_ptr<MemoryBuffer> DataBuffer, 456 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr) DataBuffer(std::move (DataBuffer))457 : DataBuffer(std::move(DataBuffer)), 458 RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} 459 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; 460 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; 461 462 /// Return the profile version. getVersion()463 uint64_t getVersion() const { return Index->getVersion(); } isIRLevelProfile()464 bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } hasCSIRLevelProfile()465 bool hasCSIRLevelProfile() const override { 466 return Index->hasCSIRLevelProfile(); 467 } 468 469 /// Return true if the given buffer is in an indexed instrprof format. 470 static bool hasFormat(const MemoryBuffer &DataBuffer); 471 472 /// Read the file header. 473 Error readHeader() override; 474 /// Read a single record. 475 Error readNextRecord(NamedInstrProfRecord &Record) override; 476 477 /// Return the NamedInstrProfRecord associated with FuncName and FuncHash 478 Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, 479 uint64_t FuncHash); 480 481 /// Fill Counts with the profile data for the given function name. 482 Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash, 483 std::vector<uint64_t> &Counts); 484 485 /// Return the maximum of all known function counts. 486 /// \c UseCS indicates whether to use the context-sensitive count. getMaximumFunctionCount(bool UseCS)487 uint64_t getMaximumFunctionCount(bool UseCS) { 488 if (UseCS) { 489 assert(CS_Summary && "No context sensitive profile summary"); 490 return CS_Summary->getMaxFunctionCount(); 491 } else { 492 assert(Summary && "No profile summary"); 493 return Summary->getMaxFunctionCount(); 494 } 495 } 496 497 /// Factory method to create an indexed reader. 498 static Expected<std::unique_ptr<IndexedInstrProfReader>> 499 create(const Twine &Path, const Twine &RemappingPath = ""); 500 501 static Expected<std::unique_ptr<IndexedInstrProfReader>> 502 create(std::unique_ptr<MemoryBuffer> Buffer, 503 std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr); 504 505 // Used for testing purpose only. setValueProfDataEndianness(support::endianness Endianness)506 void setValueProfDataEndianness(support::endianness Endianness) { 507 Index->setValueProfDataEndianness(Endianness); 508 } 509 510 // See description in the base class. This interface is designed 511 // to be used by llvm-profdata (for dumping). Avoid using this when 512 // the client is the compiler. 513 InstrProfSymtab &getSymtab() override; 514 515 /// Return the profile summary. 516 /// \c UseCS indicates whether to use the context-sensitive summary. getSummary(bool UseCS)517 ProfileSummary &getSummary(bool UseCS) { 518 if (UseCS) { 519 assert(CS_Summary && "No context sensitive summary"); 520 return *(CS_Summary.get()); 521 } else { 522 assert(Summary && "No profile summary"); 523 return *(Summary.get()); 524 } 525 } 526 }; 527 528 } // end namespace llvm 529 530 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H 531