xref: /aosp_15_r20/external/swiftshader/third_party/llvm-10.0/llvm/include/llvm/ProfileData/InstrProfReader.h (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading profiling data for instrumentation
10 // based PGO and coverage.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/IR/ProfileSummary.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/SwapByteOrder.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <iterator>
32 #include <memory>
33 #include <utility>
34 #include <vector>
35 
36 namespace llvm {
37 
38 class InstrProfReader;
39 
40 /// A file format agnostic iterator over profiling data.
41 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
42                                                NamedInstrProfRecord> {
43   InstrProfReader *Reader = nullptr;
44   value_type Record;
45 
46   void Increment();
47 
48 public:
49   InstrProfIterator() = default;
InstrProfIterator(InstrProfReader * Reader)50   InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
51 
52   InstrProfIterator &operator++() { Increment(); return *this; }
53   bool operator==(const InstrProfIterator &RHS) const {
54     return Reader == RHS.Reader;
55   }
56   bool operator!=(const InstrProfIterator &RHS) const {
57     return Reader != RHS.Reader;
58   }
59   value_type &operator*() { return Record; }
60   value_type *operator->() { return &Record; }
61 };
62 
63 /// Base class and interface for reading profiling data of any known instrprof
64 /// format. Provides an iterator over NamedInstrProfRecords.
65 class InstrProfReader {
66   instrprof_error LastError = instrprof_error::success;
67 
68 public:
69   InstrProfReader() = default;
70   virtual ~InstrProfReader() = default;
71 
72   /// Read the header.  Required before reading first record.
73   virtual Error readHeader() = 0;
74 
75   /// Read a single record.
76   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
77 
78   /// Iterator over profile data.
begin()79   InstrProfIterator begin() { return InstrProfIterator(this); }
end()80   InstrProfIterator end() { return InstrProfIterator(); }
81 
82   virtual bool isIRLevelProfile() const = 0;
83 
84   virtual bool hasCSIRLevelProfile() const = 0;
85 
86   /// Return the PGO symtab. There are three different readers:
87   /// Raw, Text, and Indexed profile readers. The first two types
88   /// of readers are used only by llvm-profdata tool, while the indexed
89   /// profile reader is also used by llvm-cov tool and the compiler (
90   /// backend or frontend). Since creating PGO symtab can create
91   /// significant runtime and memory overhead (as it touches data
92   /// for the whole program), InstrProfSymtab for the indexed profile
93   /// reader should be created on demand and it is recommended to be
94   /// only used for dumping purpose with llvm-proftool, not with the
95   /// compiler.
96   virtual InstrProfSymtab &getSymtab() = 0;
97 
98   /// Compute the sum of counts and return in Sum.
99   void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
100 
101 protected:
102   std::unique_ptr<InstrProfSymtab> Symtab;
103 
104   /// Set the current error and return same.
error(instrprof_error Err)105   Error error(instrprof_error Err) {
106     LastError = Err;
107     if (Err == instrprof_error::success)
108       return Error::success();
109     return make_error<InstrProfError>(Err);
110   }
111 
error(Error && E)112   Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
113 
114   /// Clear the current error and return a successful one.
success()115   Error success() { return error(instrprof_error::success); }
116 
117 public:
118   /// Return true if the reader has finished reading the profile data.
isEOF()119   bool isEOF() { return LastError == instrprof_error::eof; }
120 
121   /// Return true if the reader encountered an error reading profiling data.
hasError()122   bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
123 
124   /// Get the current error.
getError()125   Error getError() {
126     if (hasError())
127       return make_error<InstrProfError>(LastError);
128     return Error::success();
129   }
130 
131   /// Factory method to create an appropriately typed reader for the given
132   /// instrprof file.
133   static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
134 
135   static Expected<std::unique_ptr<InstrProfReader>>
136   create(std::unique_ptr<MemoryBuffer> Buffer);
137 };
138 
139 /// Reader for the simple text based instrprof format.
140 ///
141 /// This format is a simple text format that's suitable for test data. Records
142 /// are separated by one or more blank lines, and record fields are separated by
143 /// new lines.
144 ///
145 /// Each record consists of a function name, a function hash, a number of
146 /// counters, and then each counter value, in that order.
147 class TextInstrProfReader : public InstrProfReader {
148 private:
149   /// The profile data file contents.
150   std::unique_ptr<MemoryBuffer> DataBuffer;
151   /// Iterator over the profile data.
152   line_iterator Line;
153   bool IsIRLevelProfile = false;
154   bool HasCSIRLevelProfile = false;
155 
156   Error readValueProfileData(InstrProfRecord &Record);
157 
158 public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)159   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
160       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
161   TextInstrProfReader(const TextInstrProfReader &) = delete;
162   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
163 
164   /// Return true if the given buffer is in text instrprof format.
165   static bool hasFormat(const MemoryBuffer &Buffer);
166 
isIRLevelProfile()167   bool isIRLevelProfile() const override { return IsIRLevelProfile; }
168 
hasCSIRLevelProfile()169   bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
170 
171   /// Read the header.
172   Error readHeader() override;
173 
174   /// Read a single record.
175   Error readNextRecord(NamedInstrProfRecord &Record) override;
176 
getSymtab()177   InstrProfSymtab &getSymtab() override {
178     assert(Symtab.get());
179     return *Symtab.get();
180   }
181 };
182 
183 /// Reader for the raw instrprof binary format from runtime.
184 ///
185 /// This format is a raw memory dump of the instrumentation-baed profiling data
186 /// from the runtime.  It has no index.
187 ///
188 /// Templated on the unsigned type whose size matches pointers on the platform
189 /// that wrote the profile.
190 template <class IntPtrT>
191 class RawInstrProfReader : public InstrProfReader {
192 private:
193   /// The profile data file contents.
194   std::unique_ptr<MemoryBuffer> DataBuffer;
195   bool ShouldSwapBytes;
196   // The value of the version field of the raw profile data header. The lower 56
197   // bits specifies the format version and the most significant 8 bits specify
198   // the variant types of the profile.
199   uint64_t Version;
200   uint64_t CountersDelta;
201   uint64_t NamesDelta;
202   const RawInstrProf::ProfileData<IntPtrT> *Data;
203   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
204   const uint64_t *CountersStart;
205   const char *NamesStart;
206   uint64_t NamesSize;
207   // After value profile is all read, this pointer points to
208   // the header of next profile data (if exists)
209   const uint8_t *ValueDataStart;
210   uint32_t ValueKindLast;
211   uint32_t CurValueDataSize;
212 
213 public:
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)214   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
215       : DataBuffer(std::move(DataBuffer)) {}
216   RawInstrProfReader(const RawInstrProfReader &) = delete;
217   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
218 
219   static bool hasFormat(const MemoryBuffer &DataBuffer);
220   Error readHeader() override;
221   Error readNextRecord(NamedInstrProfRecord &Record) override;
222 
isIRLevelProfile()223   bool isIRLevelProfile() const override {
224     return (Version & VARIANT_MASK_IR_PROF) != 0;
225   }
226 
hasCSIRLevelProfile()227   bool hasCSIRLevelProfile() const override {
228     return (Version & VARIANT_MASK_CSIR_PROF) != 0;
229   }
230 
getSymtab()231   InstrProfSymtab &getSymtab() override {
232     assert(Symtab.get());
233     return *Symtab.get();
234   }
235 
236 private:
237   Error createSymtab(InstrProfSymtab &Symtab);
238   Error readNextHeader(const char *CurrentPos);
239   Error readHeader(const RawInstrProf::Header &Header);
240 
swap(IntT Int)241   template <class IntT> IntT swap(IntT Int) const {
242     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
243   }
244 
getDataEndianness()245   support::endianness getDataEndianness() const {
246     support::endianness HostEndian = getHostEndianness();
247     if (!ShouldSwapBytes)
248       return HostEndian;
249     if (HostEndian == support::little)
250       return support::big;
251     else
252       return support::little;
253   }
254 
getNumPaddingBytes(uint64_t SizeInBytes)255   inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
256     return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
257   }
258 
259   Error readName(NamedInstrProfRecord &Record);
260   Error readFuncHash(NamedInstrProfRecord &Record);
261   Error readRawCounts(InstrProfRecord &Record);
262   Error readValueProfilingData(InstrProfRecord &Record);
atEnd()263   bool atEnd() const { return Data == DataEnd; }
264 
advanceData()265   void advanceData() {
266     Data++;
267     ValueDataStart += CurValueDataSize;
268   }
269 
getNextHeaderPos()270   const char *getNextHeaderPos() const {
271       assert(atEnd());
272       return (const char *)ValueDataStart;
273   }
274 
275   /// Get the offset of \p CounterPtr from the start of the counters section of
276   /// the profile. The offset has units of "number of counters", i.e. increasing
277   /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
getCounterOffset(IntPtrT CounterPtr)278   ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
279     return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
280   }
281 
getCounter(ptrdiff_t Offset)282   const uint64_t *getCounter(ptrdiff_t Offset) const {
283     return CountersStart + Offset;
284   }
285 
getName(uint64_t NameRef)286   StringRef getName(uint64_t NameRef) const {
287     return Symtab->getFuncName(swap(NameRef));
288   }
289 };
290 
291 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
292 using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
293 
294 namespace IndexedInstrProf {
295 
296 enum class HashT : uint32_t;
297 
298 } // end namespace IndexedInstrProf
299 
300 /// Trait for lookups into the on-disk hash table for the binary instrprof
301 /// format.
302 class InstrProfLookupTrait {
303   std::vector<NamedInstrProfRecord> DataBuffer;
304   IndexedInstrProf::HashT HashType;
305   unsigned FormatVersion;
306   // Endianness of the input value profile data.
307   // It should be LE by default, but can be changed
308   // for testing purpose.
309   support::endianness ValueProfDataEndianness = support::little;
310 
311 public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType,unsigned FormatVersion)312   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
313       : HashType(HashType), FormatVersion(FormatVersion) {}
314 
315   using data_type = ArrayRef<NamedInstrProfRecord>;
316 
317   using internal_key_type = StringRef;
318   using external_key_type = StringRef;
319   using hash_value_type = uint64_t;
320   using offset_type = uint64_t;
321 
EqualKey(StringRef A,StringRef B)322   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
GetInternalKey(StringRef K)323   static StringRef GetInternalKey(StringRef K) { return K; }
GetExternalKey(StringRef K)324   static StringRef GetExternalKey(StringRef K) { return K; }
325 
326   hash_value_type ComputeHash(StringRef K);
327 
328   static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char * & D)329   ReadKeyDataLength(const unsigned char *&D) {
330     using namespace support;
331 
332     offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
333     offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
334     return std::make_pair(KeyLen, DataLen);
335   }
336 
ReadKey(const unsigned char * D,offset_type N)337   StringRef ReadKey(const unsigned char *D, offset_type N) {
338     return StringRef((const char *)D, N);
339   }
340 
341   bool readValueProfilingData(const unsigned char *&D,
342                               const unsigned char *const End);
343   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
344 
345   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)346   void setValueProfDataEndianness(support::endianness Endianness) {
347     ValueProfDataEndianness = Endianness;
348   }
349 };
350 
351 struct InstrProfReaderIndexBase {
352   virtual ~InstrProfReaderIndexBase() = default;
353 
354   // Read all the profile records with the same key pointed to the current
355   // iterator.
356   virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
357 
358   // Read all the profile records with the key equal to FuncName
359   virtual Error getRecords(StringRef FuncName,
360                                      ArrayRef<NamedInstrProfRecord> &Data) = 0;
361   virtual void advanceToNextKey() = 0;
362   virtual bool atEnd() const = 0;
363   virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
364   virtual uint64_t getVersion() const = 0;
365   virtual bool isIRLevelProfile() const = 0;
366   virtual bool hasCSIRLevelProfile() const = 0;
367   virtual Error populateSymtab(InstrProfSymtab &) = 0;
368 };
369 
370 using OnDiskHashTableImplV3 =
371     OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
372 
373 template <typename HashTableImpl>
374 class InstrProfReaderItaniumRemapper;
375 
376 template <typename HashTableImpl>
377 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
378 private:
379   std::unique_ptr<HashTableImpl> HashTable;
380   typename HashTableImpl::data_iterator RecordIterator;
381   uint64_t FormatVersion;
382 
383   friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
384 
385 public:
386   InstrProfReaderIndex(const unsigned char *Buckets,
387                        const unsigned char *const Payload,
388                        const unsigned char *const Base,
389                        IndexedInstrProf::HashT HashType, uint64_t Version);
390   ~InstrProfReaderIndex() override = default;
391 
392   Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
393   Error getRecords(StringRef FuncName,
394                    ArrayRef<NamedInstrProfRecord> &Data) override;
advanceToNextKey()395   void advanceToNextKey() override { RecordIterator++; }
396 
atEnd()397   bool atEnd() const override {
398     return RecordIterator == HashTable->data_end();
399   }
400 
setValueProfDataEndianness(support::endianness Endianness)401   void setValueProfDataEndianness(support::endianness Endianness) override {
402     HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
403   }
404 
getVersion()405   uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
406 
isIRLevelProfile()407   bool isIRLevelProfile() const override {
408     return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
409   }
410 
hasCSIRLevelProfile()411   bool hasCSIRLevelProfile() const override {
412     return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
413   }
414 
populateSymtab(InstrProfSymtab & Symtab)415   Error populateSymtab(InstrProfSymtab &Symtab) override {
416     return Symtab.create(HashTable->keys());
417   }
418 };
419 
420 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
421 class InstrProfReaderRemapper {
422 public:
~InstrProfReaderRemapper()423   virtual ~InstrProfReaderRemapper() {}
populateRemappings()424   virtual Error populateRemappings() { return Error::success(); }
425   virtual Error getRecords(StringRef FuncName,
426                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
427 };
428 
429 /// Reader for the indexed binary instrprof format.
430 class IndexedInstrProfReader : public InstrProfReader {
431 private:
432   /// The profile data file contents.
433   std::unique_ptr<MemoryBuffer> DataBuffer;
434   /// The profile remapping file contents.
435   std::unique_ptr<MemoryBuffer> RemappingBuffer;
436   /// The index into the profile data.
437   std::unique_ptr<InstrProfReaderIndexBase> Index;
438   /// The profile remapping file contents.
439   std::unique_ptr<InstrProfReaderRemapper> Remapper;
440   /// Profile summary data.
441   std::unique_ptr<ProfileSummary> Summary;
442   /// Context sensitive profile summary data.
443   std::unique_ptr<ProfileSummary> CS_Summary;
444   // Index to the current record in the record array.
445   unsigned RecordIndex;
446 
447   // Read the profile summary. Return a pointer pointing to one byte past the
448   // end of the summary data if it exists or the input \c Cur.
449   // \c UseCS indicates whether to use the context-sensitive profile summary.
450   const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
451                                    const unsigned char *Cur, bool UseCS);
452 
453 public:
454   IndexedInstrProfReader(
455       std::unique_ptr<MemoryBuffer> DataBuffer,
456       std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
DataBuffer(std::move (DataBuffer))457       : DataBuffer(std::move(DataBuffer)),
458         RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
459   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
460   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
461 
462   /// Return the profile version.
getVersion()463   uint64_t getVersion() const { return Index->getVersion(); }
isIRLevelProfile()464   bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
hasCSIRLevelProfile()465   bool hasCSIRLevelProfile() const override {
466     return Index->hasCSIRLevelProfile();
467   }
468 
469   /// Return true if the given buffer is in an indexed instrprof format.
470   static bool hasFormat(const MemoryBuffer &DataBuffer);
471 
472   /// Read the file header.
473   Error readHeader() override;
474   /// Read a single record.
475   Error readNextRecord(NamedInstrProfRecord &Record) override;
476 
477   /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
478   Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
479                                                uint64_t FuncHash);
480 
481   /// Fill Counts with the profile data for the given function name.
482   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
483                           std::vector<uint64_t> &Counts);
484 
485   /// Return the maximum of all known function counts.
486   /// \c UseCS indicates whether to use the context-sensitive count.
getMaximumFunctionCount(bool UseCS)487   uint64_t getMaximumFunctionCount(bool UseCS) {
488     if (UseCS) {
489       assert(CS_Summary && "No context sensitive profile summary");
490       return CS_Summary->getMaxFunctionCount();
491     } else {
492       assert(Summary && "No profile summary");
493       return Summary->getMaxFunctionCount();
494     }
495   }
496 
497   /// Factory method to create an indexed reader.
498   static Expected<std::unique_ptr<IndexedInstrProfReader>>
499   create(const Twine &Path, const Twine &RemappingPath = "");
500 
501   static Expected<std::unique_ptr<IndexedInstrProfReader>>
502   create(std::unique_ptr<MemoryBuffer> Buffer,
503          std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
504 
505   // Used for testing purpose only.
setValueProfDataEndianness(support::endianness Endianness)506   void setValueProfDataEndianness(support::endianness Endianness) {
507     Index->setValueProfDataEndianness(Endianness);
508   }
509 
510   // See description in the base class. This interface is designed
511   // to be used by llvm-profdata (for dumping). Avoid using this when
512   // the client is the compiler.
513   InstrProfSymtab &getSymtab() override;
514 
515   /// Return the profile summary.
516   /// \c UseCS indicates whether to use the context-sensitive summary.
getSummary(bool UseCS)517   ProfileSummary &getSummary(bool UseCS) {
518     if (UseCS) {
519       assert(CS_Summary && "No context sensitive summary");
520       return *(CS_Summary.get());
521     } else {
522       assert(Summary && "No profile summary");
523       return *(Summary.get());
524     }
525   }
526 };
527 
528 } // end namespace llvm
529 
530 #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
531