1 #pragma once 2 3 #include <array> 4 #include <cstdint> 5 #include <cstring> 6 #include <vector> 7 8 namespace torch::profiler { 9 10 /* A vector type to hold a list of performance counters */ 11 using perf_counters_t = std::vector<uint64_t>; 12 13 /* Standard list of performance events independent of hardware or backend */ 14 constexpr std::array<const char*, 2> ProfilerPerfEvents = { 15 /* 16 * Number of Processing Elelement (PE) cycles between two points of interest 17 * in time. This should correlate positively with wall-time. Measured in 18 * uint64_t. PE can be non cpu. TBD reporting behavior for multiple PEs 19 * participating (i.e. threadpool). 20 */ 21 "cycles", 22 23 /* Number of PE instructions between two points of interest in time. This 24 * should correlate positively with wall time and the amount of computation 25 * (i.e. work). Across repeat executions, the number of instructions should 26 * be more or less invariant. Measured in uint64_t. PE can be non cpu. 27 */ 28 "instructions"}; 29 } // namespace torch::profiler 30