1 // Copyright 2023-present Facebook. All Rights Reserved.
2
3 #pragma once
4
5 #include <c10/macros/Export.h>
6 #include <array>
7 #include <chrono>
8 #include <cstddef>
9 #include <cstdint>
10 #include <ctime>
11 #include <functional>
12 #include <type_traits>
13
14 #if defined(C10_IOS) && defined(C10_MOBILE)
15 #include <sys/time.h> // for gettimeofday()
16 #endif
17
18 #if defined(__i386__) || defined(__x86_64__) || defined(__amd64__)
19 #define C10_RDTSC
20 #if defined(_MSC_VER)
21 #include <intrin.h>
22 #elif defined(__CUDACC__) || defined(__HIPCC__)
23 #undef C10_RDTSC
24 #elif defined(__clang__)
25 // `__rdtsc` is available by default.
26 // NB: This has to be first, because Clang will also define `__GNUC__`
27 #elif defined(__GNUC__)
28 #include <x86intrin.h>
29 #else
30 #undef C10_RDTSC
31 #endif
32 #endif
33
34 namespace c10 {
35
36 using time_t = int64_t;
37 using steady_clock_t = std::conditional_t<
38 std::chrono::high_resolution_clock::is_steady,
39 std::chrono::high_resolution_clock,
40 std::chrono::steady_clock>;
41
getTimeSinceEpoch()42 inline time_t getTimeSinceEpoch() {
43 auto now = std::chrono::system_clock::now().time_since_epoch();
44 return std::chrono::duration_cast<std::chrono::nanoseconds>(now).count();
45 }
46
47 inline time_t getTime(bool allow_monotonic = false) {
48 #if defined(C10_IOS) && defined(C10_MOBILE)
49 // clock_gettime is only available on iOS 10.0 or newer. Unlike OS X, iOS
50 // can't rely on CLOCK_REALTIME, as it is defined no matter if clock_gettime
51 // is implemented or not
52 struct timeval now;
53 gettimeofday(&now, NULL);
54 return static_cast<time_t>(now.tv_sec) * 1000000000 +
55 static_cast<time_t>(now.tv_usec) * 1000;
56 #elif defined(_WIN32) || defined(__MACH__)
57 return std::chrono::duration_cast<std::chrono::nanoseconds>(
58 steady_clock_t::now().time_since_epoch())
59 .count();
60 #else
61 // clock_gettime is *much* faster than std::chrono implementation on Linux
62 struct timespec t {};
63 auto mode = CLOCK_REALTIME;
64 if (allow_monotonic) {
65 mode = CLOCK_MONOTONIC;
66 }
67 clock_gettime(mode, &t);
68 return static_cast<time_t>(t.tv_sec) * 1000000000 +
69 static_cast<time_t>(t.tv_nsec);
70 #endif
71 }
72
73 // We often do not need to capture true wall times. If a fast mechanism such
74 // as TSC is available we can use that instead and convert back to epoch time
75 // during post processing. This greatly reduce the clock's contribution to
76 // profiling.
77 // http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/
78 // https://quick-bench.com/q/r8opkkGZSJMu9wM_XTbDouq-0Io
79 // TODO: We should use
80 // `https://github.com/google/benchmark/blob/main/src/cycleclock.h`
getApproximateTime()81 inline auto getApproximateTime() {
82 #if defined(C10_RDTSC)
83 return static_cast<uint64_t>(__rdtsc());
84 #else
85 return getTime();
86 #endif
87 }
88
89 using approx_time_t = decltype(getApproximateTime());
90 static_assert(
91 std::is_same_v<approx_time_t, int64_t> ||
92 std::is_same_v<approx_time_t, uint64_t>,
93 "Expected either int64_t (`getTime`) or uint64_t (some TSC reads).");
94
95 // Convert `getCount` results to Nanoseconds since unix epoch.
96 class C10_API ApproximateClockToUnixTimeConverter final {
97 public:
98 ApproximateClockToUnixTimeConverter();
99 std::function<time_t(approx_time_t)> makeConverter();
100
101 struct UnixAndApproximateTimePair {
102 time_t t_;
103 approx_time_t approx_t_;
104 };
105 static UnixAndApproximateTimePair measurePair();
106
107 private:
108 static constexpr size_t replicates = 1001;
109 using time_pairs = std::array<UnixAndApproximateTimePair, replicates>;
110 time_pairs measurePairs();
111
112 time_pairs start_times_;
113 };
114
115 } // namespace c10
116