1 #include <c10/util/ApproximateClock.h> 2 #include <c10/util/ArrayRef.h> 3 #include <c10/util/irange.h> 4 #include <fmt/format.h> 5 6 namespace c10 { 7 ApproximateClockToUnixTimeConverter()8ApproximateClockToUnixTimeConverter::ApproximateClockToUnixTimeConverter() 9 : start_times_(measurePairs()) {} 10 11 ApproximateClockToUnixTimeConverter::UnixAndApproximateTimePair measurePair()12ApproximateClockToUnixTimeConverter::measurePair() { 13 // Take a measurement on either side to avoid an ordering bias. 14 auto fast_0 = getApproximateTime(); 15 auto wall = std::chrono::system_clock::now(); 16 auto fast_1 = getApproximateTime(); 17 18 TORCH_INTERNAL_ASSERT(fast_1 >= fast_0, "getCount is non-monotonic."); 19 auto t = std::chrono::duration_cast<std::chrono::nanoseconds>( 20 wall.time_since_epoch()); 21 22 // `x + (y - x) / 2` is a more numerically stable average than `(x + y) / 2`. 23 return {t.count(), fast_0 + (fast_1 - fast_0) / 2}; 24 } 25 26 ApproximateClockToUnixTimeConverter::time_pairs measurePairs()27ApproximateClockToUnixTimeConverter::measurePairs() { 28 static constexpr auto n_warmup = 5; 29 for (C10_UNUSED const auto _ : c10::irange(n_warmup)) { 30 getApproximateTime(); 31 static_cast<void>(steady_clock_t::now()); 32 } 33 34 time_pairs out; 35 for (const auto i : c10::irange(out.size())) { 36 out[i] = measurePair(); 37 } 38 return out; 39 } 40 41 std::function<time_t(approx_time_t)> ApproximateClockToUnixTimeConverter:: makeConverter()42 makeConverter() { 43 auto end_times = measurePairs(); 44 45 // Compute the real time that passes for each tick of the approximate clock. 46 std::array<long double, replicates> scale_factors{}; 47 for (const auto i : c10::irange(replicates)) { 48 auto delta_ns = end_times[i].t_ - start_times_[i].t_; 49 auto delta_approx = end_times[i].approx_t_ - start_times_[i].approx_t_; 50 scale_factors[i] = (double)delta_ns / (double)delta_approx; 51 } 52 std::sort(scale_factors.begin(), scale_factors.end()); 53 long double scale_factor = scale_factors[replicates / 2 + 1]; 54 55 // We shift all times by `t0` for better numerics. Double precision only has 56 // 16 decimal digits of accuracy, so if we blindly multiply times by 57 // `scale_factor` we may suffer from precision loss. The choice of `t0` is 58 // mostly arbitrary; we just need a factor that is the correct order of 59 // magnitude to bring the intermediate values closer to zero. We are not, 60 // however, guaranteed that `t0_approx` is *exactly* the getApproximateTime 61 // equivalent of `t0`; it is only an estimate that we have to fine tune. 62 auto t0 = start_times_[0].t_; 63 auto t0_approx = start_times_[0].approx_t_; 64 std::array<double, replicates> t0_correction{}; 65 for (const auto i : c10::irange(replicates)) { 66 auto dt = start_times_[i].t_ - t0; 67 auto dt_approx = 68 (double)(start_times_[i].approx_t_ - t0_approx) * scale_factor; 69 t0_correction[i] = dt - (time_t)dt_approx; // NOLINT 70 } 71 t0 += t0_correction[t0_correction.size() / 2 + 1]; // NOLINT 72 73 return [=](approx_time_t t_approx) { 74 // See above for why this is more stable than `A * t_approx + B`. 75 return (time_t)((double)(t_approx - t0_approx) * scale_factor) + t0; 76 }; 77 } 78 79 } // namespace c10 80