1 #ifdef USE_KINETO
2 #include <ATen/Context.h>
3 #include <libkineto.h>
4 #include <torch/csrc/autograd/profiler_kineto.h>
5 #include <chrono>
6 #include <thread>
7
8 // Ondemand tracing is not supported on Apple or edge platform
9 #if defined(__APPLE__) || defined(EDGE_PROFILER_USE_KINETO)
10 #define ENABLE_GLOBAL_OBSERVER (0)
11 #else
12 #define ENABLE_GLOBAL_OBSERVER (1)
13 #endif
14
15 namespace torch {
16
17 namespace profiler::impl {
18
19 namespace {
20
21 using namespace torch::autograd::profiler;
22
23 class LibKinetoClient : public libkineto::ClientInterface {
24 public:
init()25 void init() override {}
26
prepare(bool report_input_shapes=false,bool profile_memory=false,bool with_stack=false,bool with_flops=false,bool with_modules=false)27 void prepare(
28 bool report_input_shapes = false,
29 bool profile_memory = false,
30 bool with_stack = false,
31 bool with_flops = false,
32 bool with_modules = false) override {
33 reportInputShapes_ = report_input_shapes;
34 profileMemory_ = profile_memory;
35 withStack_ = with_stack;
36 withFlops_ = with_flops;
37 withModules_ = with_modules;
38 }
39
start()40 void start() override {
41 ProfilerConfig cfg{
42 ProfilerState::KINETO_ONDEMAND,
43 /*report_input_shapes=*/reportInputShapes_,
44 /*profile_memory=*/profileMemory_,
45 /*with_stack=*/withStack_,
46 /*with_flops=*/withFlops_,
47 /*with_modules=*/withModules_};
48 std::set<ActivityType> activities{ActivityType::CPU};
49 std::unordered_set<at::RecordScope> scopes;
50 scopes.insert(at::RecordScope::FUNCTION);
51 scopes.insert(at::RecordScope::USER_SCOPE);
52 scopes.insert(at::RecordScope::BACKWARD_FUNCTION);
53 enableProfiler(cfg, activities, scopes);
54 }
55
stop()56 void stop() override {
57 (void)disableProfiler();
58 }
59
60 private:
61 // Temporarily disable shape collection until
62 // we re-roll out the feature for on-demand cases
63 bool reportInputShapes_{false};
64 bool profileMemory_{false};
65 bool withStack_{false};
66 bool withFlops_{false};
67 bool withModules_{false};
68 };
69
70 } // namespace
71
72 } // namespace profiler::impl
73
74 #if ENABLE_GLOBAL_OBSERVER
75 namespace {
76
get_init_delay()77 int get_init_delay() {
78 const char* delay_c = std::getenv("KINETO_DAEMON_INIT_DELAY_S");
79 if (!delay_c) {
80 return -1;
81 }
82 std::string delay_s{delay_c};
83 try {
84 return std::stoi(delay_s);
85 } catch (const std::invalid_argument& _) {
86 return -1;
87 }
88 }
89
90 struct RegisterLibKinetoClient {
RegisterLibKinetoClienttorch::__anon62437ba30211::RegisterLibKinetoClient91 RegisterLibKinetoClient() {
92 static profiler::impl::LibKinetoClient client;
93 libkineto::api().registerClient(&client);
94
95 auto kineto_init = []() {
96 libkineto_init(
97 /*cpuOnly=*/!(at::hasCUDA() || at::hasXPU() || at::hasMTIA()),
98 /*logOnError=*/true);
99 libkineto::api().suppressLogMessages();
100 };
101
102 if (std::getenv("KINETO_USE_DAEMON") != nullptr) {
103 int init_delay_s = get_init_delay();
104 if (init_delay_s > 0) {
105 std::thread t([init_delay_s, kineto_init]() {
106 std::this_thread::sleep_for(std::chrono::seconds(init_delay_s));
107 kineto_init();
108 });
109 t.detach();
110 } else {
111 kineto_init();
112 }
113 }
114 }
115 } register_libkineto_client;
116
117 } // namespace
118 #endif
119
120 } // namespace torch
121 #endif // USE_KINETO
122