xref: /aosp_15_r20/external/pytorch/torch/csrc/profiler/python/combined_traceback.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <torch/csrc/profiler/python/combined_traceback.h>
2 #include <torch/csrc/python_headers.h>
3 #include <torch/csrc/utils/pybind.h>
4 #include <torch/csrc/utils/pythoncapi_compat.h>
5 namespace py = pybind11;
6 
7 namespace torch {
8 // Locking:
9 // We need to free PyCodeObjects when ~StackContext runs, but
10 // CUDACachingAllocator may hold its device lock when ~StackContext runs.
11 
12 // Because the thread calling the allocator _may_ hold the GIL,
13 // attempting to lock the GIL in ~StackContext can deadlock:
14 // T0: GIL Lock -> Call Allocator    ->| Waiting Device Lock
15 // T1: Call Allocator -> Device Lock ->| Waiting GIL Lock
16 // Instead the destructor defers freeing stack frames by putting them in
17 // to_free_frames. We still need a lock to manage this vector, but
18 // we can ensure an overall lock ordering of GIL -> device_lock ->
19 // to_free_frames_mutex because ::gather is called outside of the device lock.
20 
21 namespace {
22 static std::mutex to_free_frames_mutex;
23 static std::vector<CapturedTraceback::PyFrame> to_free_frames;
24 struct PythonTraceback : public CapturedTraceback::Python {
gathertorch::__anon02d3f9bc0111::PythonTraceback25   std::vector<CapturedTraceback::PyFrame> gather() override {
26     if (!Py_IsInitialized()) {
27       return {};
28     }
29     std::vector<CapturedTraceback::PyFrame> frames;
30     py::gil_scoped_acquire acquire;
31     {
32       std::lock_guard<std::mutex> lock(to_free_frames_mutex);
33       for (CapturedTraceback::PyFrame f : to_free_frames) {
34         Py_XDECREF(f.code);
35       }
36       to_free_frames.clear();
37     }
38     PyFrameObject* f = PyEval_GetFrame();
39     Py_XINCREF(f);
40     while (f) {
41       frames.emplace_back(
42           CapturedTraceback::PyFrame{PyFrame_GetCode(f), PyFrame_GetLasti(f)});
43       auto f_back = PyFrame_GetBack(f);
44       Py_XDECREF(f);
45       f = f_back;
46     }
47     return frames;
48   }
releasetorch::__anon02d3f9bc0111::PythonTraceback49   void release(std::vector<CapturedTraceback::PyFrame>& frames) override {
50     std::lock_guard<std::mutex> lock(to_free_frames_mutex);
51     to_free_frames.insert(to_free_frames.end(), frames.begin(), frames.end());
52   }
53   using void_visitproc = int (*)(void* self, void* arg);
traversetorch::__anon02d3f9bc0111::PythonTraceback54   int traverse(
55       std::vector<CapturedTraceback::PyFrame>& frames,
56       void_visitproc visit,
57       void* arg) override {
58     for (auto& f : frames) {
59       Py_VISIT(f.code);
60     }
61     return 0;
62   }
cleartorch::__anon02d3f9bc0111::PythonTraceback63   int clear(std::vector<CapturedTraceback::PyFrame>& frames) override {
64     for (auto& f : frames) {
65       Py_CLEAR(f.code);
66     }
67     return 0;
68   }
appendSymbolizedtorch::__anon02d3f9bc0111::PythonTraceback69   void appendSymbolized(
70       const std::vector<CapturedTraceback::PyFrame>& to_symbolize,
71       SymbolizedTracebacks& result) override {
72     py::gil_scoped_acquire acquire;
73     py::str line_s = "line";
74     py::str name_s = "name";
75     py::str filename_s = "filename";
76 
77     auto torch = py::module::import("torch");
78     py::object stack_frames_for_code;
79     if (py::hasattr(torch, "_inductor")) {
80       py::object inductor = torch.attr("_inductor");
81       if (py::hasattr(inductor, "codecache")) {
82         stack_frames_for_code = inductor.attr("codecache")
83                                     .attr("PyCodeCache")
84                                     .attr("stack_frames_for_code");
85       }
86     }
87     for (const auto& f : to_symbolize) {
88       auto f_code = (PyCodeObject*)f.code;
89       py::handle filename = f_code->co_filename;
90       py::handle funcname = f_code->co_name;
91       auto lineno = PyCode_Addr2Line(f_code, f.lasti);
92       result.tracebacks.emplace_back();
93       result.tracebacks.back().push_back(result.all_frames.size());
94       result.all_frames.emplace_back(unwind::Frame{
95           py::cast<std::string>(filename),
96           py::cast<std::string>(funcname),
97           (uint64_t)lineno});
98       // find all the additional frames associated with inductor generated
99       // code
100       if (stack_frames_for_code.ptr()) {
101         py::object extra = stack_frames_for_code(filename, lineno);
102         if (!extra.is_none()) {
103           for (py::handle h : extra) {
104             result.tracebacks.back().push_back(result.all_frames.size());
105             result.all_frames.emplace_back(unwind::Frame{
106                 py::cast<std::string>(h[filename_s]),
107                 py::cast<std::string>(h[name_s]),
108                 py::cast<uint64_t>(h[line_s])});
109           }
110         }
111       }
112     }
113   }
114 };
115 
116 } // namespace
117 
py_symbolize(std::vector<CapturedTraceback * > & to_symbolize)118 std::vector<py::object> py_symbolize(
119     std::vector<CapturedTraceback*>& to_symbolize) {
120   // we dedup repeated to_symbolize objects to prevent
121   // creating a bunch of duplicated frame objects
122   std::unordered_map<CapturedTraceback*, uint64_t> cached_frames;
123   std::vector<CapturedTraceback*> unique_frames;
124   for (const auto& sc : to_symbolize) {
125     auto it = cached_frames.find(sc);
126     if (it == cached_frames.end()) {
127       cached_frames.insert({sc, unique_frames.size()});
128       unique_frames.push_back(sc);
129     }
130   }
131   auto s = symbolize(unique_frames);
132 
133   py::str line_s = "line";
134   py::str name_s = "name";
135   py::str filename_s = "filename";
136   std::vector<py::dict> all_frames;
137   for (const auto& f : s.all_frames) {
138     py::dict d;
139     d[name_s] = f.funcname;
140     d[filename_s] = f.filename;
141     d[line_s] = f.lineno;
142     all_frames.emplace_back(std::move(d));
143   }
144 
145   std::vector<py::object> py_unique_frames;
146   for (const auto& t : s.tracebacks) {
147     py::list l;
148     for (const auto& e : t) {
149       l.append(all_frames.at(e));
150     }
151     py_unique_frames.push_back(std::move(l));
152   }
153 
154   std::vector<py::object> result;
155   result.reserve(to_symbolize.size());
156   for (const auto& sc : to_symbolize) {
157     result.push_back(py_unique_frames.at(cached_frames.at(sc)));
158   }
159   return result;
160 }
161 
freeDeadCapturedTracebackFrames()162 void freeDeadCapturedTracebackFrames() {
163   std::lock_guard<std::mutex> lock(to_free_frames_mutex);
164   for (CapturedTraceback::PyFrame f : to_free_frames) {
165     Py_XDECREF(f.code);
166   }
167   to_free_frames.clear();
168 }
169 
installCapturedTracebackPython()170 void installCapturedTracebackPython() {
171   CapturedTraceback::addPythonUnwinder(new PythonTraceback());
172 }
173 
174 } // namespace torch
175