1 #include <torch/csrc/profiler/python/combined_traceback.h>
2 #include <torch/csrc/python_headers.h>
3 #include <torch/csrc/utils/pybind.h>
4 #include <torch/csrc/utils/pythoncapi_compat.h>
5 namespace py = pybind11;
6
7 namespace torch {
8 // Locking:
9 // We need to free PyCodeObjects when ~StackContext runs, but
10 // CUDACachingAllocator may hold its device lock when ~StackContext runs.
11
12 // Because the thread calling the allocator _may_ hold the GIL,
13 // attempting to lock the GIL in ~StackContext can deadlock:
14 // T0: GIL Lock -> Call Allocator ->| Waiting Device Lock
15 // T1: Call Allocator -> Device Lock ->| Waiting GIL Lock
16 // Instead the destructor defers freeing stack frames by putting them in
17 // to_free_frames. We still need a lock to manage this vector, but
18 // we can ensure an overall lock ordering of GIL -> device_lock ->
19 // to_free_frames_mutex because ::gather is called outside of the device lock.
20
21 namespace {
22 static std::mutex to_free_frames_mutex;
23 static std::vector<CapturedTraceback::PyFrame> to_free_frames;
24 struct PythonTraceback : public CapturedTraceback::Python {
gathertorch::__anon02d3f9bc0111::PythonTraceback25 std::vector<CapturedTraceback::PyFrame> gather() override {
26 if (!Py_IsInitialized()) {
27 return {};
28 }
29 std::vector<CapturedTraceback::PyFrame> frames;
30 py::gil_scoped_acquire acquire;
31 {
32 std::lock_guard<std::mutex> lock(to_free_frames_mutex);
33 for (CapturedTraceback::PyFrame f : to_free_frames) {
34 Py_XDECREF(f.code);
35 }
36 to_free_frames.clear();
37 }
38 PyFrameObject* f = PyEval_GetFrame();
39 Py_XINCREF(f);
40 while (f) {
41 frames.emplace_back(
42 CapturedTraceback::PyFrame{PyFrame_GetCode(f), PyFrame_GetLasti(f)});
43 auto f_back = PyFrame_GetBack(f);
44 Py_XDECREF(f);
45 f = f_back;
46 }
47 return frames;
48 }
releasetorch::__anon02d3f9bc0111::PythonTraceback49 void release(std::vector<CapturedTraceback::PyFrame>& frames) override {
50 std::lock_guard<std::mutex> lock(to_free_frames_mutex);
51 to_free_frames.insert(to_free_frames.end(), frames.begin(), frames.end());
52 }
53 using void_visitproc = int (*)(void* self, void* arg);
traversetorch::__anon02d3f9bc0111::PythonTraceback54 int traverse(
55 std::vector<CapturedTraceback::PyFrame>& frames,
56 void_visitproc visit,
57 void* arg) override {
58 for (auto& f : frames) {
59 Py_VISIT(f.code);
60 }
61 return 0;
62 }
cleartorch::__anon02d3f9bc0111::PythonTraceback63 int clear(std::vector<CapturedTraceback::PyFrame>& frames) override {
64 for (auto& f : frames) {
65 Py_CLEAR(f.code);
66 }
67 return 0;
68 }
appendSymbolizedtorch::__anon02d3f9bc0111::PythonTraceback69 void appendSymbolized(
70 const std::vector<CapturedTraceback::PyFrame>& to_symbolize,
71 SymbolizedTracebacks& result) override {
72 py::gil_scoped_acquire acquire;
73 py::str line_s = "line";
74 py::str name_s = "name";
75 py::str filename_s = "filename";
76
77 auto torch = py::module::import("torch");
78 py::object stack_frames_for_code;
79 if (py::hasattr(torch, "_inductor")) {
80 py::object inductor = torch.attr("_inductor");
81 if (py::hasattr(inductor, "codecache")) {
82 stack_frames_for_code = inductor.attr("codecache")
83 .attr("PyCodeCache")
84 .attr("stack_frames_for_code");
85 }
86 }
87 for (const auto& f : to_symbolize) {
88 auto f_code = (PyCodeObject*)f.code;
89 py::handle filename = f_code->co_filename;
90 py::handle funcname = f_code->co_name;
91 auto lineno = PyCode_Addr2Line(f_code, f.lasti);
92 result.tracebacks.emplace_back();
93 result.tracebacks.back().push_back(result.all_frames.size());
94 result.all_frames.emplace_back(unwind::Frame{
95 py::cast<std::string>(filename),
96 py::cast<std::string>(funcname),
97 (uint64_t)lineno});
98 // find all the additional frames associated with inductor generated
99 // code
100 if (stack_frames_for_code.ptr()) {
101 py::object extra = stack_frames_for_code(filename, lineno);
102 if (!extra.is_none()) {
103 for (py::handle h : extra) {
104 result.tracebacks.back().push_back(result.all_frames.size());
105 result.all_frames.emplace_back(unwind::Frame{
106 py::cast<std::string>(h[filename_s]),
107 py::cast<std::string>(h[name_s]),
108 py::cast<uint64_t>(h[line_s])});
109 }
110 }
111 }
112 }
113 }
114 };
115
116 } // namespace
117
py_symbolize(std::vector<CapturedTraceback * > & to_symbolize)118 std::vector<py::object> py_symbolize(
119 std::vector<CapturedTraceback*>& to_symbolize) {
120 // we dedup repeated to_symbolize objects to prevent
121 // creating a bunch of duplicated frame objects
122 std::unordered_map<CapturedTraceback*, uint64_t> cached_frames;
123 std::vector<CapturedTraceback*> unique_frames;
124 for (const auto& sc : to_symbolize) {
125 auto it = cached_frames.find(sc);
126 if (it == cached_frames.end()) {
127 cached_frames.insert({sc, unique_frames.size()});
128 unique_frames.push_back(sc);
129 }
130 }
131 auto s = symbolize(unique_frames);
132
133 py::str line_s = "line";
134 py::str name_s = "name";
135 py::str filename_s = "filename";
136 std::vector<py::dict> all_frames;
137 for (const auto& f : s.all_frames) {
138 py::dict d;
139 d[name_s] = f.funcname;
140 d[filename_s] = f.filename;
141 d[line_s] = f.lineno;
142 all_frames.emplace_back(std::move(d));
143 }
144
145 std::vector<py::object> py_unique_frames;
146 for (const auto& t : s.tracebacks) {
147 py::list l;
148 for (const auto& e : t) {
149 l.append(all_frames.at(e));
150 }
151 py_unique_frames.push_back(std::move(l));
152 }
153
154 std::vector<py::object> result;
155 result.reserve(to_symbolize.size());
156 for (const auto& sc : to_symbolize) {
157 result.push_back(py_unique_frames.at(cached_frames.at(sc)));
158 }
159 return result;
160 }
161
freeDeadCapturedTracebackFrames()162 void freeDeadCapturedTracebackFrames() {
163 std::lock_guard<std::mutex> lock(to_free_frames_mutex);
164 for (CapturedTraceback::PyFrame f : to_free_frames) {
165 Py_XDECREF(f.code);
166 }
167 to_free_frames.clear();
168 }
169
installCapturedTracebackPython()170 void installCapturedTracebackPython() {
171 CapturedTraceback::addPythonUnwinder(new PythonTraceback());
172 }
173
174 } // namespace torch
175