xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 You may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7    http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
17 
18 #include <algorithm>
19 #include <stack>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "absl/algorithm/container.h"
25 #include "absl/strings/str_cat.h"
26 #include "absl/strings/string_view.h"
27 #include "absl/types/optional.h"
28 #include "tensorflow/core/lib/gtl/map_util.h"
29 #include "tensorflow/core/platform/logging.h"
30 #include "tensorflow/core/platform/protobuf.h"
31 #include "tensorflow/core/platform/types.h"
32 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
33 #include "tensorflow/core/profiler/utils/math_utils.h"
34 #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
35 #include "tensorflow/core/profiler/utils/timespan.h"
36 #include "tensorflow/core/profiler/utils/xplane_schema.h"
37 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
38 
39 namespace tensorflow {
40 namespace profiler {
41 
42 namespace {
43 
Decode(absl::string_view function_name,absl::string_view mode)44 std::pair<TfFunctionExecutionMode, TfFunctionCompiler> Decode(
45     absl::string_view function_name, absl::string_view mode) {
46   // mode is one of ["eager", "concrete", "traced-xla", "traced-nonXla",
47   // "notTraced-xla", "notTraced-nonXla"]
48   if (mode == "eager") return {EAGER_MODE, INVALID_COMPILER};
49   if (mode == "concrete") return {CONCRETE_MODE, INVALID_COMPILER};
50   if (mode == "traced-xla") return {TRACED_MODE, XLA_COMPILER};
51   if (mode == "traced-nonXla") return {TRACED_MODE, OTHER_COMPILER};
52   if (mode == "notTraced-xla") return {NOT_TRACED_MODE, XLA_COMPILER};
53   if (mode == "notTraced-nonXla") return {NOT_TRACED_MODE, OTHER_COMPILER};
54   // Shouldn't reach here.
55   LOG(ERROR) << absl::StrCat("tf-function '", function_name,
56                              "' has an unexpected execution mode '", mode, "'")
57              << std::endl;
58   return {INVALID_MODE, INVALID_COMPILER};
59   DCHECK(false);
60 }
61 
ComputeExpensiveCallPercent(const TfFunction & tf_function)62 double ComputeExpensiveCallPercent(const TfFunction& tf_function) {
63   // Computes the expensiveness in terms of time (rather than count).
64   uint64 total_call_time_ps = 0;
65   uint64 expensive_call_time_ps = 0;
66   for (const auto& mode_metrics : tf_function.metrics()) {
67     const auto mode = mode_metrics.first;
68     const auto& metrics = mode_metrics.second;
69     total_call_time_ps += metrics.self_time_ps();
70     if (mode == TRACED_MODE || mode == EAGER_MODE) {
71       expensive_call_time_ps += metrics.self_time_ps();
72     }
73   }
74   return SafeDivide(100.0 * expensive_call_time_ps, total_call_time_ps);
75 }
76 
77 // Each invocation of a tf-function creates an ActivationRecord.
78 struct ActivationRecord {
79   std::string function_name;               // name of the tf-function.
80   Timespan timespan;                       // timespan of this invocation.
81   TfFunctionExecutionMode execution_mode;  // execution mode.
82   TfFunctionCompiler compiler;             // compiler used.
83   int64_t tracing_count;  // the total tracing count of this function when this
84                           // invocation happened.
85   uint64 children_duration_ps;  // Sum of the duration of all (immediate)
86                                 // children tf-functions of this function.
ActivationRecordtensorflow::profiler::__anon25e3d4340111::ActivationRecord87   ActivationRecord()
88       : function_name(""),
89         execution_mode(INVALID_MODE),
90         compiler(INVALID_COMPILER),
91         tracing_count(0),
92         children_duration_ps(0) {}
ActivationRecordtensorflow::profiler::__anon25e3d4340111::ActivationRecord93   ActivationRecord(absl::string_view name, const Timespan& timespan,
94                    TfFunctionExecutionMode exe_mode,
95                    TfFunctionCompiler compiler, int64_t tracing_cnt)
96       : function_name(std::string(name)),
97         timespan(timespan),
98         execution_mode(exe_mode),
99         compiler(compiler),
100         tracing_count(tracing_cnt),
101         children_duration_ps(0) {}
DebugStringtensorflow::profiler::__anon25e3d4340111::ActivationRecord102   std::string DebugString() const {
103     return absl::StrCat("{", function_name, ", ",
104                         TfFunctionExecutionMode_Name(execution_mode), ", ",
105                         TfFunctionCompiler_Name(compiler),
106                         ", tracing_count:", tracing_count,
107                         ", children_duration:", children_duration_ps,
108                         " ps, timespan:", timespan.DebugString(), "}");
109   }
110 };
111 
112 // Entry or exit point of a tf-function.
113 struct EntryOrExit {
114   bool is_entry;        // true for entry, false for exit.
115   int64_t index;        // index to the ActivationRecord.
116   uint64 timestamp_ps;  // the time when this entry/exit happens.
EntryOrExittensorflow::profiler::__anon25e3d4340111::EntryOrExit117   EntryOrExit() : is_entry(false), index(-1), timestamp_ps(0) {}
EntryOrExittensorflow::profiler::__anon25e3d4340111::EntryOrExit118   EntryOrExit(bool is_entry, int64_t index, uint64 timestamp_ps)
119       : is_entry(is_entry), index(index), timestamp_ps(timestamp_ps) {}
DebugStringtensorflow::profiler::__anon25e3d4340111::EntryOrExit120   std::string DebugString() const {
121     std::string entry_or_exit = is_entry ? "entry, " : "exit,  ";
122     return absl::StrCat("{", entry_or_exit, "idx:", index,
123                         ", timestamp:", timestamp_ps, "}");
124   }
125 };
126 
CombineCompilers(TfFunctionCompiler a,TfFunctionCompiler b)127 TfFunctionCompiler CombineCompilers(TfFunctionCompiler a,
128                                     TfFunctionCompiler b) {
129   if (a == INVALID_COMPILER) return b;
130   if (b == INVALID_COMPILER) return a;
131   if (a == b) return a;
132   return MIXED_COMPILER;
133 }
134 
CombineTfFunctionMetrics(const TfFunctionMetrics & src,TfFunctionMetrics * dst)135 void CombineTfFunctionMetrics(const TfFunctionMetrics& src,
136                               TfFunctionMetrics* dst) {
137   dst->set_count(src.count() + dst->count());
138   dst->set_self_time_ps(src.self_time_ps() + dst->self_time_ps());
139 }
140 
CombineTfFunction(const TfFunction & src,TfFunction * dst)141 void CombineTfFunction(const TfFunction& src, TfFunction* dst) {
142   dst->set_total_tracing_count(
143       std::max(src.total_tracing_count(), dst->total_tracing_count()));
144   dst->set_compiler(CombineCompilers(src.compiler(), dst->compiler()));
145   for (const auto& mode_metrics : src.metrics()) {
146     int32_t execution_mode = mode_metrics.first;
147     const TfFunctionMetrics& src_metrics = mode_metrics.second;
148     TfFunctionMetrics* dst_metrics =
149         gtl::FindOrNull(*dst->mutable_metrics(), execution_mode);
150     if (dst_metrics == nullptr) {
151       (*dst->mutable_metrics())[execution_mode] = src_metrics;
152     } else {
153       CombineTfFunctionMetrics(src_metrics, dst_metrics);
154     }
155   }
156   dst->set_expensive_call_percent(ComputeExpensiveCallPercent(*dst));
157 }
158 
159 // Execution history of all tf-functions invoked.
160 class TfFunctionExecutions {
161  public:
TfFunctionExecutions(const XLineVisitor & line)162   explicit TfFunctionExecutions(const XLineVisitor& line) {
163     // Creates points_ and activations_ from line.
164     line.ForEachEvent([&](const XEventVisitor& event) {
165       absl::string_view mode;
166       int64_t tracing_count = 0;
167       event.ForEachStat([&mode, &tracing_count](const XStatVisitor& stat) {
168         if (!stat.Type().has_value()) return;
169         switch (stat.Type().value()) {
170           case StatType::kTfFunctionCall:
171             mode = stat.StrOrRefValue();
172             break;
173           case StatType::kTfFunctionTracingCount:
174             tracing_count = stat.IntValue();
175             break;
176         }
177       });
178       if (mode.empty()) return;
179 
180       // event is a tf-function.
181       int64_t index = activations_.size();
182       auto timespan = event.GetTimespan();
183       auto mode_compiler = Decode(event.Name(), mode);
184       ActivationRecord activation_record =
185           ActivationRecord(event.Name(), timespan, mode_compiler.first,
186                            mode_compiler.second, tracing_count);
187       activations_.push_back(activation_record);
188       EntryOrExit entry_point =
189           EntryOrExit(/*is_entry=*/true, index, timespan.begin_ps());
190       EntryOrExit exit_point =
191           EntryOrExit(/*is_entry=*/false, index, timespan.end_ps());
192       points_.push_back(entry_point);
193       points_.push_back(exit_point);
194     });
195 
196     // Sorts points_ in ascending order of timestamps.
197     auto ascending_in_timestamp = [](const EntryOrExit& a,
198                                      const EntryOrExit& b) {
199       return a.timestamp_ps < b.timestamp_ps;
200     };
201     absl::c_sort(points_, ascending_in_timestamp);
202 
203     // Calculates the children duration for each activation record.
204     CalculateChildrenDurations();
205   }
206 
DebugString() const207   std::string DebugString() const {
208     std::string result = "\nActivations:\n";
209     for (int i = 0, end = activations_.size(); i < end; i++) {
210       absl::StrAppend(&result, "[", i, "] ", activations_[i].DebugString(),
211                       "\n");
212     }
213     absl::StrAppend(&result, "tf-function Entry/Exit Points:\n");
214     for (const auto& pt : points_) {
215       absl::StrAppend(&result, pt.DebugString(), "\n");
216     }
217     return result;
218   }
219 
220   // Converts this execution history to a TfFunctionDb.
ConvertToTfFunctionDb()221   TfFunctionDb ConvertToTfFunctionDb() {
222     TfFunctionDb result;
223     for (const auto& record : activations_) {
224       TfFunction* fun = &(*result.mutable_tf_functions())[record.function_name];
225       fun->set_total_tracing_count(
226           std::max(static_cast<int64_t>(fun->total_tracing_count()),
227                    record.tracing_count));
228       fun->set_compiler(CombineCompilers(fun->compiler(), record.compiler));
229       // The self-time of this function is the difference between the duration
230       // of this function and the duration of its children.
231       uint64 self_time_ps =
232           record.timespan.duration_ps() - record.children_duration_ps;
233       // Updates the metrics for this execution mode with this invocation.
234       TfFunctionMetrics* metrics =
235           &(*fun->mutable_metrics())[record.execution_mode];
236       metrics->set_count(metrics->count() + 1);
237       metrics->set_self_time_ps(metrics->self_time_ps() + self_time_ps);
238     }
239     for (auto& name_fun : *result.mutable_tf_functions()) {
240       TfFunction& fun = name_fun.second;
241       fun.set_expensive_call_percent(ComputeExpensiveCallPercent(fun));
242     }
243     return result;
244   }
245 
246   // Calculates the children duration of every tf-function.
CalculateChildrenDurations()247   void CalculateChildrenDurations() {
248     std::stack<int64_t> call_stack;
249     for (const auto& pt : points_) {
250       if (pt.is_entry) {
251         // Function entry.
252         call_stack.push(pt.index);
253       } else {
254         // Function exit.
255         DCHECK(call_stack.top() == pt.index);  // must be well nested.
256         uint64 call_duration = activations_[pt.index].timespan.duration_ps();
257         call_stack.pop();
258         if (!call_stack.empty()) {
259           // call_stack.top() is the parent tf-function; adds call_duration to
260           // its children_duration.
261           activations_[call_stack.top()].children_duration_ps += call_duration;
262         }
263       }
264     }
265   }
266 
267  private:
268   // ActivationRecords for all tf-function invocations.
269   std::vector<ActivationRecord> activations_;
270   // Entry and exit points of all invocations.
271   std::vector<EntryOrExit> points_;
272 };
273 
274 }  // namespace
275 
DebugString(const TfFunctionDb & tf_function_db)276 std::string DebugString(const TfFunctionDb& tf_function_db) {
277   std::string str;
278   protobuf::TextFormat::PrintToString(tf_function_db, &str);
279   return str;
280 }
281 
CombineTfFunctionDb(const TfFunctionDb & src,TfFunctionDb * dst)282 void CombineTfFunctionDb(const TfFunctionDb& src, TfFunctionDb* dst) {
283   for (const auto& name_function : src.tf_functions()) {
284     const auto& name = name_function.first;
285     const auto& src_fun = name_function.second;
286     TfFunction* dst_fun = gtl::FindOrNull(*dst->mutable_tf_functions(), name);
287     if (dst_fun == nullptr) {
288       (*dst->mutable_tf_functions())[name] = src_fun;
289     } else {
290       CombineTfFunction(src_fun, dst_fun);
291     }
292   }
293 }
294 
ConvertHostThreadsXLineToTfFunctionDb(const XLineVisitor & line)295 TfFunctionDb ConvertHostThreadsXLineToTfFunctionDb(const XLineVisitor& line) {
296   TfFunctionExecutions tf_function_executions = TfFunctionExecutions(line);
297   return tf_function_executions.ConvertToTfFunctionDb();
298 }
299 
300 }  // namespace profiler
301 }  // namespace tensorflow
302