xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/utils/event_span.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/profiler/utils/event_span.h"
16 
17 #include <string>
18 #include <utility>
19 #include <vector>
20 
21 #include "absl/algorithm/container.h"
22 #include "absl/container/flat_hash_map.h"
23 #include "absl/strings/str_cat.h"
24 #include "absl/strings/string_view.h"
25 #include "tensorflow/core/lib/gtl/map_util.h"
26 #include "tensorflow/core/platform/types.h"
27 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
28 #include "tensorflow/core/profiler/utils/timespan.h"
29 
30 namespace tensorflow {
31 namespace profiler {
32 
33 namespace {
34 
35 // Representing a boundary of an event.
36 struct EventBoundary {
37   // Time at this boundary.
38   uint64 time_ps;
39   // Type of the event.
40   EventType type;
41   // True if this is the start of the event; False if this is the end.
42   bool is_start;
EventBoundarytensorflow::profiler::__anonbd91bcbc0111::EventBoundary43   EventBoundary(uint64 time_ps, EventType type, bool is_start)
44       : time_ps(time_ps), type(type), is_start(is_start) {}
45 };
46 
47 // Returns true if EventBoundary a should appear before EventBoundary b.
CmpEventBoundaries(const EventBoundary & a,const EventBoundary & b)48 bool CmpEventBoundaries(const EventBoundary& a, const EventBoundary& b) {
49   if (a.time_ps == b.time_ps) {
50     if (a.is_start == b.is_start) {
51       // Puts the higher-priority type before the lower-priority type if they
52       // have the same time and same boundary type.
53       return a.type > b.type;
54     } else {
55       // Puts the "end" bounary before the "start" boundary if they have the
56       // same time.
57       return !a.is_start;
58     }
59   }
60   // In ascending order of time.
61   return a.time_ps < b.time_ps;
62 }
63 
64 // Generates vector of event boundaries from the given overlapped_events.
GenerateEventBoundaries(const std::vector<EventTypeSpan> & overlapped_events)65 std::vector<EventBoundary> GenerateEventBoundaries(
66     const std::vector<EventTypeSpan>& overlapped_events) {
67   std::vector<EventBoundary> boundaries;
68   boundaries.reserve(2 * overlapped_events.size());
69   for (const auto& event : overlapped_events) {
70     boundaries.push_back(
71         {event.span.begin_ps(), event.type, /*is_start=*/true});
72     boundaries.push_back({event.span.end_ps(), event.type, /*is_start=*/false});
73   }
74   absl::c_sort(boundaries, CmpEventBoundaries);
75   return boundaries;
76 }
77 
78 // A class to track the highest priority that an event should be assigned.
79 class PriorityTracker {
80  private:
81   // The current maximum priority.
82   EventType current_max_priority_;
83   // A count for each possible priority.
84   std::vector<int64_t> priority_count_;
85 
86  public:
PriorityTracker()87   PriorityTracker() {
88     current_max_priority_ = UNKNOWN_TIME;
89     priority_count_.resize(LAST_EVENT_TYPE + 1, 0);
90   }
91   // Updates current_max_priority_ and priority_count_[] given the boundary.
92   // Returns the new current_max_priority_.
Update(const EventBoundary & boundary)93   EventType Update(const EventBoundary& boundary) {
94     EventType event_type = boundary.type;
95     bool is_start = boundary.is_start;
96     if (is_start) {
97       priority_count_[event_type]++;
98       if (event_type > current_max_priority_) {
99         current_max_priority_ = event_type;
100       }
101     } else {
102       priority_count_[event_type]--;
103       if (event_type == current_max_priority_ &&
104           priority_count_[event_type] == 0) {
105         // Reduces current_max_priority_ to the first event type (starting from
106         // the highest priority) that has a non-zero count.
107         bool found = false;
108         for (int i = event_type - 1; i >= 0; i--) {
109           if (priority_count_[i] > 0) {
110             current_max_priority_ = static_cast<EventType>(i);
111             found = true;
112             break;
113           }
114         }
115         if (!found) current_max_priority_ = UNKNOWN_TIME;
116       }
117     }
118     return current_max_priority_;
119   }
120 };
121 
122 constexpr int kNumGenericEventTypes = GenericEventType::kLastGenericEventType -
123                                       GenericEventType::kFirstGenericEventType +
124                                       1;
125 
126 using GenericEventTypeStrMap =
127     absl::flat_hash_map<GenericEventType, absl::string_view>;
128 
GetGenericEventTypeStrMap()129 const GenericEventTypeStrMap& GetGenericEventTypeStrMap() {
130   static const auto* generic_event_type_str_map = new GenericEventTypeStrMap({
131       {kDeviceCompute, "Device compute"},
132       {kDeviceToDevice, "Device to device"},
133       {kDeviceCollectives, "Device collective communication"},
134       {kHostCompute, "Host compute"},
135       {kHostPrepare, "Kernel launch"},
136       {kInput, "Input"},
137       {kOutput, "Output"},
138       {kCompile, "Compilation"},
139       {kAllOthers, "All others"},
140   });
141   DCHECK_EQ(generic_event_type_str_map->size(), kNumGenericEventTypes);
142   return *generic_event_type_str_map;
143 }
144 
145 }  // namespace
146 
GetGenericEventTypeStr(GenericEventType event_type)147 absl::string_view GetGenericEventTypeStr(GenericEventType event_type) {
148   return GetGenericEventTypeStrMap().at(event_type);
149 }
150 
PrintEventType(EventType event_type)151 std::string PrintEventType(EventType event_type) {
152   switch (event_type) {
153     case UNKNOWN_TIME:
154       return "unknown_time";
155     case HOST_COMPUTE:
156       return "host_compute";
157     case HOST_COMPILE:
158       return "host_compile";
159     case HOST_TO_HOST:
160       return "host_to_host";
161     case HOST_TO_DEVICE:
162       return "host_to_device";
163     case HOST_PREPARE:
164       return "host_prepare";
165     case DEVICE_COLLECTIVES:
166       return "device_collectives";
167     case HOST_WAIT_INPUT:
168       return "host_wait_input";
169     case DEVICE_TO_DEVICE:
170       return "device_to_device";
171     case DEVICE_TO_HOST:
172       return "device_to_host";
173     case DEVICE_COMPUTE_32:
174       return "device_compute_32";
175     case DEVICE_COMPUTE_16:
176       return "device_compute_16";
177     case DEVICE_WAIT_DEVICE:
178       return "device_wait_device";
179     case DEVICE_WAIT_HOST:
180       return "device_wait_host";
181     default:
182       return "unexpected";
183   }
184 }
185 
PrintEventTypeSpan(const EventTypeSpan & event_type_span)186 std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span) {
187   return absl::StrCat("(", PrintEventType(event_type_span.type), ", ",
188                       event_type_span.span.DebugString(), ")");
189 }
190 
PrintStepMarkerType(StepMarkerType type)191 absl::string_view PrintStepMarkerType(StepMarkerType type) {
192   switch (type) {
193     case StepMarkerType::kExplicitHostStepMarker:
194       return "ExplicitHostStepMarker";
195     case StepMarkerType::kImplicitHostStepMarker:
196       return "ImplicitHostStepMarker";
197     case StepMarkerType::kDeviceStepMarker:
198       return "DeviceStepMarker";
199   }
200 }
201 
PrintStepMarker(const StepMarker & step_marker)202 std::string PrintStepMarker(const StepMarker& step_marker) {
203   return absl::StrCat("(", PrintStepMarkerType(step_marker.type), ", ",
204                       step_marker.event_name, ", ",
205                       step_marker.span.DebugString(), ")");
206 }
207 
PrintStepEvents(const StepEvents & step_events)208 std::string PrintStepEvents(const StepEvents& step_events) {
209   std::vector<int64_t> step_ids;
210   step_ids.reserve(step_events.size());
211   for (const auto& id_details : step_events) {
212     step_ids.push_back(id_details.first);
213   }
214   absl::c_sort(step_ids);
215   std::string result = "{";
216   for (auto id : step_ids) {
217     absl::StrAppend(&result, "\n");
218     auto* details = gtl::FindOrNull(step_events, id);
219     std::string details_str = details ? details->DebugString() : "()";
220     absl::StrAppend(&result, id, ":", details_str);
221   }
222   return absl::StrCat(result, "\n}");
223 }
224 
CombineStepEvents(const StepEvents & src,StepEvents * dst)225 void CombineStepEvents(const StepEvents& src, StepEvents* dst) {
226   for (const auto& step_details : src) {
227     int64_t step_id = step_details.first;
228     const StepDetails& src_details = step_details.second;
229     StepDetails* dst_details = &(*dst)[step_id];
230     dst_details->Combine(src_details);
231   }
232 }
233 
ToNonOverlappedEvents(const std::vector<EventTypeSpan> & overlapped_events)234 std::vector<EventTypeSpan> ToNonOverlappedEvents(
235     const std::vector<EventTypeSpan>& overlapped_events) {
236   std::vector<EventBoundary> event_boundaries =
237       GenerateEventBoundaries(overlapped_events);
238   std::vector<EventTypeSpan> result;
239   if (event_boundaries.empty()) return result;
240   result.reserve(event_boundaries.size());
241   PriorityTracker priority_tracker;
242   for (int64_t i = 0, end = (event_boundaries.size() - 1); i < end; i++) {
243     EventType highest_priority = priority_tracker.Update(event_boundaries[i]);
244     result.push_back({highest_priority, Timespan::FromEndPoints(
245                                             event_boundaries[i].time_ps,
246                                             event_boundaries[i + 1].time_ps)});
247   }
248   return result;
249 }
250 
251 // Converts from overlapped step-events to non-overlapped step-events.
ToNonOverlappedStepEvents(const StepEvents & overlapped_step_events)252 StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events) {
253   StepEvents non_overlapped_step_events;
254   for (const auto& step_events : overlapped_step_events) {
255     const auto& step_id = step_events.first;
256     const auto& step_details = step_events.second;
257     non_overlapped_step_events.try_emplace(step_id,
258                                            step_details.ToNonOverlapped());
259   }
260   return non_overlapped_step_events;
261 }
262 
AddMarker(const StepMarker & m)263 void StepDetails::AddMarker(const StepMarker& m) { markers_.push_back(m); }
264 
AddEvent(const EventTypeSpan & e)265 void StepDetails::AddEvent(const EventTypeSpan& e) { events_.push_back(e); }
266 
AggregateDeviceMemoryTransfers(const std::vector<DeviceMemoryTransfer> device_memory_transfers)267 void StepDetails::AggregateDeviceMemoryTransfers(
268     const std::vector<DeviceMemoryTransfer> device_memory_transfers) {
269   if (device_memory_transfers.size() != device_memory_transfers_.size()) {
270     return;  // Sanity check.
271   }
272   for (size_t i = 0; i < device_memory_transfers.size(); ++i) {
273     device_memory_transfers_[i].set_occurrence(
274         device_memory_transfers_[i].occurrence() +
275         device_memory_transfers[i].occurrence());
276     device_memory_transfers_[i].set_bytes_transferred(
277         device_memory_transfers_[i].bytes_transferred() +
278         device_memory_transfers[i].bytes_transferred());
279     device_memory_transfers_[i].set_time_us(
280         device_memory_transfers_[i].time_us() +
281         device_memory_transfers[i].time_us());
282   }
283 }
284 
AddCollectiveOpEvent(uint64 core_id,const AllReduceInfo & e)285 void StepDetails::AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e) {
286   *collectives_[core_id].add_all_reduce_info() = e;
287 }
288 
AddDeviceMemoryTransferEvent(EventType event_type,const Timespan & time_span,uint64 bytes)289 void StepDetails::AddDeviceMemoryTransferEvent(EventType event_type,
290                                                const Timespan& time_span,
291                                                uint64 bytes) {
292   int index = 0;
293   switch (event_type) {
294     case HOST_TO_DEVICE:
295       index = 0;
296       break;
297     case DEVICE_TO_HOST:
298       index = 1;
299       break;
300     case DEVICE_TO_DEVICE:
301       index = 2;
302       break;
303     default:
304       return;
305   }
306   device_memory_transfers_[index].set_occurrence(
307       device_memory_transfers_[index].occurrence() + 1);
308   device_memory_transfers_[index].set_time_us(
309       device_memory_transfers_[index].time_us() +
310       time_span.duration_ps() / 1000000.0);
311   device_memory_transfers_[index].set_bytes_transferred(
312       device_memory_transfers_[index].bytes_transferred() + bytes);
313 }
314 
StepTime() const315 Timespan StepDetails::StepTime() const {
316   Timespan max_host_step_time;
317   Timespan max_device_step_time;
318   for (const auto& marker : markers_) {
319     Timespan& cur_max_step_time =
320         marker.type == StepMarkerType::kDeviceStepMarker ? max_device_step_time
321                                                          : max_host_step_time;
322     const Timespan& new_step_time = marker.span;
323     if (new_step_time.duration_ps() > cur_max_step_time.duration_ps())
324       cur_max_step_time = new_step_time;
325   }
326   // CPU-only profile.
327   if (max_device_step_time.Empty()) {
328     return max_host_step_time;
329   }
330 
331   // If the host step time includes the device step time, use the host step
332   // time. This covers the case where the device is synchronized at the end of
333   // each step.
334   if (max_host_step_time.Includes(max_device_step_time)) {
335     return max_host_step_time;
336   }
337   return max_device_step_time;
338 }
339 
ToNonOverlapped() const340 StepDetails StepDetails::ToNonOverlapped() const {
341   StepDetails non_overlapped_step_details;
342   non_overlapped_step_details.markers_ = markers_;
343   non_overlapped_step_details.events_ = ToNonOverlappedEvents(events_);
344   non_overlapped_step_details.collectives_ = collectives_;
345   non_overlapped_step_details.device_memory_transfers_ =
346       device_memory_transfers_;
347   non_overlapped_step_details.step_name_ = step_name_;
348   return non_overlapped_step_details;
349 }
350 
Combine(const StepDetails & other)351 void StepDetails::Combine(const StepDetails& other) {
352   markers_.insert(markers_.end(), other.markers_.begin(), other.markers_.end());
353   events_.insert(events_.end(), other.events_.begin(), other.events_.end());
354   collectives_.insert(other.collectives_.begin(), other.collectives_.end());
355   AggregateDeviceMemoryTransfers(other.device_memory_transfers_);
356   if (step_name_.empty()) step_name_ = other.step_name_;
357 }
358 
DebugString() const359 std::string StepDetails::DebugString() const {
360   std::string result = "([";
361   for (int i = 0, end = markers_.size(); i < end; i++) {
362     if (i > 0) absl::StrAppend(&result, ", ");
363     absl::StrAppend(&result, PrintStepMarker(markers_[i]));
364   }
365   absl::StrAppend(&result, "], [");
366   for (int i = 0, end = events_.size(); i < end; i++) {
367     if (i > 0) absl::StrAppend(&result, ", ");
368     absl::StrAppend(&result, PrintEventTypeSpan(events_[i]));
369   }
370   return absl::StrCat(result, "])");
371 }
372 
operator ==(const StepDetails & other) const373 bool StepDetails::operator==(const StepDetails& other) const {
374   const auto& other_markers = other.Markers();
375   if (markers_.size() != other_markers.size()) return false;
376   for (uint64 i = 0; i < markers_.size(); i++) {
377     if (markers_[i] != other_markers[i]) return false;
378   }
379   const auto& other_events = other.Events();
380   if (events_.size() != other_events.size()) return false;
381   for (uint64 i = 0; i < events_.size(); i++) {
382     if (events_[i] != other_events[i]) return false;
383   }
384   return true;
385 }
386 
operator ==(const StepEvents & a,const StepEvents & b)387 bool operator==(const StepEvents& a, const StepEvents& b) {
388   if (a.size() != b.size()) return false;
389   for (const auto& id_details : a) {
390     const auto a_id = id_details.first;
391     const auto& a_details = id_details.second;
392     const auto* b_details = gtl::FindOrNull(b, a_id);
393     if (b_details == nullptr) return false;
394     if (a_details != *b_details) return false;
395   }
396   return true;
397 }
398 
ComputePrecisionStats(const StepEvents & nonoverlapped_step_events)399 PrecisionStats ComputePrecisionStats(
400     const StepEvents& nonoverlapped_step_events) {
401   int64_t compute_32bit_ps = 0;
402   int64_t compute_16bit_ps = 0;
403   for (const auto& id_details : nonoverlapped_step_events) {
404     for (const auto& event : id_details.second.Events()) {
405       switch (event.type) {
406         case DEVICE_COMPUTE_32:
407           compute_32bit_ps += event.span.duration_ps();
408           break;
409         case DEVICE_COMPUTE_16:
410           compute_16bit_ps += event.span.duration_ps();
411           break;
412         default:
413           break;
414       }
415     }
416   }
417   PrecisionStats precision_stats;
418   precision_stats.set_compute_32bit_ps(compute_32bit_ps);
419   precision_stats.set_compute_16bit_ps(compute_16bit_ps);
420   return precision_stats;
421 }
422 
423 }  // namespace profiler
424 }  // namespace tensorflow
425