1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/profiler/utils/event_span.h"
16
17 #include <string>
18 #include <utility>
19 #include <vector>
20
21 #include "absl/algorithm/container.h"
22 #include "absl/container/flat_hash_map.h"
23 #include "absl/strings/str_cat.h"
24 #include "absl/strings/string_view.h"
25 #include "tensorflow/core/lib/gtl/map_util.h"
26 #include "tensorflow/core/platform/types.h"
27 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
28 #include "tensorflow/core/profiler/utils/timespan.h"
29
30 namespace tensorflow {
31 namespace profiler {
32
33 namespace {
34
35 // Representing a boundary of an event.
36 struct EventBoundary {
37 // Time at this boundary.
38 uint64 time_ps;
39 // Type of the event.
40 EventType type;
41 // True if this is the start of the event; False if this is the end.
42 bool is_start;
EventBoundarytensorflow::profiler::__anonbd91bcbc0111::EventBoundary43 EventBoundary(uint64 time_ps, EventType type, bool is_start)
44 : time_ps(time_ps), type(type), is_start(is_start) {}
45 };
46
47 // Returns true if EventBoundary a should appear before EventBoundary b.
CmpEventBoundaries(const EventBoundary & a,const EventBoundary & b)48 bool CmpEventBoundaries(const EventBoundary& a, const EventBoundary& b) {
49 if (a.time_ps == b.time_ps) {
50 if (a.is_start == b.is_start) {
51 // Puts the higher-priority type before the lower-priority type if they
52 // have the same time and same boundary type.
53 return a.type > b.type;
54 } else {
55 // Puts the "end" bounary before the "start" boundary if they have the
56 // same time.
57 return !a.is_start;
58 }
59 }
60 // In ascending order of time.
61 return a.time_ps < b.time_ps;
62 }
63
64 // Generates vector of event boundaries from the given overlapped_events.
GenerateEventBoundaries(const std::vector<EventTypeSpan> & overlapped_events)65 std::vector<EventBoundary> GenerateEventBoundaries(
66 const std::vector<EventTypeSpan>& overlapped_events) {
67 std::vector<EventBoundary> boundaries;
68 boundaries.reserve(2 * overlapped_events.size());
69 for (const auto& event : overlapped_events) {
70 boundaries.push_back(
71 {event.span.begin_ps(), event.type, /*is_start=*/true});
72 boundaries.push_back({event.span.end_ps(), event.type, /*is_start=*/false});
73 }
74 absl::c_sort(boundaries, CmpEventBoundaries);
75 return boundaries;
76 }
77
78 // A class to track the highest priority that an event should be assigned.
79 class PriorityTracker {
80 private:
81 // The current maximum priority.
82 EventType current_max_priority_;
83 // A count for each possible priority.
84 std::vector<int64_t> priority_count_;
85
86 public:
PriorityTracker()87 PriorityTracker() {
88 current_max_priority_ = UNKNOWN_TIME;
89 priority_count_.resize(LAST_EVENT_TYPE + 1, 0);
90 }
91 // Updates current_max_priority_ and priority_count_[] given the boundary.
92 // Returns the new current_max_priority_.
Update(const EventBoundary & boundary)93 EventType Update(const EventBoundary& boundary) {
94 EventType event_type = boundary.type;
95 bool is_start = boundary.is_start;
96 if (is_start) {
97 priority_count_[event_type]++;
98 if (event_type > current_max_priority_) {
99 current_max_priority_ = event_type;
100 }
101 } else {
102 priority_count_[event_type]--;
103 if (event_type == current_max_priority_ &&
104 priority_count_[event_type] == 0) {
105 // Reduces current_max_priority_ to the first event type (starting from
106 // the highest priority) that has a non-zero count.
107 bool found = false;
108 for (int i = event_type - 1; i >= 0; i--) {
109 if (priority_count_[i] > 0) {
110 current_max_priority_ = static_cast<EventType>(i);
111 found = true;
112 break;
113 }
114 }
115 if (!found) current_max_priority_ = UNKNOWN_TIME;
116 }
117 }
118 return current_max_priority_;
119 }
120 };
121
122 constexpr int kNumGenericEventTypes = GenericEventType::kLastGenericEventType -
123 GenericEventType::kFirstGenericEventType +
124 1;
125
126 using GenericEventTypeStrMap =
127 absl::flat_hash_map<GenericEventType, absl::string_view>;
128
GetGenericEventTypeStrMap()129 const GenericEventTypeStrMap& GetGenericEventTypeStrMap() {
130 static const auto* generic_event_type_str_map = new GenericEventTypeStrMap({
131 {kDeviceCompute, "Device compute"},
132 {kDeviceToDevice, "Device to device"},
133 {kDeviceCollectives, "Device collective communication"},
134 {kHostCompute, "Host compute"},
135 {kHostPrepare, "Kernel launch"},
136 {kInput, "Input"},
137 {kOutput, "Output"},
138 {kCompile, "Compilation"},
139 {kAllOthers, "All others"},
140 });
141 DCHECK_EQ(generic_event_type_str_map->size(), kNumGenericEventTypes);
142 return *generic_event_type_str_map;
143 }
144
145 } // namespace
146
GetGenericEventTypeStr(GenericEventType event_type)147 absl::string_view GetGenericEventTypeStr(GenericEventType event_type) {
148 return GetGenericEventTypeStrMap().at(event_type);
149 }
150
PrintEventType(EventType event_type)151 std::string PrintEventType(EventType event_type) {
152 switch (event_type) {
153 case UNKNOWN_TIME:
154 return "unknown_time";
155 case HOST_COMPUTE:
156 return "host_compute";
157 case HOST_COMPILE:
158 return "host_compile";
159 case HOST_TO_HOST:
160 return "host_to_host";
161 case HOST_TO_DEVICE:
162 return "host_to_device";
163 case HOST_PREPARE:
164 return "host_prepare";
165 case DEVICE_COLLECTIVES:
166 return "device_collectives";
167 case HOST_WAIT_INPUT:
168 return "host_wait_input";
169 case DEVICE_TO_DEVICE:
170 return "device_to_device";
171 case DEVICE_TO_HOST:
172 return "device_to_host";
173 case DEVICE_COMPUTE_32:
174 return "device_compute_32";
175 case DEVICE_COMPUTE_16:
176 return "device_compute_16";
177 case DEVICE_WAIT_DEVICE:
178 return "device_wait_device";
179 case DEVICE_WAIT_HOST:
180 return "device_wait_host";
181 default:
182 return "unexpected";
183 }
184 }
185
PrintEventTypeSpan(const EventTypeSpan & event_type_span)186 std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span) {
187 return absl::StrCat("(", PrintEventType(event_type_span.type), ", ",
188 event_type_span.span.DebugString(), ")");
189 }
190
PrintStepMarkerType(StepMarkerType type)191 absl::string_view PrintStepMarkerType(StepMarkerType type) {
192 switch (type) {
193 case StepMarkerType::kExplicitHostStepMarker:
194 return "ExplicitHostStepMarker";
195 case StepMarkerType::kImplicitHostStepMarker:
196 return "ImplicitHostStepMarker";
197 case StepMarkerType::kDeviceStepMarker:
198 return "DeviceStepMarker";
199 }
200 }
201
PrintStepMarker(const StepMarker & step_marker)202 std::string PrintStepMarker(const StepMarker& step_marker) {
203 return absl::StrCat("(", PrintStepMarkerType(step_marker.type), ", ",
204 step_marker.event_name, ", ",
205 step_marker.span.DebugString(), ")");
206 }
207
PrintStepEvents(const StepEvents & step_events)208 std::string PrintStepEvents(const StepEvents& step_events) {
209 std::vector<int64_t> step_ids;
210 step_ids.reserve(step_events.size());
211 for (const auto& id_details : step_events) {
212 step_ids.push_back(id_details.first);
213 }
214 absl::c_sort(step_ids);
215 std::string result = "{";
216 for (auto id : step_ids) {
217 absl::StrAppend(&result, "\n");
218 auto* details = gtl::FindOrNull(step_events, id);
219 std::string details_str = details ? details->DebugString() : "()";
220 absl::StrAppend(&result, id, ":", details_str);
221 }
222 return absl::StrCat(result, "\n}");
223 }
224
CombineStepEvents(const StepEvents & src,StepEvents * dst)225 void CombineStepEvents(const StepEvents& src, StepEvents* dst) {
226 for (const auto& step_details : src) {
227 int64_t step_id = step_details.first;
228 const StepDetails& src_details = step_details.second;
229 StepDetails* dst_details = &(*dst)[step_id];
230 dst_details->Combine(src_details);
231 }
232 }
233
ToNonOverlappedEvents(const std::vector<EventTypeSpan> & overlapped_events)234 std::vector<EventTypeSpan> ToNonOverlappedEvents(
235 const std::vector<EventTypeSpan>& overlapped_events) {
236 std::vector<EventBoundary> event_boundaries =
237 GenerateEventBoundaries(overlapped_events);
238 std::vector<EventTypeSpan> result;
239 if (event_boundaries.empty()) return result;
240 result.reserve(event_boundaries.size());
241 PriorityTracker priority_tracker;
242 for (int64_t i = 0, end = (event_boundaries.size() - 1); i < end; i++) {
243 EventType highest_priority = priority_tracker.Update(event_boundaries[i]);
244 result.push_back({highest_priority, Timespan::FromEndPoints(
245 event_boundaries[i].time_ps,
246 event_boundaries[i + 1].time_ps)});
247 }
248 return result;
249 }
250
251 // Converts from overlapped step-events to non-overlapped step-events.
ToNonOverlappedStepEvents(const StepEvents & overlapped_step_events)252 StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events) {
253 StepEvents non_overlapped_step_events;
254 for (const auto& step_events : overlapped_step_events) {
255 const auto& step_id = step_events.first;
256 const auto& step_details = step_events.second;
257 non_overlapped_step_events.try_emplace(step_id,
258 step_details.ToNonOverlapped());
259 }
260 return non_overlapped_step_events;
261 }
262
AddMarker(const StepMarker & m)263 void StepDetails::AddMarker(const StepMarker& m) { markers_.push_back(m); }
264
AddEvent(const EventTypeSpan & e)265 void StepDetails::AddEvent(const EventTypeSpan& e) { events_.push_back(e); }
266
AggregateDeviceMemoryTransfers(const std::vector<DeviceMemoryTransfer> device_memory_transfers)267 void StepDetails::AggregateDeviceMemoryTransfers(
268 const std::vector<DeviceMemoryTransfer> device_memory_transfers) {
269 if (device_memory_transfers.size() != device_memory_transfers_.size()) {
270 return; // Sanity check.
271 }
272 for (size_t i = 0; i < device_memory_transfers.size(); ++i) {
273 device_memory_transfers_[i].set_occurrence(
274 device_memory_transfers_[i].occurrence() +
275 device_memory_transfers[i].occurrence());
276 device_memory_transfers_[i].set_bytes_transferred(
277 device_memory_transfers_[i].bytes_transferred() +
278 device_memory_transfers[i].bytes_transferred());
279 device_memory_transfers_[i].set_time_us(
280 device_memory_transfers_[i].time_us() +
281 device_memory_transfers[i].time_us());
282 }
283 }
284
AddCollectiveOpEvent(uint64 core_id,const AllReduceInfo & e)285 void StepDetails::AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e) {
286 *collectives_[core_id].add_all_reduce_info() = e;
287 }
288
AddDeviceMemoryTransferEvent(EventType event_type,const Timespan & time_span,uint64 bytes)289 void StepDetails::AddDeviceMemoryTransferEvent(EventType event_type,
290 const Timespan& time_span,
291 uint64 bytes) {
292 int index = 0;
293 switch (event_type) {
294 case HOST_TO_DEVICE:
295 index = 0;
296 break;
297 case DEVICE_TO_HOST:
298 index = 1;
299 break;
300 case DEVICE_TO_DEVICE:
301 index = 2;
302 break;
303 default:
304 return;
305 }
306 device_memory_transfers_[index].set_occurrence(
307 device_memory_transfers_[index].occurrence() + 1);
308 device_memory_transfers_[index].set_time_us(
309 device_memory_transfers_[index].time_us() +
310 time_span.duration_ps() / 1000000.0);
311 device_memory_transfers_[index].set_bytes_transferred(
312 device_memory_transfers_[index].bytes_transferred() + bytes);
313 }
314
StepTime() const315 Timespan StepDetails::StepTime() const {
316 Timespan max_host_step_time;
317 Timespan max_device_step_time;
318 for (const auto& marker : markers_) {
319 Timespan& cur_max_step_time =
320 marker.type == StepMarkerType::kDeviceStepMarker ? max_device_step_time
321 : max_host_step_time;
322 const Timespan& new_step_time = marker.span;
323 if (new_step_time.duration_ps() > cur_max_step_time.duration_ps())
324 cur_max_step_time = new_step_time;
325 }
326 // CPU-only profile.
327 if (max_device_step_time.Empty()) {
328 return max_host_step_time;
329 }
330
331 // If the host step time includes the device step time, use the host step
332 // time. This covers the case where the device is synchronized at the end of
333 // each step.
334 if (max_host_step_time.Includes(max_device_step_time)) {
335 return max_host_step_time;
336 }
337 return max_device_step_time;
338 }
339
ToNonOverlapped() const340 StepDetails StepDetails::ToNonOverlapped() const {
341 StepDetails non_overlapped_step_details;
342 non_overlapped_step_details.markers_ = markers_;
343 non_overlapped_step_details.events_ = ToNonOverlappedEvents(events_);
344 non_overlapped_step_details.collectives_ = collectives_;
345 non_overlapped_step_details.device_memory_transfers_ =
346 device_memory_transfers_;
347 non_overlapped_step_details.step_name_ = step_name_;
348 return non_overlapped_step_details;
349 }
350
Combine(const StepDetails & other)351 void StepDetails::Combine(const StepDetails& other) {
352 markers_.insert(markers_.end(), other.markers_.begin(), other.markers_.end());
353 events_.insert(events_.end(), other.events_.begin(), other.events_.end());
354 collectives_.insert(other.collectives_.begin(), other.collectives_.end());
355 AggregateDeviceMemoryTransfers(other.device_memory_transfers_);
356 if (step_name_.empty()) step_name_ = other.step_name_;
357 }
358
DebugString() const359 std::string StepDetails::DebugString() const {
360 std::string result = "([";
361 for (int i = 0, end = markers_.size(); i < end; i++) {
362 if (i > 0) absl::StrAppend(&result, ", ");
363 absl::StrAppend(&result, PrintStepMarker(markers_[i]));
364 }
365 absl::StrAppend(&result, "], [");
366 for (int i = 0, end = events_.size(); i < end; i++) {
367 if (i > 0) absl::StrAppend(&result, ", ");
368 absl::StrAppend(&result, PrintEventTypeSpan(events_[i]));
369 }
370 return absl::StrCat(result, "])");
371 }
372
operator ==(const StepDetails & other) const373 bool StepDetails::operator==(const StepDetails& other) const {
374 const auto& other_markers = other.Markers();
375 if (markers_.size() != other_markers.size()) return false;
376 for (uint64 i = 0; i < markers_.size(); i++) {
377 if (markers_[i] != other_markers[i]) return false;
378 }
379 const auto& other_events = other.Events();
380 if (events_.size() != other_events.size()) return false;
381 for (uint64 i = 0; i < events_.size(); i++) {
382 if (events_[i] != other_events[i]) return false;
383 }
384 return true;
385 }
386
operator ==(const StepEvents & a,const StepEvents & b)387 bool operator==(const StepEvents& a, const StepEvents& b) {
388 if (a.size() != b.size()) return false;
389 for (const auto& id_details : a) {
390 const auto a_id = id_details.first;
391 const auto& a_details = id_details.second;
392 const auto* b_details = gtl::FindOrNull(b, a_id);
393 if (b_details == nullptr) return false;
394 if (a_details != *b_details) return false;
395 }
396 return true;
397 }
398
ComputePrecisionStats(const StepEvents & nonoverlapped_step_events)399 PrecisionStats ComputePrecisionStats(
400 const StepEvents& nonoverlapped_step_events) {
401 int64_t compute_32bit_ps = 0;
402 int64_t compute_16bit_ps = 0;
403 for (const auto& id_details : nonoverlapped_step_events) {
404 for (const auto& event : id_details.second.Events()) {
405 switch (event.type) {
406 case DEVICE_COMPUTE_32:
407 compute_32bit_ps += event.span.duration_ps();
408 break;
409 case DEVICE_COMPUTE_16:
410 compute_16bit_ps += event.span.duration_ps();
411 break;
412 default:
413 break;
414 }
415 }
416 }
417 PrecisionStats precision_stats;
418 precision_stats.set_compute_32bit_ps(compute_32bit_ps);
419 precision_stats.set_compute_16bit_ps(compute_16bit_ps);
420 return precision_stats;
421 }
422
423 } // namespace profiler
424 } // namespace tensorflow
425