1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_ 17 #define TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_ 18 19 #include <string> 20 #include <vector> 21 22 #include "absl/container/flat_hash_map.h" 23 #include "absl/strings/string_view.h" 24 #include "tensorflow/core/platform/types.h" 25 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" 26 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" 27 #include "tensorflow/core/profiler/utils/timespan.h" 28 29 namespace tensorflow { 30 namespace profiler { 31 32 // The various event types. Enumerations are numbered such that a bigger number 33 // has a higher priority than a smaller number when used in execution-time 34 // breakdown. 35 enum EventType { 36 // No event associated with the time. It could be that the machine was idle or 37 // executing some events which were not traced. 38 UNKNOWN_TIME = 0, 39 // Host is computing. 40 HOST_COMPUTE = 10, 41 // Host is preprocessing the data before the execution on device. 42 HOST_PREPROCESS = 20, 43 // Host is postprocessing the data after the execution on device. 44 HOST_POSTPROCESS = 30, 45 // Host is batching data (for inference). 46 HOST_BATCH_FORMATION = 40, 47 // Host runtime, like memory allocation and etc. 48 HOST_RUNTIME = 50, 49 // Host is compiling. 50 HOST_COMPILE = 60, 51 // Host-to-host communication. 52 HOST_TO_HOST = 70, 53 // Host-to-device communication. 54 HOST_TO_DEVICE = 80, 55 // Host is preparing to launch a computation on device. 56 HOST_PREPARE = 90, 57 // Assigns a smaller priority to DEVICE_COLLECTIVES than HOST_WAIT_INPUT, 58 // because if an all-reduce event is overlapped with an host-wait-input event, 59 // we want to count it as waiting for input. 60 // Collective Ops such as All-Reduce. 61 DEVICE_COLLECTIVES = 100, 62 // Host is waiting for input. 63 HOST_WAIT_INPUT = 110, 64 // Device-to-device communication. 65 DEVICE_TO_DEVICE = 120, 66 // Device-to-host communication. 67 DEVICE_TO_HOST = 130, 68 // Device is computing with 32-bit precision. 69 DEVICE_COMPUTE_32 = 140, 70 // Device is computing with 16-bit precision. 71 DEVICE_COMPUTE_16 = 150, 72 // Device is waiting for another device. 73 DEVICE_WAIT_DEVICE = 160, 74 // Device is waiting for host. 75 DEVICE_WAIT_HOST = 170, 76 LAST_EVENT_TYPE = DEVICE_WAIT_HOST 77 }; 78 79 // Generic event types that shown to the user. 80 enum GenericEventType { 81 kFirstGenericEventType = 1, 82 // Device is computing. 83 kDeviceCompute = kFirstGenericEventType, 84 // Device-to-device communication. 85 kDeviceToDevice, 86 // Collective Ops such as All-Reduce and NCCL. 87 kDeviceCollectives, 88 // Host is computing. 89 kHostCompute, 90 // Host is preparing to launch a computation on device. 91 kHostPrepare, 92 // Device waiting for input from the host. 93 kInput, 94 // Device sending output to the host. 95 kOutput, 96 // Host is compling. 97 kCompile, 98 // No recognized event associated with the time. 99 kAllOthers, 100 kLastGenericEventType = kAllOthers, 101 }; 102 103 // Contains the type and timespan of an event. 104 struct EventTypeSpan { 105 EventType type; // type of this event. 106 Timespan span; // timespan of this event. EventTypeSpanEventTypeSpan107 EventTypeSpan(EventType t, Timespan s) : type(t), span(s) {} 108 // Equality test. 109 bool operator==(const EventTypeSpan& other) const { 110 return type == other.type && span == other.span; 111 } 112 // Inequality test. 113 bool operator!=(const EventTypeSpan& other) const { 114 return !(*this == other); 115 } 116 }; 117 118 enum class StepMarkerType { 119 // "TraceContext" TraceMe events. 120 kExplicitHostStepMarker, 121 // Identified by group_events (e.g., FunctionRun, SessionRun). 122 kImplicitHostStepMarker, 123 // Derived from the result of group_events. A device step marker starts with 124 // the first device event of the group and ends with the last event of the 125 // group. 126 kDeviceStepMarker, 127 }; 128 129 // Record of an event that is used as a step marker. 130 struct StepMarker { 131 StepMarkerType type; 132 std::string event_name; // name of this event. 133 std::string step_name; 134 Timespan span; // timespan of this event. StepMarkerStepMarker135 StepMarker(StepMarkerType step_marker_type, absl::string_view name, 136 Timespan s) 137 : type(step_marker_type), event_name(name), span(s) {} 138 // Equality test. 139 bool operator==(const StepMarker& other) const { 140 return type == other.type && event_name == other.event_name && 141 span == other.span; 142 } 143 // Inequality test. 144 bool operator!=(const StepMarker& other) const { return !(*this == other); } 145 }; 146 147 // Details of a step. Note that this could be the result of combining the 148 // StepDetails of the same step executed on different cores. 149 class StepDetails { 150 public: StepDetails()151 StepDetails() : device_memory_transfers_(3) {} 152 Markers()153 const std::vector<StepMarker>& Markers() const { return markers_; } Events()154 const std::vector<EventTypeSpan>& Events() const { return events_; } Collectives()155 const absl::flat_hash_map<uint32, AllReduceDbResult>& Collectives() const { 156 return collectives_; 157 } DeviceMemoryTransfers()158 const std::vector<DeviceMemoryTransfer>& DeviceMemoryTransfers() const { 159 return device_memory_transfers_; 160 } 161 // Returns the step time. 162 Timespan StepTime() const; 163 // Adds a step-marker to this step. 164 void AddMarker(const StepMarker& m); 165 // Adds an EventTypeSpan to this step. 166 void AddEvent(const EventTypeSpan& e); 167 // Adds a collective op to this step. 168 void AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e); 169 // Appends device memory transfer events to this step. 170 // Only event type of HOST_TO_DEVICE/DEVICE_TO_DEVICE/DEVICE_TO_HOST are 171 // allowed. 172 void AddDeviceMemoryTransferEvent(EventType event_type, 173 const Timespan& time_span, uint64 bytes); 174 // Returns the step name. StepName()175 std::string StepName() const { return step_name_; } 176 // Sets the name of this step. SetStepName(std::string step_name)177 void SetStepName(std::string step_name) { step_name_ = step_name; } 178 179 // Converts from overlapped events to non-overlapped events. 180 StepDetails ToNonOverlapped() const; 181 182 // Combines other. 183 void Combine(const StepDetails& other); 184 185 // Equality test. 186 bool operator==(const StepDetails& other) const; 187 // Inequality test. 188 bool operator!=(const StepDetails& other) const { return !(*this == other); } 189 190 // Returns a string that prints the content of this object. 191 std::string DebugString() const; 192 193 private: 194 // Accumulates the device memory transfers from another step to this step. 195 void AggregateDeviceMemoryTransfers( 196 const std::vector<DeviceMemoryTransfer> device_memory_transfers); 197 198 // All step-markers found for marking this step in the traces. There could be 199 // multiple step-markers for a single step for different reasons. One such 200 // reason is that there may be one step-marker for the same step on each core; 201 // so after combining the StepDetails from multiple cores, there would be 202 // multiple step-markers for the same step. 203 std::vector<StepMarker> markers_; 204 // All events belonging to this step. 205 std::vector<EventTypeSpan> events_; 206 // Collective operation related events such as all-reduce etc. 207 absl::flat_hash_map<uint32, AllReduceDbResult> collectives_; 208 // Device memory transfers (including time and bytes involved). 209 // TODO(jiesun): Consider to use IntervalSet instead of just sum up the event 210 // durations. 211 std::vector<DeviceMemoryTransfer> device_memory_transfers_; 212 std::string step_name_; 213 }; 214 215 // Map from step_id to the events happened in that step. 216 using StepEvents = absl::flat_hash_map<int64_t /*step_id*/, StepDetails>; 217 218 // Equality test for StepEvents. 219 bool operator==(const StepEvents& a, const StepEvents& b); 220 221 // Returns the name of the given EventType. 222 std::string PrintEventType(EventType event_type); 223 224 // Returns the string of the given GenericEventType. 225 absl::string_view GetGenericEventTypeStr(GenericEventType event_type); 226 227 // Returns a string that prints the given EventTypeSpan. 228 std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span); 229 230 // Returns a string that prints the given StepMarker. 231 std::string PrintStepMarker(const StepMarker& step_marker); 232 233 // Returns a string that prints the given StepEvents. 234 std::string PrintStepEvents(const StepEvents& step_events); 235 236 // Combines the src StepEvents into dst. 237 void CombineStepEvents(const StepEvents& src, StepEvents* dst); 238 239 // Converts from overlapped events to non-overlapped events. 240 std::vector<EventTypeSpan> ToNonOverlappedEvents( 241 const std::vector<EventTypeSpan>& overlapped_events); 242 243 // Converts from overlapped step-events to non-overlapped step events. 244 StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events); 245 246 // Returns the precision stats of the given non-overlapped step events. 247 PrecisionStats ComputePrecisionStats( 248 const StepEvents& nonoverlapped_step_events); 249 250 } // namespace profiler 251 } // namespace tensorflow 252 253 #endif // TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_ 254