xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/utils/event_span.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
17 #define TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
18 
19 #include <string>
20 #include <vector>
21 
22 #include "absl/container/flat_hash_map.h"
23 #include "absl/strings/string_view.h"
24 #include "tensorflow/core/platform/types.h"
25 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
26 #include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
27 #include "tensorflow/core/profiler/utils/timespan.h"
28 
29 namespace tensorflow {
30 namespace profiler {
31 
32 // The various event types. Enumerations are numbered such that a bigger number
33 // has a higher priority than a smaller number when used in execution-time
34 // breakdown.
35 enum EventType {
36   // No event associated with the time. It could be that the machine was idle or
37   // executing some events which were not traced.
38   UNKNOWN_TIME = 0,
39   // Host is computing.
40   HOST_COMPUTE = 10,
41   // Host is preprocessing the data before the execution on device.
42   HOST_PREPROCESS = 20,
43   // Host is postprocessing the data after the execution on device.
44   HOST_POSTPROCESS = 30,
45   // Host is batching data (for inference).
46   HOST_BATCH_FORMATION = 40,
47   // Host runtime, like memory allocation and etc.
48   HOST_RUNTIME = 50,
49   // Host is compiling.
50   HOST_COMPILE = 60,
51   // Host-to-host communication.
52   HOST_TO_HOST = 70,
53   // Host-to-device communication.
54   HOST_TO_DEVICE = 80,
55   // Host is preparing to launch a computation on device.
56   HOST_PREPARE = 90,
57   // Assigns a smaller priority to DEVICE_COLLECTIVES than HOST_WAIT_INPUT,
58   // because if an all-reduce event is overlapped with an host-wait-input event,
59   // we want to count it as waiting for input.
60   // Collective Ops such as All-Reduce.
61   DEVICE_COLLECTIVES = 100,
62   // Host is waiting for input.
63   HOST_WAIT_INPUT = 110,
64   // Device-to-device communication.
65   DEVICE_TO_DEVICE = 120,
66   // Device-to-host communication.
67   DEVICE_TO_HOST = 130,
68   // Device is computing with 32-bit precision.
69   DEVICE_COMPUTE_32 = 140,
70   // Device is computing with 16-bit precision.
71   DEVICE_COMPUTE_16 = 150,
72   // Device is waiting for another device.
73   DEVICE_WAIT_DEVICE = 160,
74   // Device is waiting for host.
75   DEVICE_WAIT_HOST = 170,
76   LAST_EVENT_TYPE = DEVICE_WAIT_HOST
77 };
78 
79 // Generic event types that shown to the user.
80 enum GenericEventType {
81   kFirstGenericEventType = 1,
82   // Device is computing.
83   kDeviceCompute = kFirstGenericEventType,
84   // Device-to-device communication.
85   kDeviceToDevice,
86   // Collective Ops such as All-Reduce and NCCL.
87   kDeviceCollectives,
88   // Host is computing.
89   kHostCompute,
90   // Host is preparing to launch a computation on device.
91   kHostPrepare,
92   // Device waiting for input from the host.
93   kInput,
94   // Device sending output to the host.
95   kOutput,
96   // Host is compling.
97   kCompile,
98   // No recognized event associated with the time.
99   kAllOthers,
100   kLastGenericEventType = kAllOthers,
101 };
102 
103 // Contains the type and timespan of an event.
104 struct EventTypeSpan {
105   EventType type;  // type of this event.
106   Timespan span;   // timespan of this event.
EventTypeSpanEventTypeSpan107   EventTypeSpan(EventType t, Timespan s) : type(t), span(s) {}
108   // Equality test.
109   bool operator==(const EventTypeSpan& other) const {
110     return type == other.type && span == other.span;
111   }
112   // Inequality test.
113   bool operator!=(const EventTypeSpan& other) const {
114     return !(*this == other);
115   }
116 };
117 
118 enum class StepMarkerType {
119   // "TraceContext" TraceMe events.
120   kExplicitHostStepMarker,
121   // Identified by group_events (e.g., FunctionRun, SessionRun).
122   kImplicitHostStepMarker,
123   // Derived from the result of group_events. A device step marker starts with
124   // the first device event of the group and ends with the last event of the
125   // group.
126   kDeviceStepMarker,
127 };
128 
129 // Record of an event that is used as a step marker.
130 struct StepMarker {
131   StepMarkerType type;
132   std::string event_name;  // name of this event.
133   std::string step_name;
134   Timespan span;           // timespan of this event.
StepMarkerStepMarker135   StepMarker(StepMarkerType step_marker_type, absl::string_view name,
136              Timespan s)
137       : type(step_marker_type), event_name(name), span(s) {}
138   // Equality test.
139   bool operator==(const StepMarker& other) const {
140     return type == other.type && event_name == other.event_name &&
141            span == other.span;
142   }
143   // Inequality test.
144   bool operator!=(const StepMarker& other) const { return !(*this == other); }
145 };
146 
147 // Details of a step. Note that this could be the result of combining the
148 // StepDetails of the same step executed on different cores.
149 class StepDetails {
150  public:
StepDetails()151   StepDetails() : device_memory_transfers_(3) {}
152 
Markers()153   const std::vector<StepMarker>& Markers() const { return markers_; }
Events()154   const std::vector<EventTypeSpan>& Events() const { return events_; }
Collectives()155   const absl::flat_hash_map<uint32, AllReduceDbResult>& Collectives() const {
156     return collectives_;
157   }
DeviceMemoryTransfers()158   const std::vector<DeviceMemoryTransfer>& DeviceMemoryTransfers() const {
159     return device_memory_transfers_;
160   }
161   // Returns the step time.
162   Timespan StepTime() const;
163   // Adds a step-marker to this step.
164   void AddMarker(const StepMarker& m);
165   // Adds an EventTypeSpan to this step.
166   void AddEvent(const EventTypeSpan& e);
167   // Adds a collective op to this step.
168   void AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e);
169   // Appends device memory transfer events to this step.
170   // Only event type of HOST_TO_DEVICE/DEVICE_TO_DEVICE/DEVICE_TO_HOST are
171   // allowed.
172   void AddDeviceMemoryTransferEvent(EventType event_type,
173                                     const Timespan& time_span, uint64 bytes);
174   // Returns the step name.
StepName()175   std::string StepName() const { return step_name_; }
176   // Sets the name of this step.
SetStepName(std::string step_name)177   void SetStepName(std::string step_name) { step_name_ = step_name; }
178 
179   // Converts from overlapped events to non-overlapped events.
180   StepDetails ToNonOverlapped() const;
181 
182   // Combines other.
183   void Combine(const StepDetails& other);
184 
185   // Equality test.
186   bool operator==(const StepDetails& other) const;
187   // Inequality test.
188   bool operator!=(const StepDetails& other) const { return !(*this == other); }
189 
190   // Returns a string that prints the content of this object.
191   std::string DebugString() const;
192 
193  private:
194   // Accumulates the device memory transfers from another step to this step.
195   void AggregateDeviceMemoryTransfers(
196       const std::vector<DeviceMemoryTransfer> device_memory_transfers);
197 
198   // All step-markers found for marking this step in the traces. There could be
199   // multiple step-markers for a single step for different reasons. One such
200   // reason is that there may be one step-marker for the same step on each core;
201   // so after combining the StepDetails from multiple cores, there would be
202   // multiple step-markers for the same step.
203   std::vector<StepMarker> markers_;
204   // All events belonging to this step.
205   std::vector<EventTypeSpan> events_;
206   // Collective operation related events such as all-reduce etc.
207   absl::flat_hash_map<uint32, AllReduceDbResult> collectives_;
208   // Device memory transfers (including time and bytes involved).
209   // TODO(jiesun): Consider to use IntervalSet instead of just sum up the event
210   // durations.
211   std::vector<DeviceMemoryTransfer> device_memory_transfers_;
212   std::string step_name_;
213 };
214 
215 // Map from step_id to the events happened in that step.
216 using StepEvents = absl::flat_hash_map<int64_t /*step_id*/, StepDetails>;
217 
218 // Equality test for StepEvents.
219 bool operator==(const StepEvents& a, const StepEvents& b);
220 
221 // Returns the name of the given EventType.
222 std::string PrintEventType(EventType event_type);
223 
224 // Returns the string of the given GenericEventType.
225 absl::string_view GetGenericEventTypeStr(GenericEventType event_type);
226 
227 // Returns a string that prints the given EventTypeSpan.
228 std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span);
229 
230 // Returns a string that prints the given StepMarker.
231 std::string PrintStepMarker(const StepMarker& step_marker);
232 
233 // Returns a string that prints the given StepEvents.
234 std::string PrintStepEvents(const StepEvents& step_events);
235 
236 // Combines the src StepEvents into dst.
237 void CombineStepEvents(const StepEvents& src, StepEvents* dst);
238 
239 // Converts from overlapped events to non-overlapped events.
240 std::vector<EventTypeSpan> ToNonOverlappedEvents(
241     const std::vector<EventTypeSpan>& overlapped_events);
242 
243 // Converts from overlapped step-events to non-overlapped step events.
244 StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events);
245 
246 // Returns the precision stats of the given non-overlapped step events.
247 PrecisionStats ComputePrecisionStats(
248     const StepEvents& nonoverlapped_step_events);
249 
250 }  // namespace profiler
251 }  // namespace tensorflow
252 
253 #endif  // TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
254