event_span.h (revision b6fb3261f9314811a0f4371741dbb8839866f948) - OpenGrok cross reference for /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/utils/event_span.h

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_
#define TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_

#include <string>
#include <vector>

#include "absl/container/flat_hash_map.h"
#include "absl/strings/string_view.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
#include "tensorflow/core/profiler/utils/timespan.h"

namespace tensorflow {
namespace profiler {

// The various event types. Enumerations are numbered such that a bigger number
// has a higher priority than a smaller number when used in execution-time
// breakdown.
enum EventType {
  // No event associated with the time. It could be that the machine was idle or
  // executing some events which were not traced.
  UNKNOWN_TIME = 0,
  // Host is computing.
  HOST_COMPUTE = 10,
  // Host is preprocessing the data before the execution on device.
  HOST_PREPROCESS = 20,
  // Host is postprocessing the data after the execution on device.
  HOST_POSTPROCESS = 30,
  // Host is batching data (for inference).
  HOST_BATCH_FORMATION = 40,
  // Host runtime, like memory allocation and etc.
  HOST_RUNTIME = 50,
  // Host is compiling.
  HOST_COMPILE = 60,
  // Host-to-host communication.
  HOST_TO_HOST = 70,
  // Host-to-device communication.
  HOST_TO_DEVICE = 80,
  // Host is preparing to launch a computation on device.
  HOST_PREPARE = 90,
  // Assigns a smaller priority to DEVICE_COLLECTIVES than HOST_WAIT_INPUT,
  // because if an all-reduce event is overlapped with an host-wait-input event,
  // we want to count it as waiting for input.
  // Collective Ops such as All-Reduce.
  DEVICE_COLLECTIVES = 100,
  // Host is waiting for input.
  HOST_WAIT_INPUT = 110,
  // Device-to-device communication.
  DEVICE_TO_DEVICE = 120,
  // Device-to-host communication.
  DEVICE_TO_HOST = 130,
  // Device is computing with 32-bit precision.
  DEVICE_COMPUTE_32 = 140,
  // Device is computing with 16-bit precision.
  DEVICE_COMPUTE_16 = 150,
  // Device is waiting for another device.
  DEVICE_WAIT_DEVICE = 160,
  // Device is waiting for host.
  DEVICE_WAIT_HOST = 170,
  LAST_EVENT_TYPE = DEVICE_WAIT_HOST
};

// Generic event types that shown to the user.
enum GenericEventType {
  kFirstGenericEventType = 1,
  // Device is computing.
  kDeviceCompute = kFirstGenericEventType,
  // Device-to-device communication.
  kDeviceToDevice,
  // Collective Ops such as All-Reduce and NCCL.
  kDeviceCollectives,
  // Host is computing.
  kHostCompute,
  // Host is preparing to launch a computation on device.
  kHostPrepare,
  // Device waiting for input from the host.
  kInput,
  // Device sending output to the host.
  kOutput,
  // Host is compling.
  kCompile,
  // No recognized event associated with the time.
  kAllOthers,
  kLastGenericEventType = kAllOthers,
};

// Contains the type and timespan of an event.
struct EventTypeSpan {
  EventType type;  // type of this event.
  Timespan span;   // timespan of this event.
  EventTypeSpan(EventType t, Timespan s) : type(t), span(s) {}
  // Equality test.
  bool operator==(const EventTypeSpan& other) const {
    return type == other.type && span == other.span;
  }
  // Inequality test.
  bool operator!=(const EventTypeSpan& other) const {
    return !(*this == other);
  }
};

enum class StepMarkerType {
  // "TraceContext" TraceMe events.
  kExplicitHostStepMarker,
  // Identified by group_events (e.g., FunctionRun, SessionRun).
  kImplicitHostStepMarker,
  // Derived from the result of group_events. A device step marker starts with
  // the first device event of the group and ends with the last event of the
  // group.
  kDeviceStepMarker,
};

// Record of an event that is used as a step marker.
struct StepMarker {
  StepMarkerType type;
  std::string event_name;  // name of this event.
  std::string step_name;
  Timespan span;           // timespan of this event.
  StepMarker(StepMarkerType step_marker_type, absl::string_view name,
             Timespan s)
      : type(step_marker_type), event_name(name), span(s) {}
  // Equality test.
  bool operator==(const StepMarker& other) const {
    return type == other.type && event_name == other.event_name &&
           span == other.span;
  }
  // Inequality test.
  bool operator!=(const StepMarker& other) const { return !(*this == other); }
};

// Details of a step. Note that this could be the result of combining the
// StepDetails of the same step executed on different cores.
class StepDetails {
 public:
  StepDetails() : device_memory_transfers_(3) {}

  const std::vector<StepMarker>& Markers() const { return markers_; }
  const std::vector<EventTypeSpan>& Events() const { return events_; }
  const absl::flat_hash_map<uint32, AllReduceDbResult>& Collectives() const {
    return collectives_;
  }
  const std::vector<DeviceMemoryTransfer>& DeviceMemoryTransfers() const {
    return device_memory_transfers_;
  }
  // Returns the step time.
  Timespan StepTime() const;
  // Adds a step-marker to this step.
  void AddMarker(const StepMarker& m);
  // Adds an EventTypeSpan to this step.
  void AddEvent(const EventTypeSpan& e);
  // Adds a collective op to this step.
  void AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e);
  // Appends device memory transfer events to this step.
  // Only event type of HOST_TO_DEVICE/DEVICE_TO_DEVICE/DEVICE_TO_HOST are
  // allowed.
  void AddDeviceMemoryTransferEvent(EventType event_type,
                                    const Timespan& time_span, uint64 bytes);
  // Returns the step name.
  std::string StepName() const { return step_name_; }
  // Sets the name of this step.
  void SetStepName(std::string step_name) { step_name_ = step_name; }

  // Converts from overlapped events to non-overlapped events.
  StepDetails ToNonOverlapped() const;

  // Combines other.
  void Combine(const StepDetails& other);

  // Equality test.
  bool operator==(const StepDetails& other) const;
  // Inequality test.
  bool operator!=(const StepDetails& other) const { return !(*this == other); }

  // Returns a string that prints the content of this object.
  std::string DebugString() const;

 private:
  // Accumulates the device memory transfers from another step to this step.
  void AggregateDeviceMemoryTransfers(
      const std::vector<DeviceMemoryTransfer> device_memory_transfers);

  // All step-markers found for marking this step in the traces. There could be
  // multiple step-markers for a single step for different reasons. One such
  // reason is that there may be one step-marker for the same step on each core;
  // so after combining the StepDetails from multiple cores, there would be
  // multiple step-markers for the same step.
  std::vector<StepMarker> markers_;
  // All events belonging to this step.
  std::vector<EventTypeSpan> events_;
  // Collective operation related events such as all-reduce etc.
  absl::flat_hash_map<uint32, AllReduceDbResult> collectives_;
  // Device memory transfers (including time and bytes involved).
  // TODO(jiesun): Consider to use IntervalSet instead of just sum up the event
  // durations.
  std::vector<DeviceMemoryTransfer> device_memory_transfers_;
  std::string step_name_;
};

// Map from step_id to the events happened in that step.
using StepEvents = absl::flat_hash_map<int64_t /*step_id*/, StepDetails>;

// Equality test for StepEvents.
bool operator==(const StepEvents& a, const StepEvents& b);

// Returns the name of the given EventType.
std::string PrintEventType(EventType event_type);

// Returns the string of the given GenericEventType.
absl::string_view GetGenericEventTypeStr(GenericEventType event_type);

// Returns a string that prints the given EventTypeSpan.
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span);

// Returns a string that prints the given StepMarker.
std::string PrintStepMarker(const StepMarker& step_marker);

// Returns a string that prints the given StepEvents.
std::string PrintStepEvents(const StepEvents& step_events);

// Combines the src StepEvents into dst.
void CombineStepEvents(const StepEvents& src, StepEvents* dst);

// Converts from overlapped events to non-overlapped events.
std::vector<EventTypeSpan> ToNonOverlappedEvents(
    const std::vector<EventTypeSpan>& overlapped_events);

// Converts from overlapped step-events to non-overlapped step events.
StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events);

// Returns the precision stats of the given non-overlapped step events.
PrecisionStats ComputePrecisionStats(
    const StepEvents& nonoverlapped_step_events);

}  // namespace profiler
}  // namespace tensorflow

#endif  // TENSORFLOW_CORE_PROFILER_UTILS_EVENT_SPAN_H_