xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/protobuf/steps_db.proto (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1syntax = "proto3";
2
3package tensorflow.profiler;
4
5import "google/protobuf/any.proto";
6import "tensorflow/core/profiler/protobuf/op_metrics.proto";
7
8// Breakdown of step-time on generic hardware. Note that these components are
9// mutually exclusive so that adding them together is equal to the step time. If
10// an execution time interval has multiple types of event happening, we need to
11// pick one of the event type to attribute the time interval to.
12message GenericStepBreakdown {
13  // Map event type to the accumulated duration in
14  // picoseconds of that type.
15  map<int32, uint64> type_ps = 1;
16}
17
18// Information about memory transfer to/from device memory.
19message DeviceMemoryTransfer {
20  uint64 occurrence = 1;
21  double time_us = 2;
22  uint64 bytes_transferred = 3;
23}
24
25// Next ID: 6
26// Result proto for StepInfo.
27message StepInfoResult {
28  // The step number.
29  uint32 step_num = 1;
30  // The step name.
31  string step_name = 5;
32  // The step duration in picoseconds.
33  uint64 duration_ps = 2;
34  // The start time of this step in picoseconds.
35  uint64 begin_ps = 3;
36  // Breakdown of the step-time. Can be unpacked into a GenericStepBreakdown.
37  google.protobuf.Any step_breakdown = 4;
38}
39
40// Result proto for all -educe ops.
41message AllReduceInfo {
42  // Unique id for all-reduce ops.
43  uint64 id = 1;
44  // The name of the hlo op. This field is no longer set by the profiler.
45  string name = 2 [deprecated = true];
46  // For all-reduce nodes from different modules, if they have the same
47  // all_reduce_id, they will be 'Allreduce'd'. If empty, AllReduce will not be
48  // applied across modules.
49  uint64 all_reduce_id = 3;
50  // The start time in picoseconds of the op event.
51  uint64 start_time_ps = 4;
52  // The end time in picoseconds of the op event.
53  uint64 end_time_ps = 5;
54  // The size of the op in bytes.
55  uint64 byte_size = 6;
56}
57
58// Result database for all-reduce ops.
59message AllReduceDbResult {
60  repeated AllReduceInfo all_reduce_info = 1;
61}
62
63// Result proto for information in a step across all cores.
64message PerCoreStepInfo {
65  // The step number.
66  uint32 step_num = 1;
67  // A map from core_id to StepInfo.
68  map<uint32, StepInfoResult> step_info_per_core = 2;
69  // The result for the per-step HLO-metric database.
70  OpMetricsDb hlo_metrics_db = 3;
71  // A map from core ID to program replica id. Replica id map could change
72  // during a profile session, but should stay stable within a step.
73  map<uint32, uint32> core_id_to_replica_id_map = 5;
74  // A map from core_id to all-reduce ops.
75  map<uint32, AllReduceDbResult> all_reduce_db_per_core = 6;
76  // Information about deivce memory transfers, categoried by source and
77  // destination. Ordered by following categories:
78  // 1. HostToDevice
79  // 2. DeviceToHost
80  // 3. DeviceToDevice
81  repeated DeviceMemoryTransfer device_memory_transfers = 7;
82
83  reserved 4;
84}
85
86// Result proto for a StepDatabase.
87message StepDatabaseResult {
88  // A sequence of PerCoreStepInfo.
89  repeated PerCoreStepInfo step_sequence = 1;
90  // Whether the step db uses incomplete step information.
91  // This flag is set to true when:
92  // 1) no step marker or annotation present.
93  // 2) profiling duration is too short to cover a full step.
94  // If this flag is false, we will group and breakdown the
95  // profile by complete steps only and ignore incomplete steps.
96  // If this flag is true, we will simply aggregate and breakdown over the total
97  // profile as a single step.
98  bool use_incomplete_step = 2;
99  // Number of steps dropped during post processing.
100  uint32 num_steps_dropped = 3;
101  // If the step_sequence is empty because:
102  //   * there is no step profiled on any host, then empty_intersect is false.
103  //   * there are steps profiled on some host, but the intersection of steps
104  //     over all hosts is empty, then empty_intersect is true.
105  bool empty_intersect = 4;
106}
107