xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/protobuf/op_stats.proto (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1syntax = "proto3";
2
3package tensorflow.profiler;
4
5import "tensorflow/core/profiler/protobuf/diagnostics.proto";
6import "tensorflow/core/profiler/protobuf/kernel_stats.proto";
7import "tensorflow/core/profiler/protobuf/op_metrics.proto";
8import "tensorflow/core/profiler/protobuf/steps_db.proto";
9import "tensorflow/core/profiler/protobuf/tf_function.proto";
10
11// Performance environment, e.g the peak performance capabilities of the device.
12message PerfEnv {
13  // Peak performance of a TPU core or a GPU in TFLOP/s.
14  double peak_tera_flops_per_second = 1;
15  // Peak memory bandwidth of a TPU core or a GPU in GiBs/s.
16  double peak_hbm_bw_giga_bytes_per_second = 2;
17  // The ridge point of roofline model in FLOP/Byte. (i.e., minimum operational
18  // intensity required to achieve maximum performance).
19  double ridge_point = 3;
20}
21
22// Result proto for host-independent job information.
23message HostIndependentJobInfoResult {
24  // The change-list number of this build.
25  int64 change_list = 1;
26  // The time of this build (nanoseconds since the Unix epoch).
27  int64 build_time = 2;
28  // The target of this build.
29  string build_target = 3;
30  // Profiling duration (in ms).
31  uint32 profile_duration_ms = 4;
32}
33
34// Result proto for host-dependent job information.
35message HostDependentJobInfoResult {
36  // This ID of the host where the job was run on.
37  string host_id = 1;
38  // The command line used to run the job.
39  string command_line = 2;
40  // The start time of this run (nanoseconds since the Unix epoch).
41  int64 start_time = 3;
42  // BNS address specified by client at time of profiling request.
43  string bns_address = 4;
44  // Profiling start walltime (in ns).
45  uint64 profile_time_ns = 5;
46}
47
48// System topology, which describes the number of chips in a pod
49// and the connectivity style.
50message SystemTopology {
51  // The X, Y, and Z dimensions of this topology. 0 means that dimension does
52  // not exist.
53  int64 x_dimension = 1;
54  int64 y_dimension = 2;
55  int64 z_dimension = 3;
56  // The number of expected bad chips in this system.
57  int64 num_expected_reduced_chips = 4;
58}
59
60// The run environment of a profiling session.
61message RunEnvironment {
62  // Number of hosts used.
63  int32 host_count = 1;
64  // Number of tasks used.
65  int32 task_count = 2;
66  // Distinct hostnames seen.
67  map<string, bool> hostnames = 3;
68  // The type of device used.
69  string device_type = 4;
70  // The number of device cores used.
71  //   In TPU case, this corresponds to the number of TPU cores
72  //   In GPU case, this corresponds to the number of GPUs (not the number of
73  //   SMs).
74  int32 device_core_count = 5;
75  // Host-independent information about this job.
76  HostIndependentJobInfoResult host_independent_job_info = 7;
77  // Host-dependent information about this job.
78  repeated HostDependentJobInfoResult host_dependent_job_info = 8;
79  // The number of replicas, corresponds to input parallelism.
80  // If there is no model parallelism, replica_count = device_core_count
81  int32 replica_count = 9;
82  // The number of cores used for a single replica, e.g. model parallelism.
83  // If there is no model parallelism, then num_cores_per_replica = 1
84  int32 num_cores_per_replica = 10;
85  // The chip interconnection topology.
86  SystemTopology topology = 11;
87  // Host trace level.
88  uint32 host_trace_level = 12;
89  reserved 6;
90}
91
92// Next ID: 7
93message CoreDetails {
94  string hostname = 1;
95  uint32 device_ordinal = 2;  // unique within host, TPU core only
96  uint32 core_num = 3;        // unique within chip per core type
97  uint32 local_chip_id = 4;   // unique within host
98  uint32 global_chip_id = 5;  // unique within mesh
99  uint32 global_core_id = 6;  // unique within mesh, TPU core only
100}
101
102// Metrics based on hardware performance counters.
103message PerformanceCounterResult {
104  // Overall matrix unit utilization in percentage.
105  double matrix_unit_utilization_percent = 1;
106}
107
108// Next ID: 14
109// Operator Statistics.
110message OpStats {
111  // The database for the op metrics collected from the host over the entire
112  // profiling session including incomplete steps.
113  OpMetricsDb host_op_metrics_db = 1;
114  // The database for the op metrics collected from the device over the entire
115  // profiling session including incomplete steps.
116  OpMetricsDb device_op_metrics_db = 2;
117  // The result for the HLO-metric database over the complete steps only.
118  OpMetricsDb hlo_metrics_db_complete_steps_only = 10;
119  // Performance environment of the op metrics collected.
120  PerfEnv perf_env = 3;
121  // The database of step sequences.
122  StepDatabaseResult step_db = 4;
123  // The run environment of this profiling session.
124  RunEnvironment run_environment = 5;
125  // Kernel stats results from all GPUs.
126  KernelStatsDb kernel_stats_db = 6;
127  // Statistics for all tf-functions.
128  TfFunctionDb tf_function_db = 8;
129  // A map from core ID to details.
130  map<uint32, CoreDetails> core_id_to_details = 11;
131  // Error and warning messages for diagnosing profiling issues.
132  Diagnostics diagnostics = 9;
133  // A map from program ID to program name.
134  map<uint64, string> program_id_to_name_map = 12;
135  // Performance counters.
136  PerformanceCounterResult performance_counter_result = 13;
137  reserved 7;
138}
139