1syntax = "proto3"; 2 3package tensorflow.profiler; 4 5import "google/protobuf/any.proto"; 6import "tensorflow/core/profiler/protobuf/op_metrics.proto"; 7 8// Breakdown of step-time on generic hardware. Note that these components are 9// mutually exclusive so that adding them together is equal to the step time. If 10// an execution time interval has multiple types of event happening, we need to 11// pick one of the event type to attribute the time interval to. 12message GenericStepBreakdown { 13 // Map event type to the accumulated duration in 14 // picoseconds of that type. 15 map<int32, uint64> type_ps = 1; 16} 17 18// Information about memory transfer to/from device memory. 19message DeviceMemoryTransfer { 20 uint64 occurrence = 1; 21 double time_us = 2; 22 uint64 bytes_transferred = 3; 23} 24 25// Next ID: 6 26// Result proto for StepInfo. 27message StepInfoResult { 28 // The step number. 29 uint32 step_num = 1; 30 // The step name. 31 string step_name = 5; 32 // The step duration in picoseconds. 33 uint64 duration_ps = 2; 34 // The start time of this step in picoseconds. 35 uint64 begin_ps = 3; 36 // Breakdown of the step-time. Can be unpacked into a GenericStepBreakdown. 37 google.protobuf.Any step_breakdown = 4; 38} 39 40// Result proto for all -educe ops. 41message AllReduceInfo { 42 // Unique id for all-reduce ops. 43 uint64 id = 1; 44 // The name of the hlo op. This field is no longer set by the profiler. 45 string name = 2 [deprecated = true]; 46 // For all-reduce nodes from different modules, if they have the same 47 // all_reduce_id, they will be 'Allreduce'd'. If empty, AllReduce will not be 48 // applied across modules. 49 uint64 all_reduce_id = 3; 50 // The start time in picoseconds of the op event. 51 uint64 start_time_ps = 4; 52 // The end time in picoseconds of the op event. 53 uint64 end_time_ps = 5; 54 // The size of the op in bytes. 55 uint64 byte_size = 6; 56} 57 58// Result database for all-reduce ops. 59message AllReduceDbResult { 60 repeated AllReduceInfo all_reduce_info = 1; 61} 62 63// Result proto for information in a step across all cores. 64message PerCoreStepInfo { 65 // The step number. 66 uint32 step_num = 1; 67 // A map from core_id to StepInfo. 68 map<uint32, StepInfoResult> step_info_per_core = 2; 69 // The result for the per-step HLO-metric database. 70 OpMetricsDb hlo_metrics_db = 3; 71 // A map from core ID to program replica id. Replica id map could change 72 // during a profile session, but should stay stable within a step. 73 map<uint32, uint32> core_id_to_replica_id_map = 5; 74 // A map from core_id to all-reduce ops. 75 map<uint32, AllReduceDbResult> all_reduce_db_per_core = 6; 76 // Information about deivce memory transfers, categoried by source and 77 // destination. Ordered by following categories: 78 // 1. HostToDevice 79 // 2. DeviceToHost 80 // 3. DeviceToDevice 81 repeated DeviceMemoryTransfer device_memory_transfers = 7; 82 83 reserved 4; 84} 85 86// Result proto for a StepDatabase. 87message StepDatabaseResult { 88 // A sequence of PerCoreStepInfo. 89 repeated PerCoreStepInfo step_sequence = 1; 90 // Whether the step db uses incomplete step information. 91 // This flag is set to true when: 92 // 1) no step marker or annotation present. 93 // 2) profiling duration is too short to cover a full step. 94 // If this flag is false, we will group and breakdown the 95 // profile by complete steps only and ignore incomplete steps. 96 // If this flag is true, we will simply aggregate and breakdown over the total 97 // profile as a single step. 98 bool use_incomplete_step = 2; 99 // Number of steps dropped during post processing. 100 uint32 num_steps_dropped = 3; 101 // If the step_sequence is empty because: 102 // * there is no step profiled on any host, then empty_intersect is false. 103 // * there are steps profiled on some host, but the intersection of steps 104 // over all hosts is empty, then empty_intersect is true. 105 bool empty_intersect = 4; 106} 107