1syntax = "proto3"; 2 3package tensorflow.profiler.op_profile; 4 5// Profile is the top-level data that summarizes a program. 6message Profile { 7 reserved 2; 8 reserved "by_program_structure"; 9 reserved 3; 10 reserved "per_program"; 11 // Root of a profile broken down by instruction category. 12 Node by_category = 1; 13 // Root of a profile broken down by program. 14 Node by_program = 4; 15 16 // Device type. 17 string device_type = 5; 18 19 // Exclude idle ops. 20 Node by_category_exclude_idle = 6; 21 Node by_program_exclude_idle = 7; 22} 23 24// An entry in the profile tree. (An instruction, or set of instructions). 25message Node { 26 string name = 1; // Semantics depend on contents. 27 Metrics metrics = 2; // May be omitted e.g. for fused instructions. 28 repeated Node children = 3; // Subjected to pruning. 29 30 // Details about what this node represents. 31 oneof contents { 32 InstructionCategory category = 4; 33 XLAInstruction xla = 5; 34 } 35 36 int32 num_children = 6; // Total number of children before pruning. 37 // A category of XLA instructions. 38 // name is a descriptive string, like "data formatting". 39 message InstructionCategory {} 40 // A single XLA instruction. 41 // name is the unique instruction id, like "%multiply.5". 42 message XLAInstruction { 43 string op = 1; // Opcode like %multiply 44 string expression = 2; // %multiply = [shape]multiply(operand1, operand2) 45 string provenance = 3; // Typically the TensorFlow operation name. 46 string category = 4; 47 // Describes the physical memory layout of the instruction's primary input. 48 // e.g. for a convolution, this analyzes the image and ignores the kernel. 49 LayoutAnalysis layout = 5; 50 uint32 computation_primitive_size = 6; 51 message LayoutAnalysis { 52 // The physical data layout, from most-minor to most-major dimensions. 53 repeated Dimension dimensions = 1; 54 message Dimension { 55 int32 size = 1; // Size of the data in this dimension. 56 int32 alignment = 2; // Data must be padded to a multiple of alignment. 57 string semantics = 3; // What the dimension represents, e.g. "spatial". 58 } 59 } 60 } 61} 62 63// Measurements of an operation (or aggregated set of operations). 64// Metrics are always "total" rather than "self". 65message Metrics { 66 // Core-time taken by this operation, as a fraction of all operations. 67 double time = 1; 68 // Floating point computations performed by this operation, as a fraction of 69 // peak core FLOPS * program time. This representation has useful properties: 70 // - it is proportional to the number of floating point operations performed 71 // - utilization is flops/time 72 // - wasted potential flops is proportional to time - flops 73 // - it does not reveal the peak core FLOPS of the hardware 74 double flops = 2; 75 76 // The memory bandwidth used to load operands, as a fraction of 77 // thereotical memory bandwidth on the specific hardware. 78 double memory_bandwidth = 3; 79 80 double raw_time = 11; // Elapsed core-time in picoseconds. 81 double raw_flops = 12; // Total floating-point operations performed. 82 double raw_bytes_accessed = 13; // Total bytes accessed (include read/write). 83} 84