xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/protobuf/op_profile.proto (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1syntax = "proto3";
2
3package tensorflow.profiler.op_profile;
4
5// Profile is the top-level data that summarizes a program.
6message Profile {
7  reserved 2;
8  reserved "by_program_structure";
9  reserved 3;
10  reserved "per_program";
11  // Root of a profile broken down by instruction category.
12  Node by_category = 1;
13  // Root of a profile broken down by program.
14  Node by_program = 4;
15
16  // Device type.
17  string device_type = 5;
18
19  // Exclude idle ops.
20  Node by_category_exclude_idle = 6;
21  Node by_program_exclude_idle = 7;
22}
23
24// An entry in the profile tree. (An instruction, or set of instructions).
25message Node {
26  string name = 1;             // Semantics depend on contents.
27  Metrics metrics = 2;         // May be omitted e.g. for fused instructions.
28  repeated Node children = 3;  // Subjected to pruning.
29
30  // Details about what this node represents.
31  oneof contents {
32    InstructionCategory category = 4;
33    XLAInstruction xla = 5;
34  }
35
36  int32 num_children = 6;  // Total number of children before pruning.
37  // A category of XLA instructions.
38  // name is a descriptive string, like "data formatting".
39  message InstructionCategory {}
40  // A single XLA instruction.
41  // name is the unique instruction id, like "%multiply.5".
42  message XLAInstruction {
43    string op = 1;          // Opcode like %multiply
44    string expression = 2;  // %multiply = [shape]multiply(operand1, operand2)
45    string provenance = 3;  // Typically the TensorFlow operation name.
46    string category = 4;
47    // Describes the physical memory layout of the instruction's primary input.
48    // e.g. for a convolution, this analyzes the image and ignores the kernel.
49    LayoutAnalysis layout = 5;
50    uint32 computation_primitive_size = 6;
51    message LayoutAnalysis {
52      // The physical data layout, from most-minor to most-major dimensions.
53      repeated Dimension dimensions = 1;
54      message Dimension {
55        int32 size = 1;       // Size of the data in this dimension.
56        int32 alignment = 2;  // Data must be padded to a multiple of alignment.
57        string semantics = 3;  // What the dimension represents, e.g. "spatial".
58      }
59    }
60  }
61}
62
63// Measurements of an operation (or aggregated set of operations).
64// Metrics are always "total" rather than "self".
65message Metrics {
66  // Core-time taken by this operation, as a fraction of all operations.
67  double time = 1;
68  // Floating point computations performed by this operation, as a fraction of
69  // peak core FLOPS * program time. This representation has useful properties:
70  //  - it is proportional to the number of floating point operations performed
71  //  - utilization is flops/time
72  //  - wasted potential flops is proportional to time - flops
73  //  - it does not reveal the peak core FLOPS of the hardware
74  double flops = 2;
75
76  // The memory bandwidth used to load operands, as a fraction of
77  // thereotical memory bandwidth on the specific hardware.
78  double memory_bandwidth = 3;
79
80  double raw_time = 11;            // Elapsed core-time in picoseconds.
81  double raw_flops = 12;           // Total floating-point operations performed.
82  double raw_bytes_accessed = 13;  // Total bytes accessed (include read/write).
83}
84