xref: /aosp_15_r20/external/executorch/schema/program.fbs (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1// Copyright (c) Meta Platforms, Inc. and affiliates.
2
3//
4// See README.md before modifying this file.
5//
6
7include "scalar_type.fbs";
8
9namespace executorch_flatbuffer;
10
11// Identifier of a valid executor schema.
12file_identifier "ET12";
13// Extension of written files.
14file_extension "pte";
15
16// Table that contains the metadata about how
17// to unflatten the flattened input/output from compiler
18table ContainerMetadata {
19  encoded_inp_str: string;
20  encoded_out_str: string;
21}
22
23table Null {}
24
25// Contains information relevant to the allocation of non-constant
26// buffer data (e.g. from tensors).
27// This refers to where the buffer needs to be placed in an existing
28// memory and at what offset from its base address.
29table AllocationDetails {
30  memory_id: uint;  // ID of the memory where this data needs to be placed.
31
32  // Offset in bytes relative to the start of the memory area indicated by
33  // memory_id.
34  //
35  // Originally this field was a single 32-bit uint, but we need 64 bits for
36  // larger models. To preserve backwards compatibility, the high bits are
37  // managed in a separate 32-bit field. Users should combine the two fields
38  // to get the full 64-bit offset.
39  memory_offset_low: uint;  // Least significant 32 bits
40  memory_offset_high: uint;  // Most significant 32 bits. Defaults to zero.
41}
42
43// Indicates the types of shape a Tensor may have, from the point
44// of view of their dynamism.
45enum TensorShapeDynamism : byte {
46  // Static shape. Memory is allocated by the compiler.
47  STATIC = 0,
48  // Dynamic shape but with an upper bound.
49  // Memory is allocated by the compiler.
50  DYNAMIC_BOUND = 1,
51  // Dynamic shape without upper bound.
52  // Memory allocation is handled by the runtime.
53  DYNAMIC_UNBOUND = 2,
54}
55
56
57// Table to put additional information about tensors in that is not applicable
58// to the vast majority of tensors in the vast majority of programs.
59table ExtraTensorInfo {
60  // [Optional] Specifies the SubsegmentOffsets in
61  //  program.mutable_data_segments that specifies where the data is located in.
62  //  If not present and the data is located in a segment, then the data is in
63  //  the first index.
64  mutable_data_segments_idx: uint64;
65
66  // [Optional] The unique name of the tensor. e.g. 'mod.linear.weight'
67  fully_qualified_name: string;
68}
69
70table Tensor {
71  scalar_type: ScalarType;
72
73  // Offset in scalar_type elements (e.g., multiples of 4 bytes for an int
74  // scalar type) from the beginning of the tensor buffer to the beginning of
75  // the actual data. Currently, the runtime only supports a value of zero.
76  storage_offset: int;
77
78  sizes: [int];
79
80  // Specifies in what order the dimensions are laid out in memory (from outer
81  // to inner).
82  //
83  // For example, given a rank 3 Tensor of size (3, 5, 2). If we name
84  // dimensions: [row, column, batch], then a dim_order of:
85  // - (2, 0, 1) represents a [batch, row, column] ordering where "column" is
86  //   the innermost dimension, then comes "row", and the outermost dimension is
87  //   "batch".
88  // - (0, 2, 1) represents a [row, batch, column] ordering where "column" is
89  //   the innermost dimension, then comes "batch", and the outermost dimension
90  //   is "row".
91  dim_order: [ubyte];
92
93  // out of scope M1
94  requires_grad: bool;
95
96  // Overall, a Tensor is either constant or mutable. At method load time
97  //  constant tensors receive a dataptr into the serialized program. Mutable
98  //  tensors can either receive a pointer from the heirarchical allocator or a
99  //  nullptr if they will receive a data pointer at execution time (inputs
100  //  and control flow placeholders can be like this). Mutable tensors may or
101  //  may not also have an initial value in the serialized program.
102  //
103  // In summary:
104  //   data_buffer_idx > 0, allocation_info = Null: Tensor is a constant.
105  //   data_buffer_idx = 0, allocation_info = Non Null: Tensor is mutable and
106  //     will receive a dataptr at method load time.
107  //   data_buffer_idx = 0, allocation_info = Null: Tensor is mutable and
108  //     will receive a dataptr at input time or during execution.
109  //   data_buffer_idx > 0, allocation_info = Non Null: Tensor is mutable and
110  //     will receive a dataptr at method load time, and has an initial state.
111  //
112  // Tensor data is stored inline if program.constant_buffer is null. Otherwise
113  //  it is in a segment. If this tensor's allocation_info is null then the
114  //  tensor data location is specified by program.constant_segment. If the
115  //  allocation_info is non_null then the data is somewhere in
116  //  program.mutable_data_segments. If tensor_info is Null, then the data is
117  //  in program.mutable_data_segments[0] otherwise if tensor_info is non-null
118  //  then the mutable_data_segment index is specified by
119  //  tensor_info.mutable_data_segments_index.
120  data_buffer_idx: uint;
121
122  // [Optional] preallocation details for non-constants (null otherwise).
123  allocation_info: AllocationDetails;
124
125  // May not be needed.
126  layout: byte;
127
128  // Determines the type of the tensor's shape, from the point of view of its
129  // dynamic or not behavior, and consequently how the allocation of the
130  // underlying memory is handled, and also how to interpret the sizes and
131  // strides fields.
132  // 1. dynamism == STATIC: sizes field represents the static shape of
133  //    the tensor.
134  // 2. dynamism == DYNAMIC_BOUND: sizes field represents the upper bound shape
135  //    of the tensor. Each dimension of the tensor at runtime should never
136  //    exceed the corresponding dimension of the upper bound shape.
137  //
138  // 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since
139  //    shape is fully dynamic.
140  shape_dynamism: TensorShapeDynamism;
141
142  // [Optional] Additional information about the Tensor that is not applicable
143  // to most tensors.
144  extra_tensor_info: ExtraTensorInfo;
145}
146
147table Int {
148  int_val: long;
149}
150
151table Bool {
152  bool_val: bool;
153}
154
155table Double {
156  double_val: double;
157}
158
159table String {
160  string_val: string;
161}
162
163table IntList {
164  items: [long];
165}
166
167table DoubleList {
168  items: [double];
169}
170
171table BoolList {
172  items: [bool];
173}
174
175// Unlike primitive lists, tensor lists have mutable members and aliasing behavior when
176// elements are added to them. To match this aliasing behavior, the runtime tensor list is
177// serialized by serializing its elements into the ExecutionPlan.values array, and then
178// serializing their corresponding indices into TensorList.items.
179table TensorList {
180  items: [int];  // EValue indices.
181}
182
183// Similar to TensorList except the indices can also point to None.
184table OptionalTensorList {
185  items: [int];
186}
187
188// Supported values in Executorch kernels, Enums are serialized as ints.
189union KernelTypes {
190  Null,
191  Int,
192  Bool,
193  Double,
194  Tensor,
195  String,
196  IntList,
197  DoubleList,
198  BoolList,
199  TensorList,
200  OptionalTensorList,
201}
202
203// Abstraction for program values. A subset of types supported in core pytorch kernels.
204table EValue {
205  val: KernelTypes;
206}
207
208table Operator {
209  // Operator registry and lookup is uniquely identified by its name, and overload name.
210  // TODO(larryliu): is there a more efficient way to represent this
211  name: string;
212  overload: string;
213}
214
215table KernelCall {
216  // Index to the operators table in the program.
217  op_index: int;
218
219  // Indexes to the (values) required by the operation (in and out).
220  args: [int];
221}
222
223table DelegateCall {
224  // Index to the delegates table in the program.
225  delegate_index: int;
226
227  // Indexes to the (values) required by the delegates (in and out).
228  args: [int];
229}
230
231table MoveCall {
232  // Index into the values table of the evalue we are moving from
233  move_from: int;
234
235  // Index into the values table of the evalue we are moving into
236  move_to: int;
237}
238
239table JumpFalseCall {
240  // Index into the values table of boolean that specifies whether or not to jump
241  cond_value_index: int;
242
243  // Value to set the executor program counter if the jump occurs
244  destination_instruction: int;
245}
246
247table FreeCall {
248  // Index into values table of the tensor whose underlying data blob is being freed
249  value_index: int;
250}
251
252union InstructionArguments {
253  KernelCall,
254  DelegateCall,
255  MoveCall,
256  JumpFalseCall,
257  FreeCall,
258}
259
260// Basic unit of execution
261table Instruction {
262  instr_args: InstructionArguments;
263}
264
265table Frame {
266  // For storing the frame to print stacktraces
267  filename: string;  // Name of the file in which the instruction exists
268  lineno: int;       // Line number at which the instruction was called
269  name: string;      // Name of the function the instruction was called from
270  context: string;   // Source code of the instruction
271}
272
273table FrameList {
274  // For storing the frames to print stacktraces
275  items: [Frame];
276}
277
278// Indicates where a piece of data is stored.
279enum DataLocation : byte {
280  // Stored directly in the flatbuffer.
281  INLINE = 0,
282  // Stored in a segment.
283  SEGMENT = 1,
284}
285
286// Indicates where the delegate data is stored
287table BackendDelegateDataReference {
288  // Indicates which list to index into:
289  //     INLINE -> Program.backend_delegate_data
290  //     SEGMENT -> Program.segments
291  location: DataLocation;
292
293  // The index into the list indicated by the location.
294  index: uint;
295}
296
297table CompileSpec {
298  // One compile spec. There are can be multiple specs for one method
299  key: string; // like max_value
300  value: [ubyte]; // like 4, or other types based on needs.
301}
302
303table BackendDelegate {
304  // Used to resolve the delegate backend classes, for example, "TCE0", "TCE1", etc.
305  // This string is also used in to_backend.
306  id: string;
307
308  // A binary blob (from a subgraph) as an output of preprocessing. Will be
309  // provided to the backend code at init time. Can be very large, on the
310  // order of 10-100MB.
311  processed: BackendDelegateDataReference;
312
313  // The compilation spec for the lowered module's forward function
314  // Example: [CompileSpec["max_value", 4]]
315  compile_specs: [CompileSpec];
316}
317
318// A sequence of blocking instructions to be executed in order. The
319// abstraction is not currently leveraged, all current programs are 1 chain.
320// We are leaving chains as part of the program definition for future use cases
321// around graph level async where different threads will be represented as
322// seperate chains.
323table Chain {
324  // Indices of the values that are (non-static) inputs into this Chain.
325  inputs: [int];
326
327  // Indices of the values that are outputs out of this Chain.
328  outputs: [int];
329
330  // List of instructions to be executed in order.
331  instructions: [Instruction];
332
333  // Optional list of frames for each instruction.
334  // The backend config must have 'emit_stacktrace' set to true to emit
335  stacktrace: [FrameList];
336}
337
338table ExecutionPlan {
339
340  // Name of a method on the nn.Module that was traced to create this program.
341  name: string;
342
343  // Type meta data for input/output to the execution plan
344  container_meta_type: ContainerMetadata;
345
346  // A list of all values used in this execution plan.
347  values: [EValue];
348
349  // Indices to the 'Evalues' that are inputs to this execution plan.
350  // This list contains only the non-constant tensors (i.e. not part of
351  // the saved program).
352  inputs: [int];
353
354  // Indices to the 'Evalues' that are outputs of this execution plan.
355  // This signals a lifespan that goes beyond the execution.
356  outputs: [int];
357
358  // List of Chains of kernels.
359  chains: [Chain];
360
361  // Operators used in this execution plan
362  operators: [Operator];
363
364  // A list of delegates and each is a special instance of execution, the same level of chains.
365  delegates: [BackendDelegate];
366
367  // List of buffer sizes for non_constant memory allocations. (Think neural net activations)
368  // A list instead of a single buffer to account for complex memory hierarchies.
369  // TODO(jakeszwe, razy): How to reconcile this with the ability for the hierarchical memory allocator
370  // to be id based instead of index based.
371  // Runtime should use the len(constant_buffer) as the ground truth of the
372  // constants memory buffer size, and ignore non_const_buffer_sizes[0].
373  non_const_buffer_sizes: [int64];
374
375}
376
377// Constant tensor data stored directly in the flatbuffer.
378table Buffer {
379  // During serialization, this alignment may be rewritten to a larger value.
380  // The magic "@executorch-tensor-alignment" comment tells EXIR which lines to
381  // patch.
382  storage: [ubyte] (force_align: 16);  // @executorch-tensor-alignment
383}
384
385// Delegate data stored directly in the flatbuffer. This is a different type
386// than Buffer because tensors and delegates can have different alignment
387// requirements.
388table BackendDelegateInlineData {
389  // During serialization, this alignment may be rewritten to a larger value.
390  // The magic "@executorch-delegate-alignment" comment tells EXIR which lines
391  // to patch.
392  data: [ubyte] (force_align: 16);  // @executorch-delegate-alignment
393}
394
395// Describes a contiguous piece of data that lives outside of the flatbuffer data,
396// typically appended afterwards in the file. The "extended header" in the file,
397// when present, points to the segment base offset.
398table DataSegment {
399  // Segment offsets are relative to the segment base offset provided in
400  // the extended file header. Segments will typically be aligned in a
401  // way to make it possible to use mmap() to load them.
402  offset: uint64;
403
404  // The size in bytes of valid data starting at the offset. The segment
405  // data may be followed by padding before the segment that follows it,
406  // to make it easier to use mmap().
407  size: uint64;
408}
409
410// Describes data offsets into a particular segment
411table SubsegmentOffsets {
412  // Index of the segment in Program.segments
413  segment_index: uint;
414
415  // Each element is an offset in bytes into the data of the segment pointed to
416  // by segment_index. Offsets must be aligned to @executorch-tensor-alignment.
417  offsets: [uint64];
418}
419
420table Program {
421  // Schema version.
422  version: uint;
423
424  // List of ExecutionPlans that make up the program. Each ExecutionPlan corresponds with a
425  // different entry point into the model.
426  execution_plan: [ExecutionPlan];
427
428  // Tables of constant data, used for constant Values (e.g.data field of weight tensors).
429  // Each constant is assigned an index into the table which are each individually aligned.
430  // 0 index is reserved to be pointed to by non-constant Tensors.
431  // If this field is non-empty, constant_segment.offsets must be empty.
432  // DEPRECATED: After D61996249 on 2024-09-05, no new PTE files will use this field.
433  constant_buffer: [Buffer];
434
435  // List of delegate data. Pointed to by BackendDelegateDataReference.
436  backend_delegate_data: [BackendDelegateInlineData];
437
438  // List of data segments that follow the Program data in this file, sorted by
439  // offset. Elements in this schema can refer to these segments by index.
440  segments: [DataSegment];
441
442  // Describes the offsets of each constant tensor, relative to the segment
443  // offset. If constant_segment.offsets field is non-empty, constant_buffer
444  // must be empty. constant_segment.offsets[0] is reserved to be pointed to by
445  // non-constant Tensors.
446  constant_segment: SubsegmentOffsets;
447
448  // [Optional] Describes the offsets into various segments for each mutable
449  // tensor. Only mutable tensors with a meaningful initial state are
450  // serialized here (for example weights that will be trained on-device as
451  // opposed to just layer activations). Seperate from the constant_segment to
452  // reduce peak memory usage by letting us read directly from the PTE file
453  // into the mutable tensor, as opposed to loading the .pte data into
454  // constant memory, copying it over, and then being unable to release the
455  // constant segment. No two elements should point to the same segment.
456  mutable_data_segments: [SubsegmentOffsets];
457}
458
459root_type Program;
460