1// Copyright (c) Meta Platforms, Inc. and affiliates. 2 3// 4// See README.md before modifying this file. 5// 6 7include "scalar_type.fbs"; 8 9namespace executorch_flatbuffer; 10 11// Identifier of a valid executor schema. 12file_identifier "ET12"; 13// Extension of written files. 14file_extension "pte"; 15 16// Table that contains the metadata about how 17// to unflatten the flattened input/output from compiler 18table ContainerMetadata { 19 encoded_inp_str: string; 20 encoded_out_str: string; 21} 22 23table Null {} 24 25// Contains information relevant to the allocation of non-constant 26// buffer data (e.g. from tensors). 27// This refers to where the buffer needs to be placed in an existing 28// memory and at what offset from its base address. 29table AllocationDetails { 30 memory_id: uint; // ID of the memory where this data needs to be placed. 31 32 // Offset in bytes relative to the start of the memory area indicated by 33 // memory_id. 34 // 35 // Originally this field was a single 32-bit uint, but we need 64 bits for 36 // larger models. To preserve backwards compatibility, the high bits are 37 // managed in a separate 32-bit field. Users should combine the two fields 38 // to get the full 64-bit offset. 39 memory_offset_low: uint; // Least significant 32 bits 40 memory_offset_high: uint; // Most significant 32 bits. Defaults to zero. 41} 42 43// Indicates the types of shape a Tensor may have, from the point 44// of view of their dynamism. 45enum TensorShapeDynamism : byte { 46 // Static shape. Memory is allocated by the compiler. 47 STATIC = 0, 48 // Dynamic shape but with an upper bound. 49 // Memory is allocated by the compiler. 50 DYNAMIC_BOUND = 1, 51 // Dynamic shape without upper bound. 52 // Memory allocation is handled by the runtime. 53 DYNAMIC_UNBOUND = 2, 54} 55 56 57// Table to put additional information about tensors in that is not applicable 58// to the vast majority of tensors in the vast majority of programs. 59table ExtraTensorInfo { 60 // [Optional] Specifies the SubsegmentOffsets in 61 // program.mutable_data_segments that specifies where the data is located in. 62 // If not present and the data is located in a segment, then the data is in 63 // the first index. 64 mutable_data_segments_idx: uint64; 65 66 // [Optional] The unique name of the tensor. e.g. 'mod.linear.weight' 67 fully_qualified_name: string; 68} 69 70table Tensor { 71 scalar_type: ScalarType; 72 73 // Offset in scalar_type elements (e.g., multiples of 4 bytes for an int 74 // scalar type) from the beginning of the tensor buffer to the beginning of 75 // the actual data. Currently, the runtime only supports a value of zero. 76 storage_offset: int; 77 78 sizes: [int]; 79 80 // Specifies in what order the dimensions are laid out in memory (from outer 81 // to inner). 82 // 83 // For example, given a rank 3 Tensor of size (3, 5, 2). If we name 84 // dimensions: [row, column, batch], then a dim_order of: 85 // - (2, 0, 1) represents a [batch, row, column] ordering where "column" is 86 // the innermost dimension, then comes "row", and the outermost dimension is 87 // "batch". 88 // - (0, 2, 1) represents a [row, batch, column] ordering where "column" is 89 // the innermost dimension, then comes "batch", and the outermost dimension 90 // is "row". 91 dim_order: [ubyte]; 92 93 // out of scope M1 94 requires_grad: bool; 95 96 // Overall, a Tensor is either constant or mutable. At method load time 97 // constant tensors receive a dataptr into the serialized program. Mutable 98 // tensors can either receive a pointer from the heirarchical allocator or a 99 // nullptr if they will receive a data pointer at execution time (inputs 100 // and control flow placeholders can be like this). Mutable tensors may or 101 // may not also have an initial value in the serialized program. 102 // 103 // In summary: 104 // data_buffer_idx > 0, allocation_info = Null: Tensor is a constant. 105 // data_buffer_idx = 0, allocation_info = Non Null: Tensor is mutable and 106 // will receive a dataptr at method load time. 107 // data_buffer_idx = 0, allocation_info = Null: Tensor is mutable and 108 // will receive a dataptr at input time or during execution. 109 // data_buffer_idx > 0, allocation_info = Non Null: Tensor is mutable and 110 // will receive a dataptr at method load time, and has an initial state. 111 // 112 // Tensor data is stored inline if program.constant_buffer is null. Otherwise 113 // it is in a segment. If this tensor's allocation_info is null then the 114 // tensor data location is specified by program.constant_segment. If the 115 // allocation_info is non_null then the data is somewhere in 116 // program.mutable_data_segments. If tensor_info is Null, then the data is 117 // in program.mutable_data_segments[0] otherwise if tensor_info is non-null 118 // then the mutable_data_segment index is specified by 119 // tensor_info.mutable_data_segments_index. 120 data_buffer_idx: uint; 121 122 // [Optional] preallocation details for non-constants (null otherwise). 123 allocation_info: AllocationDetails; 124 125 // May not be needed. 126 layout: byte; 127 128 // Determines the type of the tensor's shape, from the point of view of its 129 // dynamic or not behavior, and consequently how the allocation of the 130 // underlying memory is handled, and also how to interpret the sizes and 131 // strides fields. 132 // 1. dynamism == STATIC: sizes field represents the static shape of 133 // the tensor. 134 // 2. dynamism == DYNAMIC_BOUND: sizes field represents the upper bound shape 135 // of the tensor. Each dimension of the tensor at runtime should never 136 // exceed the corresponding dimension of the upper bound shape. 137 // 138 // 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since 139 // shape is fully dynamic. 140 shape_dynamism: TensorShapeDynamism; 141 142 // [Optional] Additional information about the Tensor that is not applicable 143 // to most tensors. 144 extra_tensor_info: ExtraTensorInfo; 145} 146 147table Int { 148 int_val: long; 149} 150 151table Bool { 152 bool_val: bool; 153} 154 155table Double { 156 double_val: double; 157} 158 159table String { 160 string_val: string; 161} 162 163table IntList { 164 items: [long]; 165} 166 167table DoubleList { 168 items: [double]; 169} 170 171table BoolList { 172 items: [bool]; 173} 174 175// Unlike primitive lists, tensor lists have mutable members and aliasing behavior when 176// elements are added to them. To match this aliasing behavior, the runtime tensor list is 177// serialized by serializing its elements into the ExecutionPlan.values array, and then 178// serializing their corresponding indices into TensorList.items. 179table TensorList { 180 items: [int]; // EValue indices. 181} 182 183// Similar to TensorList except the indices can also point to None. 184table OptionalTensorList { 185 items: [int]; 186} 187 188// Supported values in Executorch kernels, Enums are serialized as ints. 189union KernelTypes { 190 Null, 191 Int, 192 Bool, 193 Double, 194 Tensor, 195 String, 196 IntList, 197 DoubleList, 198 BoolList, 199 TensorList, 200 OptionalTensorList, 201} 202 203// Abstraction for program values. A subset of types supported in core pytorch kernels. 204table EValue { 205 val: KernelTypes; 206} 207 208table Operator { 209 // Operator registry and lookup is uniquely identified by its name, and overload name. 210 // TODO(larryliu): is there a more efficient way to represent this 211 name: string; 212 overload: string; 213} 214 215table KernelCall { 216 // Index to the operators table in the program. 217 op_index: int; 218 219 // Indexes to the (values) required by the operation (in and out). 220 args: [int]; 221} 222 223table DelegateCall { 224 // Index to the delegates table in the program. 225 delegate_index: int; 226 227 // Indexes to the (values) required by the delegates (in and out). 228 args: [int]; 229} 230 231table MoveCall { 232 // Index into the values table of the evalue we are moving from 233 move_from: int; 234 235 // Index into the values table of the evalue we are moving into 236 move_to: int; 237} 238 239table JumpFalseCall { 240 // Index into the values table of boolean that specifies whether or not to jump 241 cond_value_index: int; 242 243 // Value to set the executor program counter if the jump occurs 244 destination_instruction: int; 245} 246 247table FreeCall { 248 // Index into values table of the tensor whose underlying data blob is being freed 249 value_index: int; 250} 251 252union InstructionArguments { 253 KernelCall, 254 DelegateCall, 255 MoveCall, 256 JumpFalseCall, 257 FreeCall, 258} 259 260// Basic unit of execution 261table Instruction { 262 instr_args: InstructionArguments; 263} 264 265table Frame { 266 // For storing the frame to print stacktraces 267 filename: string; // Name of the file in which the instruction exists 268 lineno: int; // Line number at which the instruction was called 269 name: string; // Name of the function the instruction was called from 270 context: string; // Source code of the instruction 271} 272 273table FrameList { 274 // For storing the frames to print stacktraces 275 items: [Frame]; 276} 277 278// Indicates where a piece of data is stored. 279enum DataLocation : byte { 280 // Stored directly in the flatbuffer. 281 INLINE = 0, 282 // Stored in a segment. 283 SEGMENT = 1, 284} 285 286// Indicates where the delegate data is stored 287table BackendDelegateDataReference { 288 // Indicates which list to index into: 289 // INLINE -> Program.backend_delegate_data 290 // SEGMENT -> Program.segments 291 location: DataLocation; 292 293 // The index into the list indicated by the location. 294 index: uint; 295} 296 297table CompileSpec { 298 // One compile spec. There are can be multiple specs for one method 299 key: string; // like max_value 300 value: [ubyte]; // like 4, or other types based on needs. 301} 302 303table BackendDelegate { 304 // Used to resolve the delegate backend classes, for example, "TCE0", "TCE1", etc. 305 // This string is also used in to_backend. 306 id: string; 307 308 // A binary blob (from a subgraph) as an output of preprocessing. Will be 309 // provided to the backend code at init time. Can be very large, on the 310 // order of 10-100MB. 311 processed: BackendDelegateDataReference; 312 313 // The compilation spec for the lowered module's forward function 314 // Example: [CompileSpec["max_value", 4]] 315 compile_specs: [CompileSpec]; 316} 317 318// A sequence of blocking instructions to be executed in order. The 319// abstraction is not currently leveraged, all current programs are 1 chain. 320// We are leaving chains as part of the program definition for future use cases 321// around graph level async where different threads will be represented as 322// seperate chains. 323table Chain { 324 // Indices of the values that are (non-static) inputs into this Chain. 325 inputs: [int]; 326 327 // Indices of the values that are outputs out of this Chain. 328 outputs: [int]; 329 330 // List of instructions to be executed in order. 331 instructions: [Instruction]; 332 333 // Optional list of frames for each instruction. 334 // The backend config must have 'emit_stacktrace' set to true to emit 335 stacktrace: [FrameList]; 336} 337 338table ExecutionPlan { 339 340 // Name of a method on the nn.Module that was traced to create this program. 341 name: string; 342 343 // Type meta data for input/output to the execution plan 344 container_meta_type: ContainerMetadata; 345 346 // A list of all values used in this execution plan. 347 values: [EValue]; 348 349 // Indices to the 'Evalues' that are inputs to this execution plan. 350 // This list contains only the non-constant tensors (i.e. not part of 351 // the saved program). 352 inputs: [int]; 353 354 // Indices to the 'Evalues' that are outputs of this execution plan. 355 // This signals a lifespan that goes beyond the execution. 356 outputs: [int]; 357 358 // List of Chains of kernels. 359 chains: [Chain]; 360 361 // Operators used in this execution plan 362 operators: [Operator]; 363 364 // A list of delegates and each is a special instance of execution, the same level of chains. 365 delegates: [BackendDelegate]; 366 367 // List of buffer sizes for non_constant memory allocations. (Think neural net activations) 368 // A list instead of a single buffer to account for complex memory hierarchies. 369 // TODO(jakeszwe, razy): How to reconcile this with the ability for the hierarchical memory allocator 370 // to be id based instead of index based. 371 // Runtime should use the len(constant_buffer) as the ground truth of the 372 // constants memory buffer size, and ignore non_const_buffer_sizes[0]. 373 non_const_buffer_sizes: [int64]; 374 375} 376 377// Constant tensor data stored directly in the flatbuffer. 378table Buffer { 379 // During serialization, this alignment may be rewritten to a larger value. 380 // The magic "@executorch-tensor-alignment" comment tells EXIR which lines to 381 // patch. 382 storage: [ubyte] (force_align: 16); // @executorch-tensor-alignment 383} 384 385// Delegate data stored directly in the flatbuffer. This is a different type 386// than Buffer because tensors and delegates can have different alignment 387// requirements. 388table BackendDelegateInlineData { 389 // During serialization, this alignment may be rewritten to a larger value. 390 // The magic "@executorch-delegate-alignment" comment tells EXIR which lines 391 // to patch. 392 data: [ubyte] (force_align: 16); // @executorch-delegate-alignment 393} 394 395// Describes a contiguous piece of data that lives outside of the flatbuffer data, 396// typically appended afterwards in the file. The "extended header" in the file, 397// when present, points to the segment base offset. 398table DataSegment { 399 // Segment offsets are relative to the segment base offset provided in 400 // the extended file header. Segments will typically be aligned in a 401 // way to make it possible to use mmap() to load them. 402 offset: uint64; 403 404 // The size in bytes of valid data starting at the offset. The segment 405 // data may be followed by padding before the segment that follows it, 406 // to make it easier to use mmap(). 407 size: uint64; 408} 409 410// Describes data offsets into a particular segment 411table SubsegmentOffsets { 412 // Index of the segment in Program.segments 413 segment_index: uint; 414 415 // Each element is an offset in bytes into the data of the segment pointed to 416 // by segment_index. Offsets must be aligned to @executorch-tensor-alignment. 417 offsets: [uint64]; 418} 419 420table Program { 421 // Schema version. 422 version: uint; 423 424 // List of ExecutionPlans that make up the program. Each ExecutionPlan corresponds with a 425 // different entry point into the model. 426 execution_plan: [ExecutionPlan]; 427 428 // Tables of constant data, used for constant Values (e.g.data field of weight tensors). 429 // Each constant is assigned an index into the table which are each individually aligned. 430 // 0 index is reserved to be pointed to by non-constant Tensors. 431 // If this field is non-empty, constant_segment.offsets must be empty. 432 // DEPRECATED: After D61996249 on 2024-09-05, no new PTE files will use this field. 433 constant_buffer: [Buffer]; 434 435 // List of delegate data. Pointed to by BackendDelegateDataReference. 436 backend_delegate_data: [BackendDelegateInlineData]; 437 438 // List of data segments that follow the Program data in this file, sorted by 439 // offset. Elements in this schema can refer to these segments by index. 440 segments: [DataSegment]; 441 442 // Describes the offsets of each constant tensor, relative to the segment 443 // offset. If constant_segment.offsets field is non-empty, constant_buffer 444 // must be empty. constant_segment.offsets[0] is reserved to be pointed to by 445 // non-constant Tensors. 446 constant_segment: SubsegmentOffsets; 447 448 // [Optional] Describes the offsets into various segments for each mutable 449 // tensor. Only mutable tensors with a meaningful initial state are 450 // serialized here (for example weights that will be trained on-device as 451 // opposed to just layer activations). Seperate from the constant_segment to 452 // reduce peak memory usage by letting us read directly from the PTE file 453 // into the mutable tensor, as opposed to loading the .pte data into 454 // constant memory, copying it over, and then being unable to release the 455 // constant segment. No two elements should point to the same segment. 456 mutable_data_segments: [SubsegmentOffsets]; 457} 458 459root_type Program; 460