// Copyright (c) Meta Platforms, Inc. and affiliates. // // See README.md before modifying this file. // include "scalar_type.fbs"; namespace executorch_flatbuffer; // Identifier of a valid executor schema. file_identifier "ET12"; // Extension of written files. file_extension "pte"; // Table that contains the metadata about how // to unflatten the flattened input/output from compiler table ContainerMetadata { encoded_inp_str: string; encoded_out_str: string; } table Null {} // Contains information relevant to the allocation of non-constant // buffer data (e.g. from tensors). // This refers to where the buffer needs to be placed in an existing // memory and at what offset from its base address. table AllocationDetails { memory_id: uint; // ID of the memory where this data needs to be placed. // Offset in bytes relative to the start of the memory area indicated by // memory_id. // // Originally this field was a single 32-bit uint, but we need 64 bits for // larger models. To preserve backwards compatibility, the high bits are // managed in a separate 32-bit field. Users should combine the two fields // to get the full 64-bit offset. memory_offset_low: uint; // Least significant 32 bits memory_offset_high: uint; // Most significant 32 bits. Defaults to zero. } // Indicates the types of shape a Tensor may have, from the point // of view of their dynamism. enum TensorShapeDynamism : byte { // Static shape. Memory is allocated by the compiler. STATIC = 0, // Dynamic shape but with an upper bound. // Memory is allocated by the compiler. DYNAMIC_BOUND = 1, // Dynamic shape without upper bound. // Memory allocation is handled by the runtime. DYNAMIC_UNBOUND = 2, } // Table to put additional information about tensors in that is not applicable // to the vast majority of tensors in the vast majority of programs. table ExtraTensorInfo { // [Optional] Specifies the SubsegmentOffsets in // program.mutable_data_segments that specifies where the data is located in. // If not present and the data is located in a segment, then the data is in // the first index. mutable_data_segments_idx: uint64; // [Optional] The unique name of the tensor. e.g. 'mod.linear.weight' fully_qualified_name: string; } table Tensor { scalar_type: ScalarType; // Offset in scalar_type elements (e.g., multiples of 4 bytes for an int // scalar type) from the beginning of the tensor buffer to the beginning of // the actual data. Currently, the runtime only supports a value of zero. storage_offset: int; sizes: [int]; // Specifies in what order the dimensions are laid out in memory (from outer // to inner). // // For example, given a rank 3 Tensor of size (3, 5, 2). If we name // dimensions: [row, column, batch], then a dim_order of: // - (2, 0, 1) represents a [batch, row, column] ordering where "column" is // the innermost dimension, then comes "row", and the outermost dimension is // "batch". // - (0, 2, 1) represents a [row, batch, column] ordering where "column" is // the innermost dimension, then comes "batch", and the outermost dimension // is "row". dim_order: [ubyte]; // out of scope M1 requires_grad: bool; // Overall, a Tensor is either constant or mutable. At method load time // constant tensors receive a dataptr into the serialized program. Mutable // tensors can either receive a pointer from the heirarchical allocator or a // nullptr if they will receive a data pointer at execution time (inputs // and control flow placeholders can be like this). Mutable tensors may or // may not also have an initial value in the serialized program. // // In summary: // data_buffer_idx > 0, allocation_info = Null: Tensor is a constant. // data_buffer_idx = 0, allocation_info = Non Null: Tensor is mutable and // will receive a dataptr at method load time. // data_buffer_idx = 0, allocation_info = Null: Tensor is mutable and // will receive a dataptr at input time or during execution. // data_buffer_idx > 0, allocation_info = Non Null: Tensor is mutable and // will receive a dataptr at method load time, and has an initial state. // // Tensor data is stored inline if program.constant_buffer is null. Otherwise // it is in a segment. If this tensor's allocation_info is null then the // tensor data location is specified by program.constant_segment. If the // allocation_info is non_null then the data is somewhere in // program.mutable_data_segments. If tensor_info is Null, then the data is // in program.mutable_data_segments[0] otherwise if tensor_info is non-null // then the mutable_data_segment index is specified by // tensor_info.mutable_data_segments_index. data_buffer_idx: uint; // [Optional] preallocation details for non-constants (null otherwise). allocation_info: AllocationDetails; // May not be needed. layout: byte; // Determines the type of the tensor's shape, from the point of view of its // dynamic or not behavior, and consequently how the allocation of the // underlying memory is handled, and also how to interpret the sizes and // strides fields. // 1. dynamism == STATIC: sizes field represents the static shape of // the tensor. // 2. dynamism == DYNAMIC_BOUND: sizes field represents the upper bound shape // of the tensor. Each dimension of the tensor at runtime should never // exceed the corresponding dimension of the upper bound shape. // // 3. dynamism == DYNAMIC_UNBOUND: the stored sizes field can be ignored since // shape is fully dynamic. shape_dynamism: TensorShapeDynamism; // [Optional] Additional information about the Tensor that is not applicable // to most tensors. extra_tensor_info: ExtraTensorInfo; } table Int { int_val: long; } table Bool { bool_val: bool; } table Double { double_val: double; } table String { string_val: string; } table IntList { items: [long]; } table DoubleList { items: [double]; } table BoolList { items: [bool]; } // Unlike primitive lists, tensor lists have mutable members and aliasing behavior when // elements are added to them. To match this aliasing behavior, the runtime tensor list is // serialized by serializing its elements into the ExecutionPlan.values array, and then // serializing their corresponding indices into TensorList.items. table TensorList { items: [int]; // EValue indices. } // Similar to TensorList except the indices can also point to None. table OptionalTensorList { items: [int]; } // Supported values in Executorch kernels, Enums are serialized as ints. union KernelTypes { Null, Int, Bool, Double, Tensor, String, IntList, DoubleList, BoolList, TensorList, OptionalTensorList, } // Abstraction for program values. A subset of types supported in core pytorch kernels. table EValue { val: KernelTypes; } table Operator { // Operator registry and lookup is uniquely identified by its name, and overload name. // TODO(larryliu): is there a more efficient way to represent this name: string; overload: string; } table KernelCall { // Index to the operators table in the program. op_index: int; // Indexes to the (values) required by the operation (in and out). args: [int]; } table DelegateCall { // Index to the delegates table in the program. delegate_index: int; // Indexes to the (values) required by the delegates (in and out). args: [int]; } table MoveCall { // Index into the values table of the evalue we are moving from move_from: int; // Index into the values table of the evalue we are moving into move_to: int; } table JumpFalseCall { // Index into the values table of boolean that specifies whether or not to jump cond_value_index: int; // Value to set the executor program counter if the jump occurs destination_instruction: int; } table FreeCall { // Index into values table of the tensor whose underlying data blob is being freed value_index: int; } union InstructionArguments { KernelCall, DelegateCall, MoveCall, JumpFalseCall, FreeCall, } // Basic unit of execution table Instruction { instr_args: InstructionArguments; } table Frame { // For storing the frame to print stacktraces filename: string; // Name of the file in which the instruction exists lineno: int; // Line number at which the instruction was called name: string; // Name of the function the instruction was called from context: string; // Source code of the instruction } table FrameList { // For storing the frames to print stacktraces items: [Frame]; } // Indicates where a piece of data is stored. enum DataLocation : byte { // Stored directly in the flatbuffer. INLINE = 0, // Stored in a segment. SEGMENT = 1, } // Indicates where the delegate data is stored table BackendDelegateDataReference { // Indicates which list to index into: // INLINE -> Program.backend_delegate_data // SEGMENT -> Program.segments location: DataLocation; // The index into the list indicated by the location. index: uint; } table CompileSpec { // One compile spec. There are can be multiple specs for one method key: string; // like max_value value: [ubyte]; // like 4, or other types based on needs. } table BackendDelegate { // Used to resolve the delegate backend classes, for example, "TCE0", "TCE1", etc. // This string is also used in to_backend. id: string; // A binary blob (from a subgraph) as an output of preprocessing. Will be // provided to the backend code at init time. Can be very large, on the // order of 10-100MB. processed: BackendDelegateDataReference; // The compilation spec for the lowered module's forward function // Example: [CompileSpec["max_value", 4]] compile_specs: [CompileSpec]; } // A sequence of blocking instructions to be executed in order. The // abstraction is not currently leveraged, all current programs are 1 chain. // We are leaving chains as part of the program definition for future use cases // around graph level async where different threads will be represented as // seperate chains. table Chain { // Indices of the values that are (non-static) inputs into this Chain. inputs: [int]; // Indices of the values that are outputs out of this Chain. outputs: [int]; // List of instructions to be executed in order. instructions: [Instruction]; // Optional list of frames for each instruction. // The backend config must have 'emit_stacktrace' set to true to emit stacktrace: [FrameList]; } table ExecutionPlan { // Name of a method on the nn.Module that was traced to create this program. name: string; // Type meta data for input/output to the execution plan container_meta_type: ContainerMetadata; // A list of all values used in this execution plan. values: [EValue]; // Indices to the 'Evalues' that are inputs to this execution plan. // This list contains only the non-constant tensors (i.e. not part of // the saved program). inputs: [int]; // Indices to the 'Evalues' that are outputs of this execution plan. // This signals a lifespan that goes beyond the execution. outputs: [int]; // List of Chains of kernels. chains: [Chain]; // Operators used in this execution plan operators: [Operator]; // A list of delegates and each is a special instance of execution, the same level of chains. delegates: [BackendDelegate]; // List of buffer sizes for non_constant memory allocations. (Think neural net activations) // A list instead of a single buffer to account for complex memory hierarchies. // TODO(jakeszwe, razy): How to reconcile this with the ability for the hierarchical memory allocator // to be id based instead of index based. // Runtime should use the len(constant_buffer) as the ground truth of the // constants memory buffer size, and ignore non_const_buffer_sizes[0]. non_const_buffer_sizes: [int64]; } // Constant tensor data stored directly in the flatbuffer. table Buffer { // During serialization, this alignment may be rewritten to a larger value. // The magic "@executorch-tensor-alignment" comment tells EXIR which lines to // patch. storage: [ubyte] (force_align: 16); // @executorch-tensor-alignment } // Delegate data stored directly in the flatbuffer. This is a different type // than Buffer because tensors and delegates can have different alignment // requirements. table BackendDelegateInlineData { // During serialization, this alignment may be rewritten to a larger value. // The magic "@executorch-delegate-alignment" comment tells EXIR which lines // to patch. data: [ubyte] (force_align: 16); // @executorch-delegate-alignment } // Describes a contiguous piece of data that lives outside of the flatbuffer data, // typically appended afterwards in the file. The "extended header" in the file, // when present, points to the segment base offset. table DataSegment { // Segment offsets are relative to the segment base offset provided in // the extended file header. Segments will typically be aligned in a // way to make it possible to use mmap() to load them. offset: uint64; // The size in bytes of valid data starting at the offset. The segment // data may be followed by padding before the segment that follows it, // to make it easier to use mmap(). size: uint64; } // Describes data offsets into a particular segment table SubsegmentOffsets { // Index of the segment in Program.segments segment_index: uint; // Each element is an offset in bytes into the data of the segment pointed to // by segment_index. Offsets must be aligned to @executorch-tensor-alignment. offsets: [uint64]; } table Program { // Schema version. version: uint; // List of ExecutionPlans that make up the program. Each ExecutionPlan corresponds with a // different entry point into the model. execution_plan: [ExecutionPlan]; // Tables of constant data, used for constant Values (e.g.data field of weight tensors). // Each constant is assigned an index into the table which are each individually aligned. // 0 index is reserved to be pointed to by non-constant Tensors. // If this field is non-empty, constant_segment.offsets must be empty. // DEPRECATED: After D61996249 on 2024-09-05, no new PTE files will use this field. constant_buffer: [Buffer]; // List of delegate data. Pointed to by BackendDelegateDataReference. backend_delegate_data: [BackendDelegateInlineData]; // List of data segments that follow the Program data in this file, sorted by // offset. Elements in this schema can refer to these segments by index. segments: [DataSegment]; // Describes the offsets of each constant tensor, relative to the segment // offset. If constant_segment.offsets field is non-empty, constant_buffer // must be empty. constant_segment.offsets[0] is reserved to be pointed to by // non-constant Tensors. constant_segment: SubsegmentOffsets; // [Optional] Describes the offsets into various segments for each mutable // tensor. Only mutable tensors with a meaningful initial state are // serialized here (for example weights that will be trained on-device as // opposed to just layer activations). Seperate from the constant_segment to // reduce peak memory usage by letting us read directly from the PTE file // into the mutable tensor, as opposed to loading the .pte data into // constant memory, copying it over, and then being unable to release the // constant segment. No two elements should point to the same segment. mutable_data_segments: [SubsegmentOffsets]; } root_type Program;