// Copyright (c) Meta Platforms, Inc. and affiliates. namespace fb_xnnpack; // Update after any BC breaking changes file_identifier "XN01"; // datatype for xnn-values enum XNNDatatype : short { /// Invalid data type. Valid Values never have this datatype. xnn_datatype_invalid = 0, /// IEEE754 single-precision floating-point. xnn_datatype_fp32 = 1, /// IEEE754 half-precision floating-point. xnn_datatype_fp16 = 2, /// Quantized 8-bit signed integer with shared per-Value quantization parameters. xnn_datatype_qint8 = 3, /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters. xnn_datatype_quint8 = 4, /// Quantized 32-bit signed integer with shared per-Value quantization parameters. xnn_datatype_qint32 = 5, /// Quantized 8-bit signed integer with shared per-channel quantization parameters. xnn_datatype_qcint8 = 6, /// Quantized 32-bit signed integer with shared per-channel quantization parameters. xnn_datatype_qcint32 = 7, /// Quantized 4-bit signed integer with shared per-channel quantization parameters. xnn_datatype_qcint4 = 8, /// Dynamically quantized 8-bit signed integer with per-batch quantization parameters. xnn_datatype_qdint8 = 9, /// Quantized 4-bit signed integer with shared blockwise quantization parameters. xnn_datatype_qbint4 = 10, } // type of quantization union XNNQuantParams { PerChannelQuant, PerTensorQuant, PerTokenDynamicQuant, PerChannelGroupQuant, } // Deprecated buffer abstraction, const data buffers do not belong in flatbuffer table Buffer { storage:[ubyte] (deprecated, force_align: 16); } table PerChannelGroupQuant { scale:[float]; channel_dim:int; group_size:int; scale_bf16:[ushort]; } table PerChannelQuant { scale:[float]; channel_dim:int; } table PerTokenDynamicQuant { num_nonbatch_dims:int; } table PerTensorQuant { scale:float; zero_point:int; } table XNNTensorValue { // type of the tensor elements. datatype:XNNDatatype; // number of dimensions in the shape. num_dims:uint; // pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. // XNNPACK does not keep any pointers to this array after the function returns. dims:[uint]; // Index to the program's constant buffer table, value 0 is reserved to indicate non constant constant_buffer_idx:uint; // external ID for the Value. The ID must be within the range of reserved Value IDs specified on // the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be // created for the Value. external_id:uint; // binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT // and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. flags:uint; // pointer to the variable that will be initialized with the Value ID upon successful return. If a // valid @a external_id was provided, the variable will be initialized with the @a external_id value. id_out:uint; } table XNNQuantizedTensorValue { // Base Tensor Value tensor_value:XNNTensorValue; // Quantization parameters quant_params:XNNQuantParams; } union XNodeUnion { XNNAdd: _XNNNode2x1, XNNFullyConnected, XNNSoftmax: _XNNNode1x1, XNNSigmoid: _XNNNode1x1, XNNStaticTranspose, XNNClamp: _XNNNode1x1, XNNConv2d: _XNNNodeConv, XNNDiv: _XNNNode2x1, XNNStaticResizeBilinear2D, XNNStaticConstantPad, XNNAvgPooling2d: _XNNPooling2D, XNNMinimum: _XNNNode2x1, XNNDepthwiseConv2d: _XNNNodeConv, XNNMaxPooling2d: _XNNPooling2D, XNNMultiply: _XNNNode2x1, XNNSubtract: _XNNNode2x1, XNNFloor: _XNNNode1x1, XNNConvert: _XNNNode1x1, XNNGlobalAvgPooling2d: _XNNNode1x1, XNNStaticReshape, XNNArgMaxPooling2d, XNNSquareRoot: _XNNNode1x1, XNNCeiling: _XNNNode1x1, XNNHardswish: _XNNNode1x1, XNNLeakyReLU, XNNMaximum: _XNNNode2x1, XNNNegate: _XNNNode1x1, XNNSquare: _XNNNode1x1, XNNELU, XNNAbs: _XNNNode1x1, XNNPReLU: _XNNNode2x1, XNNConcatenate2: _XNNCat, XNNConcatenate3: _XNNCat, XNNConcatenate4: _XNNCat, XNNStaticSlice, XNNScaledDotProductAttention, XNNBatchMatrixMultiply: _XNNNode2x1, } union XValueUnion { XNNTensorValue, XNNQuantizedTensorValue, } table OutputMinMax { output_min:float; output_max:float; } table XNode { xnode_union:XNodeUnion; // An int which can be linked back to the node in the origin graph debug_handle:uint; output_min_max:OutputMinMax; } table XValue { xvalue_union:XValueUnion; } table XNNStaticTranspose { num_dims:uint; perm:[uint]; input_id:uint; output_id:uint; flags:uint; } table XNNStaticResizeBilinear2D { new_height:uint; new_width:uint; input_id:uint; output_id:uint; flags:uint; } table XNNStaticConstantPad { pre_paddings:[uint]; post_paddings:[uint]; padding_value:float; input_id:uint; output_id:uint; flags:uint; } // A node with two input and one output // Not meant to be used directly table _XNNNode2x1 { input1_id:uint; input2_id:uint; output_id:uint; flags:uint; } // A node with one input and one output // Not meant to be used directly table _XNNNode1x1 { input_id:uint; output_id:uint; flags:uint; } table _XNNCat { axis: uint; input1_id: uint; input2_id: uint; input3_id: uint; input4_id: uint; output_id: uint; flags: uint; } table XNNELU { alpha:float; input_id:uint; output_id:uint; flags:uint; } table XNNFullyConnected { input1_id:uint; filter_id:uint; bias_id:uint; output_id:uint; flags:uint; } table _XNNNodeConv { padding_top:uint; padding_right:uint; padding_bottom:uint; padding_left:uint; kernel_height:uint; kernel_width:uint; subsampling_height:uint; subsampling_width:uint; dilation_height:uint; dilation_width:uint; group_input_channels:uint; group_output_channels:uint; groups:uint; adjustment_height:uint; adjustment_width:uint; input1_id:uint; filter_id:uint; bias_id:uint; output_id:uint; flags:uint; } table _XNNPooling2D { padding_top: uint; padding_right: uint; padding_bottom: uint; padding_left: uint; pooling_height: uint; pooling_width: uint; stride_height: uint; stride_width: uint; dilation_height: uint; dilation_width: uint; input_id: uint; output_id: uint; flags: uint; } table XNNStaticReshape { num_dims:uint; new_shape:[uint]; input_id: uint; output_id: uint; flags: uint; } table XNNStaticSlice { num_dims:uint; offsets:[uint]; sizes:[uint]; input_id:uint; output_id:uint; flags:uint; } table XNNScaledDotProductAttention { query_id:uint; key_id:uint; value_id:uint; scale_id:uint; mask_id:uint; output_id:uint; flags:uint; } table XNNArgMaxPooling2d { padding_top: uint; padding_right: uint; padding_bottom: uint; padding_left: uint; pooling_height: uint; pooling_width: uint; input_id: uint; output_value_id: uint; output_index_id: uint; flags: uint; } table XNNLeakyReLU { negative_slope: float; input_id: uint; output_id: uint; flags: uint; } // Describes data offsets for constant data table ConstantDataOffset { // Constant data offsets are relative to the constant data base offset provided // in the XNNPACKHeader. offset: uint64; // The size in bytes of valid data starting at the offset. The constant data // may be followed by padding before the next piece of constant data size: uint64; } table XNNGraph { // Schema version. version:string; xnodes:[XNode]; xvalues:[XValue]; // Number of external inputs/outputs num_externs:uint; // Ids of external inputs input_ids:[uint]; // Ids of external outputs output_ids:[uint]; // Deprecated constant buffer storage in flatbuffer constant_buffer:[Buffer] (deprecated); // Deprecated memory_buffer size tracking in flatbuffer mem_buffer_sizes: [uint] (deprecated); // List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into // the table. 0 index is reserved to be pointed to by non-constant Tensor. constant_data:[ConstantDataOffset]; } root_type XNNGraph;