1// Copyright (c) Meta Platforms, Inc. and affiliates. 2 3namespace fb_xnnpack; 4 5// Update after any BC breaking changes 6file_identifier "XN00"; 7 8// datatype for xnn-values 9enum XNNDatatype : short { 10 /// Invalid data type. Valid Values never have this datatype. 11 xnn_datatype_invalid = 0, 12 /// IEEE754 single-precision floating-point. 13 xnn_datatype_fp32 = 1, 14 /// IEEE754 half-precision floating-point. 15 xnn_datatype_fp16 = 2, 16 /// Quantized 8-bit signed integer with shared per-Value quantization parameters. 17 xnn_datatype_qint8 = 3, 18 /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters. 19 xnn_datatype_quint8 = 4, 20 /// Quantized 32-bit signed integer with shared per-Value quantization parameters. 21 xnn_datatype_qint32 = 5, 22 /// Quantized 8-bit signed integer with shared per-channel quantization parameters. 23 xnn_datatype_qcint8 = 6, 24 /// Quantized 32-bit signed integer with shared per-channel quantization parameters. 25 xnn_datatype_qcint32 = 7, 26 /// Quantized 4-bit signed integer with shared per-channel quantization parameters. 27 xnn_datatype_qcint4 = 8, 28 /// Dynamically quantized 8-bit signed integer with per-batch quantization parameters. 29 xnn_datatype_qdint8 = 9, 30 /// Quantized 4-bit signed integer with shared blockwise quantization parameters. 31 xnn_datatype_qbint4 = 10, 32} 33 34// type of quantization 35union XNNQuantParams { 36 PerChannelQuant, 37 PerTensorQuant, 38 PerTokenDynamicQuant, 39 PerChannelGroupQuant, 40} 41 42// taken from executorch 43// Data buffer abstraction. 44table Buffer { 45 storage:[ubyte] (force_align: 16); 46} 47 48table PerChannelQuant { 49 scale:[float]; 50 channel_dim:int; 51} 52 53table PerTokenDynamicQuant { 54 num_nonbatch_dims:int; 55} 56 57table PerTensorQuant { 58 scale:float; 59 zero_point:int; 60} 61 62table PerChannelGroupQuant { 63 scale:[float]; 64 channel_dim:int; 65 group_size:int; 66 scale_bf16:[ushort]; 67} 68 69table XNNTensorValue { 70 // type of the tensor elements. 71 datatype:XNNDatatype; 72 // number of dimensions in the shape. 73 num_dims:uint; 74 // pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 75 // XNNPACK does not keep any pointers to this array after the function returns. 76 dims:[uint]; 77 // Index to the program's constant buffer table, value 0 is reserved to indicate non constant 78 constant_buffer_idx:uint; 79 // external ID for the Value. The ID must be within the range of reserved Value IDs specified on 80 // the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 81 // created for the Value. 82 external_id:uint; 83 // binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 84 // and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 85 flags:uint; 86 // pointer to the variable that will be initialized with the Value ID upon successful return. If a 87 // valid @a external_id was provided, the variable will be initialized with the @a external_id value. 88 id_out:uint; 89 // does this value need to be quantized dynamically at runtime? 90 // if we are quantizing at runtime, this field points to a target dtype 91 dq_datatype:XNNDatatype = xnn_datatype_invalid; 92} 93 94table XNNQuantizedTensorValue { 95 // Base Tensor Value 96 tensor_value:XNNTensorValue; 97 // Quantization parameters 98 quant_params:XNNQuantParams; 99} 100 101union XNodeUnion { 102 XNNAdd: _XNNNode2x1, 103 XNNFullyConnected, 104 XNNSoftmax: _XNNNode1x1, 105 XNNSigmoid: _XNNNode1x1, 106 XNNStaticTranspose, 107 XNNClamp: _XNNNode1x1, 108 XNNConv2d: _XNNNodeConv, 109 XNNDiv: _XNNNode2x1, 110 XNNStaticResizeBilinear2D, 111 XNNStaticConstantPad, 112 XNNAvgPooling2d: _XNNPooling2D, 113 XNNMinimum: _XNNNode2x1, 114 XNNDepthwiseConv2d: _XNNNodeConv, 115 XNNMaxPooling2d: _XNNPooling2D, 116 XNNMultiply: _XNNNode2x1, 117 XNNSubtract: _XNNNode2x1, 118 XNNFloor: _XNNNode1x1, 119 XNNConvert: _XNNNode1x1, 120 XNNGlobalAvgPooling2d: _XNNNode1x1, 121 XNNStaticReshape, 122 XNNArgMaxPooling2d, 123 XNNSquareRoot: _XNNNode1x1, 124 XNNCeiling: _XNNNode1x1, 125 XNNHardswish: _XNNNode1x1, 126 XNNLeakyReLU, 127 XNNMaximum: _XNNNode2x1, 128 XNNNegate: _XNNNode1x1, 129 XNNSquare: _XNNNode1x1, 130 XNNELU, 131 XNNAbs: _XNNNode1x1, 132 XNNPReLU: _XNNNode2x1, 133 XNNConcatenate2: _XNNCat, 134 XNNConcatenate3: _XNNCat, 135 XNNConcatenate4: _XNNCat, 136 XNNStaticSlice, 137 XNNScaledDotProductAttention, 138 XNNBatchMatrixMultiply: _XNNNode2x1, 139} 140 141union XValueUnion { 142 XNNTensorValue, 143 XNNQuantizedTensorValue, 144} 145 146table OutputMinMax { 147 output_min:float; 148 output_max:float; 149} 150 151table XNode { 152 xnode_union:XNodeUnion; 153 // An int which can be linked back to the node in the origin graph 154 debug_handle:uint; 155 output_min_max:OutputMinMax; 156} 157 158table XValue { 159 xvalue_union:XValueUnion; 160} 161 162table XNNStaticTranspose { 163 num_dims:uint; 164 perm:[uint]; 165 input_id:uint; 166 output_id:uint; 167 flags:uint; 168} 169 170table XNNStaticResizeBilinear2D { 171 new_height:uint; 172 new_width:uint; 173 input_id:uint; 174 output_id:uint; 175 flags:uint; 176} 177 178table XNNStaticConstantPad { 179 pre_paddings:[uint]; 180 post_paddings:[uint]; 181 padding_value:float; 182 input_id:uint; 183 output_id:uint; 184 flags:uint; 185} 186 187// A node with two input and one output 188// Not meant to be used directly 189table _XNNNode2x1 { 190 input1_id:uint; 191 input2_id:uint; 192 output_id:uint; 193 flags:uint; 194} 195 196// A node with one input and one output 197// Not meant to be used directly 198table _XNNNode1x1 { 199 input_id:uint; 200 output_id:uint; 201 flags:uint; 202} 203 204table _XNNCat { 205 axis: uint; 206 input1_id: uint; 207 input2_id: uint; 208 input3_id: uint; 209 input4_id: uint; 210 output_id: uint; 211 flags: uint; 212} 213 214table XNNELU { 215 alpha:float; 216 input_id:uint; 217 output_id:uint; 218 flags:uint; 219} 220 221table XNNFullyConnected { 222 input1_id:uint; 223 filter_id:uint; 224 bias_id:uint; 225 output_id:uint; 226 flags:uint; 227} 228 229table _XNNNodeConv { 230 padding_top:uint; 231 padding_right:uint; 232 padding_bottom:uint; 233 padding_left:uint; 234 kernel_height:uint; 235 kernel_width:uint; 236 subsampling_height:uint; 237 subsampling_width:uint; 238 dilation_height:uint; 239 dilation_width:uint; 240 group_input_channels:uint; 241 group_output_channels:uint; 242 groups:uint; 243 adjustment_height:uint; 244 adjustment_width:uint; 245 input1_id:uint; 246 filter_id:uint; 247 bias_id:uint; 248 output_id:uint; 249 flags:uint; 250} 251 252table _XNNPooling2D { 253 padding_top: uint; 254 padding_right: uint; 255 padding_bottom: uint; 256 padding_left: uint; 257 pooling_height: uint; 258 pooling_width: uint; 259 stride_height: uint; 260 stride_width: uint; 261 dilation_height: uint; 262 dilation_width: uint; 263 input_id: uint; 264 output_id: uint; 265 flags: uint; 266} 267 268table XNNStaticReshape { 269 num_dims:uint; 270 new_shape:[uint]; 271 input_id: uint; 272 output_id: uint; 273 flags: uint; 274} 275 276table XNNStaticSlice { 277 num_dims:uint; 278 offsets:[uint]; 279 sizes:[uint]; 280 input_id:uint; 281 output_id:uint; 282 flags:uint; 283} 284 285table XNNScaledDotProductAttention { 286 query_id:uint; 287 key_id:uint; 288 value_id:uint; 289 scale_id:uint; 290 mask_id:uint; 291 output_id:uint; 292 flags:uint; 293} 294 295table XNNArgMaxPooling2d { 296 padding_top: uint; 297 padding_right: uint; 298 padding_bottom: uint; 299 padding_left: uint; 300 pooling_height: uint; 301 pooling_width: uint; 302 input_id: uint; 303 output_value_id: uint; 304 output_index_id: uint; 305 flags: uint; 306} 307 308table XNNLeakyReLU { 309 negative_slope: float; 310 input_id: uint; 311 output_id: uint; 312 flags: uint; 313} 314 315// Describes data offsets for constant data 316table ConstantDataOffset { 317 // Constant data offsets are relative to the constant data base offset provided 318 // in the XNNPACKHeader. 319 offset: uint64; 320 321 // The size in bytes of valid data starting at the offset. The constant data 322 // may be followed by padding before the next piece of constant data 323 size: uint64; 324} 325 326table XNNGraph { 327 // Schema version. 328 version:string; 329 xnodes:[XNode]; 330 xvalues:[XValue]; 331 332 // Number of external inputs/outputs 333 num_externs:uint; 334 335 // Ids of external inputs 336 input_ids:[uint]; 337 338 // Ids of external outputs 339 output_ids:[uint]; 340 341 // Tables of constant data, used for constant Values (e.g. 342 // data field of weight tensors). Each constant is assigned an index into the table 343 // which are each individually aligned. 0 index is reserved to be pointed to by non-constant 344 // Tensors. Exactly one of constant_buffer and constant_data must be non-empty 345 constant_buffer:[Buffer]; 346 347 // the list index is memory buffer id, the value is the memory buffer size. 348 mem_buffer_sizes: [uint]; 349 350 // List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into 351 // the table. 0 index is reserved to be pointed to by non-constant Tensor. Exactly one of constant_buffer and 352 // constant_data must be non-empty 353 constant_data:[ConstantDataOffset]; 354} 355 356root_type XNNGraph; 357