xref: /aosp_15_r20/external/executorch/backends/xnnpack/serialization/runtime_schema.fbs (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1// Copyright (c) Meta Platforms, Inc. and affiliates.
2
3namespace fb_xnnpack;
4
5// Update after any BC breaking changes
6file_identifier "XN00";
7
8// datatype for xnn-values
9enum XNNDatatype : short {
10  /// Invalid data type. Valid Values never have this datatype.
11  xnn_datatype_invalid = 0,
12  /// IEEE754 single-precision floating-point.
13  xnn_datatype_fp32 = 1,
14  /// IEEE754 half-precision floating-point.
15  xnn_datatype_fp16 = 2,
16  /// Quantized 8-bit signed integer with shared per-Value quantization parameters.
17  xnn_datatype_qint8 = 3,
18  /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters.
19  xnn_datatype_quint8 = 4,
20  /// Quantized 32-bit signed integer with shared per-Value quantization parameters.
21  xnn_datatype_qint32 = 5,
22  /// Quantized 8-bit signed integer with shared per-channel quantization parameters.
23  xnn_datatype_qcint8 = 6,
24  /// Quantized 32-bit signed integer with shared per-channel quantization parameters.
25  xnn_datatype_qcint32 = 7,
26  /// Quantized 4-bit signed integer with shared per-channel quantization parameters.
27  xnn_datatype_qcint4 = 8,
28  /// Dynamically quantized 8-bit signed integer with per-batch quantization parameters.
29  xnn_datatype_qdint8 = 9,
30  /// Quantized 4-bit signed integer with shared blockwise quantization parameters.
31  xnn_datatype_qbint4 = 10,
32}
33
34// type of quantization
35union XNNQuantParams {
36  PerChannelQuant,
37  PerTensorQuant,
38  PerTokenDynamicQuant,
39  PerChannelGroupQuant,
40}
41
42// taken from executorch
43// Data buffer abstraction.
44table Buffer {
45  storage:[ubyte] (force_align: 16);
46}
47
48table PerChannelQuant {
49  scale:[float];
50  channel_dim:int;
51}
52
53table PerTokenDynamicQuant {
54  num_nonbatch_dims:int;
55}
56
57table PerTensorQuant {
58  scale:float;
59  zero_point:int;
60}
61
62table PerChannelGroupQuant {
63  scale:[float];
64  channel_dim:int;
65  group_size:int;
66  scale_bf16:[ushort];
67}
68
69table XNNTensorValue {
70  // type of the tensor elements.
71  datatype:XNNDatatype;
72  // number of dimensions in the shape.
73  num_dims:uint;
74  // pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
75  // XNNPACK does not keep any pointers to this array after the function returns.
76  dims:[uint];
77  // Index to the program's constant buffer table, value 0 is reserved to indicate non constant
78  constant_buffer_idx:uint;
79  // external ID for the Value. The ID must be within the range of reserved Value IDs specified on
80  // the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
81  // created for the Value.
82  external_id:uint;
83  // binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
84  // and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
85  flags:uint;
86  // pointer to the variable that will be initialized with the Value ID upon successful return. If a
87  // valid @a external_id was provided, the variable will be initialized with the @a external_id value.
88  id_out:uint;
89  // does this value need to be quantized dynamically at runtime?
90  // if we are quantizing at runtime, this field points to a target dtype
91  dq_datatype:XNNDatatype = xnn_datatype_invalid;
92}
93
94table XNNQuantizedTensorValue {
95  // Base Tensor Value
96  tensor_value:XNNTensorValue;
97  // Quantization parameters
98  quant_params:XNNQuantParams;
99}
100
101union XNodeUnion {
102  XNNAdd: _XNNNode2x1,
103  XNNFullyConnected,
104  XNNSoftmax: _XNNNode1x1,
105  XNNSigmoid: _XNNNode1x1,
106  XNNStaticTranspose,
107  XNNClamp: _XNNNode1x1,
108  XNNConv2d: _XNNNodeConv,
109  XNNDiv: _XNNNode2x1,
110  XNNStaticResizeBilinear2D,
111  XNNStaticConstantPad,
112  XNNAvgPooling2d: _XNNPooling2D,
113  XNNMinimum: _XNNNode2x1,
114  XNNDepthwiseConv2d: _XNNNodeConv,
115  XNNMaxPooling2d: _XNNPooling2D,
116  XNNMultiply: _XNNNode2x1,
117  XNNSubtract: _XNNNode2x1,
118  XNNFloor: _XNNNode1x1,
119  XNNConvert: _XNNNode1x1,
120  XNNGlobalAvgPooling2d: _XNNNode1x1,
121  XNNStaticReshape,
122  XNNArgMaxPooling2d,
123  XNNSquareRoot: _XNNNode1x1,
124  XNNCeiling: _XNNNode1x1,
125  XNNHardswish: _XNNNode1x1,
126  XNNLeakyReLU,
127  XNNMaximum: _XNNNode2x1,
128  XNNNegate: _XNNNode1x1,
129  XNNSquare: _XNNNode1x1,
130  XNNELU,
131  XNNAbs: _XNNNode1x1,
132  XNNPReLU: _XNNNode2x1,
133  XNNConcatenate2: _XNNCat,
134  XNNConcatenate3: _XNNCat,
135  XNNConcatenate4: _XNNCat,
136  XNNStaticSlice,
137  XNNScaledDotProductAttention,
138  XNNBatchMatrixMultiply: _XNNNode2x1,
139}
140
141union XValueUnion {
142  XNNTensorValue,
143  XNNQuantizedTensorValue,
144}
145
146table OutputMinMax {
147  output_min:float;
148  output_max:float;
149}
150
151table XNode {
152  xnode_union:XNodeUnion;
153  // An int which can be linked back to the node in the origin graph
154  debug_handle:uint;
155  output_min_max:OutputMinMax;
156}
157
158table XValue {
159  xvalue_union:XValueUnion;
160}
161
162table XNNStaticTranspose {
163  num_dims:uint;
164  perm:[uint];
165  input_id:uint;
166  output_id:uint;
167  flags:uint;
168}
169
170table XNNStaticResizeBilinear2D {
171  new_height:uint;
172  new_width:uint;
173  input_id:uint;
174  output_id:uint;
175  flags:uint;
176}
177
178table XNNStaticConstantPad {
179  pre_paddings:[uint];
180  post_paddings:[uint];
181  padding_value:float;
182  input_id:uint;
183  output_id:uint;
184  flags:uint;
185}
186
187// A node with two input and one output
188// Not meant to be used directly
189table _XNNNode2x1 {
190  input1_id:uint;
191  input2_id:uint;
192  output_id:uint;
193  flags:uint;
194}
195
196// A node with one input and one output
197// Not meant to be used directly
198table _XNNNode1x1 {
199  input_id:uint;
200  output_id:uint;
201  flags:uint;
202}
203
204table _XNNCat {
205  axis: uint;
206  input1_id: uint;
207  input2_id: uint;
208  input3_id: uint;
209  input4_id: uint;
210  output_id: uint;
211  flags: uint;
212}
213
214table XNNELU {
215  alpha:float;
216  input_id:uint;
217  output_id:uint;
218  flags:uint;
219}
220
221table XNNFullyConnected {
222  input1_id:uint;
223  filter_id:uint;
224  bias_id:uint;
225  output_id:uint;
226  flags:uint;
227}
228
229table _XNNNodeConv {
230  padding_top:uint;
231  padding_right:uint;
232  padding_bottom:uint;
233  padding_left:uint;
234  kernel_height:uint;
235  kernel_width:uint;
236  subsampling_height:uint;
237  subsampling_width:uint;
238  dilation_height:uint;
239  dilation_width:uint;
240  group_input_channels:uint;
241  group_output_channels:uint;
242  groups:uint;
243  adjustment_height:uint;
244  adjustment_width:uint;
245  input1_id:uint;
246  filter_id:uint;
247  bias_id:uint;
248  output_id:uint;
249  flags:uint;
250}
251
252table _XNNPooling2D {
253  padding_top: uint;
254  padding_right: uint;
255  padding_bottom: uint;
256  padding_left: uint;
257  pooling_height: uint;
258  pooling_width: uint;
259  stride_height: uint;
260  stride_width: uint;
261  dilation_height: uint;
262  dilation_width: uint;
263  input_id: uint;
264  output_id: uint;
265  flags: uint;
266}
267
268table XNNStaticReshape {
269  num_dims:uint;
270  new_shape:[uint];
271  input_id: uint;
272  output_id: uint;
273  flags: uint;
274}
275
276table XNNStaticSlice {
277  num_dims:uint;
278  offsets:[uint];
279  sizes:[uint];
280  input_id:uint;
281  output_id:uint;
282  flags:uint;
283}
284
285table XNNScaledDotProductAttention {
286    query_id:uint;
287    key_id:uint;
288    value_id:uint;
289    scale_id:uint;
290    mask_id:uint;
291    output_id:uint;
292    flags:uint;
293}
294
295table XNNArgMaxPooling2d {
296  padding_top: uint;
297  padding_right: uint;
298  padding_bottom: uint;
299  padding_left: uint;
300  pooling_height: uint;
301  pooling_width: uint;
302  input_id: uint;
303  output_value_id: uint;
304  output_index_id: uint;
305  flags: uint;
306}
307
308table XNNLeakyReLU {
309  negative_slope: float;
310  input_id: uint;
311  output_id: uint;
312  flags: uint;
313}
314
315// Describes data offsets for constant data
316table ConstantDataOffset {
317  // Constant data offsets are relative to the constant data base offset provided
318  // in the XNNPACKHeader.
319  offset: uint64;
320
321  // The size in bytes of valid data starting at the offset. The constant data
322  // may be followed by padding before the next piece of constant data
323  size: uint64;
324}
325
326table XNNGraph {
327  // Schema version.
328  version:string;
329  xnodes:[XNode];
330  xvalues:[XValue];
331
332  // Number of external inputs/outputs
333  num_externs:uint;
334
335  // Ids of external inputs
336  input_ids:[uint];
337
338  // Ids of external outputs
339  output_ids:[uint];
340
341  // Tables of constant data, used for constant Values (e.g.
342  // data field of weight tensors). Each constant is assigned an index into the table
343  // which are each individually aligned. 0 index is reserved to be pointed to by non-constant
344  // Tensors. Exactly one of constant_buffer and constant_data must be non-empty
345  constant_buffer:[Buffer];
346
347  // the list index is memory buffer id, the value is the memory buffer size.
348  mem_buffer_sizes: [uint];
349
350  // List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into
351  // the table. 0 index is reserved to be pointed to by non-constant Tensor. Exactly one of constant_buffer and
352  // constant_data must be non-empty
353  constant_data:[ConstantDataOffset];
354}
355
356root_type XNNGraph;
357