xref: /aosp_15_r20/external/executorch/backends/xnnpack/serialization/schema.fbs (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1// Copyright (c) Meta Platforms, Inc. and affiliates.
2
3namespace fb_xnnpack;
4
5// Update after any BC breaking changes
6file_identifier "XN01";
7
8// datatype for xnn-values
9enum XNNDatatype : short {
10  /// Invalid data type. Valid Values never have this datatype.
11  xnn_datatype_invalid = 0,
12  /// IEEE754 single-precision floating-point.
13  xnn_datatype_fp32 = 1,
14  /// IEEE754 half-precision floating-point.
15  xnn_datatype_fp16 = 2,
16  /// Quantized 8-bit signed integer with shared per-Value quantization parameters.
17  xnn_datatype_qint8 = 3,
18  /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters.
19  xnn_datatype_quint8 = 4,
20  /// Quantized 32-bit signed integer with shared per-Value quantization parameters.
21  xnn_datatype_qint32 = 5,
22  /// Quantized 8-bit signed integer with shared per-channel quantization parameters.
23  xnn_datatype_qcint8 = 6,
24  /// Quantized 32-bit signed integer with shared per-channel quantization parameters.
25  xnn_datatype_qcint32 = 7,
26  /// Quantized 4-bit signed integer with shared per-channel quantization parameters.
27  xnn_datatype_qcint4 = 8,
28  /// Dynamically quantized 8-bit signed integer with per-batch quantization parameters.
29  xnn_datatype_qdint8 = 9,
30  /// Quantized 4-bit signed integer with shared blockwise quantization parameters.
31  xnn_datatype_qbint4 = 10,
32}
33
34// type of quantization
35union XNNQuantParams {
36  PerChannelQuant,
37  PerTensorQuant,
38  PerTokenDynamicQuant,
39  PerChannelGroupQuant,
40}
41
42// Deprecated buffer abstraction, const data buffers do not belong in flatbuffer
43table Buffer {
44  storage:[ubyte] (deprecated, force_align: 16);
45}
46
47table PerChannelGroupQuant {
48  scale:[float];
49  channel_dim:int;
50  group_size:int;
51  scale_bf16:[ushort];
52}
53
54table PerChannelQuant {
55  scale:[float];
56  channel_dim:int;
57}
58
59table PerTokenDynamicQuant {
60  num_nonbatch_dims:int;
61}
62
63table PerTensorQuant {
64  scale:float;
65  zero_point:int;
66}
67
68table XNNTensorValue {
69  // type of the tensor elements.
70  datatype:XNNDatatype;
71  // number of dimensions in the shape.
72  num_dims:uint;
73  // pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL.
74  // XNNPACK does not keep any pointers to this array after the function returns.
75  dims:[uint];
76  // Index to the program's constant buffer table, value 0 is reserved to indicate non constant
77  constant_buffer_idx:uint;
78  // external ID for the Value. The ID must be within the range of reserved Value IDs specified on
79  // the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be
80  // created for the Value.
81  external_id:uint;
82  // binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT
83  // and XNN_VALUE_FLAG_EXTERNAL_OUTPUT.
84  flags:uint;
85  // pointer to the variable that will be initialized with the Value ID upon successful return. If a
86  // valid @a external_id was provided, the variable will be initialized with the @a external_id value.
87  id_out:uint;
88}
89
90table XNNQuantizedTensorValue {
91  // Base Tensor Value
92  tensor_value:XNNTensorValue;
93  // Quantization parameters
94  quant_params:XNNQuantParams;
95}
96
97union XNodeUnion {
98  XNNAdd: _XNNNode2x1,
99  XNNFullyConnected,
100  XNNSoftmax: _XNNNode1x1,
101  XNNSigmoid: _XNNNode1x1,
102  XNNStaticTranspose,
103  XNNClamp: _XNNNode1x1,
104  XNNConv2d: _XNNNodeConv,
105  XNNDiv: _XNNNode2x1,
106  XNNStaticResizeBilinear2D,
107  XNNStaticConstantPad,
108  XNNAvgPooling2d: _XNNPooling2D,
109  XNNMinimum: _XNNNode2x1,
110  XNNDepthwiseConv2d: _XNNNodeConv,
111  XNNMaxPooling2d: _XNNPooling2D,
112  XNNMultiply: _XNNNode2x1,
113  XNNSubtract: _XNNNode2x1,
114  XNNFloor: _XNNNode1x1,
115  XNNConvert: _XNNNode1x1,
116  XNNGlobalAvgPooling2d: _XNNNode1x1,
117  XNNStaticReshape,
118  XNNArgMaxPooling2d,
119  XNNSquareRoot: _XNNNode1x1,
120  XNNCeiling: _XNNNode1x1,
121  XNNHardswish: _XNNNode1x1,
122  XNNLeakyReLU,
123  XNNMaximum: _XNNNode2x1,
124  XNNNegate: _XNNNode1x1,
125  XNNSquare: _XNNNode1x1,
126  XNNELU,
127  XNNAbs: _XNNNode1x1,
128  XNNPReLU: _XNNNode2x1,
129  XNNConcatenate2: _XNNCat,
130  XNNConcatenate3: _XNNCat,
131  XNNConcatenate4: _XNNCat,
132  XNNStaticSlice,
133  XNNScaledDotProductAttention,
134  XNNBatchMatrixMultiply: _XNNNode2x1,
135}
136
137union XValueUnion {
138  XNNTensorValue,
139  XNNQuantizedTensorValue,
140}
141
142table OutputMinMax {
143  output_min:float;
144  output_max:float;
145}
146
147table XNode {
148  xnode_union:XNodeUnion;
149  // An int which can be linked back to the node in the origin graph
150  debug_handle:uint;
151  output_min_max:OutputMinMax;
152}
153
154table XValue {
155  xvalue_union:XValueUnion;
156}
157
158table XNNStaticTranspose {
159  num_dims:uint;
160  perm:[uint];
161  input_id:uint;
162  output_id:uint;
163  flags:uint;
164}
165
166table XNNStaticResizeBilinear2D {
167  new_height:uint;
168  new_width:uint;
169  input_id:uint;
170  output_id:uint;
171  flags:uint;
172}
173
174table XNNStaticConstantPad {
175  pre_paddings:[uint];
176  post_paddings:[uint];
177  padding_value:float;
178  input_id:uint;
179  output_id:uint;
180  flags:uint;
181}
182
183// A node with two input and one output
184// Not meant to be used directly
185table _XNNNode2x1 {
186  input1_id:uint;
187  input2_id:uint;
188  output_id:uint;
189  flags:uint;
190}
191
192// A node with one input and one output
193// Not meant to be used directly
194table _XNNNode1x1 {
195  input_id:uint;
196  output_id:uint;
197  flags:uint;
198}
199
200table _XNNCat {
201  axis: uint;
202  input1_id: uint;
203  input2_id: uint;
204  input3_id: uint;
205  input4_id: uint;
206  output_id: uint;
207  flags: uint;
208}
209
210table XNNELU {
211  alpha:float;
212  input_id:uint;
213  output_id:uint;
214  flags:uint;
215}
216
217table XNNFullyConnected {
218  input1_id:uint;
219  filter_id:uint;
220  bias_id:uint;
221  output_id:uint;
222  flags:uint;
223}
224
225table _XNNNodeConv {
226  padding_top:uint;
227  padding_right:uint;
228  padding_bottom:uint;
229  padding_left:uint;
230  kernel_height:uint;
231  kernel_width:uint;
232  subsampling_height:uint;
233  subsampling_width:uint;
234  dilation_height:uint;
235  dilation_width:uint;
236  group_input_channels:uint;
237  group_output_channels:uint;
238  groups:uint;
239  adjustment_height:uint;
240  adjustment_width:uint;
241  input1_id:uint;
242  filter_id:uint;
243  bias_id:uint;
244  output_id:uint;
245  flags:uint;
246}
247
248table _XNNPooling2D {
249  padding_top: uint;
250  padding_right: uint;
251  padding_bottom: uint;
252  padding_left: uint;
253  pooling_height: uint;
254  pooling_width: uint;
255  stride_height: uint;
256  stride_width: uint;
257  dilation_height: uint;
258  dilation_width: uint;
259  input_id: uint;
260  output_id: uint;
261  flags: uint;
262}
263
264table XNNStaticReshape {
265  num_dims:uint;
266  new_shape:[uint];
267  input_id: uint;
268  output_id: uint;
269  flags: uint;
270}
271
272table XNNStaticSlice {
273  num_dims:uint;
274  offsets:[uint];
275  sizes:[uint];
276  input_id:uint;
277  output_id:uint;
278  flags:uint;
279}
280
281table XNNScaledDotProductAttention {
282    query_id:uint;
283    key_id:uint;
284    value_id:uint;
285    scale_id:uint;
286    mask_id:uint;
287    output_id:uint;
288    flags:uint;
289}
290
291table XNNArgMaxPooling2d {
292  padding_top: uint;
293  padding_right: uint;
294  padding_bottom: uint;
295  padding_left: uint;
296  pooling_height: uint;
297  pooling_width: uint;
298  input_id: uint;
299  output_value_id: uint;
300  output_index_id: uint;
301  flags: uint;
302}
303
304table XNNLeakyReLU {
305  negative_slope: float;
306  input_id: uint;
307  output_id: uint;
308  flags: uint;
309}
310
311// Describes data offsets for constant data
312table ConstantDataOffset {
313  // Constant data offsets are relative to the constant data base offset provided
314  // in the XNNPACKHeader.
315  offset: uint64;
316
317  // The size in bytes of valid data starting at the offset. The constant data
318  // may be followed by padding before the next piece of constant data
319  size: uint64;
320}
321
322table XNNGraph {
323  // Schema version.
324  version:string;
325  xnodes:[XNode];
326  xvalues:[XValue];
327
328  // Number of external inputs/outputs
329  num_externs:uint;
330
331  // Ids of external inputs
332  input_ids:[uint];
333
334  // Ids of external outputs
335  output_ids:[uint];
336
337  // Deprecated constant buffer storage in flatbuffer
338  constant_buffer:[Buffer] (deprecated);
339
340  // Deprecated memory_buffer size tracking in flatbuffer
341  mem_buffer_sizes: [uint] (deprecated);
342
343  // List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into
344  // the table. 0 index is reserved to be pointed to by non-constant Tensor.
345  constant_data:[ConstantDataOffset];
346}
347
348root_type XNNGraph;
349