1// Copyright (c) Meta Platforms, Inc. and affiliates. 2 3namespace fb_xnnpack; 4 5// Update after any BC breaking changes 6file_identifier "XN01"; 7 8// datatype for xnn-values 9enum XNNDatatype : short { 10 /// Invalid data type. Valid Values never have this datatype. 11 xnn_datatype_invalid = 0, 12 /// IEEE754 single-precision floating-point. 13 xnn_datatype_fp32 = 1, 14 /// IEEE754 half-precision floating-point. 15 xnn_datatype_fp16 = 2, 16 /// Quantized 8-bit signed integer with shared per-Value quantization parameters. 17 xnn_datatype_qint8 = 3, 18 /// Quantized 8-bit unsigned integer with shared per-Value quantization parameters. 19 xnn_datatype_quint8 = 4, 20 /// Quantized 32-bit signed integer with shared per-Value quantization parameters. 21 xnn_datatype_qint32 = 5, 22 /// Quantized 8-bit signed integer with shared per-channel quantization parameters. 23 xnn_datatype_qcint8 = 6, 24 /// Quantized 32-bit signed integer with shared per-channel quantization parameters. 25 xnn_datatype_qcint32 = 7, 26 /// Quantized 4-bit signed integer with shared per-channel quantization parameters. 27 xnn_datatype_qcint4 = 8, 28 /// Dynamically quantized 8-bit signed integer with per-batch quantization parameters. 29 xnn_datatype_qdint8 = 9, 30 /// Quantized 4-bit signed integer with shared blockwise quantization parameters. 31 xnn_datatype_qbint4 = 10, 32} 33 34// type of quantization 35union XNNQuantParams { 36 PerChannelQuant, 37 PerTensorQuant, 38 PerTokenDynamicQuant, 39 PerChannelGroupQuant, 40} 41 42// Deprecated buffer abstraction, const data buffers do not belong in flatbuffer 43table Buffer { 44 storage:[ubyte] (deprecated, force_align: 16); 45} 46 47table PerChannelGroupQuant { 48 scale:[float]; 49 channel_dim:int; 50 group_size:int; 51 scale_bf16:[ushort]; 52} 53 54table PerChannelQuant { 55 scale:[float]; 56 channel_dim:int; 57} 58 59table PerTokenDynamicQuant { 60 num_nonbatch_dims:int; 61} 62 63table PerTensorQuant { 64 scale:float; 65 zero_point:int; 66} 67 68table XNNTensorValue { 69 // type of the tensor elements. 70 datatype:XNNDatatype; 71 // number of dimensions in the shape. 72 num_dims:uint; 73 // pointer to an array of @a num_dims shape dimensions. If num_dims is 0, this pointer can be NULL. 74 // XNNPACK does not keep any pointers to this array after the function returns. 75 dims:[uint]; 76 // Index to the program's constant buffer table, value 0 is reserved to indicate non constant 77 constant_buffer_idx:uint; 78 // external ID for the Value. The ID must be within the range of reserved Value IDs specified on 79 // the Subgraph creation. If the external ID is XNN_INVALID_VALUE_ID, an internal ID will be 80 // created for the Value. 81 external_id:uint; 82 // binary features of the Value. Supported values are any combination of XNN_VALUE_FLAG_EXTERNAL_INPUT 83 // and XNN_VALUE_FLAG_EXTERNAL_OUTPUT. 84 flags:uint; 85 // pointer to the variable that will be initialized with the Value ID upon successful return. If a 86 // valid @a external_id was provided, the variable will be initialized with the @a external_id value. 87 id_out:uint; 88} 89 90table XNNQuantizedTensorValue { 91 // Base Tensor Value 92 tensor_value:XNNTensorValue; 93 // Quantization parameters 94 quant_params:XNNQuantParams; 95} 96 97union XNodeUnion { 98 XNNAdd: _XNNNode2x1, 99 XNNFullyConnected, 100 XNNSoftmax: _XNNNode1x1, 101 XNNSigmoid: _XNNNode1x1, 102 XNNStaticTranspose, 103 XNNClamp: _XNNNode1x1, 104 XNNConv2d: _XNNNodeConv, 105 XNNDiv: _XNNNode2x1, 106 XNNStaticResizeBilinear2D, 107 XNNStaticConstantPad, 108 XNNAvgPooling2d: _XNNPooling2D, 109 XNNMinimum: _XNNNode2x1, 110 XNNDepthwiseConv2d: _XNNNodeConv, 111 XNNMaxPooling2d: _XNNPooling2D, 112 XNNMultiply: _XNNNode2x1, 113 XNNSubtract: _XNNNode2x1, 114 XNNFloor: _XNNNode1x1, 115 XNNConvert: _XNNNode1x1, 116 XNNGlobalAvgPooling2d: _XNNNode1x1, 117 XNNStaticReshape, 118 XNNArgMaxPooling2d, 119 XNNSquareRoot: _XNNNode1x1, 120 XNNCeiling: _XNNNode1x1, 121 XNNHardswish: _XNNNode1x1, 122 XNNLeakyReLU, 123 XNNMaximum: _XNNNode2x1, 124 XNNNegate: _XNNNode1x1, 125 XNNSquare: _XNNNode1x1, 126 XNNELU, 127 XNNAbs: _XNNNode1x1, 128 XNNPReLU: _XNNNode2x1, 129 XNNConcatenate2: _XNNCat, 130 XNNConcatenate3: _XNNCat, 131 XNNConcatenate4: _XNNCat, 132 XNNStaticSlice, 133 XNNScaledDotProductAttention, 134 XNNBatchMatrixMultiply: _XNNNode2x1, 135} 136 137union XValueUnion { 138 XNNTensorValue, 139 XNNQuantizedTensorValue, 140} 141 142table OutputMinMax { 143 output_min:float; 144 output_max:float; 145} 146 147table XNode { 148 xnode_union:XNodeUnion; 149 // An int which can be linked back to the node in the origin graph 150 debug_handle:uint; 151 output_min_max:OutputMinMax; 152} 153 154table XValue { 155 xvalue_union:XValueUnion; 156} 157 158table XNNStaticTranspose { 159 num_dims:uint; 160 perm:[uint]; 161 input_id:uint; 162 output_id:uint; 163 flags:uint; 164} 165 166table XNNStaticResizeBilinear2D { 167 new_height:uint; 168 new_width:uint; 169 input_id:uint; 170 output_id:uint; 171 flags:uint; 172} 173 174table XNNStaticConstantPad { 175 pre_paddings:[uint]; 176 post_paddings:[uint]; 177 padding_value:float; 178 input_id:uint; 179 output_id:uint; 180 flags:uint; 181} 182 183// A node with two input and one output 184// Not meant to be used directly 185table _XNNNode2x1 { 186 input1_id:uint; 187 input2_id:uint; 188 output_id:uint; 189 flags:uint; 190} 191 192// A node with one input and one output 193// Not meant to be used directly 194table _XNNNode1x1 { 195 input_id:uint; 196 output_id:uint; 197 flags:uint; 198} 199 200table _XNNCat { 201 axis: uint; 202 input1_id: uint; 203 input2_id: uint; 204 input3_id: uint; 205 input4_id: uint; 206 output_id: uint; 207 flags: uint; 208} 209 210table XNNELU { 211 alpha:float; 212 input_id:uint; 213 output_id:uint; 214 flags:uint; 215} 216 217table XNNFullyConnected { 218 input1_id:uint; 219 filter_id:uint; 220 bias_id:uint; 221 output_id:uint; 222 flags:uint; 223} 224 225table _XNNNodeConv { 226 padding_top:uint; 227 padding_right:uint; 228 padding_bottom:uint; 229 padding_left:uint; 230 kernel_height:uint; 231 kernel_width:uint; 232 subsampling_height:uint; 233 subsampling_width:uint; 234 dilation_height:uint; 235 dilation_width:uint; 236 group_input_channels:uint; 237 group_output_channels:uint; 238 groups:uint; 239 adjustment_height:uint; 240 adjustment_width:uint; 241 input1_id:uint; 242 filter_id:uint; 243 bias_id:uint; 244 output_id:uint; 245 flags:uint; 246} 247 248table _XNNPooling2D { 249 padding_top: uint; 250 padding_right: uint; 251 padding_bottom: uint; 252 padding_left: uint; 253 pooling_height: uint; 254 pooling_width: uint; 255 stride_height: uint; 256 stride_width: uint; 257 dilation_height: uint; 258 dilation_width: uint; 259 input_id: uint; 260 output_id: uint; 261 flags: uint; 262} 263 264table XNNStaticReshape { 265 num_dims:uint; 266 new_shape:[uint]; 267 input_id: uint; 268 output_id: uint; 269 flags: uint; 270} 271 272table XNNStaticSlice { 273 num_dims:uint; 274 offsets:[uint]; 275 sizes:[uint]; 276 input_id:uint; 277 output_id:uint; 278 flags:uint; 279} 280 281table XNNScaledDotProductAttention { 282 query_id:uint; 283 key_id:uint; 284 value_id:uint; 285 scale_id:uint; 286 mask_id:uint; 287 output_id:uint; 288 flags:uint; 289} 290 291table XNNArgMaxPooling2d { 292 padding_top: uint; 293 padding_right: uint; 294 padding_bottom: uint; 295 padding_left: uint; 296 pooling_height: uint; 297 pooling_width: uint; 298 input_id: uint; 299 output_value_id: uint; 300 output_index_id: uint; 301 flags: uint; 302} 303 304table XNNLeakyReLU { 305 negative_slope: float; 306 input_id: uint; 307 output_id: uint; 308 flags: uint; 309} 310 311// Describes data offsets for constant data 312table ConstantDataOffset { 313 // Constant data offsets are relative to the constant data base offset provided 314 // in the XNNPACKHeader. 315 offset: uint64; 316 317 // The size in bytes of valid data starting at the offset. The constant data 318 // may be followed by padding before the next piece of constant data 319 size: uint64; 320} 321 322table XNNGraph { 323 // Schema version. 324 version:string; 325 xnodes:[XNode]; 326 xvalues:[XValue]; 327 328 // Number of external inputs/outputs 329 num_externs:uint; 330 331 // Ids of external inputs 332 input_ids:[uint]; 333 334 // Ids of external outputs 335 output_ids:[uint]; 336 337 // Deprecated constant buffer storage in flatbuffer 338 constant_buffer:[Buffer] (deprecated); 339 340 // Deprecated memory_buffer size tracking in flatbuffer 341 mem_buffer_sizes: [uint] (deprecated); 342 343 // List of the constant data that follows the XNNGraph in this file. Each constant data is assigned an index into 344 // the table. 0 index is reserved to be pointed to by non-constant Tensor. 345 constant_data:[ConstantDataOffset]; 346} 347 348root_type XNNGraph; 349