xref: /aosp_15_r20/external/executorch/backends/qualcomm/serialization/qc_compiler_spec.fbs (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1//============================================================================
2//
3// Copyright (c) Qualcomm Innovation Center, Inc.
4// All rights reserved
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9//============================================================================
10
11namespace qnn_delegate;
12
13/// Defines the HTP hardware architecture available for HTP backend.
14enum HtpArch: int {
15  NONE = 0,
16  V68 = 68,
17  V69 = 69,
18  V73 = 73,
19  V75 = 75,
20}
21
22table HtpInfo {
23  /// Represent the HTP hardware architecture
24  htp_arch:HtpArch;
25
26  /// Represent the vtcm size to use for graphs. VTCM size is provided in MB.
27  vtcm_size_in_mb:uint;
28}
29
30/// You could refer to Qualcomm AI Engine Direct SDK
31/// to get SoC Model in supported snapdragon devices
32enum QcomChipset: int {
33  UNKNOWN_SM = 0,
34  SM8450 = 36,
35  SM8475 = 42,
36  SM8550 = 43,
37  SSG2115P = 46,
38  SM8650 = 57,
39  SA8295 = 39,
40}
41
42/// Indicate the information of the specified SoC.
43table SocInfo {
44  /// Identifies SOC model.
45  soc_model:QcomChipset;
46
47  /// Identifies the htp information of the specified SoC.
48  htp_info:HtpInfo;
49}
50
51/// Defines performance modes available for HTP backend.
52enum QnnExecuTorchHtpPerformanceMode: int {
53  kHtpDefault = 0,
54  kHtpSustainedHighPerformance,
55  kHtpBurst,
56  kHtpHighPerformance,
57  kHtpPowerSaver,
58  kHtpLowPowerSaver,
59  kHtpHighPowerSaver,
60  kHtpLowBalanced,
61  kHtpBalanced,
62}
63
64/// Defines the optimization levels of the graph tensors that are not input nor
65/// output tensors. This enum controls the trade-off between performance and
66/// accuracy.
67enum QnnExecuTorchHtpPrecision: int {
68  kHtpQuantized = 0,
69  kHtpFp16,
70}
71
72/// The QNN backend used to delegate the model's nodes. Each backend has
73/// its own set of supported ops and tensor types.
74enum QnnExecuTorchBackendType: int {
75  kUndefinedBackend = 0,
76  kGpuBackend,
77  kHtpBackend,
78  kDspBackend,
79}
80
81/// Defines pd sessions available for HTP backend.
82enum QnnExecuTorchHtpPdSession: int {
83  kHtpUnsignedPd = 0,
84  kHtpSignedPd,
85}
86
87/// Specifies the backend options for the HTP backend.
88table QnnExecuTorchHtpBackendOptions {
89  /// Max spill-fill buffer across contexts.
90  max_sf_buf_size:int;
91
92  /// The default performance mode sets no configurations on the HTP.
93  performance_mode:QnnExecuTorchHtpPerformanceMode;
94
95  /// The default precision mode supports quantized networks. Other precision
96  /// modes may only be supported on certain SoCs.
97  precision:QnnExecuTorchHtpPrecision;
98
99  /// Signed or unsigned HTP PD session. The default PD session is unsigned.
100  pd_session:QnnExecuTorchHtpPdSession;
101
102  /// Optional parameter specifying the directory of QNN Skel library. Only
103  /// useful for backends which have a Skel library.
104  skel_library_dir:string;
105
106  /// With using conv hmx with short depths, we might have better performance,
107  /// but convolution that have short depth and/or weights that are not
108  /// symmetric could exhibit inaccurate results.
109  use_conv_hmx:bool;
110
111  /// Deep Learning Bandwidth Compression allows inputs to be
112  /// compressed, such that the processing bandwidth can be lowered.
113  use_dlbc:bool;
114
115  /// With using fold relu, we might have better performance, this optimization
116  /// is correct when quantization ranges for convolution are equal or subset of
117  /// the Relu operation.
118  use_fold_relu:bool;
119
120  /// When multiple contexts are generated inside the same
121  /// pte, it is possible to reserve a single spill-fill allocation that
122  /// could be re-used across all the splits.
123  use_multi_contexts:bool;
124
125  /// When multiple graphs appear inside the same context,
126  /// weights could be reused across all graphs.
127  use_weight_sharing:bool;
128}
129
130/// Logging level of the delegate and QNN backend.
131enum QnnExecuTorchLogLevel: int {
132  kLogOff = 0,
133  kLogLevelError,
134  kLogLevelWarn,
135  kLogLevelInfo,
136  kLogLevelVerbose,
137  kLogLevelDebug,
138}
139
140/// Profiling level of the delegate and QNN backend.
141enum QnnExecuTorchProfileLevel: int {
142  kProfileOff = 0,
143  kProfileBasic,
144  kProfileDetailed,
145  kProfileOptrace,
146}
147
148/// QNN backends currently supported
149table QnnExecuTorchBackendOptions {
150  /// The backend QNN library to open and execute the graph with. This is a
151  /// required argument and will error out if kUndefinedBackend is supplied.
152  backend_type:QnnExecuTorchBackendType;
153
154  htp_options:QnnExecuTorchHtpBackendOptions;
155}
156
157table QnnExecuTorchOptions {
158  /// Specify SoC to compile or execute for.
159  soc_info:SocInfo;
160
161  /// Optional backend specific options for the HTP backend.
162  backend_options:QnnExecuTorchBackendOptions;
163
164  /// Optional parameter to create qnn graph if QNN context blob is not given
165  graph_name:string;
166
167  /// Optional parameter to override the QNN backend library.
168  library_path:string;
169
170  /// Logging level of the delegate and the backend. Default is off.
171  log_level:QnnExecuTorchLogLevel;
172
173  /// Check if on-device graph construction. Default is false.
174  online_prepare:bool;
175
176  /// If tensor dump is enabled, all intermediate tensors output will be dumped.
177  /// This option exists for debugging accuracy issues. Default is off.
178  dump_intermediate_outputs:bool;
179
180  /// Profiling level of the delegate and the backend. Default is off.
181  profile_level:QnnExecuTorchProfileLevel;
182
183  /// Enables usage of shared buffer between application and backend for graph I/O.
184  shared_buffer:bool;
185
186  /// Is model from qnn context binary
187  is_from_context_binary:bool;
188
189  /// True if there exists multiple graphs in one .pte file.
190  multiple_graphs:bool;
191}
192
193root_type QnnExecuTorchOptions;
194