1//============================================================================ 2// 3// Copyright (c) Qualcomm Innovation Center, Inc. 4// All rights reserved 5// 6// This source code is licensed under the BSD-style license found in the 7// LICENSE file in the root directory of this source tree. 8// 9//============================================================================ 10 11namespace qnn_delegate; 12 13/// Defines the HTP hardware architecture available for HTP backend. 14enum HtpArch: int { 15 NONE = 0, 16 V68 = 68, 17 V69 = 69, 18 V73 = 73, 19 V75 = 75, 20} 21 22table HtpInfo { 23 /// Represent the HTP hardware architecture 24 htp_arch:HtpArch; 25 26 /// Represent the vtcm size to use for graphs. VTCM size is provided in MB. 27 vtcm_size_in_mb:uint; 28} 29 30/// You could refer to Qualcomm AI Engine Direct SDK 31/// to get SoC Model in supported snapdragon devices 32enum QcomChipset: int { 33 UNKNOWN_SM = 0, 34 SM8450 = 36, 35 SM8475 = 42, 36 SM8550 = 43, 37 SSG2115P = 46, 38 SM8650 = 57, 39 SA8295 = 39, 40} 41 42/// Indicate the information of the specified SoC. 43table SocInfo { 44 /// Identifies SOC model. 45 soc_model:QcomChipset; 46 47 /// Identifies the htp information of the specified SoC. 48 htp_info:HtpInfo; 49} 50 51/// Defines performance modes available for HTP backend. 52enum QnnExecuTorchHtpPerformanceMode: int { 53 kHtpDefault = 0, 54 kHtpSustainedHighPerformance, 55 kHtpBurst, 56 kHtpHighPerformance, 57 kHtpPowerSaver, 58 kHtpLowPowerSaver, 59 kHtpHighPowerSaver, 60 kHtpLowBalanced, 61 kHtpBalanced, 62} 63 64/// Defines the optimization levels of the graph tensors that are not input nor 65/// output tensors. This enum controls the trade-off between performance and 66/// accuracy. 67enum QnnExecuTorchHtpPrecision: int { 68 kHtpQuantized = 0, 69 kHtpFp16, 70} 71 72/// The QNN backend used to delegate the model's nodes. Each backend has 73/// its own set of supported ops and tensor types. 74enum QnnExecuTorchBackendType: int { 75 kUndefinedBackend = 0, 76 kGpuBackend, 77 kHtpBackend, 78 kDspBackend, 79} 80 81/// Defines pd sessions available for HTP backend. 82enum QnnExecuTorchHtpPdSession: int { 83 kHtpUnsignedPd = 0, 84 kHtpSignedPd, 85} 86 87/// Specifies the backend options for the HTP backend. 88table QnnExecuTorchHtpBackendOptions { 89 /// Max spill-fill buffer across contexts. 90 max_sf_buf_size:int; 91 92 /// The default performance mode sets no configurations on the HTP. 93 performance_mode:QnnExecuTorchHtpPerformanceMode; 94 95 /// The default precision mode supports quantized networks. Other precision 96 /// modes may only be supported on certain SoCs. 97 precision:QnnExecuTorchHtpPrecision; 98 99 /// Signed or unsigned HTP PD session. The default PD session is unsigned. 100 pd_session:QnnExecuTorchHtpPdSession; 101 102 /// Optional parameter specifying the directory of QNN Skel library. Only 103 /// useful for backends which have a Skel library. 104 skel_library_dir:string; 105 106 /// With using conv hmx with short depths, we might have better performance, 107 /// but convolution that have short depth and/or weights that are not 108 /// symmetric could exhibit inaccurate results. 109 use_conv_hmx:bool; 110 111 /// Deep Learning Bandwidth Compression allows inputs to be 112 /// compressed, such that the processing bandwidth can be lowered. 113 use_dlbc:bool; 114 115 /// With using fold relu, we might have better performance, this optimization 116 /// is correct when quantization ranges for convolution are equal or subset of 117 /// the Relu operation. 118 use_fold_relu:bool; 119 120 /// When multiple contexts are generated inside the same 121 /// pte, it is possible to reserve a single spill-fill allocation that 122 /// could be re-used across all the splits. 123 use_multi_contexts:bool; 124 125 /// When multiple graphs appear inside the same context, 126 /// weights could be reused across all graphs. 127 use_weight_sharing:bool; 128} 129 130/// Logging level of the delegate and QNN backend. 131enum QnnExecuTorchLogLevel: int { 132 kLogOff = 0, 133 kLogLevelError, 134 kLogLevelWarn, 135 kLogLevelInfo, 136 kLogLevelVerbose, 137 kLogLevelDebug, 138} 139 140/// Profiling level of the delegate and QNN backend. 141enum QnnExecuTorchProfileLevel: int { 142 kProfileOff = 0, 143 kProfileBasic, 144 kProfileDetailed, 145 kProfileOptrace, 146} 147 148/// QNN backends currently supported 149table QnnExecuTorchBackendOptions { 150 /// The backend QNN library to open and execute the graph with. This is a 151 /// required argument and will error out if kUndefinedBackend is supplied. 152 backend_type:QnnExecuTorchBackendType; 153 154 htp_options:QnnExecuTorchHtpBackendOptions; 155} 156 157table QnnExecuTorchOptions { 158 /// Specify SoC to compile or execute for. 159 soc_info:SocInfo; 160 161 /// Optional backend specific options for the HTP backend. 162 backend_options:QnnExecuTorchBackendOptions; 163 164 /// Optional parameter to create qnn graph if QNN context blob is not given 165 graph_name:string; 166 167 /// Optional parameter to override the QNN backend library. 168 library_path:string; 169 170 /// Logging level of the delegate and the backend. Default is off. 171 log_level:QnnExecuTorchLogLevel; 172 173 /// Check if on-device graph construction. Default is false. 174 online_prepare:bool; 175 176 /// If tensor dump is enabled, all intermediate tensors output will be dumped. 177 /// This option exists for debugging accuracy issues. Default is off. 178 dump_intermediate_outputs:bool; 179 180 /// Profiling level of the delegate and the backend. Default is off. 181 profile_level:QnnExecuTorchProfileLevel; 182 183 /// Enables usage of shared buffer between application and backend for graph I/O. 184 shared_buffer:bool; 185 186 /// Is model from qnn context binary 187 is_from_context_binary:bool; 188 189 /// True if there exists multiple graphs in one .pte file. 190 multiple_graphs:bool; 191} 192 193root_type QnnExecuTorchOptions; 194