1 /*
2 * Copyright (c) Qualcomm Innovation Center, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
10 #include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
11
12 #include <unordered_map>
13
14 namespace executorch {
15 namespace backends {
16 namespace qnn {
17
ToTensorType(Qnn_TensorType_t type)18 qcir::TensorType ToTensorType(Qnn_TensorType_t type) {
19 static const std::unordered_map<Qnn_TensorType_t, qcir::TensorType> type_map{
20 {QNN_TENSOR_TYPE_APP_WRITE, qcir::TensorType::WRITE},
21 {QNN_TENSOR_TYPE_APP_READ, qcir::TensorType::READ},
22 {QNN_TENSOR_TYPE_APP_READWRITE, qcir::TensorType::READWRITE},
23 {QNN_TENSOR_TYPE_NATIVE, qcir::TensorType::NATIVE},
24 {QNN_TENSOR_TYPE_STATIC, qcir::TensorType::STATIC},
25 {QNN_TENSOR_TYPE_NULL, qcir::TensorType::OPTIONAL},
26 {QNN_TENSOR_TYPE_UNDEFINED, qcir::TensorType::UNDEFINED},
27 };
28 return type_map.at(type);
29 }
30
ToTensorType(qcir::TensorType type)31 Qnn_TensorType_t ToTensorType(qcir::TensorType type) {
32 static const std::unordered_map<qcir::TensorType, Qnn_TensorType_t> type_map{
33 {qcir::TensorType::WRITE, QNN_TENSOR_TYPE_APP_WRITE},
34 {qcir::TensorType::READ, QNN_TENSOR_TYPE_APP_READ},
35 {qcir::TensorType::READWRITE, QNN_TENSOR_TYPE_APP_READWRITE},
36 {qcir::TensorType::NATIVE, QNN_TENSOR_TYPE_NATIVE},
37 {qcir::TensorType::STATIC, QNN_TENSOR_TYPE_STATIC},
38 {qcir::TensorType::OPTIONAL, QNN_TENSOR_TYPE_NULL},
39 {qcir::TensorType::UNDEFINED, QNN_TENSOR_TYPE_UNDEFINED},
40 };
41 return type_map.at(type);
42 }
43
44 // TODO: enable commented type by QNN version control
ToDataType(Qnn_DataType_t type)45 qcir::DataType ToDataType(Qnn_DataType_t type) {
46 static const std::unordered_map<Qnn_DataType_t, qcir::DataType> type_map{
47 {QNN_DATATYPE_INT_8, qcir::DataType::INT8},
48 {QNN_DATATYPE_INT_16, qcir::DataType::INT16},
49 {QNN_DATATYPE_INT_32, qcir::DataType::INT32},
50 {QNN_DATATYPE_INT_64, qcir::DataType::INT64},
51 {QNN_DATATYPE_UINT_8, qcir::DataType::UINT8},
52 {QNN_DATATYPE_UINT_16, qcir::DataType::UINT16},
53 {QNN_DATATYPE_UINT_32, qcir::DataType::UINT32},
54 {QNN_DATATYPE_UINT_64, qcir::DataType::UINT64},
55 {QNN_DATATYPE_FLOAT_16, qcir::DataType::FLOAT16},
56 {QNN_DATATYPE_FLOAT_32, qcir::DataType::FLOAT32},
57 // {QNN_DATATYPE_FLOAT_64, qcir::DataType::FLOAT64},
58 {QNN_DATATYPE_SFIXED_POINT_4, qcir::DataType::SFIXED4},
59 {QNN_DATATYPE_SFIXED_POINT_8, qcir::DataType::SFIXED8},
60 {QNN_DATATYPE_SFIXED_POINT_16, qcir::DataType::SFIXED16},
61 {QNN_DATATYPE_SFIXED_POINT_32, qcir::DataType::SFIXED32},
62 {QNN_DATATYPE_UFIXED_POINT_4, qcir::DataType::UFIXED4},
63 {QNN_DATATYPE_UFIXED_POINT_8, qcir::DataType::UFIXED8},
64 {QNN_DATATYPE_UFIXED_POINT_16, qcir::DataType::UFIXED16},
65 {QNN_DATATYPE_UFIXED_POINT_32, qcir::DataType::UFIXED32},
66 {QNN_DATATYPE_BOOL_8, qcir::DataType::BOOL},
67 // {QNN_DATATYPE_STRING, qcir::DataType::STRING},
68 {QNN_DATATYPE_UNDEFINED, qcir::DataType::UNDEFINED},
69 };
70 return type_map.at(type);
71 }
72
73 // TODO: enable commented type by QNN version control
ToDataType(qcir::DataType type)74 Qnn_DataType_t ToDataType(qcir::DataType type) {
75 static const std::unordered_map<qcir::DataType, Qnn_DataType_t> type_map{
76 {qcir::DataType::INT8, QNN_DATATYPE_INT_8},
77 {qcir::DataType::INT16, QNN_DATATYPE_INT_16},
78 {qcir::DataType::INT32, QNN_DATATYPE_INT_32},
79 {qcir::DataType::INT64, QNN_DATATYPE_INT_64},
80 {qcir::DataType::UINT8, QNN_DATATYPE_UINT_8},
81 {qcir::DataType::UINT16, QNN_DATATYPE_UINT_16},
82 {qcir::DataType::UINT32, QNN_DATATYPE_UINT_32},
83 {qcir::DataType::UINT64, QNN_DATATYPE_UINT_64},
84 {qcir::DataType::FLOAT16, QNN_DATATYPE_FLOAT_16},
85 {qcir::DataType::FLOAT32, QNN_DATATYPE_FLOAT_32},
86 // {qcir::DataType::FLOAT64, QNN_DATATYPE_FLOAT_64},
87 {qcir::DataType::SFIXED4, QNN_DATATYPE_SFIXED_POINT_4},
88 {qcir::DataType::SFIXED8, QNN_DATATYPE_SFIXED_POINT_8},
89 {qcir::DataType::SFIXED16, QNN_DATATYPE_SFIXED_POINT_16},
90 {qcir::DataType::SFIXED32, QNN_DATATYPE_SFIXED_POINT_32},
91 {qcir::DataType::UFIXED4, QNN_DATATYPE_UFIXED_POINT_4},
92 {qcir::DataType::UFIXED8, QNN_DATATYPE_UFIXED_POINT_8},
93 {qcir::DataType::UFIXED16, QNN_DATATYPE_UFIXED_POINT_16},
94 {qcir::DataType::UFIXED32, QNN_DATATYPE_UFIXED_POINT_32},
95 {qcir::DataType::BOOL, QNN_DATATYPE_BOOL_8},
96 // {qcir::DataType::STRING, QNN_DATATYPE_STRING},
97 {qcir::DataType::UNDEFINED, QNN_DATATYPE_UNDEFINED},
98 };
99 return type_map.at(type);
100 }
101
ToQuantizeParam(const Qnn_Tensor_t & tensor,flatbuffers::FlatBufferBuilder * builder)102 flatbuffers::Offset<qcir::QuantizeParam> ToQuantizeParam(
103 const Qnn_Tensor_t& tensor,
104 flatbuffers::FlatBufferBuilder* builder) {
105 static const std::unordered_map<Qnn_Definition_t, qcir::QuantizeDef> def_map{
106 {QNN_DEFINITION_IMPL_GENERATED, qcir::QuantizeDef::IMPL_GENERATED},
107 {QNN_DEFINITION_DEFINED, qcir::QuantizeDef::DEFINED},
108 {QNN_DEFINITION_UNDEFINED, qcir::QuantizeDef::UNDEFINED},
109 };
110 static const std::
111 unordered_map<Qnn_QuantizationEncoding_t, qcir::QuantizeType>
112 type_map{
113 {QNN_QUANTIZATION_ENCODING_SCALE_OFFSET,
114 qcir::QuantizeType::SCALE_OFFSET},
115 {QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET,
116 qcir::QuantizeType::AXIS_SCALE_OFFSET},
117 {QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET,
118 qcir::QuantizeType::BW_SCALE_OFFSET},
119 {QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET,
120 qcir::QuantizeType::BW_AXIS_SCALE_OFFSET},
121 {QNN_QUANTIZATION_ENCODING_UNDEFINED,
122 qcir::QuantizeType::UNDEFINED},
123 };
124
125 int32_t axis = 0;
126 uint32_t bitwidth = 0;
127 auto param = QNN_VER_PTR(tensor)->quantizeParams;
128 auto quant_type = type_map.at(param.quantizationEncoding);
129 std::vector<qcir::ScaleOffset> data;
130 std::vector<float> scales;
131 std::vector<int32_t> offsets;
132 switch (quant_type) {
133 case qcir::QuantizeType::SCALE_OFFSET: {
134 data.emplace_back(qcir::ScaleOffset(
135 param.scaleOffsetEncoding.scale, param.scaleOffsetEncoding.offset));
136 } break;
137 case qcir::QuantizeType::AXIS_SCALE_OFFSET: {
138 size_t len = param.axisScaleOffsetEncoding.numScaleOffsets;
139 axis = param.axisScaleOffsetEncoding.axis;
140 data.reserve(len);
141 for (uint i = 0; i < len; ++i) {
142 data.emplace_back(qcir::ScaleOffset(
143 param.axisScaleOffsetEncoding.scaleOffset[i].scale,
144 param.axisScaleOffsetEncoding.scaleOffset[i].offset));
145 }
146 } break;
147 case qcir::QuantizeType::BW_SCALE_OFFSET: {
148 bitwidth = param.bwScaleOffsetEncoding.bitwidth;
149 scales.push_back(param.bwScaleOffsetEncoding.scale);
150 offsets.push_back(param.bwScaleOffsetEncoding.offset);
151 } break;
152 case qcir::QuantizeType::BW_AXIS_SCALE_OFFSET: {
153 bitwidth = param.bwAxisScaleOffsetEncoding.bitwidth;
154 axis = param.bwAxisScaleOffsetEncoding.axis;
155 size_t len = param.bwAxisScaleOffsetEncoding.numElements;
156 scales.reserve(len);
157 offsets.reserve(len);
158 for (size_t i = 0; i < len; ++i) {
159 scales.push_back(param.bwAxisScaleOffsetEncoding.scales[i]);
160 offsets.push_back(param.bwAxisScaleOffsetEncoding.offsets[i]);
161 }
162 } break;
163 default:
164 // encodings are not required if lowering with floating point precision
165 break;
166 }
167 return CreateQuantizeParamDirect(
168 *builder,
169 def_map.at(param.encodingDefinition),
170 quant_type,
171 bitwidth,
172 axis,
173 &scales,
174 &offsets,
175 &data);
176 }
177
ToQuantizeParam(const tensor_type & tensor)178 Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor) {
179 static const std::unordered_map<qcir::QuantizeDef, Qnn_Definition_t> def_map{
180 {qcir::QuantizeDef::IMPL_GENERATED, QNN_DEFINITION_IMPL_GENERATED},
181 {qcir::QuantizeDef::DEFINED, QNN_DEFINITION_DEFINED},
182 {qcir::QuantizeDef::UNDEFINED, QNN_DEFINITION_UNDEFINED},
183 };
184 static const std::
185 unordered_map<qcir::QuantizeType, Qnn_QuantizationEncoding_t>
186 type_map{
187 {qcir::QuantizeType::SCALE_OFFSET,
188 QNN_QUANTIZATION_ENCODING_SCALE_OFFSET},
189 {qcir::QuantizeType::AXIS_SCALE_OFFSET,
190 QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET},
191 {qcir::QuantizeType::BW_SCALE_OFFSET,
192 QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET},
193 {qcir::QuantizeType::BW_AXIS_SCALE_OFFSET,
194 QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET},
195 {qcir::QuantizeType::UNDEFINED,
196 QNN_QUANTIZATION_ENCODING_UNDEFINED},
197 };
198
199 Qnn_QuantizeParams_t p = QNN_QUANTIZE_PARAMS_INIT;
200 auto param = tensor->qparam();
201 p.encodingDefinition = def_map.at(param->def());
202 p.quantizationEncoding = type_map.at(param->type());
203 switch (p.quantizationEncoding) {
204 case QNN_QUANTIZATION_ENCODING_SCALE_OFFSET: {
205 p.scaleOffsetEncoding.scale = param->data()->Get(0)->scale();
206 p.scaleOffsetEncoding.offset = param->data()->Get(0)->offset();
207 } break;
208 case QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET: {
209 p.axisScaleOffsetEncoding.axis = param->axis();
210 p.axisScaleOffsetEncoding.numScaleOffsets = param->data()->size();
211 p.axisScaleOffsetEncoding.scaleOffset =
212 reinterpret_cast<Qnn_ScaleOffset_t*>(
213 const_cast<uint8_t*>(param->data()->Data()));
214 } break;
215 case QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET: {
216 p.bwAxisScaleOffsetEncoding.bitwidth = param->bitwidth();
217 p.bwScaleOffsetEncoding.scale = param->scales()->Get(0);
218 p.bwScaleOffsetEncoding.offset = param->offsets()->Get(0);
219 } break;
220 case QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET: {
221 p.bwAxisScaleOffsetEncoding.bitwidth = param->bitwidth();
222 p.bwAxisScaleOffsetEncoding.axis = param->axis();
223 p.bwAxisScaleOffsetEncoding.numElements = param->scales()->size();
224 p.bwAxisScaleOffsetEncoding.scales =
225 const_cast<float*>(param->scales()->data());
226 p.bwAxisScaleOffsetEncoding.offsets =
227 const_cast<int32_t*>(param->offsets()->data());
228 } break;
229 default:
230 // encodings are not required if lowering with floating point precision
231 break;
232 }
233 return p;
234 }
235
ToTensor(const Qnn_Tensor_t & tensor,flatbuffers::FlatBufferBuilder * builder)236 flatbuffers::Offset<qcir::Tensor> ToTensor(
237 const Qnn_Tensor_t& tensor,
238 flatbuffers::FlatBufferBuilder* builder) {
239 std::vector<uint8_t> buffer(
240 static_cast<uint8_t*>(QNN_VER_PTR(tensor)->clientBuf.data),
241 static_cast<uint8_t*>(QNN_VER_PTR(tensor)->clientBuf.data) +
242 QNN_VER_PTR(tensor)->clientBuf.dataSize);
243 std::vector<uint32_t> shape(
244 QNN_VER_PTR(tensor)->dimensions,
245 QNN_VER_PTR(tensor)->dimensions + QNN_VER_PTR(tensor)->rank);
246
247 return qcir::CreateTensorDirect(
248 *builder,
249 QNN_VER_PTR(tensor)->name,
250 &shape,
251 ToTensorType(QNN_VER_PTR(tensor)->type),
252 ToDataType(QNN_VER_PTR(tensor)->dataType),
253 ToQuantizeParam(tensor, builder),
254 &buffer);
255 }
256
ToTensor(const tensor_type & tensor)257 Qnn_Tensor_t ToTensor(const tensor_type& tensor) {
258 auto is_io_tensor = [](Qnn_TensorType_t type) {
259 return type < QNN_TENSOR_TYPE_STATIC;
260 };
261
262 Qnn_Tensor_t t = QNN_TENSOR_INIT;
263 QNN_VER_PTR(t)->name = tensor->name()->c_str();
264 QNN_VER_PTR(t)->type = ToTensorType(tensor->type());
265 QNN_VER_PTR(t)->dataType = ToDataType(tensor->dtype());
266 QNN_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor);
267 QNN_VER_PTR(t)->rank = tensor->shape()->size();
268 QNN_VER_PTR(t)->dimensions = const_cast<uint32_t*>(tensor->shape()->data());
269 QNN_VER_PTR(t)->clientBuf.dataSize = tensor->data()->size();
270 QNN_VER_PTR(t)->clientBuf.data = is_io_tensor(QNN_VER_PTR(t)->type)
271 ? nullptr
272 : static_cast<void*>(const_cast<uint8_t*>(tensor->data()->Data()));
273 return t;
274 }
275
276 } // namespace qnn
277 } // namespace backends
278 } // namespace executorch
279