1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // This transformation pass convert dense tensor to sparse format.
17
18 #include "absl/memory/memory.h"
19 #include "third_party/eigen3/Eigen/Core"
20 #include "mlir/Dialect/Func/IR/FuncOps.h" // from @llvm-project
21 #include "mlir/IR/Attributes.h" // from @llvm-project
22 #include "mlir/IR/Builders.h" // from @llvm-project
23 #include "mlir/IR/BuiltinTypes.h" // from @llvm-project
24 #include "mlir/Pass/Pass.h" // from @llvm-project
25 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
26 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
27 #include "tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h"
28
29 //===----------------------------------------------------------------------===//
30 // The DenseToSparse Pass.
31 //
32 namespace mlir {
33 namespace TFL {
34
35 namespace {
36
37 #define GEN_PASS_CLASSES
38 #include "tensorflow/compiler/mlir/lite/transforms/passes.h.inc"
39
40 // If sparsity level is below this threshold, keep the tensor in dense format.
41 constexpr float kMinSparsityLevel = 0.3;
42 // Heuristic to check if a block configuration is correct for float constants.
43 constexpr float kBlockOverRandomSparsityRatio = 0.9;
44 // After quantization, some non-zero values are set to 0.
45 // Lower the ratio for identifying block configuration for quantized constants.
46 constexpr float kBlockOverRandomSparsityRatioQuant = 0.8;
47
APFloatToEigenHalf(const APFloat & val)48 Eigen::half APFloatToEigenHalf(const APFloat& val) {
49 uint16_t raw_data = val.bitcastToAPInt().getZExtValue();
50 return Eigen::numext::bit_cast<Eigen::half>(raw_data);
51 }
52
EigenHalfToAPFloat(const Eigen::half & val)53 APFloat EigenHalfToAPFloat(const Eigen::half& val) {
54 uint16_t raw_data = Eigen::numext::bit_cast<uint16_t>(val);
55 return APFloat(APFloat::IEEEhalf(), APInt(16, raw_data));
56 }
57
PopulateEncodingParams(const std::vector<int> & block_size,std::vector<int> * traversal_order,std::vector<TfLiteDimensionType> * format,std::vector<int> * b_map,std::vector<int> * b_size)58 void PopulateEncodingParams(const std::vector<int>& block_size,
59 std::vector<int>* traversal_order,
60 std::vector<TfLiteDimensionType>* format,
61 std::vector<int>* b_map, std::vector<int>* b_size) {
62 const int dims_count = block_size.size();
63 traversal_order->resize(dims_count);
64 format->resize(dims_count);
65 for (int i = 0; i < dims_count; i++) {
66 (*traversal_order)[i] = i;
67 }
68 for (int i = 0; i < dims_count - 1; i++) {
69 (*format)[i] = kTfLiteDimDense;
70 }
71 (*format)[dims_count - 1] = kTfLiteDimSparseCSR;
72 *b_map = {};
73 *b_size = {};
74 int block_rank = 0;
75 for (int i = 0; i < dims_count; i++) {
76 if (block_size[i] != 1) {
77 traversal_order->push_back(block_rank + dims_count);
78 format->push_back(kTfLiteDimDense);
79 block_rank++;
80 b_map->push_back(i);
81 b_size->push_back(block_size[i]);
82 }
83 }
84 }
85
GetSparsity(const int num_zeros,const int num_elements)86 inline float GetSparsity(const int num_zeros, const int num_elements) {
87 return (1.0 * num_zeros / num_elements);
88 }
89
CalculateRandomSparsity(const ElementsAttr & attr,const ShapedType & type)90 float CalculateRandomSparsity(const ElementsAttr& attr,
91 const ShapedType& type) {
92 int num_elements = type.getNumElements();
93 int num_zeros = 0;
94
95 if (type.getElementType().isa<FloatType>()) {
96 for (const auto val : attr.getValues<APFloat>()) {
97 if (val.isZero()) {
98 num_zeros++;
99 }
100 }
101 } else if (type.getElementType().isa<quant::QuantizedType>()) {
102 for (const auto val : attr.getValues<int8_t>()) {
103 if (val == 0) {
104 num_zeros++;
105 }
106 }
107 }
108
109 return GetSparsity(num_zeros, num_elements);
110 }
111
CalculateBlockSparsity(const ElementsAttr & attr,const ShapedType & type,const std::vector<int> & block_size)112 float CalculateBlockSparsity(const ElementsAttr& attr, const ShapedType& type,
113 const std::vector<int>& block_size) {
114 float sparsity = 0;
115 std::vector<int> shape(2);
116 shape[0] = type.getDimSize(0);
117 shape[1] = type.getDimSize(1);
118
119 std::vector<int> traversal_order = {};
120 std::vector<TfLiteDimensionType> format = {};
121 std::vector<int> b_size = {};
122 std::vector<int> b_map = {};
123 PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
124 &b_size);
125
126 if (type.getElementType().isF32()) {
127 tflite::internal::sparsity::FormatConverter<float> format_converter(
128 shape, traversal_order, format, b_size, b_map);
129 std::vector<float> data;
130 data.reserve(type.getNumElements());
131 for (const auto val : attr.getValues<float>()) data.push_back(val);
132 format_converter.DenseToSparse(data.data());
133 sparsity =
134 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
135 type.getNumElements());
136 } else if (type.getElementType().isF16()) {
137 tflite::internal::sparsity::FormatConverter<Eigen::half> format_converter(
138 shape, traversal_order, format, b_size, b_map);
139 std::vector<Eigen::half> data;
140 data.reserve(type.getNumElements());
141 for (const auto& val : attr.getValues<APFloat>())
142 data.push_back(APFloatToEigenHalf(val));
143 format_converter.DenseToSparse(data.data());
144 sparsity =
145 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
146 type.getNumElements());
147 } else if (type.getElementType().isa<quant::QuantizedType>()) {
148 tflite::internal::sparsity::FormatConverter<int8_t> format_converter(
149 shape, traversal_order, format, b_size, b_map);
150 std::vector<int8_t> data;
151 data.reserve(type.getNumElements());
152 for (const auto val : attr.getValues<int8_t>()) data.push_back(val);
153 format_converter.DenseToSparse(data.data());
154 sparsity =
155 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
156 type.getNumElements());
157 }
158
159 return sparsity;
160 }
161
162 typedef struct InspectResult {
163 // Whether the weight tensor is sparse enough to be compressed.
164 bool can_compress;
165 // If the weight tensor cannot be encoded in a block configuration that the op
166 // supports, a Densify() op will be inserted afterwards to fall back to dense
167 // execution.
168 bool needs_densify;
169 // Among the supported block configs of an op, which got selected to encode
170 // the sparse weight.
171 std::vector<int> selected_block_size;
172 } InspectResult;
173
InspectWeight(Operation * inst,const std::vector<std::vector<int>> & supported_block_size,const float ratio_threshold)174 InspectResult InspectWeight(
175 Operation* inst, const std::vector<std::vector<int>>& supported_block_size,
176 const float ratio_threshold) {
177 ElementsAttr attr;
178 ShapedType type;
179 InspectResult result = {};
180 if (auto cst = dyn_cast<ConstOp>(inst)) {
181 attr = cst.value();
182 type = cst.getType().cast<ShapedType>();
183 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
184 attr = cst.value();
185 type = cst.getType().cast<ShapedType>();
186 } else {
187 result.can_compress = false;
188 return result;
189 }
190
191 // Currently we only support compressing weights of ops:
192 // Conv, DepthwiseConv, TransposeConv, whose filter has rank 4, and
193 // FullyConnected, whose filter has rank 2.
194 if (type.getRank() != 2 && type.getRank() != 4) {
195 result.can_compress = false;
196 return result;
197 }
198
199 float random_sparsity = CalculateRandomSparsity(attr, type);
200 if (random_sparsity < kMinSparsityLevel) {
201 result.can_compress = false;
202 return result;
203 }
204
205 result.can_compress = true;
206
207 float curr_sparsity = 0;
208 std::vector<int> selected_block_size;
209 result.needs_densify = true;
210 for (const auto& block_size : supported_block_size) {
211 curr_sparsity = CalculateBlockSparsity(attr, type, block_size);
212 if (curr_sparsity / random_sparsity > ratio_threshold) {
213 selected_block_size = block_size;
214 result.can_compress = true;
215 result.needs_densify = false;
216 result.selected_block_size = selected_block_size;
217 break;
218 }
219 }
220
221 return result;
222 }
223
224 template <typename T>
BuildSparsityParameterAttribute(const std::vector<int> & block_size,const T * dense_buffer,Operation * inst,OpBuilder * builder,SparsityParameterAttr * s_param)225 std::vector<T> BuildSparsityParameterAttribute(
226 const std::vector<int>& block_size, const T* dense_buffer, Operation* inst,
227 OpBuilder* builder, SparsityParameterAttr* s_param) {
228 ElementsAttr attr;
229 ShapedType type;
230 if (auto cst = dyn_cast<ConstOp>(inst)) {
231 attr = cst.value();
232 type = cst.getType().cast<ShapedType>();
233 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
234 attr = cst.value();
235 type = cst.getType().cast<ShapedType>();
236 } else {
237 assert(false && "Expected a constant-like op");
238 }
239 const int dims_count = type.getRank();
240 std::vector<int> shape(dims_count);
241 for (int i = 0; i < dims_count; i++) {
242 shape[i] = type.getDimSize(i);
243 }
244
245 std::vector<int> traversal_order = {};
246 std::vector<TfLiteDimensionType> format = {};
247 std::vector<int> b_size = {};
248 std::vector<int> b_map = {};
249 PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
250 &b_size);
251
252 tflite::internal::sparsity::FormatConverter<T> format_converter(
253 shape, traversal_order, format, b_size, b_map);
254 format_converter.DenseToSparse(dense_buffer);
255 const auto& metadata = format_converter.GetDimMetadata();
256 const auto& compressed_data = format_converter.GetData();
257 const int dim_size = metadata.size() / 2;
258 std::vector<DimensionMetadataAttr> dim_metadata(traversal_order.size());
259 for (int i = 0; i < dim_size; i++) {
260 if (format[i] == kTfLiteDimDense) {
261 dim_metadata[i] = DimensionMetadataAttr::get(
262 builder->getContext(),
263 ::mlir::TFL::DimensionTypeAttr::get(
264 builder->getContext(), ::mlir::TFL::DimensionType::DENSE),
265 metadata[2 * i][0], {}, {});
266 } else {
267 dim_metadata[i] = DimensionMetadataAttr::get(
268 builder->getContext(),
269 ::mlir::TFL::DimensionTypeAttr::get(
270 builder->getContext(), ::mlir::TFL::DimensionType::SPARSE_CSR),
271 0, metadata[2 * i], metadata[2 * i + 1]);
272 }
273 }
274 *s_param = SparsityParameterAttr::get(builder->getContext(), traversal_order,
275 b_map, dim_metadata);
276
277 return compressed_data;
278 }
279
280 struct DenseToSparsePass : public DenseToSparsePassBase<DenseToSparsePass> {
281 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(DenseToSparsePass)
282
283 void runOnOperation() override;
284 };
285
runOnOperation()286 void DenseToSparsePass::runOnOperation() {
287 func::FuncOp func = getOperation();
288 OpBuilder builder(func);
289
290 func.walk([&](SparseOpInterface sparse_op) {
291 const auto& sparse_operands = sparse_op.GetSparseOperands();
292 std::vector<std::vector<int>> supported_block_size;
293 for (int operand : sparse_operands) {
294 auto* op = sparse_op.getOperation();
295 auto value = op->getOperand(operand);
296
297 auto* inst = value.getDefiningOp();
298 if (!inst) {
299 continue;
300 }
301
302 // There could be a Dequantize op after the weight tensor in cases like
303 // fp16 post-training quantization. We need to get the weight from the
304 // input of the Dequantize op.
305 if (isa<DequantizeOp>(inst)) {
306 op = inst;
307 value = inst->getOperand(0);
308 inst = value.getDefiningOp();
309 if (!inst) {
310 continue;
311 }
312 operand = 0;
313 }
314
315 ShapedType type;
316 float ratio_threshold = kBlockOverRandomSparsityRatio;
317 if (isa<ConstOp>(inst)) {
318 supported_block_size = sparse_op.GetFloatBlockSize();
319 type = dyn_cast<ConstOp>(inst).getType().cast<ShapedType>();
320 } else if (isa<QConstOp>(inst)) {
321 supported_block_size = sparse_op.GetQuantizedBlockSize();
322 type = dyn_cast<QConstOp>(inst).getType().cast<ShapedType>();
323 ratio_threshold = kBlockOverRandomSparsityRatioQuant;
324 } else {
325 continue;
326 }
327
328 InspectResult result =
329 InspectWeight(inst, supported_block_size, ratio_threshold);
330 if (!result.can_compress) {
331 continue;
332 }
333
334 // The weight is not block sparse. Encode with random sparsity.
335 if (result.selected_block_size.empty()) {
336 result.selected_block_size = std::vector<int>(type.getRank(), 1);
337 }
338
339 builder.setInsertionPoint(op);
340 SparsityParameterAttr s_param;
341 if (auto cst = dyn_cast<ConstOp>(inst)) {
342 auto attr = cst.value();
343 auto type = cst.getType().cast<ShapedType>();
344 if (type.getElementType().isF32()) {
345 std::vector<float> dense_data;
346 dense_data.reserve(type.getNumElements());
347 for (const auto val : attr.getValues<float>())
348 dense_data.push_back(val);
349 std::vector<float> compressed_data =
350 BuildSparsityParameterAttribute<float>(result.selected_block_size,
351 dense_data.data(), inst,
352 &builder, &s_param);
353 auto compressed_data_type = RankedTensorType::get(
354 {static_cast<int64_t>(compressed_data.size())},
355 builder.getF32Type());
356 auto new_value = DenseElementsAttr::get<float>(compressed_data_type,
357 compressed_data);
358 auto s_const = builder.create<SparseConstOp>(
359 op->getLoc(), cst.value(), s_param, new_value);
360 value.replaceAllUsesWith(s_const.getResult());
361 cst.erase();
362 } else if (type.getElementType().isF16()) {
363 std::vector<Eigen::half> dense_data;
364 dense_data.reserve(type.getNumElements());
365 for (const auto& val : attr.getValues<APFloat>())
366 dense_data.push_back(APFloatToEigenHalf(val));
367 std::vector<Eigen::half> compressed_data =
368 BuildSparsityParameterAttribute<Eigen::half>(
369 result.selected_block_size, dense_data.data(), inst, &builder,
370 &s_param);
371 std::vector<APFloat> apfloat_data;
372 apfloat_data.reserve(type.getNumElements());
373 for (const auto& val : compressed_data)
374 apfloat_data.push_back(EigenHalfToAPFloat(val));
375 auto compressed_data_type = RankedTensorType::get(
376 {static_cast<int64_t>(compressed_data.size())},
377 type.getElementType());
378 auto new_value =
379 DenseElementsAttr::get(compressed_data_type, apfloat_data);
380 auto s_const = builder.create<SparseConstOp>(
381 op->getLoc(), cst.value(), s_param, new_value);
382 value.replaceAllUsesWith(s_const.getResult());
383 cst.erase();
384 }
385 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
386 auto attr = cst.value();
387 auto type = cst.getType().cast<ShapedType>();
388 std::vector<int8_t> dense_data;
389 dense_data.reserve(type.getNumElements());
390 for (const auto& val : attr.getValues<int8_t>())
391 dense_data.push_back(val);
392 std::vector<int8_t> compressed_data =
393 BuildSparsityParameterAttribute<int8_t>(result.selected_block_size,
394 dense_data.data(), inst,
395 &builder, &s_param);
396 auto compressed_data_type = RankedTensorType::get(
397 {static_cast<int64_t>(compressed_data.size())},
398 builder.getIntegerType(8, true));
399 auto new_value = DenseElementsAttr::get<int8_t>(compressed_data_type,
400 compressed_data);
401 auto s_qconst = builder.create<SparseQConstOp>(
402 op->getLoc(), cst.qtypeAttr(), cst.value(), s_param, new_value);
403 value.replaceAllUsesWith(s_qconst.getResult());
404 cst.erase();
405 }
406
407 if (result.needs_densify) {
408 const auto value = op->getOperand(operand);
409 auto densify =
410 builder.create<DensifyOp>(op->getLoc(), value.getType(), value);
411 value.replaceAllUsesWith(densify);
412 densify.setOperand(value);
413 }
414 }
415 });
416 }
417
418 } // namespace
419
420 // Creates an instance of the TensorFlow Lite dialect DenseToSparse pass.
CreateDenseToSparsePass()421 std::unique_ptr<OperationPass<func::FuncOp>> CreateDenseToSparsePass() {
422 return std::make_unique<DenseToSparsePass>();
423 }
424
425 } // namespace TFL
426 } // namespace mlir
427