xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This transformation pass convert dense tensor to sparse format.
17 
18 #include "absl/memory/memory.h"
19 #include "third_party/eigen3/Eigen/Core"
20 #include "mlir/Dialect/Func/IR/FuncOps.h"  // from @llvm-project
21 #include "mlir/IR/Attributes.h"  // from @llvm-project
22 #include "mlir/IR/Builders.h"  // from @llvm-project
23 #include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
24 #include "mlir/Pass/Pass.h"  // from @llvm-project
25 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
26 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
27 #include "tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h"
28 
29 //===----------------------------------------------------------------------===//
30 // The DenseToSparse Pass.
31 //
32 namespace mlir {
33 namespace TFL {
34 
35 namespace {
36 
37 #define GEN_PASS_CLASSES
38 #include "tensorflow/compiler/mlir/lite/transforms/passes.h.inc"
39 
40 // If sparsity level is below this threshold, keep the tensor in dense format.
41 constexpr float kMinSparsityLevel = 0.3;
42 // Heuristic to check if a block configuration is correct for float constants.
43 constexpr float kBlockOverRandomSparsityRatio = 0.9;
44 // After quantization, some non-zero values are set to 0.
45 // Lower the ratio for identifying block configuration for quantized constants.
46 constexpr float kBlockOverRandomSparsityRatioQuant = 0.8;
47 
APFloatToEigenHalf(const APFloat & val)48 Eigen::half APFloatToEigenHalf(const APFloat& val) {
49   uint16_t raw_data = val.bitcastToAPInt().getZExtValue();
50   return Eigen::numext::bit_cast<Eigen::half>(raw_data);
51 }
52 
EigenHalfToAPFloat(const Eigen::half & val)53 APFloat EigenHalfToAPFloat(const Eigen::half& val) {
54   uint16_t raw_data = Eigen::numext::bit_cast<uint16_t>(val);
55   return APFloat(APFloat::IEEEhalf(), APInt(16, raw_data));
56 }
57 
PopulateEncodingParams(const std::vector<int> & block_size,std::vector<int> * traversal_order,std::vector<TfLiteDimensionType> * format,std::vector<int> * b_map,std::vector<int> * b_size)58 void PopulateEncodingParams(const std::vector<int>& block_size,
59                             std::vector<int>* traversal_order,
60                             std::vector<TfLiteDimensionType>* format,
61                             std::vector<int>* b_map, std::vector<int>* b_size) {
62   const int dims_count = block_size.size();
63   traversal_order->resize(dims_count);
64   format->resize(dims_count);
65   for (int i = 0; i < dims_count; i++) {
66     (*traversal_order)[i] = i;
67   }
68   for (int i = 0; i < dims_count - 1; i++) {
69     (*format)[i] = kTfLiteDimDense;
70   }
71   (*format)[dims_count - 1] = kTfLiteDimSparseCSR;
72   *b_map = {};
73   *b_size = {};
74   int block_rank = 0;
75   for (int i = 0; i < dims_count; i++) {
76     if (block_size[i] != 1) {
77       traversal_order->push_back(block_rank + dims_count);
78       format->push_back(kTfLiteDimDense);
79       block_rank++;
80       b_map->push_back(i);
81       b_size->push_back(block_size[i]);
82     }
83   }
84 }
85 
GetSparsity(const int num_zeros,const int num_elements)86 inline float GetSparsity(const int num_zeros, const int num_elements) {
87   return (1.0 * num_zeros / num_elements);
88 }
89 
CalculateRandomSparsity(const ElementsAttr & attr,const ShapedType & type)90 float CalculateRandomSparsity(const ElementsAttr& attr,
91                               const ShapedType& type) {
92   int num_elements = type.getNumElements();
93   int num_zeros = 0;
94 
95   if (type.getElementType().isa<FloatType>()) {
96     for (const auto val : attr.getValues<APFloat>()) {
97       if (val.isZero()) {
98         num_zeros++;
99       }
100     }
101   } else if (type.getElementType().isa<quant::QuantizedType>()) {
102     for (const auto val : attr.getValues<int8_t>()) {
103       if (val == 0) {
104         num_zeros++;
105       }
106     }
107   }
108 
109   return GetSparsity(num_zeros, num_elements);
110 }
111 
CalculateBlockSparsity(const ElementsAttr & attr,const ShapedType & type,const std::vector<int> & block_size)112 float CalculateBlockSparsity(const ElementsAttr& attr, const ShapedType& type,
113                              const std::vector<int>& block_size) {
114   float sparsity = 0;
115   std::vector<int> shape(2);
116   shape[0] = type.getDimSize(0);
117   shape[1] = type.getDimSize(1);
118 
119   std::vector<int> traversal_order = {};
120   std::vector<TfLiteDimensionType> format = {};
121   std::vector<int> b_size = {};
122   std::vector<int> b_map = {};
123   PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
124                          &b_size);
125 
126   if (type.getElementType().isF32()) {
127     tflite::internal::sparsity::FormatConverter<float> format_converter(
128         shape, traversal_order, format, b_size, b_map);
129     std::vector<float> data;
130     data.reserve(type.getNumElements());
131     for (const auto val : attr.getValues<float>()) data.push_back(val);
132     format_converter.DenseToSparse(data.data());
133     sparsity =
134         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
135                     type.getNumElements());
136   } else if (type.getElementType().isF16()) {
137     tflite::internal::sparsity::FormatConverter<Eigen::half> format_converter(
138         shape, traversal_order, format, b_size, b_map);
139     std::vector<Eigen::half> data;
140     data.reserve(type.getNumElements());
141     for (const auto& val : attr.getValues<APFloat>())
142       data.push_back(APFloatToEigenHalf(val));
143     format_converter.DenseToSparse(data.data());
144     sparsity =
145         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
146                     type.getNumElements());
147   } else if (type.getElementType().isa<quant::QuantizedType>()) {
148     tflite::internal::sparsity::FormatConverter<int8_t> format_converter(
149         shape, traversal_order, format, b_size, b_map);
150     std::vector<int8_t> data;
151     data.reserve(type.getNumElements());
152     for (const auto val : attr.getValues<int8_t>()) data.push_back(val);
153     format_converter.DenseToSparse(data.data());
154     sparsity =
155         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
156                     type.getNumElements());
157   }
158 
159   return sparsity;
160 }
161 
162 typedef struct InspectResult {
163   // Whether the weight tensor is sparse enough to be compressed.
164   bool can_compress;
165   // If the weight tensor cannot be encoded in a block configuration that the op
166   // supports, a Densify() op will be inserted afterwards to fall back to dense
167   // execution.
168   bool needs_densify;
169   // Among the supported block configs of an op, which got selected to encode
170   // the sparse weight.
171   std::vector<int> selected_block_size;
172 } InspectResult;
173 
InspectWeight(Operation * inst,const std::vector<std::vector<int>> & supported_block_size,const float ratio_threshold)174 InspectResult InspectWeight(
175     Operation* inst, const std::vector<std::vector<int>>& supported_block_size,
176     const float ratio_threshold) {
177   ElementsAttr attr;
178   ShapedType type;
179   InspectResult result = {};
180   if (auto cst = dyn_cast<ConstOp>(inst)) {
181     attr = cst.value();
182     type = cst.getType().cast<ShapedType>();
183   } else if (auto cst = dyn_cast<QConstOp>(inst)) {
184     attr = cst.value();
185     type = cst.getType().cast<ShapedType>();
186   } else {
187     result.can_compress = false;
188     return result;
189   }
190 
191   // Currently we only support compressing weights of ops:
192   //   Conv, DepthwiseConv, TransposeConv, whose filter has rank 4, and
193   //   FullyConnected, whose filter has rank 2.
194   if (type.getRank() != 2 && type.getRank() != 4) {
195     result.can_compress = false;
196     return result;
197   }
198 
199   float random_sparsity = CalculateRandomSparsity(attr, type);
200   if (random_sparsity < kMinSparsityLevel) {
201     result.can_compress = false;
202     return result;
203   }
204 
205   result.can_compress = true;
206 
207   float curr_sparsity = 0;
208   std::vector<int> selected_block_size;
209   result.needs_densify = true;
210   for (const auto& block_size : supported_block_size) {
211     curr_sparsity = CalculateBlockSparsity(attr, type, block_size);
212     if (curr_sparsity / random_sparsity > ratio_threshold) {
213       selected_block_size = block_size;
214       result.can_compress = true;
215       result.needs_densify = false;
216       result.selected_block_size = selected_block_size;
217       break;
218     }
219   }
220 
221   return result;
222 }
223 
224 template <typename T>
BuildSparsityParameterAttribute(const std::vector<int> & block_size,const T * dense_buffer,Operation * inst,OpBuilder * builder,SparsityParameterAttr * s_param)225 std::vector<T> BuildSparsityParameterAttribute(
226     const std::vector<int>& block_size, const T* dense_buffer, Operation* inst,
227     OpBuilder* builder, SparsityParameterAttr* s_param) {
228   ElementsAttr attr;
229   ShapedType type;
230   if (auto cst = dyn_cast<ConstOp>(inst)) {
231     attr = cst.value();
232     type = cst.getType().cast<ShapedType>();
233   } else if (auto cst = dyn_cast<QConstOp>(inst)) {
234     attr = cst.value();
235     type = cst.getType().cast<ShapedType>();
236   } else {
237     assert(false && "Expected a constant-like op");
238   }
239   const int dims_count = type.getRank();
240   std::vector<int> shape(dims_count);
241   for (int i = 0; i < dims_count; i++) {
242     shape[i] = type.getDimSize(i);
243   }
244 
245   std::vector<int> traversal_order = {};
246   std::vector<TfLiteDimensionType> format = {};
247   std::vector<int> b_size = {};
248   std::vector<int> b_map = {};
249   PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
250                          &b_size);
251 
252   tflite::internal::sparsity::FormatConverter<T> format_converter(
253       shape, traversal_order, format, b_size, b_map);
254   format_converter.DenseToSparse(dense_buffer);
255   const auto& metadata = format_converter.GetDimMetadata();
256   const auto& compressed_data = format_converter.GetData();
257   const int dim_size = metadata.size() / 2;
258   std::vector<DimensionMetadataAttr> dim_metadata(traversal_order.size());
259   for (int i = 0; i < dim_size; i++) {
260     if (format[i] == kTfLiteDimDense) {
261       dim_metadata[i] = DimensionMetadataAttr::get(
262           builder->getContext(),
263           ::mlir::TFL::DimensionTypeAttr::get(
264               builder->getContext(), ::mlir::TFL::DimensionType::DENSE),
265           metadata[2 * i][0], {}, {});
266     } else {
267       dim_metadata[i] = DimensionMetadataAttr::get(
268           builder->getContext(),
269           ::mlir::TFL::DimensionTypeAttr::get(
270               builder->getContext(), ::mlir::TFL::DimensionType::SPARSE_CSR),
271           0, metadata[2 * i], metadata[2 * i + 1]);
272     }
273   }
274   *s_param = SparsityParameterAttr::get(builder->getContext(), traversal_order,
275                                         b_map, dim_metadata);
276 
277   return compressed_data;
278 }
279 
280 struct DenseToSparsePass : public DenseToSparsePassBase<DenseToSparsePass> {
281   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(DenseToSparsePass)
282 
283   void runOnOperation() override;
284 };
285 
runOnOperation()286 void DenseToSparsePass::runOnOperation() {
287   func::FuncOp func = getOperation();
288   OpBuilder builder(func);
289 
290   func.walk([&](SparseOpInterface sparse_op) {
291     const auto& sparse_operands = sparse_op.GetSparseOperands();
292     std::vector<std::vector<int>> supported_block_size;
293     for (int operand : sparse_operands) {
294       auto* op = sparse_op.getOperation();
295       auto value = op->getOperand(operand);
296 
297       auto* inst = value.getDefiningOp();
298       if (!inst) {
299         continue;
300       }
301 
302       // There could be a Dequantize op after the weight tensor in cases like
303       // fp16 post-training quantization. We need to get the weight from the
304       // input of the Dequantize op.
305       if (isa<DequantizeOp>(inst)) {
306         op = inst;
307         value = inst->getOperand(0);
308         inst = value.getDefiningOp();
309         if (!inst) {
310           continue;
311         }
312         operand = 0;
313       }
314 
315       ShapedType type;
316       float ratio_threshold = kBlockOverRandomSparsityRatio;
317       if (isa<ConstOp>(inst)) {
318         supported_block_size = sparse_op.GetFloatBlockSize();
319         type = dyn_cast<ConstOp>(inst).getType().cast<ShapedType>();
320       } else if (isa<QConstOp>(inst)) {
321         supported_block_size = sparse_op.GetQuantizedBlockSize();
322         type = dyn_cast<QConstOp>(inst).getType().cast<ShapedType>();
323         ratio_threshold = kBlockOverRandomSparsityRatioQuant;
324       } else {
325         continue;
326       }
327 
328       InspectResult result =
329           InspectWeight(inst, supported_block_size, ratio_threshold);
330       if (!result.can_compress) {
331         continue;
332       }
333 
334       // The weight is not block sparse. Encode with random sparsity.
335       if (result.selected_block_size.empty()) {
336         result.selected_block_size = std::vector<int>(type.getRank(), 1);
337       }
338 
339       builder.setInsertionPoint(op);
340       SparsityParameterAttr s_param;
341       if (auto cst = dyn_cast<ConstOp>(inst)) {
342         auto attr = cst.value();
343         auto type = cst.getType().cast<ShapedType>();
344         if (type.getElementType().isF32()) {
345           std::vector<float> dense_data;
346           dense_data.reserve(type.getNumElements());
347           for (const auto val : attr.getValues<float>())
348             dense_data.push_back(val);
349           std::vector<float> compressed_data =
350               BuildSparsityParameterAttribute<float>(result.selected_block_size,
351                                                      dense_data.data(), inst,
352                                                      &builder, &s_param);
353           auto compressed_data_type = RankedTensorType::get(
354               {static_cast<int64_t>(compressed_data.size())},
355               builder.getF32Type());
356           auto new_value = DenseElementsAttr::get<float>(compressed_data_type,
357                                                          compressed_data);
358           auto s_const = builder.create<SparseConstOp>(
359               op->getLoc(), cst.value(), s_param, new_value);
360           value.replaceAllUsesWith(s_const.getResult());
361           cst.erase();
362         } else if (type.getElementType().isF16()) {
363           std::vector<Eigen::half> dense_data;
364           dense_data.reserve(type.getNumElements());
365           for (const auto& val : attr.getValues<APFloat>())
366             dense_data.push_back(APFloatToEigenHalf(val));
367           std::vector<Eigen::half> compressed_data =
368               BuildSparsityParameterAttribute<Eigen::half>(
369                   result.selected_block_size, dense_data.data(), inst, &builder,
370                   &s_param);
371           std::vector<APFloat> apfloat_data;
372           apfloat_data.reserve(type.getNumElements());
373           for (const auto& val : compressed_data)
374             apfloat_data.push_back(EigenHalfToAPFloat(val));
375           auto compressed_data_type = RankedTensorType::get(
376               {static_cast<int64_t>(compressed_data.size())},
377               type.getElementType());
378           auto new_value =
379               DenseElementsAttr::get(compressed_data_type, apfloat_data);
380           auto s_const = builder.create<SparseConstOp>(
381               op->getLoc(), cst.value(), s_param, new_value);
382           value.replaceAllUsesWith(s_const.getResult());
383           cst.erase();
384         }
385       } else if (auto cst = dyn_cast<QConstOp>(inst)) {
386         auto attr = cst.value();
387         auto type = cst.getType().cast<ShapedType>();
388         std::vector<int8_t> dense_data;
389         dense_data.reserve(type.getNumElements());
390         for (const auto& val : attr.getValues<int8_t>())
391           dense_data.push_back(val);
392         std::vector<int8_t> compressed_data =
393             BuildSparsityParameterAttribute<int8_t>(result.selected_block_size,
394                                                     dense_data.data(), inst,
395                                                     &builder, &s_param);
396         auto compressed_data_type = RankedTensorType::get(
397             {static_cast<int64_t>(compressed_data.size())},
398             builder.getIntegerType(8, true));
399         auto new_value = DenseElementsAttr::get<int8_t>(compressed_data_type,
400                                                         compressed_data);
401         auto s_qconst = builder.create<SparseQConstOp>(
402             op->getLoc(), cst.qtypeAttr(), cst.value(), s_param, new_value);
403         value.replaceAllUsesWith(s_qconst.getResult());
404         cst.erase();
405       }
406 
407       if (result.needs_densify) {
408         const auto value = op->getOperand(operand);
409         auto densify =
410             builder.create<DensifyOp>(op->getLoc(), value.getType(), value);
411         value.replaceAllUsesWith(densify);
412         densify.setOperand(value);
413       }
414     }
415   });
416 }
417 
418 }  // namespace
419 
420 // Creates an instance of the TensorFlow Lite dialect DenseToSparse pass.
CreateDenseToSparsePass()421 std::unique_ptr<OperationPass<func::FuncOp>> CreateDenseToSparsePass() {
422   return std::make_unique<DenseToSparsePass>();
423 }
424 
425 }  // namespace TFL
426 }  // namespace mlir
427