xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/common/tasks/convolution_transposed.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
18 
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
26 #include "tensorflow/lite/delegates/gpu/common/operations.h"
27 #include "tensorflow/lite/delegates/gpu/common/shape.h"
28 #include "tensorflow/lite/delegates/gpu/common/status.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
30 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
34 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
35 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
36 #include "tensorflow/lite/delegates/gpu/common/types.h"
37 
38 namespace tflite {
39 namespace gpu {
40 
41 class ConvolutionTransposed : public GPUOperation {
42  public:
43   ConvolutionTransposed() = default;
44   void GetPossibleKernelWorkGroups(
45       TuningType tuning_type, const GpuInfo& gpu_info,
46       const KernelInfo& kernel_info,
47       std::vector<int3>* work_groups) const override;
48   absl::Status BindArguments(ArgumentsBinder* args) override;
49   int3 GetGridSize() const override;
50 
51   // Move only
52   ConvolutionTransposed(ConvolutionTransposed&& operation) = default;
53   ConvolutionTransposed& operator=(ConvolutionTransposed&& operation) = default;
54   ConvolutionTransposed(const ConvolutionTransposed&) = delete;
55   ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
56 
GetWeightsDescription()57   WeightsDescription GetWeightsDescription() const {
58     WeightsDescription desc;
59     desc.type = DeduceDataTypeFromPrecision(definition_.precision);
60     desc.layout = weights_layout_;
61     desc.output_group_size = block_size_.w;
62     return desc;
63   }
64 
65  private:
66   friend ConvolutionTransposed CreateConvolutionTransposed(
67       const GpuInfo& gpu_info, const OperationDef& definition,
68       const ConvolutionTransposedAttributes& attr);
69   friend ConvolutionTransposed CreateConvolutionTransposed3D(
70       const GpuInfo& gpu_info, const OperationDef& definition,
71       const ConvolutionTransposed3DAttributes& attr);
72   friend ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
73       const GpuInfo& gpu_info, const OperationDef& definition,
74       const ConvolutionTransposedAttributes& attr);
75 
76   ConvolutionTransposed(const OperationDef& definition,
77                         const ConvolutionTransposedAttributes& attr,
78                         const GpuInfo& gpu_info);
79   ConvolutionTransposed(const OperationDef& definition,
80                         const ConvolutionTransposed3DAttributes& attr,
81                         const GpuInfo& gpu_info);
82 
83   template <DataType T>
84   void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
85                      bool weights_are_buffer);
86 
87   template <DataType T>
88   void UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights,
89                      bool weights_are_buffer);
90 
91   std::string GenerateConvolutionTransposedCode(const OperationDef& op_def,
92                                                 const GpuInfo& gpu_info,
93                                                 const int4& block_size);
94   int4 stride_;
95   int4 block_size_ = int4(1, 1, 1, 1);  // WHDS
96   WeightsLayout weights_layout_;
97 };
98 
99 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWI,T> & weights,bool weights_are_buffer)100 void ConvolutionTransposed::UploadWeights(
101     const tflite::gpu::Tensor<OHWI, T>& weights, bool weights_are_buffer) {
102   const auto weights_desc = GetWeightsDescription();
103   const int flt_count =
104       GetTotalElementsCountForLayout(weights_desc, weights.shape);
105 
106   std::vector<uint8_t> weights_data(flt_count * SizeOf(weights_desc.type));
107   RearrangeWeights(weights, weights_desc, absl::MakeSpan(weights_data));
108 
109   if (weights_are_buffer) {
110     BufferDescriptor desc;
111     desc.element_type = weights_desc.type;
112     desc.element_size = 16;
113     desc.size = weights_data.size();
114     desc.data = std::move(weights_data);
115     args_.AddObject("weights",
116                     std::make_unique<BufferDescriptor>(std::move(desc)));
117   } else {
118     uint2 tex_size = Get2dResourceSize(weights_desc, weights.shape);
119     int sub_size = SizeOf(weights_desc.type) * 4 * tex_size.x * tex_size.y;
120     for (int i = 0; i < 4; ++i) {
121       TensorDescriptor desc = CreateConstantHWVec4TensorDescriptor(
122           weights_desc.type, TensorStorageType::TEXTURE_2D, tex_size.x,
123           tex_size.y, weights_data.data() + sub_size * i);
124       args_.AddObject("weights" + std::to_string(i),
125                       std::make_unique<TensorDescriptor>(std::move(desc)));
126     }
127   }
128 }
129 
130 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWDI,T> & weights,bool weights_are_buffer)131 void ConvolutionTransposed::UploadWeights(
132     const tflite::gpu::Tensor<OHWDI, T>& weights, bool weights_are_buffer) {
133   const auto weights_desc = GetWeightsDescription();
134   const int flt_count =
135       GetTotalElementsCountForLayout(weights_desc, weights.shape);
136 
137   std::vector<uint8_t> weights_data(flt_count * SizeOf(weights_desc.type));
138   RearrangeWeights(weights, weights_desc, absl::MakeSpan(weights_data));
139 
140   if (weights_are_buffer) {
141     BufferDescriptor desc;
142     desc.element_type = weights_desc.type;
143     desc.element_size = 16;
144     desc.size = weights_data.size();
145     desc.data = std::move(weights_data);
146     args_.AddObject("weights",
147                     std::make_unique<BufferDescriptor>(std::move(desc)));
148   } else {
149     uint2 tex_size = Get2dResourceSize(weights_desc, weights.shape);
150     int sub_size = SizeOf(weights_desc.type) * 4 * tex_size.x * tex_size.y;
151     for (int i = 0; i < 4; ++i) {
152       TensorDescriptor desc = CreateConstantHWVec4TensorDescriptor(
153           weights_desc.type, TensorStorageType::TEXTURE_2D, tex_size.x,
154           tex_size.y, weights_data.data() + sub_size * i);
155       args_.AddObject("weights" + std::to_string(i),
156                       std::make_unique<TensorDescriptor>(std::move(desc)));
157     }
158   }
159 }
160 
161 ConvolutionTransposed CreateConvolutionTransposed(
162     const GpuInfo& gpu_info, const OperationDef& definition,
163     const ConvolutionTransposedAttributes& attr);
164 
165 ConvolutionTransposed CreateConvolutionTransposed3D(
166     const GpuInfo& gpu_info, const OperationDef& definition,
167     const ConvolutionTransposed3DAttributes& attr);
168 
169 ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
170     const GpuInfo& gpu_info, const OperationDef& definition,
171     const ConvolutionTransposedAttributes& attr);
172 
173 }  // namespace gpu
174 }  // namespace tflite
175 
176 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
177