xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/gl/compiler.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/gl/compiler.h"
17 
18 #include <algorithm>
19 #include <any>
20 #include <memory>
21 #include <string>
22 #include <unordered_set>
23 #include <utility>
24 #include <variant>
25 #include <vector>
26 
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/memory/memory.h"
29 #include "absl/types/any.h"
30 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
31 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
32 #include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
33 #include "tensorflow/lite/delegates/gpu/common/operations.h"
34 #include "tensorflow/lite/delegates/gpu/common/status.h"
35 #include "tensorflow/lite/delegates/gpu/common/types.h"
36 #include "tensorflow/lite/delegates/gpu/gl/compiler/compiled_node.h"
37 #include "tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.h"
38 #include "tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.h"
39 #include "tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.h"
40 #include "tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.h"
41 #include "tensorflow/lite/delegates/gpu/gl/float16_conversions.h"
42 
43 namespace tflite {
44 namespace gpu {
45 namespace gl {
46 namespace {
47 
48 struct ExceedSizeChecker {
operator ()tflite::gpu::gl::__anon763b9c710111::ExceedSizeChecker49   bool operator()(uint32_t v) const { return v > max_size.x; }
50 
operator ()tflite::gpu::gl::__anon763b9c710111::ExceedSizeChecker51   bool operator()(const uint2& v) const {
52     return v.x > max_size.x || v.y > max_size.y;
53   }
54 
operator ()tflite::gpu::gl::__anon763b9c710111::ExceedSizeChecker55   bool operator()(const uint3& v) const {
56     return v.x > max_size.x || v.y > max_size.y || v.z > max_z_size;
57   }
58 
59   int2 max_size;
60   int max_z_size;
61 };
62 
63 // Returns true if any size variable exceeds the given limit
ExceedsMaxSize(const Object & object,const GpuInfo & gpu_info)64 bool ExceedsMaxSize(const Object& object, const GpuInfo& gpu_info) {
65   ExceedSizeChecker size_checker;
66   size_checker.max_size =
67       int2(gpu_info.GetMaxImage2DWidth(), gpu_info.GetMaxImage2DHeight());
68   size_checker.max_z_size = gpu_info.GetMaxImage2DArrayLayers();
69   return std::visit(size_checker, object.size);
70 }
71 
ChooseFastestObjectType(const GpuInfo & gpu_info)72 ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) {
73   return gpu_info.IsAdreno() ? ObjectType::TEXTURE : ObjectType::BUFFER;
74 }
75 
ChooseFastestRefObjectType(const GpuInfo & gpu_info,const CompilationOptions & options)76 ObjectType ChooseFastestRefObjectType(const GpuInfo& gpu_info,
77                                       const CompilationOptions& options) {
78   if (!gpu_info.IsAdreno()) {
79     return ObjectType::BUFFER;
80   }
81   if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) {
82     return ObjectType::TEXTURE;
83   } else {
84     return options.allow_precision_loss ? ObjectType::TEXTURE
85                                         : ObjectType::BUFFER;
86   }
87 }
88 
89 // Compiler executes the following steps:
90 //   1. Runs NodeShader for every node in the input graph.
91 //   2. Creates a compiled graph that mirrors the input graph and keeps
92 //      GeneratedCode in operation's attributes.
93 //   3. Fuses nodes in the compiled graph.
94 //   4. Generates the full shader code using the nodes in the compiled graph.
95 class CompilerImpl : public Compiler {
96  public:
97   // We use const GpuInfo* because it doesn't let you assign temporary object
CompilerImpl(const NodeShader * node_shader,const GpuInfo * gpu_info,const CompilationOptions & options)98   CompilerImpl(const NodeShader* node_shader, const GpuInfo* gpu_info,
99                const CompilationOptions& options)
100       : node_shader_(*node_shader), gpu_info_(*gpu_info), options_(options) {
101     if (options_.preferred_obj_type == ObjectType::UNKNOWN) {
102       options_.preferred_obj_type = ChooseFastestObjectType(*gpu_info);
103     }
104     if (options_.ref_obj_type == ObjectType::UNKNOWN) {
105       options_.ref_obj_type = ChooseFastestRefObjectType(*gpu_info, options);
106     }
107   }
108 
Compile(const GraphFloat32 & graph,const std::unordered_set<int> & tflite_graph_io,const ShaderCodeCallback & callback)109   absl::Status Compile(
110       const GraphFloat32& graph,
111       const std::unordered_set<int>& tflite_graph_io,  // NOLINT
112       const ShaderCodeCallback& callback) final {
113     // It is important to have ids in a compiled graph identical to the given
114     // graph.
115     RETURN_IF_ERROR(graph.MakeExactCopy(&compiled_graph_));
116 
117     // Clear out batch dimension for dynamic batch support.
118     if (options_.dynamic_batch) {
119       for (auto value : compiled_graph_.values()) {
120         value->tensor.shape.b = 1;
121       }
122     }
123 
124     // Generate a shader for a node and all input/output objects.
125     for (auto node : compiled_graph_.nodes()) {
126       CompiledNodeAttributes attr;
127       attr.node_indices.push_back(node->id);
128       NodeShader::GenerationContext ctx = {&gpu_info_, options_,
129                                            node->operation.type,
130                                            node->operation.attributes};
131       for (const auto& tensor : graph.FindInputs(node->id)) {
132         const auto& shape = tensor->tensor.shape;
133         ctx.input_shapes.push_back({shape.b, shape.h, shape.w, shape.c});
134       }
135       for (const auto& tensor : graph.FindOutputs(node->id)) {
136         const auto& shape = tensor->tensor.shape;
137         ctx.output_shapes.push_back({shape.b, shape.h, shape.w, shape.c});
138       }
139       RETURN_IF_ERROR(node_shader_.GenerateCode(ctx, &attr.code));
140       node->operation.attributes = std::move(attr);
141     }
142 
143     ModelTransformer transformer(&compiled_graph_);
144     if (options_.fuse_operations) {
145       FuseAutoOutputWithInline fuse_inline;
146       if (!transformer.Apply("fuse_auto_with_inline", &fuse_inline)) {
147         return absl::InternalError("fuse_auto_with_inline failed");
148       }
149       FuseInplaceUpdate fuse_inplace;
150       if (!transformer.Apply("fuse_inplace_update", &fuse_inplace)) {
151         return absl::InternalError("fuse_inplace failed");
152       }
153       if (options_.auto_input_fusion) {
154         FuseAutoInput fuse_auto_input;
155         if (!transformer.Apply("fuse_auto_input", &fuse_auto_input)) {
156           return absl::InternalError("fuse_auto_input failed");
157         }
158       }
159     }
160     RemoveUnusedInplaceUpdates remove_inplace_updates;
161     if (!transformer.Apply("remove_inplace_updates", &remove_inplace_updates)) {
162       return absl::InternalError("remove_inplace_updates failed");
163     }
164 
165     // Prepare internal objects.
166     absl::flat_hash_map<ValueId, Object> objects;
167     for (auto value : compiled_graph_.values()) {
168       Object object = MakePHWC4Ref(value->id, value->tensor.shape);
169       object.data_type = value->tensor.type;
170       // External references may not be upgraded to f16 nor be represented as
171       // textures.
172       const bool is_external =
173           graph.IsGraphInput(value->id) || graph.IsGraphOutput(value->id) ||
174           tflite_graph_io.find(value->tensor.ref) != tflite_graph_io.end();
175       if (is_external) {
176         object.object_type = ObjectType::BUFFER;
177       } else if (options_.allow_precision_loss) {
178         MaybeConvertToFloat16(&object);
179       }
180       objects[value->id] = std::move(object);
181     }
182 
183     // Prepare readonly objects and check whether object types are supported.
184     for (auto node : compiled_graph_.nodes()) {
185       auto& attr =
186           std::any_cast<CompiledNodeAttributes&>(node->operation.attributes);
187 
188       // Set workload explicitly.
189       if (attr.code.workload == uint3()) {
190         auto outputs = compiled_graph_.FindOutputs(node->id);
191         auto shape = outputs[0]->tensor.shape;
192         for (auto output : outputs) {
193           if (shape != output->tensor.shape) {
194             return absl::FailedPreconditionError(
195                 "Workload uint3() requires all output sizes to match");
196           }
197         }
198         attr.code.workload = uint3(shape.w, shape.h, DivideRoundUp(shape.c, 4));
199       }
200 
201       int num_textures = 0;
202       // Counts number of used textures and chooses ObjectType for an object.
203       auto set_object_type = [&](Object* object) {
204         if (object->object_type == ObjectType::BUFFER) {
205           // Don't change from buffer once it is set.
206           return;
207         }
208         bool is_ref = IsRef(*object);
209         if (num_textures < gpu_info_.GetMaxImageArguments() &&
210             !ExceedsMaxSize(*object, gpu_info_) &&
211             (object->object_type == ObjectType::TEXTURE ||
212              (is_ref && options_.ref_obj_type == ObjectType::TEXTURE) ||
213              (!is_ref && options_.preferred_obj_type == ObjectType::TEXTURE))) {
214           object->object_type = ObjectType::TEXTURE;
215           num_textures++;
216         } else {
217           object->object_type = ObjectType::BUFFER;
218         }
219       };
220 
221       for (auto& object : attr.code.objects) {
222         // Downgrade readonly objects to F16 is requested.
223         if (options_.allow_precision_loss) {
224           MaybeConvertToFloat16(&object.second);
225         }
226         set_object_type(&object.second);
227       }
228 
229       for (auto ref : compiled_graph_.FindInputs(node->id)) {
230         set_object_type(&objects[ref->id]);
231       }
232       for (auto ref : compiled_graph_.FindOutputs(node->id)) {
233         set_object_type(&objects[ref->id]);
234       }
235     }
236 
237     // Generate shaders from the transformed graph.
238     ShaderCodegen codegen(options_, gpu_info_);
239     for (auto node : compiled_graph_.nodes()) {
240       auto& attr =
241           std::any_cast<CompiledNodeAttributes&>(node->operation.attributes);
242       if (attr.code.source_code.empty()) {
243         // noop. Skip this node.
244         continue;
245       }
246 
247       // Declare inputs and outputs explicitly.
248       for (auto ref : compiled_graph_.FindInputs(node->id)) {
249         auto object = objects[ref->id];
250         object.access = AccessType::READ;
251         attr.inputs.push_back(object);
252       }
253       for (auto ref : compiled_graph_.FindOutputs(node->id)) {
254         auto object = objects[ref->id];
255         object.access = AccessType::WRITE;
256         attr.outputs.push_back(object);
257       }
258 
259       // Allocate bindings. Textures must be bound first.
260       uint32_t binding = 0;
261       auto set_binding = [&](ObjectType type, Object& object) {
262         if (object.object_type == type) {
263           object.binding = binding++;
264         }
265       };
266       for (auto& object : attr.inputs) {
267         set_binding(ObjectType::TEXTURE, object);
268       }
269       for (auto& object : attr.outputs) {
270         set_binding(ObjectType::TEXTURE, object);
271       }
272       for (auto& object : attr.code.objects) {
273         set_binding(ObjectType::TEXTURE, object.second);
274       }
275       for (auto& object : attr.inputs) {
276         set_binding(ObjectType::BUFFER, object);
277       }
278       for (auto& object : attr.outputs) {
279         set_binding(ObjectType::BUFFER, object);
280       }
281       for (auto& object : attr.code.objects) {
282         set_binding(ObjectType::BUFFER, object.second);
283       }
284 
285       // Generate source code.
286       ShaderCode shader_code;
287       RETURN_IF_ERROR(codegen.Build(std::move(attr), &shader_code));
288       RETURN_IF_ERROR(callback(std::move(shader_code)));
289     }
290     return absl::OkStatus();
291   }
292 
293  private:
294   const NodeShader& node_shader_;
295   const GpuInfo& gpu_info_;
296   CompilationOptions options_;
297   GraphFloat32 compiled_graph_;
298 };
299 
300 }  // namespace
301 
NewCompiler(const NodeShader * node_shader,const GpuInfo * gpu_info,const CompilationOptions & options)302 std::unique_ptr<Compiler> NewCompiler(const NodeShader* node_shader,
303                                       const GpuInfo* gpu_info,
304                                       const CompilationOptions& options) {
305   return std::make_unique<CompilerImpl>(node_shader, gpu_info, options);
306 }
307 
308 }  // namespace gl
309 }  // namespace gpu
310 }  // namespace tflite
311