1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/gl/workgroups/calculator.h"
17
18 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
19 #include "tensorflow/lite/delegates/gpu/common/types.h"
20 #include "tensorflow/lite/delegates/gpu/gl/compiler/shader_code.h"
21
22 namespace tflite {
23 namespace gpu {
24 namespace gl {
25 namespace {
26
CalculateProduct(const uint3 & value)27 uint64_t CalculateProduct(const uint3& value) {
28 return static_cast<uint64_t>(value.x) * value.y * value.z;
29 }
30
MaybeShrinkWorkgroup(const GpuInfo & gpu_info,uint3 * wg)31 void MaybeShrinkWorkgroup(const GpuInfo& gpu_info, uint3* wg) {
32 while (wg->x > gpu_info.GetMaxWorkGroupSizeForX()) {
33 wg->x /= 2;
34 }
35
36 while (wg->y > gpu_info.GetMaxWorkGroupSizeForY()) {
37 wg->y /= 2;
38 }
39
40 while (wg->z > gpu_info.GetMaxWorkGroupSizeForZ()) {
41 wg->z /= 2;
42 }
43
44 // Code below decreases amount of invocations per workgroup in a balanced way.
45 // As example, workgroup size is x=16, y=8, z=8 (16x8x8 = 1024), but
46 // max_work_group_total_size = 512. We need to fit this limit and we can
47 // reduce workgroup size in different ways, but we want to use the most
48 // balanced way. So code below will find the maximal of three dimensions and
49 // reduce it, so the whole workgroup is kept balanced by all dimensions. And
50 // the final reduced workgroup will be x=8, y=8, z=8 for the given example.
51 while (CalculateProduct(*wg) > gpu_info.GetMaxWorkGroupTotalSize()) {
52 unsigned int* max = &wg->x;
53 if (wg->y > *max) max = &wg->y;
54 if (wg->z > *max) max = &wg->z;
55 *max = *max /= 2;
56 }
57 }
58
59 } // namespace
60
WorkgroupsCalculator(const GpuInfo & gpu_info)61 WorkgroupsCalculator::WorkgroupsCalculator(const GpuInfo& gpu_info)
62 : gpu_info_{gpu_info} {}
63
Calculate(const ShaderCode & shader_code) const64 uint3 WorkgroupsCalculator::Calculate(const ShaderCode& shader_code) const {
65 uint3 workgroup_size = shader_code.recommended_workgroup;
66 if (workgroup_size == kEmptyWorkgroupSize) {
67 workgroup_size = CalculateInternal(shader_code);
68 }
69 MaybeShrinkWorkgroup(gpu_info_, &workgroup_size);
70 return workgroup_size;
71 }
72
73 } // namespace gl
74 } // namespace gpu
75 } // namespace tflite
76