1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
17
18 #include "tensorflow/core/framework/register_types.h"
19 #include "tensorflow/core/kernels/segment_reduction_ops_gpu.cu.h"
20
21 namespace tensorflow {
22
UseDeterministicSegmentReductions()23 bool UseDeterministicSegmentReductions() {
24 // See comment below regarding CI build error on Windows.
25 #if !defined(PLATFORM_WINDOWS)
26 static bool cached_result = [] {
27 bool result = false;
28 TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar(
29 "TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS",
30 /*default_val=*/false, &result));
31 return result;
32 }();
33 return cached_result;
34 #else
35 return false;
36 #endif
37 }
38
DisableSegmentReductionOpDeterminismExceptions()39 bool DisableSegmentReductionOpDeterminismExceptions() {
40 static bool cached_disable = [] {
41 bool disable = false;
42 TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar(
43 "TF_DISABLE_SEGMENT_REDUCTION_OP_DETERMINISM_EXCEPTIONS",
44 /*default_val=*/false, &disable));
45 return disable;
46 }();
47 return cached_disable;
48 }
49
50 namespace functor {
51
52 #define DEFINE_SORTED_GPU_SPECS_INDEX(T, Index) \
53 template struct SegmentReductionFunctor< \
54 T, Index, /*InitialValueF=*/functor::Zero<T>, \
55 /*EmptySegmentValueF=*/functor::Zero<T>, functor::Sum>; \
56 template struct SegmentReductionFunctor< \
57 T, Index, /*InitialValueF=*/functor::One<T>, \
58 /*EmptySegmentValueF=*/functor::One<T>, functor::Prod>; \
59 template struct SegmentReductionFunctor< \
60 T, Index, /*InitialValueF=*/functor::Highest<T>, \
61 /*EmptySegmentValueF=*/functor::Zero<T>, functor::Min>; \
62 template struct SegmentReductionFunctor< \
63 T, Index, /*InitialValueF=*/functor::Lowest<T>, \
64 /*EmptySegmentValueF=*/functor::Zero<T>, functor::Max>;
65
66 #define DEFINE_SORTED_GPU_SPECS(T) DEFINE_SORTED_GPU_SPECS_INDEX(T, int32);
67
68 TF_CALL_GPU_NUMBER_TYPES(DEFINE_SORTED_GPU_SPECS);
69
70 #define DEFINE_REAL_UNSORTED_GPU_SPECS_INDEX(T, Index) \
71 template struct UnsortedSegmentFunctor<GPUDevice, T, Index, \
72 functor::Lowest<T>, functor::Max>; \
73 template struct UnsortedSegmentFunctor<GPUDevice, T, Index, \
74 functor::Highest<T>, functor::Min>; \
75 template struct UnsortedSegmentFunctor<GPUDevice, T, Index, functor::One<T>, \
76 functor::Prod>;
77
78 // Sum is the only op that supports all input types currently.
79 #define DEFINE_SUM_UNSORTED_GPU_SPECS_INDEX(T, Index) \
80 template struct UnsortedSegmentFunctor<GPUDevice, T, Index, \
81 functor::Zero<T>, functor::Sum>;
82
83 #define DEFINE_REAL_GPU_SPECS(T) DEFINE_REAL_UNSORTED_GPU_SPECS_INDEX(T, int32);
84
85 #define DEFINE_SUM_GPU_SPECS(T) DEFINE_SUM_UNSORTED_GPU_SPECS_INDEX(T, int32);
86
87 TF_CALL_GPU_NUMBER_TYPES(DEFINE_REAL_GPU_SPECS);
88 TF_CALL_GPU_NUMBER_TYPES(DEFINE_SUM_GPU_SPECS);
89
90 #undef DEFINE_SORTED_GPU_SPECS_INDEX
91 #undef DEFINE_SORTED_GPU_SPECS
92 #undef DEFINE_REAL_UNSORTED_GPU_SPECS_INDEX
93 #undef DEFINE_SUM_UNSORTED_GPU_SPECS_INDEX
94 #undef DEFINE_REAL_GPU_SPECS
95 #undef DEFINE_SUM_GPU_SPECS
96
97 // TODO(benbarsdell): These kernels are disabled on Windows as a workaround for
98 // a CI build error: "formal parameter with requested alignment of 128 won't be
99 // aligned". The root cause is suspected to be an aligned type (AlignedVector)
100 // being passed to a function by value, possibly inside the CUB library
101 // somewhere, but I have not yet been able to reproduce it in isolation outside
102 // of the GitHub CI.
103 #if !defined(PLATFORM_WINDOWS)
104
105 #define DEFINE_SPARSE_SEGMENT_REDUCTION_FUNCTOR(T) \
106 template struct SparseSegmentReductionFunctor<T, int32, int32>; \
107 template struct SparseSegmentReductionFunctor<T, int32, int64_t>;
108 TF_CALL_GPU_NUMBER_TYPES(DEFINE_SPARSE_SEGMENT_REDUCTION_FUNCTOR);
109 #undef DEFINE_SPARSE_SEGMENT_REDUCTION_FUNCTOR
110
111 #define DEFINE_SPARSE_SEGMENT_GRAD_FUNCTOR(T) \
112 template struct SparseSegmentGradFunctor<GPUDevice, T, int32, int32>; \
113 template struct SparseSegmentGradFunctor<GPUDevice, T, int32, int64_t>;
114 TF_CALL_GPU_NUMBER_TYPES(DEFINE_SPARSE_SEGMENT_GRAD_FUNCTOR);
115 #undef DEFINE_SPARSE_SEGMENT_GRAD_FUNCTOR
116
117 #endif // !defined(PLATFORM_WINDOWS)
118
119 } // namespace functor
120 } // namespace tensorflow
121
122 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
123