1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/nn_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/kernels/relu_op.h" 21 22 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 23 #include "tensorflow/core/framework/numeric_op.h" 24 #include "tensorflow/core/framework/op_kernel.h" 25 #include "tensorflow/core/framework/register_types.h" 26 #include "tensorflow/core/framework/tensor.h" 27 #include "tensorflow/core/lib/core/errors.h" 28 29 namespace tensorflow { 30 31 typedef Eigen::ThreadPoolDevice CPUDevice; 32 typedef Eigen::GpuDevice GPUDevice; 33 34 #define REGISTER_RELU_KERNELS(type) \ 35 REGISTER_KERNEL_BUILDER( \ 36 Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 37 ReluOp<CPUDevice, type>); \ 38 REGISTER_KERNEL_BUILDER( \ 39 Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 40 ReluGradOp<CPUDevice, type>); \ 41 REGISTER_KERNEL_BUILDER( \ 42 Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 43 Relu6Op<CPUDevice, type>); \ 44 REGISTER_KERNEL_BUILDER( \ 45 Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 46 Relu6GradOp<CPUDevice, type>) \ 47 REGISTER_KERNEL_BUILDER( \ 48 Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 49 LeakyReluGradOp<CPUDevice, type>); 50 51 TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS); 52 #undef REGISTER_RELU_KERNELS 53 54 // Register LeakyRelu here for all types except bfloat16 55 // bfloat16 is in cwise_op_leakyrelu_bf16.cc 56 #define REGISTER_LEAKYRELU_KERNELS(type) \ 57 REGISTER_KERNEL_BUILDER( \ 58 Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 59 LeakyReluOp<CPUDevice, type>); 60 61 TF_CALL_INTEGRAL_TYPES(REGISTER_LEAKYRELU_KERNELS) 62 TF_CALL_half(REGISTER_LEAKYRELU_KERNELS) 63 TF_CALL_float(REGISTER_LEAKYRELU_KERNELS) 64 TF_CALL_double(REGISTER_LEAKYRELU_KERNELS) 65 #undef REGISTER_LEAKYRELU_KERNELS 66 67 #define REGISTER_ELU_KERNELS(type) \ 68 REGISTER_KERNEL_BUILDER( \ 69 Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 70 EluOp<CPUDevice, type>); \ 71 REGISTER_KERNEL_BUILDER( \ 72 Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 73 EluGradOp<CPUDevice, type>); \ 74 REGISTER_KERNEL_BUILDER( \ 75 Name("Selu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 76 SeluOp<CPUDevice, type>); \ 77 REGISTER_KERNEL_BUILDER( \ 78 Name("SeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ 79 SeluGradOp<CPUDevice, type>) 80 81 // Elu and Selu only make sense with float or double. 82 TF_CALL_FLOAT_TYPES(REGISTER_ELU_KERNELS); 83 #undef REGISTER_ELU_KERNELS 84 85 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM 86 87 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) 88 89 namespace functor { 90 #define DECLARE_GPU_NO_MLIR_SPEC(T) \ 91 template <> \ 92 void Relu<GPUDevice, T>::operator()( \ 93 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 94 typename TTypes<T>::Tensor activations); \ 95 extern template struct Relu<GPUDevice, T>; \ 96 \ 97 template <> \ 98 void Elu<GPUDevice, T>::operator()(const GPUDevice& d, \ 99 typename TTypes<T>::ConstTensor features, \ 100 typename TTypes<T>::Tensor activations); \ 101 extern template struct Elu<GPUDevice, T>; \ 102 \ 103 template <> \ 104 void Selu<GPUDevice, T>::operator()( \ 105 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 106 typename TTypes<T>::Tensor activations); \ 107 extern template struct Selu<GPUDevice, T>; 108 109 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_NO_MLIR_SPEC); 110 } // namespace functor 111 112 #define REGISTER_GPU_NO_MLIR_KERNELS(type) \ 113 REGISTER_KERNEL_BUILDER( \ 114 Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 115 ReluOp<GPUDevice, type>); \ 116 REGISTER_KERNEL_BUILDER( \ 117 Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 118 EluOp<GPUDevice, type>); \ 119 REGISTER_KERNEL_BUILDER( \ 120 Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 121 SeluOp<GPUDevice, type>); 122 123 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_NO_MLIR_KERNELS); 124 #undef REGISTER_RELU_KERNEL 125 #endif 126 127 // Forward declarations of the functor specializations for GPU. 128 namespace functor { 129 #define DECLARE_GPU_SPEC(T) \ 130 template <> \ 131 void ReluGrad<GPUDevice, T>::operator()( \ 132 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 133 typename TTypes<T>::ConstTensor features, \ 134 typename TTypes<T>::Tensor backprops); \ 135 extern template struct ReluGrad<GPUDevice, T>; \ 136 \ 137 template <> \ 138 void Relu6<GPUDevice, T>::operator()( \ 139 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ 140 typename TTypes<T>::Tensor activations); \ 141 extern template struct Relu6<GPUDevice, T>; \ 142 \ 143 template <> \ 144 void Relu6Grad<GPUDevice, T>::operator()( \ 145 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 146 typename TTypes<T>::ConstTensor features, \ 147 typename TTypes<T>::Tensor backprops); \ 148 extern template struct Relu6Grad<GPUDevice, T>; \ 149 \ 150 template <> \ 151 void LeakyRelu<GPUDevice, T>::operator()(LeakyReluArgs args); \ 152 extern template struct LeakyRelu<GPUDevice, T>; \ 153 \ 154 template <> \ 155 void LeakyReluGrad<GPUDevice, T>::operator()( \ 156 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 157 typename TTypes<T>::ConstTensor features, T alpha, \ 158 typename TTypes<T>::Tensor backprops); \ 159 extern template struct LeakyReluGrad<GPUDevice, T>; \ 160 \ 161 template <> \ 162 void EluGrad<GPUDevice, T>::operator()( \ 163 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 164 typename TTypes<T>::ConstTensor activations, \ 165 typename TTypes<T>::Tensor backprops); \ 166 extern template struct EluGrad<GPUDevice, T>; \ 167 \ 168 template <> \ 169 void SeluGrad<GPUDevice, T>::operator()( \ 170 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ 171 typename TTypes<T>::ConstTensor activations, \ 172 typename TTypes<T>::Tensor backprops); \ 173 extern template struct SeluGrad<GPUDevice, T>; 174 175 template <> 176 void Relu<GPUDevice, qint8>::operator()( 177 const GPUDevice& d, typename TTypes<qint8>::ConstTensor features, 178 typename TTypes<qint8>::Tensor activations); 179 extern template struct Relu<GPUDevice, qint8>; 180 181 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); 182 } // namespace functor 183 184 // Registration of the GPU implementations. 185 #define REGISTER_GPU_KERNELS(type) \ 186 REGISTER_KERNEL_BUILDER( \ 187 Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 188 ReluGradOp<GPUDevice, type>); \ 189 REGISTER_KERNEL_BUILDER( \ 190 Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 191 Relu6Op<GPUDevice, type>); \ 192 REGISTER_KERNEL_BUILDER( \ 193 Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 194 Relu6GradOp<GPUDevice, type>); \ 195 REGISTER_KERNEL_BUILDER( \ 196 Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 197 LeakyReluOp<GPUDevice, type>); \ 198 REGISTER_KERNEL_BUILDER( \ 199 Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 200 LeakyReluGradOp<GPUDevice, type>); \ 201 REGISTER_KERNEL_BUILDER( \ 202 Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 203 EluGradOp<GPUDevice, type>); \ 204 REGISTER_KERNEL_BUILDER( \ 205 Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ 206 SeluGradOp<GPUDevice, type>) 207 208 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); 209 #undef REGISTER_GPU_KERNELS 210 211 template <typename Device> 212 class ReluOp<Device, qint8> 213 : public UnaryElementWiseOp<qint8, ReluOp<Device, qint8>> { 214 public: 215 using UnaryElementWiseOp<qint8, ReluOp<Device, qint8>>::UnaryElementWiseOp; 216 Operate(OpKernelContext * context,const Tensor & input,Tensor * output)217 void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { 218 auto flat_input = input.flat<qint8>(); 219 OP_REQUIRES(context, (flat_input.size() % 4) == 0, 220 errors::InvalidArgument( 221 "Tensor size must be a multiple of 4 for Relu<qint8>. Got ", 222 flat_input.size())); 223 functor::Relu<Device, qint8> func; 224 func(context->eigen_device<Device>(), flat_input, output->flat<qint8>()); 225 } 226 }; 227 228 REGISTER_KERNEL_BUILDER( 229 Name("Relu").Device(DEVICE_GPU).TypeConstraint<qint8>("T"), 230 ReluOp<GPUDevice, qint8>); 231 232 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM 233 234 } // namespace tensorflow 235