1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_ 18 19 #include <functional> 20 #include <string> 21 #include <utility> 22 23 #include "absl/types/span.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Value.h" 26 #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h" 27 #include "tensorflow/compiler/xla/service/gpu/target_util.h" 28 #include "tensorflow/compiler/xla/service/hlo_computation.h" 29 #include "tensorflow/compiler/xla/service/hlo_instruction.h" 30 #include "tensorflow/compiler/xla/service/hlo_module_config.h" 31 #include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h" 32 #include "tensorflow/compiler/xla/statusor.h" 33 #include "tensorflow/compiler/xla/types.h" 34 #include "tensorflow/compiler/xla/xla_data.pb.h" 35 36 namespace xla { 37 namespace gpu { 38 39 class GpuElementalIrEmitter : public ElementalIrEmitter { 40 public: 41 // A NestedComputer computes an element of the output of the given computation 42 // given a Span of its input elements. 43 using NestedComputer = std::function<StatusOr<std::vector<llvm::Value*>>( 44 const HloComputation&, absl::Span<llvm::Value* const>)>; 45 46 GpuElementalIrEmitter(const HloModuleConfig& hlo_module_config, 47 llvm::Module* module, llvm::IRBuilder<>* b, 48 NestedComputer compute_nested); 49 50 protected: 51 llvm_ir::IrArray::Index GetSourceIndexOfBitcast( 52 const llvm_ir::IrArray::Index& index, const HloInstruction* hlo) override; 53 54 StatusOr<llvm::Value*> EmitFloatBinaryOp(const HloInstruction* op, 55 llvm::Value* lhs_value, 56 llvm::Value* rhs_value) override; 57 58 StatusOr<llvm::Value*> EmitLog(PrimitiveType prim_type, 59 llvm::Value* value) override; 60 61 StatusOr<llvm::Value*> EmitLog1p(PrimitiveType prim_type, 62 llvm::Value* value) override; 63 64 StatusOr<llvm::Value*> EmitSin(PrimitiveType prim_type, 65 llvm::Value* value) override; 66 67 StatusOr<llvm::Value*> EmitCos(PrimitiveType prim_type, 68 llvm::Value* value) override; 69 70 StatusOr<llvm::Value*> EmitExp(PrimitiveType prim_type, llvm::Value* value, 71 absl::string_view name) override; 72 73 StatusOr<llvm::Value*> EmitExpm1(PrimitiveType prim_type, 74 llvm::Value* value) override; 75 76 StatusOr<llvm::Value*> EmitSqrt(PrimitiveType prim_type, 77 llvm::Value* value) override; 78 79 StatusOr<llvm::Value*> EmitRsqrt(PrimitiveType prim_type, 80 llvm::Value* value) override; 81 82 StatusOr<llvm::Value*> EmitPow(PrimitiveType prim_type, llvm::Value* lhs, 83 llvm::Value* rhs, 84 absl::string_view name) override; 85 86 StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type, llvm::Value* lhs, 87 llvm::Value* rhs, 88 absl::string_view name) override; 89 90 StatusOr<llvm::Value*> EmitTanh(PrimitiveType prim_type, 91 llvm::Value* value) override; 92 93 StatusOr<llvm::Value*> EmitComplexAbs(PrimitiveType prim_type, 94 llvm::Value* value) override; 95 EmitThreadLocalCall(const HloComputation & callee,absl::Span<llvm::Value * const> parameters,absl::string_view,bool)96 StatusOr<std::vector<llvm::Value*>> EmitThreadLocalCall( 97 const HloComputation& callee, absl::Span<llvm::Value* const> parameters, 98 absl::string_view, bool /*is_reducer*/) override { 99 return compute_nested_(callee, parameters); 100 } 101 102 llvm::Value* EmitThreadId() override; 103 fast_min_max()104 bool fast_min_max() override { 105 return hlo_module_config_.debug_options().xla_gpu_enable_fast_min_max(); 106 } 107 108 private: 109 // Emits IR for op, which must have opcode kPower. 110 StatusOr<llvm::Value*> EmitPowerOp(const HloInstruction* op, 111 llvm::Value* lhs_value, 112 llvm::Value* rhs_value); 113 114 // Emits IR to call an LLVM intrinsic of type [T] -> T. Adjusts 115 // callee_name according to T. Returns the IR value that represents the 116 // return value of the function. 117 StatusOr<llvm::Value*> EmitLlvmIntrinsicMathCall( 118 const std::string& callee_name, absl::Span<llvm::Value* const> operands, 119 absl::Span<const PrimitiveType> input_types, PrimitiveType output_type); 120 121 // Emits IR to call a device function of type [T] -> T. Adjusts 122 // callee_name according to T. Returns the IR value that represents the 123 // return value of the function. 124 StatusOr<llvm::Value*> EmitDeviceMathCall( 125 TargetDeviceFunctionID funcid, absl::Span<llvm::Value* const> operands, 126 absl::Span<const PrimitiveType> input_types, PrimitiveType output_type, 127 absl::string_view name = ""); 128 129 // Emits IR to call a function of type [T] -> T. Does not munge callee_name. 130 // Returns the IR value that represents the return value of the function. 131 StatusOr<llvm::Value*> EmitMathCall( 132 const std::string& callee_name, absl::Span<llvm::Value* const> operands, 133 absl::Span<const PrimitiveType> input_types, PrimitiveType output_type, 134 absl::string_view name = ""); 135 136 const HloModuleConfig& hlo_module_config_; 137 138 NestedComputer compute_nested_; 139 }; 140 141 } // namespace gpu 142 } // namespace xla 143 144 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_ 145