xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_
18 
19 #include <functional>
20 #include <string>
21 #include <utility>
22 
23 #include "absl/types/span.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Value.h"
26 #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h"
27 #include "tensorflow/compiler/xla/service/gpu/target_util.h"
28 #include "tensorflow/compiler/xla/service/hlo_computation.h"
29 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
30 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
31 #include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h"
32 #include "tensorflow/compiler/xla/statusor.h"
33 #include "tensorflow/compiler/xla/types.h"
34 #include "tensorflow/compiler/xla/xla_data.pb.h"
35 
36 namespace xla {
37 namespace gpu {
38 
39 class GpuElementalIrEmitter : public ElementalIrEmitter {
40  public:
41   // A NestedComputer computes an element of the output of the given computation
42   // given a Span of its input elements.
43   using NestedComputer = std::function<StatusOr<std::vector<llvm::Value*>>(
44       const HloComputation&, absl::Span<llvm::Value* const>)>;
45 
46   GpuElementalIrEmitter(const HloModuleConfig& hlo_module_config,
47                         llvm::Module* module, llvm::IRBuilder<>* b,
48                         NestedComputer compute_nested);
49 
50  protected:
51   llvm_ir::IrArray::Index GetSourceIndexOfBitcast(
52       const llvm_ir::IrArray::Index& index, const HloInstruction* hlo) override;
53 
54   StatusOr<llvm::Value*> EmitFloatBinaryOp(const HloInstruction* op,
55                                            llvm::Value* lhs_value,
56                                            llvm::Value* rhs_value) override;
57 
58   StatusOr<llvm::Value*> EmitLog(PrimitiveType prim_type,
59                                  llvm::Value* value) override;
60 
61   StatusOr<llvm::Value*> EmitLog1p(PrimitiveType prim_type,
62                                    llvm::Value* value) override;
63 
64   StatusOr<llvm::Value*> EmitSin(PrimitiveType prim_type,
65                                  llvm::Value* value) override;
66 
67   StatusOr<llvm::Value*> EmitCos(PrimitiveType prim_type,
68                                  llvm::Value* value) override;
69 
70   StatusOr<llvm::Value*> EmitExp(PrimitiveType prim_type, llvm::Value* value,
71                                  absl::string_view name) override;
72 
73   StatusOr<llvm::Value*> EmitExpm1(PrimitiveType prim_type,
74                                    llvm::Value* value) override;
75 
76   StatusOr<llvm::Value*> EmitSqrt(PrimitiveType prim_type,
77                                   llvm::Value* value) override;
78 
79   StatusOr<llvm::Value*> EmitRsqrt(PrimitiveType prim_type,
80                                    llvm::Value* value) override;
81 
82   StatusOr<llvm::Value*> EmitPow(PrimitiveType prim_type, llvm::Value* lhs,
83                                  llvm::Value* rhs,
84                                  absl::string_view name) override;
85 
86   StatusOr<llvm::Value*> EmitAtan2(PrimitiveType prim_type, llvm::Value* lhs,
87                                    llvm::Value* rhs,
88                                    absl::string_view name) override;
89 
90   StatusOr<llvm::Value*> EmitTanh(PrimitiveType prim_type,
91                                   llvm::Value* value) override;
92 
93   StatusOr<llvm::Value*> EmitComplexAbs(PrimitiveType prim_type,
94                                         llvm::Value* value) override;
95 
EmitThreadLocalCall(const HloComputation & callee,absl::Span<llvm::Value * const> parameters,absl::string_view,bool)96   StatusOr<std::vector<llvm::Value*>> EmitThreadLocalCall(
97       const HloComputation& callee, absl::Span<llvm::Value* const> parameters,
98       absl::string_view, bool /*is_reducer*/) override {
99     return compute_nested_(callee, parameters);
100   }
101 
102   llvm::Value* EmitThreadId() override;
103 
fast_min_max()104   bool fast_min_max() override {
105     return hlo_module_config_.debug_options().xla_gpu_enable_fast_min_max();
106   }
107 
108  private:
109   // Emits IR for op, which must have opcode kPower.
110   StatusOr<llvm::Value*> EmitPowerOp(const HloInstruction* op,
111                                      llvm::Value* lhs_value,
112                                      llvm::Value* rhs_value);
113 
114   // Emits IR to call an LLVM intrinsic of type [T] -> T.  Adjusts
115   // callee_name according to T.  Returns the IR value that represents the
116   // return value of the function.
117   StatusOr<llvm::Value*> EmitLlvmIntrinsicMathCall(
118       const std::string& callee_name, absl::Span<llvm::Value* const> operands,
119       absl::Span<const PrimitiveType> input_types, PrimitiveType output_type);
120 
121   // Emits IR to call a device function of type [T] -> T.  Adjusts
122   // callee_name according to T.  Returns the IR value that represents the
123   // return value of the function.
124   StatusOr<llvm::Value*> EmitDeviceMathCall(
125       TargetDeviceFunctionID funcid, absl::Span<llvm::Value* const> operands,
126       absl::Span<const PrimitiveType> input_types, PrimitiveType output_type,
127       absl::string_view name = "");
128 
129   // Emits IR to call a function of type [T] -> T.  Does not munge callee_name.
130   // Returns the IR value that represents the return value of the function.
131   StatusOr<llvm::Value*> EmitMathCall(
132       const std::string& callee_name, absl::Span<llvm::Value* const> operands,
133       absl::Span<const PrimitiveType> input_types, PrimitiveType output_type,
134       absl::string_view name = "");
135 
136   const HloModuleConfig& hlo_module_config_;
137 
138   NestedComputer compute_nested_;
139 };
140 
141 }  // namespace gpu
142 }  // namespace xla
143 
144 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_ELEMENTAL_IR_EMITTER_H_
145