1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/MC/TargetRegistry.h"
19 #include "llvm/Target/TargetMachine.h"
20 #include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h"
21 #include "tensorflow/compiler/xla/service/cpu/test_target_triple_helper.h"
22 #include "tensorflow/compiler/xla/test.h"
23 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
24 
25 namespace xla {
26 namespace {
27 class CodegenReduceOnArchWithNoVectorRegisters : public HloTestBase {};
28 
GetTargetVectorRegisterByteSize(std::string triple)29 StatusOr<unsigned> GetTargetVectorRegisterByteSize(std::string triple) {
30   // Unfortunately we need a lot of boilerplate to get to an
31   // llvm::TargetMachine.
32 
33   std::string error;
34   const llvm::Target* target =
35       llvm::TargetRegistry::lookupTarget(triple, error);
36   if (target == nullptr) {
37     return InternalError("TargetRegistry::lookupTarget failed: %s", error);
38   }
39 
40   llvm::LLVMContext context;
41   llvm::Module module("test", context);
42   llvm::Function* function = llvm::Function::Create(
43       llvm::FunctionType::get(llvm::Type::getVoidTy(context), {}),
44       llvm::GlobalValue::ExternalLinkage, "test", &module);
45 
46   std::unique_ptr<llvm::TargetMachine> target_machine =
47       absl::WrapUnique(target->createTargetMachine(
48           /*TT=*/triple, /*CPU=*/"", /*Features=*/"", llvm::TargetOptions{},
49           /*RM=*/llvm::None));
50   cpu::LLVMTargetMachineFeatures target_machine_features(target_machine.get());
51   return target_machine_features.vector_register_byte_size(*function);
52 }
53 
TEST_F(CodegenReduceOnArchWithNoVectorRegisters,Test)54 TEST_F(CodegenReduceOnArchWithNoVectorRegisters, Test) {
55   absl::string_view text = R"(
56 HloModule Reduce
57 
58 add {
59   lhs = f32[] parameter(0)
60   rhs = f32[] parameter(1)
61   ROOT add = f32[] add(lhs, rhs)
62 }
63 
64 ENTRY main {
65   input = f32[1000,1000] parameter(0)
66   constant = f32[] constant(0)
67   ROOT reduce = f32[1000] reduce(input, constant), dimensions={0}, to_apply=add
68 }
69 )";
70 
71   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> hlo_module,
72                           ParseAndReturnVerifiedModule(text));
73   cpu::CpuCompiler cpu_compiler;
74   auto module_group = std::make_unique<HloModuleGroup>("group");
75   module_group->push_back(std::move(hlo_module));
76 
77   // Check that the GetTargetVectorRegisterByteSize is itself working.
78   TF_ASSERT_OK_AND_ASSIGN(
79       unsigned vector_register_byte_size_for_x86_64,
80       GetTargetVectorRegisterByteSize(kTargetTripleForHost));
81   ASSERT_EQ(vector_register_byte_size_for_x86_64, 16);
82 
83   std::string triple = "i686-none-android";
84 
85   TF_ASSERT_OK_AND_ASSIGN(unsigned vector_register_byte_size,
86                           GetTargetVectorRegisterByteSize(triple));
87 
88   // This test is supposed to check whether the XLA CPU vectorized reduction
89   // codegen works correctly for architectures that do not have vector
90   // registers.  So first ASSERT that `triple` is actually a target with no
91   // vector registers, as otherwise the test isn't actually testing anything
92   // interesting.
93 
94   ASSERT_EQ(vector_register_byte_size, 0);
95 
96   cpu::CpuAotCompilationOptions aot_compilation_options(
97       /*triple=*/triple, /*cpu_name=*/"", /*features=*/"",
98       /*entry_point_name=*/"main",
99       cpu::CpuAotCompilationOptions::RelocationModel::BigPic);
100 
101   TF_ASSERT_OK_AND_ASSIGN(
102       std::vector<std::unique_ptr<AotCompilationResult>> aot_compilation_result,
103       cpu_compiler.CompileAheadOfTime(std::move(module_group),
104                                       aot_compilation_options));
105   EXPECT_EQ(aot_compilation_result.size(), 1);
106 }
107 }  // namespace
108 }  // namespace xla
109