1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/stream_executor/kernel_spec.h"
17
18 #include "absl/strings/string_view.h"
19
20 namespace stream_executor {
21
KernelLoaderSpec(absl::string_view kernelname)22 KernelLoaderSpec::KernelLoaderSpec(absl::string_view kernelname)
23 : kernelname_(std::string(kernelname)) {}
24
OnDiskKernelLoaderSpec(absl::string_view filename,absl::string_view kernelname)25 OnDiskKernelLoaderSpec::OnDiskKernelLoaderSpec(absl::string_view filename,
26 absl::string_view kernelname)
27 : KernelLoaderSpec(kernelname), filename_(std::string(filename)) {}
28
CudaPtxOnDisk(absl::string_view filename,absl::string_view kernelname)29 CudaPtxOnDisk::CudaPtxOnDisk(absl::string_view filename,
30 absl::string_view kernelname)
31 : OnDiskKernelLoaderSpec(filename, kernelname) {}
32
CudaCubinOnDisk(absl::string_view filename,absl::string_view kernelname)33 CudaCubinOnDisk::CudaCubinOnDisk(absl::string_view filename,
34 absl::string_view kernelname)
35 : OnDiskKernelLoaderSpec(filename, kernelname) {}
36
CudaCubinInMemory(const char * bytes,absl::string_view kernelname)37 CudaCubinInMemory::CudaCubinInMemory(const char *bytes,
38 absl::string_view kernelname)
39 : KernelLoaderSpec(kernelname), bytes_(bytes) {}
40
CompareComputeCapability(const std::tuple<int,int> & lhs,const std::tuple<int,int> & rhs)41 bool CompareComputeCapability(const std::tuple<int, int> &lhs,
42 const std::tuple<int, int> &rhs) {
43 return std::get<0>(lhs) < std::get<0>(rhs) ||
44 (std::get<0>(lhs) == std::get<0>(rhs) &&
45 std::get<1>(lhs) < std::get<1>(rhs));
46 }
47
48 const std::tuple<int, int> CudaPtxInMemory::kMinimumCapability{1, 0};
49
CudaPtxInMemory(absl::string_view ptx,absl::string_view kernel_name,bool ptx_compressed)50 CudaPtxInMemory::CudaPtxInMemory(absl::string_view ptx,
51 absl::string_view kernel_name,
52 bool ptx_compressed)
53 : KernelLoaderSpec(kernel_name),
54 ptx_by_compute_capability_(CompareComputeCapability) {
55 if (ptx_compressed) {
56 // Lazy decompression. Put an empty string in decompressed_ptx_ showing that
57 // the original ptx is compressed.
58 decompressed_ptx_[ptx.data()] = "";
59 }
60 ptx_by_compute_capability_[kMinimumCapability] = ptx.data();
61 }
62
CudaPtxInMemory(const std::initializer_list<CudaPtxInMemory::PtxSpec> & spec_list,absl::string_view kernel_name,bool ptx_compressed)63 CudaPtxInMemory::CudaPtxInMemory(
64 const std::initializer_list<CudaPtxInMemory::PtxSpec> &spec_list,
65 absl::string_view kernel_name, bool ptx_compressed)
66 : KernelLoaderSpec(kernel_name),
67 ptx_by_compute_capability_(CompareComputeCapability) {
68 for (const auto &spec : spec_list) {
69 int major, minor;
70 absl::string_view ptx;
71 std::tie(major, minor, ptx) = spec;
72 if (ptx_compressed) {
73 // Lazy decompression. Put an empty string in decompressed_ptx_ showing
74 // that the original ptx is compressed.
75 decompressed_ptx_[ptx.data()] = "";
76 }
77 ptx_by_compute_capability_[std::tuple<int, int>{major, minor}] = ptx.data();
78 }
79 }
80
DecompressPtx(const char * ptx)81 std::string CudaPtxInMemory::DecompressPtx(const char *ptx) {
82 // Get the length of the PTX string from the beginning of the buffer.
83 uint64_t ptx_length = *reinterpret_cast<const uint64 *>(ptx);
84 // Get the PTX string from the buffer with offset and length.
85 std::string compressed_ptx(ptx + sizeof(uint64_t),
86 ptx + sizeof(uint64_t) + ptx_length);
87 std::string decompressed_ptx;
88 // Decompress the PTX string with bzip2.
89 LOG(FATAL) << "bzip2 decompression is not supported yet.";
90 return decompressed_ptx;
91 }
92
default_text() const93 const char *CudaPtxInMemory::default_text() const {
94 if (ptx_by_compute_capability_.empty()) {
95 return nullptr;
96 }
97
98 absl::MutexLock lock(&mu_);
99
100 auto ptx = ptx_by_compute_capability_.begin()->second;
101 // Check if there is an entry in decompressed ptx table.
102 auto decompressed_ptx_iter = decompressed_ptx_.find(ptx);
103 if (decompressed_ptx_iter != decompressed_ptx_.end()) {
104 // If the decompressed string is empty, which means the ptx hasn't been
105 // decompressed, decompress it here.
106 if (decompressed_ptx_iter->second.empty()) {
107 decompressed_ptx_iter->second = DecompressPtx(ptx);
108 }
109 return decompressed_ptx_iter->second.c_str();
110 }
111 return ptx;
112 }
113
original_default_text() const114 const char *CudaPtxInMemory::original_default_text() const {
115 if (ptx_by_compute_capability_.empty()) {
116 return nullptr;
117 }
118
119 return ptx_by_compute_capability_.begin()->second;
120 }
121
text(int compute_capability_major,int compute_capability_minor) const122 const char *CudaPtxInMemory::text(int compute_capability_major,
123 int compute_capability_minor) const {
124 std::tuple<int, int> capability{compute_capability_major,
125 compute_capability_minor};
126
127 auto ptx_iter = ptx_by_compute_capability_.find(capability);
128 if (ptx_iter == ptx_by_compute_capability_.end()) {
129 return nullptr;
130 }
131
132 absl::MutexLock lock(&mu_);
133
134 // Check if there is an entry in decompressed ptx table.
135 auto decompressed_ptx_iter = decompressed_ptx_.find(ptx_iter->second);
136 if (decompressed_ptx_iter != decompressed_ptx_.end()) {
137 // If the decompressed string is empty, which means the ptx hasn't been
138 // decompressed, decompress it here.
139 if (decompressed_ptx_iter->second.empty()) {
140 decompressed_ptx_iter->second = DecompressPtx(ptx_iter->second);
141 }
142 return decompressed_ptx_iter->second.c_str();
143 }
144 return ptx_iter->second;
145 }
146
original_text(int compute_capability_major,int compute_capability_minor) const147 const char *CudaPtxInMemory::original_text(int compute_capability_major,
148 int compute_capability_minor) const {
149 std::tuple<int, int> capability{compute_capability_major,
150 compute_capability_minor};
151
152 auto ptx_iter = ptx_by_compute_capability_.find(capability);
153 if (ptx_iter == ptx_by_compute_capability_.end()) {
154 return nullptr;
155 }
156
157 return ptx_iter->second;
158 }
159
OpenCLTextOnDisk(absl::string_view filename,absl::string_view kernelname)160 OpenCLTextOnDisk::OpenCLTextOnDisk(absl::string_view filename,
161 absl::string_view kernelname)
162 : OnDiskKernelLoaderSpec(filename, kernelname) {}
163
OpenCLTextInMemory(absl::string_view text,absl::string_view kernelname)164 OpenCLTextInMemory::OpenCLTextInMemory(absl::string_view text,
165 absl::string_view kernelname)
166 : KernelLoaderSpec(kernelname), text_(text) {}
167
OpenCLBinaryOnDisk(absl::string_view filename,absl::string_view kernelname)168 OpenCLBinaryOnDisk::OpenCLBinaryOnDisk(absl::string_view filename,
169 absl::string_view kernelname)
170 : OnDiskKernelLoaderSpec(filename, kernelname) {}
171
AddOpenCLTextOnDisk(absl::string_view filename,absl::string_view kernelname)172 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextOnDisk(
173 absl::string_view filename, absl::string_view kernelname) {
174 CHECK(ocl_text_on_disk_ == nullptr);
175 ocl_text_on_disk_.reset(new OpenCLTextOnDisk{filename, kernelname});
176 return this;
177 }
178
AddOpenCLBinaryOnDisk(absl::string_view filename,absl::string_view kernelname)179 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLBinaryOnDisk(
180 absl::string_view filename, absl::string_view kernelname) {
181 CHECK(ocl_binary_on_disk_ == nullptr);
182 ocl_binary_on_disk_.reset(new OpenCLBinaryOnDisk{filename, kernelname});
183 return this;
184 }
185
AddOpenCLTextInMemory(absl::string_view filename,absl::string_view kernelname)186 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextInMemory(
187 absl::string_view filename, absl::string_view kernelname) {
188 CHECK(ocl_text_in_memory_ == nullptr);
189 ocl_text_in_memory_.reset(new OpenCLTextInMemory{filename, kernelname});
190 return this;
191 }
192
AddCudaPtxOnDisk(absl::string_view filename,absl::string_view kernelname)193 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxOnDisk(
194 absl::string_view filename, absl::string_view kernelname) {
195 CHECK(cuda_ptx_on_disk_ == nullptr);
196 cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernelname});
197 return this;
198 }
199
AddCudaCubinInMemory(const char * bytes,absl::string_view kernelname)200 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory(
201 const char *bytes, absl::string_view kernelname) {
202 CHECK(cuda_cubin_in_memory_ == nullptr);
203 cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernelname});
204 return this;
205 }
206
AddCudaCubinOnDisk(absl::string_view filename,absl::string_view kernelname)207 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinOnDisk(
208 absl::string_view filename, absl::string_view kernelname) {
209 CHECK(cuda_cubin_on_disk_ == nullptr);
210 cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernelname});
211 return this;
212 }
213
AddCudaPtxInMemory(absl::string_view ptx,absl::string_view kernelname)214 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
215 absl::string_view ptx, absl::string_view kernelname) {
216 CHECK(cuda_ptx_in_memory_ == nullptr);
217 cuda_ptx_in_memory_.reset(
218 new CudaPtxInMemory{ptx, kernelname, false /* ptx_compressed */});
219 return this;
220 }
221
AddCudaCompressedPtxInMemory(absl::string_view ptx,absl::string_view kernelname)222 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
223 absl::string_view ptx, absl::string_view kernelname) {
224 CHECK(cuda_ptx_in_memory_ == nullptr);
225 cuda_ptx_in_memory_.reset(
226 new CudaPtxInMemory{ptx, kernelname, true /* ptx_compressed */});
227 return this;
228 }
229
AddCudaPtxInMemory(std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,absl::string_view kernelname)230 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
231 std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
232 absl::string_view kernelname) {
233 CHECK(cuda_ptx_in_memory_ == nullptr);
234 cuda_ptx_in_memory_.reset(
235 new CudaPtxInMemory{spec_list, kernelname, false /* ptx_compressed */});
236 return this;
237 }
238
AddCudaCompressedPtxInMemory(std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,absl::string_view kernelname)239 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
240 std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
241 absl::string_view kernelname) {
242 CHECK(cuda_ptx_in_memory_ == nullptr);
243 cuda_ptx_in_memory_.reset(
244 new CudaPtxInMemory{spec_list, kernelname, true /* ptx_compressed */});
245 return this;
246 }
247
MultiKernelLoaderSpec(size_t arity)248 MultiKernelLoaderSpec::MultiKernelLoaderSpec(size_t arity) : arity_(arity) {}
249
250 } // namespace stream_executor
251