xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/stream_executor/kernel_spec.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/stream_executor/kernel_spec.h"
17 
18 #include "absl/strings/string_view.h"
19 
20 namespace stream_executor {
21 
KernelLoaderSpec(absl::string_view kernelname)22 KernelLoaderSpec::KernelLoaderSpec(absl::string_view kernelname)
23     : kernelname_(std::string(kernelname)) {}
24 
OnDiskKernelLoaderSpec(absl::string_view filename,absl::string_view kernelname)25 OnDiskKernelLoaderSpec::OnDiskKernelLoaderSpec(absl::string_view filename,
26                                                absl::string_view kernelname)
27     : KernelLoaderSpec(kernelname), filename_(std::string(filename)) {}
28 
CudaPtxOnDisk(absl::string_view filename,absl::string_view kernelname)29 CudaPtxOnDisk::CudaPtxOnDisk(absl::string_view filename,
30                              absl::string_view kernelname)
31     : OnDiskKernelLoaderSpec(filename, kernelname) {}
32 
CudaCubinOnDisk(absl::string_view filename,absl::string_view kernelname)33 CudaCubinOnDisk::CudaCubinOnDisk(absl::string_view filename,
34                                  absl::string_view kernelname)
35     : OnDiskKernelLoaderSpec(filename, kernelname) {}
36 
CudaCubinInMemory(const char * bytes,absl::string_view kernelname)37 CudaCubinInMemory::CudaCubinInMemory(const char *bytes,
38                                      absl::string_view kernelname)
39     : KernelLoaderSpec(kernelname), bytes_(bytes) {}
40 
CompareComputeCapability(const std::tuple<int,int> & lhs,const std::tuple<int,int> & rhs)41 bool CompareComputeCapability(const std::tuple<int, int> &lhs,
42                               const std::tuple<int, int> &rhs) {
43   return std::get<0>(lhs) < std::get<0>(rhs) ||
44          (std::get<0>(lhs) == std::get<0>(rhs) &&
45           std::get<1>(lhs) < std::get<1>(rhs));
46 }
47 
48 const std::tuple<int, int> CudaPtxInMemory::kMinimumCapability{1, 0};
49 
CudaPtxInMemory(absl::string_view ptx,absl::string_view kernel_name,bool ptx_compressed)50 CudaPtxInMemory::CudaPtxInMemory(absl::string_view ptx,
51                                  absl::string_view kernel_name,
52                                  bool ptx_compressed)
53     : KernelLoaderSpec(kernel_name),
54       ptx_by_compute_capability_(CompareComputeCapability) {
55   if (ptx_compressed) {
56     // Lazy decompression. Put an empty string in decompressed_ptx_ showing that
57     // the original ptx is compressed.
58     decompressed_ptx_[ptx.data()] = "";
59   }
60   ptx_by_compute_capability_[kMinimumCapability] = ptx.data();
61 }
62 
CudaPtxInMemory(const std::initializer_list<CudaPtxInMemory::PtxSpec> & spec_list,absl::string_view kernel_name,bool ptx_compressed)63 CudaPtxInMemory::CudaPtxInMemory(
64     const std::initializer_list<CudaPtxInMemory::PtxSpec> &spec_list,
65     absl::string_view kernel_name, bool ptx_compressed)
66     : KernelLoaderSpec(kernel_name),
67       ptx_by_compute_capability_(CompareComputeCapability) {
68   for (const auto &spec : spec_list) {
69     int major, minor;
70     absl::string_view ptx;
71     std::tie(major, minor, ptx) = spec;
72     if (ptx_compressed) {
73       // Lazy decompression. Put an empty string in decompressed_ptx_ showing
74       // that the original ptx is compressed.
75       decompressed_ptx_[ptx.data()] = "";
76     }
77     ptx_by_compute_capability_[std::tuple<int, int>{major, minor}] = ptx.data();
78   }
79 }
80 
DecompressPtx(const char * ptx)81 std::string CudaPtxInMemory::DecompressPtx(const char *ptx) {
82   // Get the length of the PTX string from the beginning of the buffer.
83   uint64_t ptx_length = *reinterpret_cast<const uint64 *>(ptx);
84   // Get the PTX string from the buffer with offset and length.
85   std::string compressed_ptx(ptx + sizeof(uint64_t),
86                              ptx + sizeof(uint64_t) + ptx_length);
87   std::string decompressed_ptx;
88   // Decompress the PTX string with bzip2.
89   LOG(FATAL) << "bzip2 decompression is not supported yet.";
90   return decompressed_ptx;
91 }
92 
default_text() const93 const char *CudaPtxInMemory::default_text() const {
94   if (ptx_by_compute_capability_.empty()) {
95     return nullptr;
96   }
97 
98   absl::MutexLock lock(&mu_);
99 
100   auto ptx = ptx_by_compute_capability_.begin()->second;
101   // Check if there is an entry in decompressed ptx table.
102   auto decompressed_ptx_iter = decompressed_ptx_.find(ptx);
103   if (decompressed_ptx_iter != decompressed_ptx_.end()) {
104     // If the decompressed string is empty, which means the ptx hasn't been
105     // decompressed, decompress it here.
106     if (decompressed_ptx_iter->second.empty()) {
107       decompressed_ptx_iter->second = DecompressPtx(ptx);
108     }
109     return decompressed_ptx_iter->second.c_str();
110   }
111   return ptx;
112 }
113 
original_default_text() const114 const char *CudaPtxInMemory::original_default_text() const {
115   if (ptx_by_compute_capability_.empty()) {
116     return nullptr;
117   }
118 
119   return ptx_by_compute_capability_.begin()->second;
120 }
121 
text(int compute_capability_major,int compute_capability_minor) const122 const char *CudaPtxInMemory::text(int compute_capability_major,
123                                   int compute_capability_minor) const {
124   std::tuple<int, int> capability{compute_capability_major,
125                                   compute_capability_minor};
126 
127   auto ptx_iter = ptx_by_compute_capability_.find(capability);
128   if (ptx_iter == ptx_by_compute_capability_.end()) {
129     return nullptr;
130   }
131 
132   absl::MutexLock lock(&mu_);
133 
134   // Check if there is an entry in decompressed ptx table.
135   auto decompressed_ptx_iter = decompressed_ptx_.find(ptx_iter->second);
136   if (decompressed_ptx_iter != decompressed_ptx_.end()) {
137     // If the decompressed string is empty, which means the ptx hasn't been
138     // decompressed, decompress it here.
139     if (decompressed_ptx_iter->second.empty()) {
140       decompressed_ptx_iter->second = DecompressPtx(ptx_iter->second);
141     }
142     return decompressed_ptx_iter->second.c_str();
143   }
144   return ptx_iter->second;
145 }
146 
original_text(int compute_capability_major,int compute_capability_minor) const147 const char *CudaPtxInMemory::original_text(int compute_capability_major,
148                                            int compute_capability_minor) const {
149   std::tuple<int, int> capability{compute_capability_major,
150                                   compute_capability_minor};
151 
152   auto ptx_iter = ptx_by_compute_capability_.find(capability);
153   if (ptx_iter == ptx_by_compute_capability_.end()) {
154     return nullptr;
155   }
156 
157   return ptx_iter->second;
158 }
159 
OpenCLTextOnDisk(absl::string_view filename,absl::string_view kernelname)160 OpenCLTextOnDisk::OpenCLTextOnDisk(absl::string_view filename,
161                                    absl::string_view kernelname)
162     : OnDiskKernelLoaderSpec(filename, kernelname) {}
163 
OpenCLTextInMemory(absl::string_view text,absl::string_view kernelname)164 OpenCLTextInMemory::OpenCLTextInMemory(absl::string_view text,
165                                        absl::string_view kernelname)
166     : KernelLoaderSpec(kernelname), text_(text) {}
167 
OpenCLBinaryOnDisk(absl::string_view filename,absl::string_view kernelname)168 OpenCLBinaryOnDisk::OpenCLBinaryOnDisk(absl::string_view filename,
169                                        absl::string_view kernelname)
170     : OnDiskKernelLoaderSpec(filename, kernelname) {}
171 
AddOpenCLTextOnDisk(absl::string_view filename,absl::string_view kernelname)172 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextOnDisk(
173     absl::string_view filename, absl::string_view kernelname) {
174   CHECK(ocl_text_on_disk_ == nullptr);
175   ocl_text_on_disk_.reset(new OpenCLTextOnDisk{filename, kernelname});
176   return this;
177 }
178 
AddOpenCLBinaryOnDisk(absl::string_view filename,absl::string_view kernelname)179 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLBinaryOnDisk(
180     absl::string_view filename, absl::string_view kernelname) {
181   CHECK(ocl_binary_on_disk_ == nullptr);
182   ocl_binary_on_disk_.reset(new OpenCLBinaryOnDisk{filename, kernelname});
183   return this;
184 }
185 
AddOpenCLTextInMemory(absl::string_view filename,absl::string_view kernelname)186 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextInMemory(
187     absl::string_view filename, absl::string_view kernelname) {
188   CHECK(ocl_text_in_memory_ == nullptr);
189   ocl_text_in_memory_.reset(new OpenCLTextInMemory{filename, kernelname});
190   return this;
191 }
192 
AddCudaPtxOnDisk(absl::string_view filename,absl::string_view kernelname)193 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxOnDisk(
194     absl::string_view filename, absl::string_view kernelname) {
195   CHECK(cuda_ptx_on_disk_ == nullptr);
196   cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernelname});
197   return this;
198 }
199 
AddCudaCubinInMemory(const char * bytes,absl::string_view kernelname)200 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory(
201     const char *bytes, absl::string_view kernelname) {
202   CHECK(cuda_cubin_in_memory_ == nullptr);
203   cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernelname});
204   return this;
205 }
206 
AddCudaCubinOnDisk(absl::string_view filename,absl::string_view kernelname)207 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinOnDisk(
208     absl::string_view filename, absl::string_view kernelname) {
209   CHECK(cuda_cubin_on_disk_ == nullptr);
210   cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernelname});
211   return this;
212 }
213 
AddCudaPtxInMemory(absl::string_view ptx,absl::string_view kernelname)214 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
215     absl::string_view ptx, absl::string_view kernelname) {
216   CHECK(cuda_ptx_in_memory_ == nullptr);
217   cuda_ptx_in_memory_.reset(
218       new CudaPtxInMemory{ptx, kernelname, false /* ptx_compressed */});
219   return this;
220 }
221 
AddCudaCompressedPtxInMemory(absl::string_view ptx,absl::string_view kernelname)222 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
223     absl::string_view ptx, absl::string_view kernelname) {
224   CHECK(cuda_ptx_in_memory_ == nullptr);
225   cuda_ptx_in_memory_.reset(
226       new CudaPtxInMemory{ptx, kernelname, true /* ptx_compressed */});
227   return this;
228 }
229 
AddCudaPtxInMemory(std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,absl::string_view kernelname)230 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
231     std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
232     absl::string_view kernelname) {
233   CHECK(cuda_ptx_in_memory_ == nullptr);
234   cuda_ptx_in_memory_.reset(
235       new CudaPtxInMemory{spec_list, kernelname, false /* ptx_compressed */});
236   return this;
237 }
238 
AddCudaCompressedPtxInMemory(std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,absl::string_view kernelname)239 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
240     std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
241     absl::string_view kernelname) {
242   CHECK(cuda_ptx_in_memory_ == nullptr);
243   cuda_ptx_in_memory_.reset(
244       new CudaPtxInMemory{spec_list, kernelname, true /* ptx_compressed */});
245   return this;
246 }
247 
MultiKernelLoaderSpec(size_t arity)248 MultiKernelLoaderSpec::MultiKernelLoaderSpec(size_t arity) : arity_(arity) {}
249 
250 }  // namespace stream_executor
251