1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_BUFFER_ALLOCATIONS_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_BUFFER_ALLOCATIONS_H_ 18 19 #include <memory> 20 #include <set> 21 #include <vector> 22 23 #include "absl/container/flat_hash_map.h" 24 #include "absl/strings/str_format.h" 25 #include "absl/types/span.h" 26 #include "tensorflow/compiler/xla/service/buffer_assignment.h" 27 #include "tensorflow/compiler/xla/statusor.h" 28 #include "tensorflow/core/platform/stream_executor_no_cuda.h" 29 #include "tensorflow/stream_executor/device_memory_allocator.h" 30 31 namespace xla { 32 namespace gpu { 33 34 // A thread-compatible class that encapsulates the base addresses of the 35 // allocated device buffers. 36 class BufferAllocations { 37 public: BufferAllocations(absl::Span<se::DeviceMemoryBase const> buffers,int device_ordinal,se::DeviceMemoryAllocator * memory_allocator)38 BufferAllocations(absl::Span<se::DeviceMemoryBase const> buffers, 39 int device_ordinal, 40 se::DeviceMemoryAllocator* memory_allocator) 41 : buffers_(buffers.begin(), buffers.end()), 42 device_ordinal_(device_ordinal), 43 memory_allocator_(memory_allocator) {} 44 45 BufferAllocations(BufferAllocations&& other) = default; 46 BufferAllocations& operator=(BufferAllocations&& other) = default; 47 BufferAllocations(const BufferAllocations&) = delete; 48 BufferAllocations& operator=(const BufferAllocations&) = delete; 49 memory_allocator()50 se::DeviceMemoryAllocator* memory_allocator() const { 51 return memory_allocator_; 52 } device_ordinal()53 int device_ordinal() const { return device_ordinal_; } 54 55 // Returns the device address of buffer `buffer_index`. `buffer_index` must be 56 // a valid index, i.e., in [0, buffer_count). This function returns null if 57 // `buffer_index` is not assigned to a buffer address. 58 se::DeviceMemoryBase GetDeviceAddress( 59 BufferAllocation::Index buffer_index) const; 60 61 // Returns a mutable value for the allocation at a given `buffer_index`. 62 se::DeviceMemoryBase& GetMutableDeviceAddress( 63 BufferAllocation::Index buffer_index); 64 65 // Same as above, but also adjusts the returned address for the offset and 66 // size contained in the given slice. 67 se::DeviceMemoryBase GetDeviceAddress( 68 const BufferAllocation::Slice& buffer_slice) const; 69 70 // Tears down all buffers allocated by this object that are not in 71 // `live_addresses`. 72 Status TearDown(const std::set<se::DeviceMemoryBase>& live_addresses, 73 absl::Span<const BufferAllocation> allocations); 74 ToString()75 std::string ToString() { 76 std::string out; 77 for (BufferAllocation::Index i = 0; i < buffers_.size(); ++i) { 78 const auto& buf = buffers_[i]; 79 absl::StrAppendFormat(&out, "Buffer %d -> %p (%d B)", i, buf.opaque(), 80 buf.size()); 81 } 82 return out; 83 } 84 85 private: 86 // An array of device pointers that stores the address of each buffer 87 // indexed by Index. Each element can point to a temporary buffer, an 88 // input buffer, or nullptr if no buffer is needed for that Index. 89 std::vector<se::DeviceMemoryBase> buffers_; 90 int device_ordinal_; 91 se::DeviceMemoryAllocator* memory_allocator_; 92 }; 93 94 // LLVM and PTXAS don't deal well with large constants, so we only emit very 95 // small constants directly in LLVM IR. Larger constants are emitted with zero 96 // initializers in LLVM IR and are later overwritten when the PTX/CUBIN is 97 // loaded. 98 bool ShouldEmitLiteralInLlvmIr(const Literal& literal); 99 100 } // namespace gpu 101 } // namespace xla 102 103 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_BUFFER_ALLOCATIONS_H_ 104