1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/common_runtime/gpu/gpu_virtual_mem_allocator.h"
17
18 #if CUDA_VERSION >= 10020
19
20 #include "tensorflow/core/common_runtime/device/device_id_utils.h"
21 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
22 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
23 #include "tensorflow/core/platform/test.h"
24 #include "tensorflow/core/platform/test_benchmark.h"
25
26 namespace tensorflow {
27 namespace {
28
29 using ::stream_executor::gpu::GpuContext;
30 using ::stream_executor::gpu::GpuDevicePtr;
31 using ::stream_executor::gpu::GpuDriver;
32
33 // Empirically the min allocation granularity.
34 constexpr size_t k2MiB{2 << 20};
35
36 // Creates an allocator with 8 MiB of virtual address space.
CreateAllocator()37 std::unique_ptr<GpuVirtualMemAllocator> CreateAllocator() {
38 PlatformDeviceId gpu_id(0);
39 auto executor =
40 DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(), gpu_id)
41 .ValueOrDie();
42 GpuContext* gpu_context = reinterpret_cast<GpuContext*>(
43 executor->implementation()->GpuContextHack());
44 return GpuVirtualMemAllocator::Create(
45 {}, {}, *gpu_context, gpu_id,
46 /*virtual_address_space_size=*/4 * k2MiB, {})
47 .ValueOrDie();
48 }
49
TEST(GpuVirtualMemAllocatorTest,SimpleAlloc)50 TEST(GpuVirtualMemAllocatorTest, SimpleAlloc) {
51 PlatformDeviceId gpu_id(0);
52 auto executor =
53 DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(), gpu_id)
54 .ValueOrDie();
55 GpuContext* gpu_context = reinterpret_cast<GpuContext*>(
56 executor->implementation()->GpuContextHack());
57 auto allocator = GpuVirtualMemAllocator::Create(
58 {}, {}, *gpu_context, gpu_id,
59 /*virtual_address_space_size=*/4 * k2MiB, {})
60 .ValueOrDie();
61 size_t bytes_received; // Ignored in this test.
62 void* gpu_block =
63 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
64 ASSERT_NE(gpu_block, nullptr);
65
66 constexpr size_t kBufSize{256};
67 void* host_mem[2] = {GpuDriver::HostAllocate(gpu_context, kBufSize),
68 GpuDriver::HostAllocate(gpu_context, kBufSize)};
69 std::memset(host_mem[0], 'z', kBufSize);
70 std::memset(host_mem[1], 0, kBufSize);
71
72 GpuDevicePtr gpu_buf = reinterpret_cast<GpuDevicePtr>(gpu_block) + 2048;
73 ASSERT_TRUE(GpuDriver::SynchronousMemcpyH2D(gpu_context, gpu_buf, host_mem[0],
74 kBufSize)
75 .ok());
76 ASSERT_TRUE(GpuDriver::SynchronousMemcpyD2H(gpu_context, host_mem[1], gpu_buf,
77 kBufSize)
78 .ok());
79 for (int i = 0; i < kBufSize; ++i) {
80 ASSERT_EQ('z', reinterpret_cast<const char*>(host_mem[1])[i]);
81 }
82 }
83
TEST(GpuVirtualMemAllocatorTest,AllocPaddedUp)84 TEST(GpuVirtualMemAllocatorTest, AllocPaddedUp) {
85 auto allocator = CreateAllocator();
86 size_t bytes_received;
87 void* gpu_block =
88 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/256, &bytes_received);
89 ASSERT_NE(gpu_block, nullptr);
90 ASSERT_EQ(bytes_received, k2MiB);
91 }
92
TEST(GpuVirtualMemAllocatorTest,AllocsContiguous)93 TEST(GpuVirtualMemAllocatorTest, AllocsContiguous) {
94 auto allocator = CreateAllocator();
95 size_t bytes_received; // Ignored in this test.
96 void* first_alloc =
97 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
98 ASSERT_NE(first_alloc, nullptr);
99 void* second_alloc = allocator->Alloc(
100 /*alignment=*/0, /*num_bytes=*/2 * k2MiB, &bytes_received);
101 ASSERT_NE(second_alloc, nullptr);
102
103 ASSERT_EQ(second_alloc, reinterpret_cast<const char*>(first_alloc) + k2MiB);
104
105 void* third_alloc =
106 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
107 ASSERT_NE(third_alloc, nullptr);
108
109 ASSERT_EQ(third_alloc,
110 reinterpret_cast<const char*>(second_alloc) + 2 * k2MiB);
111 }
112
TEST(GpuVirtualMemAllocator,OverAllocate)113 TEST(GpuVirtualMemAllocator, OverAllocate) {
114 auto allocator = CreateAllocator();
115 size_t bytes_received; // Ignored in this test.
116 void* first_alloc =
117 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
118 ASSERT_NE(first_alloc, nullptr);
119 void* over_alloc = allocator->Alloc(/*alignment=*/0, /*num_bytes=*/4 * k2MiB,
120 &bytes_received);
121 ASSERT_EQ(over_alloc, nullptr);
122 }
123
TEST(GpuVirtualMemAllocatorTest,FreeAtEnd)124 TEST(GpuVirtualMemAllocatorTest, FreeAtEnd) {
125 auto allocator = CreateAllocator();
126 size_t bytes_received; // Ignored in this test.
127 void* first_alloc =
128 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
129 ASSERT_NE(first_alloc, nullptr);
130 void* second_alloc =
131 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
132 ASSERT_NE(second_alloc, nullptr);
133
134 allocator->Free(second_alloc, k2MiB);
135
136 void* re_alloc =
137 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
138 ASSERT_EQ(re_alloc, second_alloc);
139 }
140
TEST(GpuVirtualMemAllocatorTest,FreeHole)141 TEST(GpuVirtualMemAllocatorTest, FreeHole) {
142 auto allocator = CreateAllocator();
143 size_t bytes_received; // Ignored in this test.
144 void* first_alloc =
145 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
146 ASSERT_NE(first_alloc, nullptr);
147 void* second_alloc =
148 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
149 ASSERT_NE(second_alloc, nullptr);
150
151 allocator->Free(first_alloc, k2MiB);
152
153 void* third_alloc =
154 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
155 ASSERT_NE(third_alloc, nullptr);
156
157 // Expect that allocation still happens at the end.
158 ASSERT_EQ(third_alloc, reinterpret_cast<const char*>(second_alloc) + k2MiB);
159 }
160
TEST(GpuVirtualMemAllocatorTest,FreeRange)161 TEST(GpuVirtualMemAllocatorTest, FreeRange) {
162 auto allocator = CreateAllocator();
163 size_t bytes_received; // Ignored in this test.
164 void* first_alloc =
165 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
166 ASSERT_NE(first_alloc, nullptr);
167 void* second_alloc =
168 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
169 ASSERT_NE(second_alloc, nullptr);
170 void* third_alloc =
171 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
172 ASSERT_NE(third_alloc, nullptr);
173
174 allocator->Free(first_alloc, 3 * k2MiB);
175
176 void* re_alloc =
177 allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
178 ASSERT_NE(re_alloc, nullptr);
179 ASSERT_EQ(re_alloc, first_alloc);
180 }
181
182 } // namespace
183 } // namespace tensorflow
184
185 #endif
186