1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/gpu/gpu_virtual_mem_allocator.h"
17 
18 #if CUDA_VERSION >= 10020
19 
20 #include "tensorflow/core/common_runtime/device/device_id_utils.h"
21 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
22 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
23 #include "tensorflow/core/platform/test.h"
24 #include "tensorflow/core/platform/test_benchmark.h"
25 
26 namespace tensorflow {
27 namespace {
28 
29 using ::stream_executor::gpu::GpuContext;
30 using ::stream_executor::gpu::GpuDevicePtr;
31 using ::stream_executor::gpu::GpuDriver;
32 
33 // Empirically the min allocation granularity.
34 constexpr size_t k2MiB{2 << 20};
35 
36 // Creates an allocator with 8 MiB of virtual address space.
CreateAllocator()37 std::unique_ptr<GpuVirtualMemAllocator> CreateAllocator() {
38   PlatformDeviceId gpu_id(0);
39   auto executor =
40       DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(), gpu_id)
41           .ValueOrDie();
42   GpuContext* gpu_context = reinterpret_cast<GpuContext*>(
43       executor->implementation()->GpuContextHack());
44   return GpuVirtualMemAllocator::Create(
45              {}, {}, *gpu_context, gpu_id,
46              /*virtual_address_space_size=*/4 * k2MiB, {})
47       .ValueOrDie();
48 }
49 
TEST(GpuVirtualMemAllocatorTest,SimpleAlloc)50 TEST(GpuVirtualMemAllocatorTest, SimpleAlloc) {
51   PlatformDeviceId gpu_id(0);
52   auto executor =
53       DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(), gpu_id)
54           .ValueOrDie();
55   GpuContext* gpu_context = reinterpret_cast<GpuContext*>(
56       executor->implementation()->GpuContextHack());
57   auto allocator = GpuVirtualMemAllocator::Create(
58                        {}, {}, *gpu_context, gpu_id,
59                        /*virtual_address_space_size=*/4 * k2MiB, {})
60                        .ValueOrDie();
61   size_t bytes_received;  // Ignored in this test.
62   void* gpu_block =
63       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
64   ASSERT_NE(gpu_block, nullptr);
65 
66   constexpr size_t kBufSize{256};
67   void* host_mem[2] = {GpuDriver::HostAllocate(gpu_context, kBufSize),
68                        GpuDriver::HostAllocate(gpu_context, kBufSize)};
69   std::memset(host_mem[0], 'z', kBufSize);
70   std::memset(host_mem[1], 0, kBufSize);
71 
72   GpuDevicePtr gpu_buf = reinterpret_cast<GpuDevicePtr>(gpu_block) + 2048;
73   ASSERT_TRUE(GpuDriver::SynchronousMemcpyH2D(gpu_context, gpu_buf, host_mem[0],
74                                               kBufSize)
75                   .ok());
76   ASSERT_TRUE(GpuDriver::SynchronousMemcpyD2H(gpu_context, host_mem[1], gpu_buf,
77                                               kBufSize)
78                   .ok());
79   for (int i = 0; i < kBufSize; ++i) {
80     ASSERT_EQ('z', reinterpret_cast<const char*>(host_mem[1])[i]);
81   }
82 }
83 
TEST(GpuVirtualMemAllocatorTest,AllocPaddedUp)84 TEST(GpuVirtualMemAllocatorTest, AllocPaddedUp) {
85   auto allocator = CreateAllocator();
86   size_t bytes_received;
87   void* gpu_block =
88       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/256, &bytes_received);
89   ASSERT_NE(gpu_block, nullptr);
90   ASSERT_EQ(bytes_received, k2MiB);
91 }
92 
TEST(GpuVirtualMemAllocatorTest,AllocsContiguous)93 TEST(GpuVirtualMemAllocatorTest, AllocsContiguous) {
94   auto allocator = CreateAllocator();
95   size_t bytes_received;  // Ignored in this test.
96   void* first_alloc =
97       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
98   ASSERT_NE(first_alloc, nullptr);
99   void* second_alloc = allocator->Alloc(
100       /*alignment=*/0, /*num_bytes=*/2 * k2MiB, &bytes_received);
101   ASSERT_NE(second_alloc, nullptr);
102 
103   ASSERT_EQ(second_alloc, reinterpret_cast<const char*>(first_alloc) + k2MiB);
104 
105   void* third_alloc =
106       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
107   ASSERT_NE(third_alloc, nullptr);
108 
109   ASSERT_EQ(third_alloc,
110             reinterpret_cast<const char*>(second_alloc) + 2 * k2MiB);
111 }
112 
TEST(GpuVirtualMemAllocator,OverAllocate)113 TEST(GpuVirtualMemAllocator, OverAllocate) {
114   auto allocator = CreateAllocator();
115   size_t bytes_received;  // Ignored in this test.
116   void* first_alloc =
117       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
118   ASSERT_NE(first_alloc, nullptr);
119   void* over_alloc = allocator->Alloc(/*alignment=*/0, /*num_bytes=*/4 * k2MiB,
120                                       &bytes_received);
121   ASSERT_EQ(over_alloc, nullptr);
122 }
123 
TEST(GpuVirtualMemAllocatorTest,FreeAtEnd)124 TEST(GpuVirtualMemAllocatorTest, FreeAtEnd) {
125   auto allocator = CreateAllocator();
126   size_t bytes_received;  // Ignored in this test.
127   void* first_alloc =
128       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
129   ASSERT_NE(first_alloc, nullptr);
130   void* second_alloc =
131       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
132   ASSERT_NE(second_alloc, nullptr);
133 
134   allocator->Free(second_alloc, k2MiB);
135 
136   void* re_alloc =
137       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
138   ASSERT_EQ(re_alloc, second_alloc);
139 }
140 
TEST(GpuVirtualMemAllocatorTest,FreeHole)141 TEST(GpuVirtualMemAllocatorTest, FreeHole) {
142   auto allocator = CreateAllocator();
143   size_t bytes_received;  // Ignored in this test.
144   void* first_alloc =
145       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
146   ASSERT_NE(first_alloc, nullptr);
147   void* second_alloc =
148       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
149   ASSERT_NE(second_alloc, nullptr);
150 
151   allocator->Free(first_alloc, k2MiB);
152 
153   void* third_alloc =
154       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
155   ASSERT_NE(third_alloc, nullptr);
156 
157   // Expect that allocation still happens at the end.
158   ASSERT_EQ(third_alloc, reinterpret_cast<const char*>(second_alloc) + k2MiB);
159 }
160 
TEST(GpuVirtualMemAllocatorTest,FreeRange)161 TEST(GpuVirtualMemAllocatorTest, FreeRange) {
162   auto allocator = CreateAllocator();
163   size_t bytes_received;  // Ignored in this test.
164   void* first_alloc =
165       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
166   ASSERT_NE(first_alloc, nullptr);
167   void* second_alloc =
168       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
169   ASSERT_NE(second_alloc, nullptr);
170   void* third_alloc =
171       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
172   ASSERT_NE(third_alloc, nullptr);
173 
174   allocator->Free(first_alloc, 3 * k2MiB);
175 
176   void* re_alloc =
177       allocator->Alloc(/*alignment=*/0, /*num_bytes=*/k2MiB, &bytes_received);
178   ASSERT_NE(re_alloc, nullptr);
179   ASSERT_EQ(re_alloc, first_alloc);
180 }
181 
182 }  // namespace
183 }  // namespace tensorflow
184 
185 #endif
186