1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <atomic>
17
18 #include "tensorflow/core/framework/allocator.h"
19 #include "tensorflow/core/framework/allocator_registry.h"
20 #include "tensorflow/core/framework/tracking_allocator.h"
21 #include "tensorflow/core/lib/strings/strcat.h"
22 #include "tensorflow/core/lib/strings/stringprintf.h"
23 #include "tensorflow/core/platform/mem.h"
24 #include "tensorflow/core/platform/mutex.h"
25 #include "tensorflow/core/platform/types.h"
26 #include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
27 #include "tensorflow/core/profiler/lib/traceme.h"
28
29 namespace tensorflow {
30
31 // If true, cpu allocator collects more stats.
32 static bool cpu_allocator_collect_stats = false;
33
EnableCPUAllocatorStats()34 void EnableCPUAllocatorStats() { cpu_allocator_collect_stats = true; }
DisableCPUAllocatorStats()35 void DisableCPUAllocatorStats() { cpu_allocator_collect_stats = false; }
CPUAllocatorStatsEnabled()36 bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; }
37
38 static const int kMaxTotalAllocationWarnings = 1;
39
40 static const int kMaxSingleAllocationWarnings = 5;
41
42 // If cpu_allocator_collect_stats is true, warn when the total allocated memory
43 // exceeds this threshold.
44 static const double kTotalAllocationWarningThreshold = 0.5;
45
46 // Individual allocations large than this amount will trigger a warning.
47 static const double kLargeAllocationWarningThreshold = 0.1;
48
49 // Cache first invocation to port::AvailableRam, as it can be expensive.
LargeAllocationWarningBytes()50 static int64_t LargeAllocationWarningBytes() {
51 static int64_t value = static_cast<int64_t>(port::AvailableRam() *
52 kLargeAllocationWarningThreshold);
53 return value;
54 }
55
TotalAllocationWarningBytes()56 static int64_t TotalAllocationWarningBytes() {
57 static int64_t value = static_cast<int64_t>(port::AvailableRam() *
58 kTotalAllocationWarningThreshold);
59 return value;
60 }
61
62 namespace {
63
64 // A default Allocator for CPU devices. ProcessState::GetCPUAllocator() will
65 // return a different version that may perform better, but may also lack the
66 // optional stats triggered by the functions above. TODO(tucker): migrate all
67 // uses of cpu_allocator() except tests to use ProcessState instead.
68 class CPUAllocator : public Allocator {
69 public:
CPUAllocator()70 CPUAllocator()
71 : single_allocation_warning_count_(0),
72 total_allocation_warning_count_(0) {}
73
~CPUAllocator()74 ~CPUAllocator() override {}
75
Name()76 string Name() override { return "cpu"; }
77
AllocateRaw(size_t alignment,size_t num_bytes)78 void* AllocateRaw(size_t alignment, size_t num_bytes) override {
79 if (num_bytes > static_cast<size_t>(LargeAllocationWarningBytes()) &&
80 single_allocation_warning_count_ < kMaxSingleAllocationWarnings) {
81 ++single_allocation_warning_count_;
82 LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
83 << 100 * kLargeAllocationWarningThreshold
84 << "% of free system memory.";
85 }
86
87 void* p = port::AlignedMalloc(num_bytes, alignment);
88 if (cpu_allocator_collect_stats) {
89 const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p);
90 mutex_lock l(mu_);
91 ++stats_.num_allocs;
92 stats_.bytes_in_use += alloc_size;
93 stats_.peak_bytes_in_use =
94 std::max<int64_t>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
95 stats_.largest_alloc_size =
96 std::max<int64_t>(stats_.largest_alloc_size, alloc_size);
97
98 if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
99 total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
100 ++total_allocation_warning_count_;
101 LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
102 << "exceeds " << 100 * kTotalAllocationWarningThreshold
103 << "% of free system memory";
104 }
105 if (p != nullptr) {
106 AddTraceMe("MemoryAllocation", p, num_bytes, alloc_size);
107 }
108 }
109 return p;
110 }
111
DeallocateRaw(void * ptr)112 void DeallocateRaw(void* ptr) override {
113 if (cpu_allocator_collect_stats) {
114 const std::size_t alloc_size =
115 port::MallocExtension_GetAllocatedSize(ptr);
116 mutex_lock l(mu_);
117 stats_.bytes_in_use -= alloc_size;
118 AddTraceMe("MemoryDeallocation", ptr, 0, alloc_size);
119 }
120 port::AlignedFree(ptr);
121 }
122
AddTraceMe(absl::string_view traceme_name,const void * chunk_ptr,std::size_t req_bytes,std::size_t alloc_bytes)123 void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr,
124 std::size_t req_bytes, std::size_t alloc_bytes) {
125 tensorflow::profiler::TraceMe::InstantActivity(
126 [this, traceme_name, chunk_ptr, req_bytes,
127 alloc_bytes]() TF_NO_THREAD_SAFETY_ANALYSIS {
128 const auto& annotation =
129 profiler::ScopedMemoryDebugAnnotation::CurrentAnnotation();
130 return tensorflow::profiler::TraceMeEncode(
131 traceme_name, {{"allocator_name", Name()},
132 {"bytes_reserved", stats_.bytes_reserved},
133 {"bytes_allocated", stats_.bytes_in_use},
134 {"peak_bytes_in_use", stats_.peak_bytes_in_use},
135 {"requested_bytes", req_bytes},
136 {"allocation_bytes", alloc_bytes},
137 {"addr", reinterpret_cast<uint64>(chunk_ptr)},
138 {"tf_op", annotation.pending_op_name},
139 {"id", annotation.pending_step_id},
140 {"region_type", annotation.pending_region_type},
141 {"data_type", annotation.pending_data_type},
142 {"shape", annotation.pending_shape_func()}});
143 },
144 /*level=*/profiler::TraceMeLevel::kInfo);
145 }
146
GetStats()147 absl::optional<AllocatorStats> GetStats() override {
148 if (!cpu_allocator_collect_stats) return absl::nullopt;
149 mutex_lock l(mu_);
150 return stats_;
151 }
152
ClearStats()153 bool ClearStats() override {
154 if (!cpu_allocator_collect_stats) return false;
155 mutex_lock l(mu_);
156 stats_.num_allocs = 0;
157 stats_.peak_bytes_in_use = stats_.bytes_in_use;
158 stats_.largest_alloc_size = 0;
159 return true;
160 }
161
AllocatedSizeSlow(const void * ptr) const162 size_t AllocatedSizeSlow(const void* ptr) const override {
163 return port::MallocExtension_GetAllocatedSize(ptr);
164 }
165
GetMemoryType() const166 AllocatorMemoryType GetMemoryType() const override {
167 return AllocatorMemoryType::kHostPageable;
168 }
169
170 private:
171 mutex mu_;
172 AllocatorStats stats_ TF_GUARDED_BY(mu_);
173
174 // Use <atomic> for single allocations to avoid mutex contention when
175 // statistics are disabled.
176 std::atomic<int> single_allocation_warning_count_;
177 int total_allocation_warning_count_ TF_GUARDED_BY(mu_);
178
179 TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator);
180 };
181
182 class CPUAllocatorFactory : public AllocatorFactory {
183 public:
CreateAllocator()184 Allocator* CreateAllocator() override { return new CPUAllocator; }
185
CreateSubAllocator(int numa_node)186 SubAllocator* CreateSubAllocator(int numa_node) override {
187 return new CPUSubAllocator(new CPUAllocator);
188 }
189
190 private:
191 class CPUSubAllocator : public SubAllocator {
192 public:
CPUSubAllocator(CPUAllocator * cpu_allocator)193 explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
194 : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
195
Alloc(size_t alignment,size_t num_bytes,size_t * bytes_received)196 void* Alloc(size_t alignment, size_t num_bytes,
197 size_t* bytes_received) override {
198 *bytes_received = num_bytes;
199 return cpu_allocator_->AllocateRaw(alignment, num_bytes);
200 }
201
Free(void * ptr,size_t num_bytes)202 void Free(void* ptr, size_t num_bytes) override {
203 cpu_allocator_->DeallocateRaw(ptr);
204 }
205
SupportsCoalescing() const206 bool SupportsCoalescing() const override { return false; }
207
GetMemoryType() const208 AllocatorMemoryType GetMemoryType() const override {
209 return cpu_allocator_->GetMemoryType();
210 }
211
212 private:
213 CPUAllocator* cpu_allocator_;
214 };
215 };
216
217 REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocatorFactory);
218 } // namespace
219
220 } // namespace tensorflow
221