xref: /aosp_15_r20/external/tensorflow/tensorflow/core/framework/cpu_allocator_impl.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <atomic>
17 
18 #include "tensorflow/core/framework/allocator.h"
19 #include "tensorflow/core/framework/allocator_registry.h"
20 #include "tensorflow/core/framework/tracking_allocator.h"
21 #include "tensorflow/core/lib/strings/strcat.h"
22 #include "tensorflow/core/lib/strings/stringprintf.h"
23 #include "tensorflow/core/platform/mem.h"
24 #include "tensorflow/core/platform/mutex.h"
25 #include "tensorflow/core/platform/types.h"
26 #include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
27 #include "tensorflow/core/profiler/lib/traceme.h"
28 
29 namespace tensorflow {
30 
31 // If true, cpu allocator collects more stats.
32 static bool cpu_allocator_collect_stats = false;
33 
EnableCPUAllocatorStats()34 void EnableCPUAllocatorStats() { cpu_allocator_collect_stats = true; }
DisableCPUAllocatorStats()35 void DisableCPUAllocatorStats() { cpu_allocator_collect_stats = false; }
CPUAllocatorStatsEnabled()36 bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; }
37 
38 static const int kMaxTotalAllocationWarnings = 1;
39 
40 static const int kMaxSingleAllocationWarnings = 5;
41 
42 // If cpu_allocator_collect_stats is true, warn when the total allocated memory
43 // exceeds this threshold.
44 static const double kTotalAllocationWarningThreshold = 0.5;
45 
46 // Individual allocations large than this amount will trigger a warning.
47 static const double kLargeAllocationWarningThreshold = 0.1;
48 
49 // Cache first invocation to port::AvailableRam, as it can be expensive.
LargeAllocationWarningBytes()50 static int64_t LargeAllocationWarningBytes() {
51   static int64_t value = static_cast<int64_t>(port::AvailableRam() *
52                                               kLargeAllocationWarningThreshold);
53   return value;
54 }
55 
TotalAllocationWarningBytes()56 static int64_t TotalAllocationWarningBytes() {
57   static int64_t value = static_cast<int64_t>(port::AvailableRam() *
58                                               kTotalAllocationWarningThreshold);
59   return value;
60 }
61 
62 namespace {
63 
64 // A default Allocator for CPU devices.  ProcessState::GetCPUAllocator() will
65 // return a different version that may perform better, but may also lack the
66 // optional stats triggered by the functions above.  TODO(tucker): migrate all
67 // uses of cpu_allocator() except tests to use ProcessState instead.
68 class CPUAllocator : public Allocator {
69  public:
CPUAllocator()70   CPUAllocator()
71       : single_allocation_warning_count_(0),
72         total_allocation_warning_count_(0) {}
73 
~CPUAllocator()74   ~CPUAllocator() override {}
75 
Name()76   string Name() override { return "cpu"; }
77 
AllocateRaw(size_t alignment,size_t num_bytes)78   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
79     if (num_bytes > static_cast<size_t>(LargeAllocationWarningBytes()) &&
80         single_allocation_warning_count_ < kMaxSingleAllocationWarnings) {
81       ++single_allocation_warning_count_;
82       LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
83                    << 100 * kLargeAllocationWarningThreshold
84                    << "% of free system memory.";
85     }
86 
87     void* p = port::AlignedMalloc(num_bytes, alignment);
88     if (cpu_allocator_collect_stats) {
89       const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p);
90       mutex_lock l(mu_);
91       ++stats_.num_allocs;
92       stats_.bytes_in_use += alloc_size;
93       stats_.peak_bytes_in_use =
94           std::max<int64_t>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
95       stats_.largest_alloc_size =
96           std::max<int64_t>(stats_.largest_alloc_size, alloc_size);
97 
98       if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
99           total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
100         ++total_allocation_warning_count_;
101         LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
102                      << "exceeds " << 100 * kTotalAllocationWarningThreshold
103                      << "% of free system memory";
104       }
105       if (p != nullptr) {
106         AddTraceMe("MemoryAllocation", p, num_bytes, alloc_size);
107       }
108     }
109     return p;
110   }
111 
DeallocateRaw(void * ptr)112   void DeallocateRaw(void* ptr) override {
113     if (cpu_allocator_collect_stats) {
114       const std::size_t alloc_size =
115           port::MallocExtension_GetAllocatedSize(ptr);
116       mutex_lock l(mu_);
117       stats_.bytes_in_use -= alloc_size;
118       AddTraceMe("MemoryDeallocation", ptr, 0, alloc_size);
119     }
120     port::AlignedFree(ptr);
121   }
122 
AddTraceMe(absl::string_view traceme_name,const void * chunk_ptr,std::size_t req_bytes,std::size_t alloc_bytes)123   void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr,
124                   std::size_t req_bytes, std::size_t alloc_bytes) {
125     tensorflow::profiler::TraceMe::InstantActivity(
126         [this, traceme_name, chunk_ptr, req_bytes,
127          alloc_bytes]() TF_NO_THREAD_SAFETY_ANALYSIS {
128           const auto& annotation =
129               profiler::ScopedMemoryDebugAnnotation::CurrentAnnotation();
130           return tensorflow::profiler::TraceMeEncode(
131               traceme_name, {{"allocator_name", Name()},
132                              {"bytes_reserved", stats_.bytes_reserved},
133                              {"bytes_allocated", stats_.bytes_in_use},
134                              {"peak_bytes_in_use", stats_.peak_bytes_in_use},
135                              {"requested_bytes", req_bytes},
136                              {"allocation_bytes", alloc_bytes},
137                              {"addr", reinterpret_cast<uint64>(chunk_ptr)},
138                              {"tf_op", annotation.pending_op_name},
139                              {"id", annotation.pending_step_id},
140                              {"region_type", annotation.pending_region_type},
141                              {"data_type", annotation.pending_data_type},
142                              {"shape", annotation.pending_shape_func()}});
143         },
144         /*level=*/profiler::TraceMeLevel::kInfo);
145   }
146 
GetStats()147   absl::optional<AllocatorStats> GetStats() override {
148     if (!cpu_allocator_collect_stats) return absl::nullopt;
149     mutex_lock l(mu_);
150     return stats_;
151   }
152 
ClearStats()153   bool ClearStats() override {
154     if (!cpu_allocator_collect_stats) return false;
155     mutex_lock l(mu_);
156     stats_.num_allocs = 0;
157     stats_.peak_bytes_in_use = stats_.bytes_in_use;
158     stats_.largest_alloc_size = 0;
159     return true;
160   }
161 
AllocatedSizeSlow(const void * ptr) const162   size_t AllocatedSizeSlow(const void* ptr) const override {
163     return port::MallocExtension_GetAllocatedSize(ptr);
164   }
165 
GetMemoryType() const166   AllocatorMemoryType GetMemoryType() const override {
167     return AllocatorMemoryType::kHostPageable;
168   }
169 
170  private:
171   mutex mu_;
172   AllocatorStats stats_ TF_GUARDED_BY(mu_);
173 
174   // Use <atomic> for single allocations to avoid mutex contention when
175   // statistics are disabled.
176   std::atomic<int> single_allocation_warning_count_;
177   int total_allocation_warning_count_ TF_GUARDED_BY(mu_);
178 
179   TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator);
180 };
181 
182 class CPUAllocatorFactory : public AllocatorFactory {
183  public:
CreateAllocator()184   Allocator* CreateAllocator() override { return new CPUAllocator; }
185 
CreateSubAllocator(int numa_node)186   SubAllocator* CreateSubAllocator(int numa_node) override {
187     return new CPUSubAllocator(new CPUAllocator);
188   }
189 
190  private:
191   class CPUSubAllocator : public SubAllocator {
192    public:
CPUSubAllocator(CPUAllocator * cpu_allocator)193     explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
194         : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
195 
Alloc(size_t alignment,size_t num_bytes,size_t * bytes_received)196     void* Alloc(size_t alignment, size_t num_bytes,
197                 size_t* bytes_received) override {
198       *bytes_received = num_bytes;
199       return cpu_allocator_->AllocateRaw(alignment, num_bytes);
200     }
201 
Free(void * ptr,size_t num_bytes)202     void Free(void* ptr, size_t num_bytes) override {
203       cpu_allocator_->DeallocateRaw(ptr);
204     }
205 
SupportsCoalescing() const206     bool SupportsCoalescing() const override { return false; }
207 
GetMemoryType() const208     AllocatorMemoryType GetMemoryType() const override {
209       return cpu_allocator_->GetMemoryType();
210     }
211 
212    private:
213     CPUAllocator* cpu_allocator_;
214   };
215 };
216 
217 REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocatorFactory);
218 }  // namespace
219 
220 }  // namespace tensorflow
221