xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/simple_memory_arena_debug_dump.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <algorithm>
16 #include <cstdio>
17 #include <functional>
18 #include <limits>
19 #include <queue>
20 #include <string>
21 #include <vector>
22 
23 #include "tensorflow/lite/simple_memory_arena.h"
24 
25 namespace tflite {
26 namespace {
27 // Same w/ that defined in tensorflow/lite/arena_planner.cc.
28 constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
29 
PrintIntVector(const std::vector<int> & v)30 void PrintIntVector(const std::vector<int>& v) {
31   if (v.empty()) {
32     printf("[]");
33     return;
34   }
35 
36   int range_start = v[0];
37   int range_end = range_start;
38   std::function<void(const char*)> print_range = [&](const char* suffix) {
39     if (range_end == range_start) {
40       printf("%d%s", range_start, suffix);
41     } else if (range_end == range_start + 1) {
42       printf("%d,%d%s", range_start, range_end, suffix);
43     } else {
44       printf("%d-%d%s", range_start, range_end, suffix);
45     }
46   };
47 
48   printf("[");
49   for (int i = 1; i < v.size(); ++i) {
50     int current = v[i];
51     if (current == range_end + 1) {
52       range_end = current;
53     } else {
54       print_range(",");
55       range_start = range_end = current;
56     }
57   }
58   print_range("]");
59 }
60 
61 struct PerLayerInfo {
PerLayerInfotflite::__anon05a65eff0111::PerLayerInfo62   PerLayerInfo() {}
PerLayerInfotflite::__anon05a65eff0111::PerLayerInfo63   PerLayerInfo(int id, size_t bytes, const std::vector<int>& tensors)
64       : node_id(id), total_bytes(bytes), live_tensors(tensors) {}
65   int node_id;
66   size_t total_bytes = 0;
67   std::vector<int> live_tensors;
68 };
69 struct PerLayerInfoGreater {
operator ()tflite::__anon05a65eff0111::PerLayerInfoGreater70   bool operator()(const PerLayerInfo& l, const PerLayerInfo& r) {
71     return l.total_bytes > r.total_bytes;
72   }
73 };
74 class PerLayerMinHeap
75     : public std::priority_queue<PerLayerInfo, std::vector<PerLayerInfo>,
76                                  PerLayerInfoGreater> {
77  public:
78   // Just to expose iterators to simplify iterating over contained elements.
begin() const79   std::vector<PerLayerInfo>::const_iterator begin() const { return c.begin(); }
end() const80   std::vector<PerLayerInfo>::const_iterator end() const { return c.end(); }
81 };
82 
83 class TopKLayers {
84  public:
TopKLayers(size_t top_k,size_t arena_size)85   TopKLayers(size_t top_k, size_t arena_size)
86       : top_k_(top_k), arena_size_(arena_size) {}
87 
Add(int node_id,size_t total_bytes,const std::vector<int> & live_tensors)88   void Add(int node_id, size_t total_bytes,
89            const std::vector<int>& live_tensors) {
90     if (topk_usage_.size() < top_k_) {
91       topk_usage_.emplace(PerLayerInfo(node_id, total_bytes, live_tensors));
92       return;
93     }
94     if (total_bytes < topk_usage_.top().total_bytes) return;
95     topk_usage_.pop();
96     topk_usage_.emplace(PerLayerInfo(node_id, total_bytes, live_tensors));
97   }
98 
Print() const99   void Print() const {
100     printf("\nTop %zu memory-consuming layers:\n",
101            topk_usage_.size() < top_k_ ? topk_usage_.size() : top_k_);
102     // As we use a min-heap but want to print out usage in decreasing order, we
103     // use a temporary vector to hold pointers to top memory-consuming layers
104     // and do a sorting on it.
105     std::vector<const PerLayerInfo*> tops;
106     for (const auto& usage : topk_usage_) tops.push_back(&usage);
107     std::sort(tops.begin(), tops.end(),
108               [](const PerLayerInfo* l, const PerLayerInfo* r) {
109                 return l->total_bytes > r->total_bytes;
110               });
111     for (const auto* usage : tops) {
112       printf(
113           "Node %d: %zu bytes (%.3f MB), utilization rate: %.3f%%, %zu live "
114           "tensors: ",
115           usage->node_id, usage->total_bytes,
116           static_cast<float>(usage->total_bytes) / (1 << 20),
117           static_cast<float>(usage->total_bytes) / arena_size_ * 100.0,
118           usage->live_tensors.size());
119       PrintIntVector(usage->live_tensors);
120       printf("\n");
121     }
122     printf("\n");
123   }
124 
125  private:
126   const size_t top_k_;
127   const size_t arena_size_;
128   PerLayerMinHeap topk_usage_;
129 };
130 }  // namespace
131 
132 // Corresponding weak declaration found in lite/simple_memory_arena.cc
DumpArenaInfo(const std::string & name,const std::vector<int> & execution_plan,size_t arena_size,const std::vector<ArenaAllocWithUsageInterval> & allocs)133 void DumpArenaInfo(const std::string& name,
134                    const std::vector<int>& execution_plan, size_t arena_size,
135                    const std::vector<ArenaAllocWithUsageInterval>& allocs) {
136   if (allocs.empty() || execution_plan.empty()) return;
137 
138   const int max_node_id =
139       *std::max_element(execution_plan.begin(), execution_plan.end());
140 
141   printf("=== Beginning of %s ===\n", name.c_str());
142   printf("Total size is %zu bytes (%.3f MB), holding %zu tensors.\n",
143          arena_size, static_cast<float>(arena_size) / (1 << 20), allocs.size());
144   std::vector<int> max_size_tensors;
145   size_t max_tensor_size = 0;
146   for (const auto& alloc_info : allocs) {
147     printf("tensor %d: life_span: node [%d, %d], size:  %zu bytes (%.3f MB).\n",
148            alloc_info.tensor, alloc_info.first_node,
149            alloc_info.last_node == kNodeNotAssigned ? max_node_id
150                                                     : alloc_info.last_node,
151            alloc_info.size, static_cast<float>(alloc_info.size) / (1 << 20));
152     if (alloc_info.size > max_tensor_size) {
153       max_size_tensors.clear();
154       max_size_tensors.push_back(alloc_info.tensor);
155       max_tensor_size = alloc_info.size;
156     } else if (alloc_info.size == max_tensor_size) {
157       max_size_tensors.push_back(alloc_info.tensor);
158     }
159   }
160   std::sort(max_size_tensors.begin(), max_size_tensors.end());
161   printf("%zu tensors are of same max size (%zu B (%.3f MB)): ",
162          max_size_tensors.size(), max_tensor_size,
163          static_cast<float>(max_tensor_size) / (1 << 20));
164   PrintIntVector(max_size_tensors);
165 
166   printf("\nPer-layer-info in the order of op execution:\n");
167   // A straightforward way of computing per-op memory consumption
168   // in the order of O(execution_plan.size() * allocs.size().
169   std::vector<size_t> per_op_mem_bytes(execution_plan.size());
170   // Track top 5 layers that consume most memory.
171   TopKLayers top_usage(5, arena_size);
172   for (int i = 0; i < execution_plan.size(); ++i) {
173     const int node_id = execution_plan[i];
174     size_t total_bytes = 0;
175     std::vector<int> live_tensors;
176     for (const auto& alloc_info : allocs) {
177       if (node_id >= alloc_info.first_node && node_id <= alloc_info.last_node) {
178         total_bytes += alloc_info.size;
179         live_tensors.push_back(alloc_info.tensor);
180       }
181     }
182     per_op_mem_bytes[i] = total_bytes;
183     std::sort(live_tensors.begin(), live_tensors.end());
184     printf(
185         "Node %d: %zu bytes (%.3f MB), utilization rate: %.3f%%, %zu live "
186         "tensors: ",
187         node_id, total_bytes, static_cast<float>(total_bytes) / (1 << 20),
188         static_cast<float>(total_bytes) / arena_size * 100.0,
189         live_tensors.size());
190     PrintIntVector(live_tensors);
191     printf("\n");
192     top_usage.Add(node_id, total_bytes, live_tensors);
193   }
194   top_usage.Print();
195   printf("===End of %s ===\n\n", name.c_str());
196 }
197 }  // namespace tflite
198