1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <algorithm>
16 #include <cstdio>
17 #include <functional>
18 #include <limits>
19 #include <queue>
20 #include <string>
21 #include <vector>
22
23 #include "tensorflow/lite/simple_memory_arena.h"
24
25 namespace tflite {
26 namespace {
27 // Same w/ that defined in tensorflow/lite/arena_planner.cc.
28 constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
29
PrintIntVector(const std::vector<int> & v)30 void PrintIntVector(const std::vector<int>& v) {
31 if (v.empty()) {
32 printf("[]");
33 return;
34 }
35
36 int range_start = v[0];
37 int range_end = range_start;
38 std::function<void(const char*)> print_range = [&](const char* suffix) {
39 if (range_end == range_start) {
40 printf("%d%s", range_start, suffix);
41 } else if (range_end == range_start + 1) {
42 printf("%d,%d%s", range_start, range_end, suffix);
43 } else {
44 printf("%d-%d%s", range_start, range_end, suffix);
45 }
46 };
47
48 printf("[");
49 for (int i = 1; i < v.size(); ++i) {
50 int current = v[i];
51 if (current == range_end + 1) {
52 range_end = current;
53 } else {
54 print_range(",");
55 range_start = range_end = current;
56 }
57 }
58 print_range("]");
59 }
60
61 struct PerLayerInfo {
PerLayerInfotflite::__anon05a65eff0111::PerLayerInfo62 PerLayerInfo() {}
PerLayerInfotflite::__anon05a65eff0111::PerLayerInfo63 PerLayerInfo(int id, size_t bytes, const std::vector<int>& tensors)
64 : node_id(id), total_bytes(bytes), live_tensors(tensors) {}
65 int node_id;
66 size_t total_bytes = 0;
67 std::vector<int> live_tensors;
68 };
69 struct PerLayerInfoGreater {
operator ()tflite::__anon05a65eff0111::PerLayerInfoGreater70 bool operator()(const PerLayerInfo& l, const PerLayerInfo& r) {
71 return l.total_bytes > r.total_bytes;
72 }
73 };
74 class PerLayerMinHeap
75 : public std::priority_queue<PerLayerInfo, std::vector<PerLayerInfo>,
76 PerLayerInfoGreater> {
77 public:
78 // Just to expose iterators to simplify iterating over contained elements.
begin() const79 std::vector<PerLayerInfo>::const_iterator begin() const { return c.begin(); }
end() const80 std::vector<PerLayerInfo>::const_iterator end() const { return c.end(); }
81 };
82
83 class TopKLayers {
84 public:
TopKLayers(size_t top_k,size_t arena_size)85 TopKLayers(size_t top_k, size_t arena_size)
86 : top_k_(top_k), arena_size_(arena_size) {}
87
Add(int node_id,size_t total_bytes,const std::vector<int> & live_tensors)88 void Add(int node_id, size_t total_bytes,
89 const std::vector<int>& live_tensors) {
90 if (topk_usage_.size() < top_k_) {
91 topk_usage_.emplace(PerLayerInfo(node_id, total_bytes, live_tensors));
92 return;
93 }
94 if (total_bytes < topk_usage_.top().total_bytes) return;
95 topk_usage_.pop();
96 topk_usage_.emplace(PerLayerInfo(node_id, total_bytes, live_tensors));
97 }
98
Print() const99 void Print() const {
100 printf("\nTop %zu memory-consuming layers:\n",
101 topk_usage_.size() < top_k_ ? topk_usage_.size() : top_k_);
102 // As we use a min-heap but want to print out usage in decreasing order, we
103 // use a temporary vector to hold pointers to top memory-consuming layers
104 // and do a sorting on it.
105 std::vector<const PerLayerInfo*> tops;
106 for (const auto& usage : topk_usage_) tops.push_back(&usage);
107 std::sort(tops.begin(), tops.end(),
108 [](const PerLayerInfo* l, const PerLayerInfo* r) {
109 return l->total_bytes > r->total_bytes;
110 });
111 for (const auto* usage : tops) {
112 printf(
113 "Node %d: %zu bytes (%.3f MB), utilization rate: %.3f%%, %zu live "
114 "tensors: ",
115 usage->node_id, usage->total_bytes,
116 static_cast<float>(usage->total_bytes) / (1 << 20),
117 static_cast<float>(usage->total_bytes) / arena_size_ * 100.0,
118 usage->live_tensors.size());
119 PrintIntVector(usage->live_tensors);
120 printf("\n");
121 }
122 printf("\n");
123 }
124
125 private:
126 const size_t top_k_;
127 const size_t arena_size_;
128 PerLayerMinHeap topk_usage_;
129 };
130 } // namespace
131
132 // Corresponding weak declaration found in lite/simple_memory_arena.cc
DumpArenaInfo(const std::string & name,const std::vector<int> & execution_plan,size_t arena_size,const std::vector<ArenaAllocWithUsageInterval> & allocs)133 void DumpArenaInfo(const std::string& name,
134 const std::vector<int>& execution_plan, size_t arena_size,
135 const std::vector<ArenaAllocWithUsageInterval>& allocs) {
136 if (allocs.empty() || execution_plan.empty()) return;
137
138 const int max_node_id =
139 *std::max_element(execution_plan.begin(), execution_plan.end());
140
141 printf("=== Beginning of %s ===\n", name.c_str());
142 printf("Total size is %zu bytes (%.3f MB), holding %zu tensors.\n",
143 arena_size, static_cast<float>(arena_size) / (1 << 20), allocs.size());
144 std::vector<int> max_size_tensors;
145 size_t max_tensor_size = 0;
146 for (const auto& alloc_info : allocs) {
147 printf("tensor %d: life_span: node [%d, %d], size: %zu bytes (%.3f MB).\n",
148 alloc_info.tensor, alloc_info.first_node,
149 alloc_info.last_node == kNodeNotAssigned ? max_node_id
150 : alloc_info.last_node,
151 alloc_info.size, static_cast<float>(alloc_info.size) / (1 << 20));
152 if (alloc_info.size > max_tensor_size) {
153 max_size_tensors.clear();
154 max_size_tensors.push_back(alloc_info.tensor);
155 max_tensor_size = alloc_info.size;
156 } else if (alloc_info.size == max_tensor_size) {
157 max_size_tensors.push_back(alloc_info.tensor);
158 }
159 }
160 std::sort(max_size_tensors.begin(), max_size_tensors.end());
161 printf("%zu tensors are of same max size (%zu B (%.3f MB)): ",
162 max_size_tensors.size(), max_tensor_size,
163 static_cast<float>(max_tensor_size) / (1 << 20));
164 PrintIntVector(max_size_tensors);
165
166 printf("\nPer-layer-info in the order of op execution:\n");
167 // A straightforward way of computing per-op memory consumption
168 // in the order of O(execution_plan.size() * allocs.size().
169 std::vector<size_t> per_op_mem_bytes(execution_plan.size());
170 // Track top 5 layers that consume most memory.
171 TopKLayers top_usage(5, arena_size);
172 for (int i = 0; i < execution_plan.size(); ++i) {
173 const int node_id = execution_plan[i];
174 size_t total_bytes = 0;
175 std::vector<int> live_tensors;
176 for (const auto& alloc_info : allocs) {
177 if (node_id >= alloc_info.first_node && node_id <= alloc_info.last_node) {
178 total_bytes += alloc_info.size;
179 live_tensors.push_back(alloc_info.tensor);
180 }
181 }
182 per_op_mem_bytes[i] = total_bytes;
183 std::sort(live_tensors.begin(), live_tensors.end());
184 printf(
185 "Node %d: %zu bytes (%.3f MB), utilization rate: %.3f%%, %zu live "
186 "tensors: ",
187 node_id, total_bytes, static_cast<float>(total_bytes) / (1 << 20),
188 static_cast<float>(total_bytes) / arena_size * 100.0,
189 live_tensors.size());
190 PrintIntVector(live_tensors);
191 printf("\n");
192 top_usage.Add(node_id, total_bytes, live_tensors);
193 }
194 top_usage.Print();
195 printf("===End of %s ===\n\n", name.c_str());
196 }
197 } // namespace tflite
198