1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/profiler/utils/step_intersection.h"
16
17 #include "tensorflow/core/lib/gtl/map_util.h"
18 #include "tensorflow/core/platform/logging.h"
19 #include "tensorflow/core/profiler/utils/timespan.h"
20
21 namespace tensorflow {
22 namespace profiler {
23
24 namespace {
25
26 // Returns the timespan in this step (across all cores).
StepTimespan(const PerCoreStepInfo & percore_stepinfo)27 Timespan StepTimespan(const PerCoreStepInfo& percore_stepinfo) {
28 uint64 min_ps = kuint64max;
29 uint64 max_ps = 0;
30 for (const auto& core_stepinfo : percore_stepinfo.step_info_per_core()) {
31 const auto& stepinfo = core_stepinfo.second;
32 uint64 begin_ps = stepinfo.begin_ps();
33 uint64 end_ps = begin_ps + stepinfo.duration_ps();
34 min_ps = std::min(min_ps, begin_ps);
35 max_ps = std::max(max_ps, end_ps);
36 }
37 return (min_ps < max_ps) ? Timespan::FromEndPoints(min_ps, max_ps)
38 : Timespan();
39 }
40
41 // Returns the timespan across all steps in the given step_db.
AllStepsTimespan(const StepDatabaseResult & step_db)42 Timespan AllStepsTimespan(const StepDatabaseResult& step_db) {
43 uint64 min_ps = kuint64max;
44 uint64 max_ps = 0;
45 for (const auto& step : step_db.step_sequence()) {
46 Timespan timespan = StepTimespan(step);
47 uint64 begin_ps = timespan.begin_ps();
48 uint64 end_ps = timespan.end_ps();
49 min_ps = std::min(min_ps, begin_ps);
50 max_ps = std::max(max_ps, end_ps);
51 }
52 return (min_ps < max_ps) ? Timespan::FromEndPoints(min_ps, max_ps)
53 : Timespan();
54 }
55
56 struct AlignmentInfo {
57 StepsAlignment alignment;
58 double similarity;
59 };
60
61 // Computes the similarity between the given two steps. The closer their
62 // timespans are, the larger is the similarity.
StepSimilarity(const PerCoreStepInfo & subordinate_step,const PerCoreStepInfo & chief_step)63 double StepSimilarity(const PerCoreStepInfo& subordinate_step,
64 const PerCoreStepInfo& chief_step) {
65 Timespan subordinate_timespan = StepTimespan(subordinate_step);
66 Timespan chief_timespan = StepTimespan(chief_step);
67 return chief_timespan.OverlappedDurationPs(subordinate_timespan);
68 }
69
70 // If the subordinate steps and the chief steps are aligned at the given anchor
71 // points (i.e. at the subordinate_anchor step on the subordinate sequence, at
72 // the chief_anchor step on the chief sequence), returns the corresponding
73 // AlignmentInfo.
ComputeAlignmentInfo(const StepDatabaseResult & subordinate,uint32 subordinate_anchor,const StepDatabaseResult & chief,uint32 chief_anchor)74 AlignmentInfo ComputeAlignmentInfo(const StepDatabaseResult& subordinate,
75 uint32 subordinate_anchor,
76 const StepDatabaseResult& chief,
77 uint32 chief_anchor) {
78 // Assumes that the step at subordinate_anchor on the subordinate sequence is
79 // aligned with the step at the chief_anchor on the chief sequence. Then the
80 // number of steps before the anchor is the minimum of the number of steps
81 // before the anchor in the subordinate and that before the anchor in the
82 // chief. Similarly, the number of steps after the anchor is the minimum of
83 // the number of steps after the anchor in the subordinate and that after the
84 // anchor in the chief.
85 uint32 pre_anchor_steps = std::min(subordinate_anchor, chief_anchor);
86 uint32 post_anchor_steps =
87 std::min(subordinate.step_sequence_size() - subordinate_anchor,
88 chief.step_sequence_size() - chief_anchor);
89 // total number of steps aligned = pre_anchor_steps + post_anchor_steps.
90 uint32 alignment_steps = pre_anchor_steps + post_anchor_steps;
91
92 double similarity = 0;
93 // Where the aligned steps begin on the subordinate sequence.
94 uint32 begin_subordinate_idx = subordinate_anchor - pre_anchor_steps;
95 // Where the aligned steps begin on the chief sequence.
96 uint32 begin_chief_idx = chief_anchor - pre_anchor_steps;
97
98 for (uint32 i = 0; i < alignment_steps; i++) {
99 // Accumulates the similarity at each step.
100 similarity +=
101 StepSimilarity(subordinate.step_sequence(begin_subordinate_idx + i),
102 chief.step_sequence(begin_chief_idx + i));
103 }
104 StepsAlignment alignment = {begin_subordinate_idx, begin_chief_idx,
105 alignment_steps};
106 return {alignment, similarity};
107 }
108
109 // Returns the best alignment for aligning subordinate against chief.
FindStepsAlignment(const StepDatabaseResult & subordinate,const StepDatabaseResult & chief)110 StepsAlignment FindStepsAlignment(const StepDatabaseResult& subordinate,
111 const StepDatabaseResult& chief) {
112 double max_similarity = -1;
113 StepsAlignment alignment = {0, 0, 0};
114 if (subordinate.step_sequence_size() == 0 || chief.step_sequence_size() == 0)
115 return alignment;
116 for (auto c = 0; c < chief.step_sequence_size(); c++) {
117 AlignmentInfo info =
118 ComputeAlignmentInfo(subordinate, /*subordinate_anchor=*/0, chief, c);
119 if (info.similarity <= max_similarity) continue;
120 max_similarity = info.similarity;
121 alignment = info.alignment;
122 }
123 for (auto s = 1; s < subordinate.step_sequence_size(); s++) {
124 // s starts at 1 instead of 0, because the loop above already considers
125 // (s=0, c=0).
126 AlignmentInfo info =
127 ComputeAlignmentInfo(subordinate, s, chief, /*chief_anchor=*/0);
128 if (info.similarity <= max_similarity) continue;
129 max_similarity = info.similarity;
130 alignment = info.alignment;
131 }
132 return alignment;
133 }
134
StringStepsAlignment(const StepsAlignment & alignment)135 std::string StringStepsAlignment(const StepsAlignment& alignment) {
136 return absl::StrCat(
137 "[begin_subordinate_idx: ", alignment.begin_subordinate_idx,
138 ", begin_chief_idx: ", alignment.begin_chief_idx,
139 ", num_steps: ", alignment.num_steps, "]");
140 }
141
StringDstStepNumbers(const std::vector<uint32> & step_numbers)142 std::string StringDstStepNumbers(const std::vector<uint32>& step_numbers) {
143 std::string str;
144 absl::StrAppend(&str, "[");
145 for (auto i = 0; i < step_numbers.size(); i++) {
146 if (i > 0) absl::StrAppend(&str, ", ");
147 absl::StrAppend(&str, step_numbers[i]);
148 }
149 absl::StrAppend(&str, "]");
150 return str;
151 }
152
StringSrcToDstIndexMap(uint32 src_first_step_idx,uint32 num_steps)153 std::string StringSrcToDstIndexMap(uint32 src_first_step_idx,
154 uint32 num_steps) {
155 std::string str;
156 absl::StrAppend(&str, "[");
157 for (auto i = 0; i < num_steps; i++) {
158 if (i > 0) absl::StrAppend(&str, ", ");
159 absl::StrAppend(&str, src_first_step_idx + i, ":", i);
160 }
161 absl::StrAppend(&str, "]");
162 return str;
163 }
164
165 } // namespace
166
StepIntersection(uint32 max_steps,const absl::flat_hash_map<uint32,const StepDatabaseResult * > & perhost_stepdb)167 StepIntersection::StepIntersection(
168 uint32 max_steps,
169 const absl::flat_hash_map<uint32, const StepDatabaseResult*>&
170 perhost_stepdb) {
171 empty_intersect_ = false;
172
173 // Figures out the host with the shortest timespan among their steps (called
174 // this host the "chief").
175 chief_host_id_ = kuint32max;
176 uint64 min_duration_ps = kuint64max;
177 const StepDatabaseResult* chief_step_db = nullptr;
178 for (const auto& hostid_stepdb : perhost_stepdb) {
179 auto host_id = hostid_stepdb.first;
180 const auto& step_db = hostid_stepdb.second;
181 Timespan timespan = AllStepsTimespan(*step_db);
182 if (timespan.duration_ps() < min_duration_ps) {
183 chief_host_id_ = host_id;
184 chief_step_db = step_db;
185 min_duration_ps = timespan.duration_ps();
186 }
187 }
188 if (chief_host_id_ == kuint32max) {
189 // There is no step at all on any host.
190 steps_dropped_ = 0;
191 begin_chief_idx_ = 0;
192 end_chief_idx_ = 0;
193 return;
194 }
195
196 uint32 max_begin_chief_idx = 0;
197 uint32 min_end_chief_idx = kuint32max;
198 // Aligns the steps in all hosts with those in the chief.
199 for (const auto& hostid_stepdb : perhost_stepdb) {
200 auto host_id = hostid_stepdb.first;
201 const auto& step_db = hostid_stepdb.second;
202 if (host_id == chief_host_id_) {
203 // Simply aligns with itself.
204 perhost_alignment_[host_id] = {
205 /*begin_subordinate_idx=*/0, /*begin_chief_idx=*/0,
206 static_cast<uint32>(step_db->step_sequence_size())};
207 } else {
208 perhost_alignment_[host_id] =
209 FindStepsAlignment(*step_db, *chief_step_db);
210 }
211 // Intersects this host's alignment with other hosts' alignments.
212 uint32 host_begin_chief_idx = perhost_alignment_[host_id].begin_chief_idx;
213 max_begin_chief_idx = std::max(max_begin_chief_idx, host_begin_chief_idx);
214 uint32 host_end_chief_idx = perhost_alignment_[host_id].begin_chief_idx +
215 perhost_alignment_[host_id].num_steps;
216 min_end_chief_idx = std::min(min_end_chief_idx, host_end_chief_idx);
217 }
218 if (max_begin_chief_idx > min_end_chief_idx) {
219 // The intersection is empty.
220 steps_dropped_ = 0;
221 begin_chief_idx_ = 0;
222 end_chief_idx_ = 0;
223 empty_intersect_ = true;
224 return;
225 }
226
227 begin_chief_idx_ = max_begin_chief_idx;
228
229 // Takes max_steps into account.
230 uint32 num_steps = min_end_chief_idx - max_begin_chief_idx;
231 if (num_steps > max_steps) {
232 steps_dropped_ = num_steps - max_steps;
233 // TODO(ckluk): Drops from both ends to avoid incomplete steps at the
234 // beginning and end of the profile.
235 end_chief_idx_ = max_begin_chief_idx + max_steps;
236 } else {
237 steps_dropped_ = 0;
238 end_chief_idx_ = min_end_chief_idx;
239 }
240 }
241
DstStepNumbers() const242 std::vector<uint32> StepIntersection::DstStepNumbers() const {
243 // TODO(ckluk): Honors training-loop boundaries (if more than one loop
244 // sampled).
245 std::vector<uint32> result;
246 result.reserve(NumSteps());
247 for (uint32 i = 0; i < NumSteps(); i++) {
248 result.push_back(i);
249 }
250 return result;
251 }
252
FirstStepIndex(uint32 host_id) const253 uint32 StepIntersection::FirstStepIndex(uint32 host_id) const {
254 const auto* alignment = gtl::FindOrNull(perhost_alignment_, host_id);
255 if (alignment == nullptr) return 0;
256 DCHECK(alignment->begin_chief_idx <= begin_chief_idx_);
257 uint32 shift = begin_chief_idx_ - alignment->begin_chief_idx;
258 uint32 begin_subordinate_idx = alignment->begin_subordinate_idx + shift;
259 return begin_subordinate_idx;
260 }
261
DebugString() const262 std::string StepIntersection::DebugString() const {
263 std::string str;
264 absl::StrAppend(&str, "chief host id_: ", chief_host_id_, "\n");
265 absl::StrAppend(&str, "begin_chief_idx_: ", begin_chief_idx_,
266 ", num_steps: ", NumSteps(), "\n");
267 absl::StrAppend(
268 &str, "DstStepNumbers(): ", StringDstStepNumbers(DstStepNumbers()), "\n");
269
270 std::vector<uint32> host_ids;
271 host_ids.reserve(perhost_alignment_.size());
272 for (const auto& hostid_alignment : perhost_alignment_) {
273 auto host_id = hostid_alignment.first;
274 host_ids.push_back(host_id);
275 }
276 absl::c_sort(host_ids);
277
278 absl::StrAppend(&str, "perhost_alignment:\n");
279 for (const auto host_id : host_ids) {
280 const auto* ptr = gtl::FindOrNull(perhost_alignment_, host_id);
281 if (ptr == nullptr) continue;
282 absl::StrAppend(&str, "host: ", host_id,
283 ", step-alignment: ", StringStepsAlignment(*ptr), "\n");
284 }
285 absl::StrAppend(&str, "SrcToDstIndexMap():\n");
286 for (const auto host_id : host_ids) {
287 absl::StrAppend(&str, "host: ", host_id, ", src-to-dst-index-map: ",
288 StringSrcToDstIndexMap(FirstStepIndex(host_id), NumSteps()),
289 "\n");
290 }
291 return str;
292 }
293
294 } // namespace profiler
295 } // namespace tensorflow
296