xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/utils/step_intersection.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/profiler/utils/step_intersection.h"
16 
17 #include "tensorflow/core/lib/gtl/map_util.h"
18 #include "tensorflow/core/platform/logging.h"
19 #include "tensorflow/core/profiler/utils/timespan.h"
20 
21 namespace tensorflow {
22 namespace profiler {
23 
24 namespace {
25 
26 // Returns the timespan in this step (across all cores).
StepTimespan(const PerCoreStepInfo & percore_stepinfo)27 Timespan StepTimespan(const PerCoreStepInfo& percore_stepinfo) {
28   uint64 min_ps = kuint64max;
29   uint64 max_ps = 0;
30   for (const auto& core_stepinfo : percore_stepinfo.step_info_per_core()) {
31     const auto& stepinfo = core_stepinfo.second;
32     uint64 begin_ps = stepinfo.begin_ps();
33     uint64 end_ps = begin_ps + stepinfo.duration_ps();
34     min_ps = std::min(min_ps, begin_ps);
35     max_ps = std::max(max_ps, end_ps);
36   }
37   return (min_ps < max_ps) ? Timespan::FromEndPoints(min_ps, max_ps)
38                            : Timespan();
39 }
40 
41 // Returns the timespan across all steps in the given step_db.
AllStepsTimespan(const StepDatabaseResult & step_db)42 Timespan AllStepsTimespan(const StepDatabaseResult& step_db) {
43   uint64 min_ps = kuint64max;
44   uint64 max_ps = 0;
45   for (const auto& step : step_db.step_sequence()) {
46     Timespan timespan = StepTimespan(step);
47     uint64 begin_ps = timespan.begin_ps();
48     uint64 end_ps = timespan.end_ps();
49     min_ps = std::min(min_ps, begin_ps);
50     max_ps = std::max(max_ps, end_ps);
51   }
52   return (min_ps < max_ps) ? Timespan::FromEndPoints(min_ps, max_ps)
53                            : Timespan();
54 }
55 
56 struct AlignmentInfo {
57   StepsAlignment alignment;
58   double similarity;
59 };
60 
61 // Computes the similarity between the given two steps. The closer their
62 // timespans are, the larger is the similarity.
StepSimilarity(const PerCoreStepInfo & subordinate_step,const PerCoreStepInfo & chief_step)63 double StepSimilarity(const PerCoreStepInfo& subordinate_step,
64                       const PerCoreStepInfo& chief_step) {
65   Timespan subordinate_timespan = StepTimespan(subordinate_step);
66   Timespan chief_timespan = StepTimespan(chief_step);
67   return chief_timespan.OverlappedDurationPs(subordinate_timespan);
68 }
69 
70 // If the subordinate steps and the chief steps are aligned at the given anchor
71 // points (i.e. at the subordinate_anchor step on the subordinate sequence, at
72 // the chief_anchor step on the chief sequence), returns the corresponding
73 // AlignmentInfo.
ComputeAlignmentInfo(const StepDatabaseResult & subordinate,uint32 subordinate_anchor,const StepDatabaseResult & chief,uint32 chief_anchor)74 AlignmentInfo ComputeAlignmentInfo(const StepDatabaseResult& subordinate,
75                                    uint32 subordinate_anchor,
76                                    const StepDatabaseResult& chief,
77                                    uint32 chief_anchor) {
78   // Assumes that the step at subordinate_anchor on the subordinate sequence is
79   // aligned with the step at the chief_anchor on the chief sequence. Then the
80   // number of steps before the anchor is the minimum of the number of steps
81   // before the anchor in the subordinate and that before the anchor in the
82   // chief. Similarly, the number of steps after the anchor is the minimum of
83   // the number of steps after the anchor in the subordinate and that after the
84   // anchor in the chief.
85   uint32 pre_anchor_steps = std::min(subordinate_anchor, chief_anchor);
86   uint32 post_anchor_steps =
87       std::min(subordinate.step_sequence_size() - subordinate_anchor,
88                chief.step_sequence_size() - chief_anchor);
89   // total number of steps aligned = pre_anchor_steps + post_anchor_steps.
90   uint32 alignment_steps = pre_anchor_steps + post_anchor_steps;
91 
92   double similarity = 0;
93   // Where the aligned steps begin on the subordinate sequence.
94   uint32 begin_subordinate_idx = subordinate_anchor - pre_anchor_steps;
95   // Where the aligned steps begin on the chief sequence.
96   uint32 begin_chief_idx = chief_anchor - pre_anchor_steps;
97 
98   for (uint32 i = 0; i < alignment_steps; i++) {
99     // Accumulates the similarity at each step.
100     similarity +=
101         StepSimilarity(subordinate.step_sequence(begin_subordinate_idx + i),
102                        chief.step_sequence(begin_chief_idx + i));
103   }
104   StepsAlignment alignment = {begin_subordinate_idx, begin_chief_idx,
105                               alignment_steps};
106   return {alignment, similarity};
107 }
108 
109 // Returns the best alignment for aligning subordinate against chief.
FindStepsAlignment(const StepDatabaseResult & subordinate,const StepDatabaseResult & chief)110 StepsAlignment FindStepsAlignment(const StepDatabaseResult& subordinate,
111                                   const StepDatabaseResult& chief) {
112   double max_similarity = -1;
113   StepsAlignment alignment = {0, 0, 0};
114   if (subordinate.step_sequence_size() == 0 || chief.step_sequence_size() == 0)
115     return alignment;
116   for (auto c = 0; c < chief.step_sequence_size(); c++) {
117     AlignmentInfo info =
118         ComputeAlignmentInfo(subordinate, /*subordinate_anchor=*/0, chief, c);
119     if (info.similarity <= max_similarity) continue;
120     max_similarity = info.similarity;
121     alignment = info.alignment;
122   }
123   for (auto s = 1; s < subordinate.step_sequence_size(); s++) {
124     // s starts at 1 instead of 0, because the loop above already considers
125     // (s=0, c=0).
126     AlignmentInfo info =
127         ComputeAlignmentInfo(subordinate, s, chief, /*chief_anchor=*/0);
128     if (info.similarity <= max_similarity) continue;
129     max_similarity = info.similarity;
130     alignment = info.alignment;
131   }
132   return alignment;
133 }
134 
StringStepsAlignment(const StepsAlignment & alignment)135 std::string StringStepsAlignment(const StepsAlignment& alignment) {
136   return absl::StrCat(
137       "[begin_subordinate_idx: ", alignment.begin_subordinate_idx,
138       ", begin_chief_idx: ", alignment.begin_chief_idx,
139       ", num_steps: ", alignment.num_steps, "]");
140 }
141 
StringDstStepNumbers(const std::vector<uint32> & step_numbers)142 std::string StringDstStepNumbers(const std::vector<uint32>& step_numbers) {
143   std::string str;
144   absl::StrAppend(&str, "[");
145   for (auto i = 0; i < step_numbers.size(); i++) {
146     if (i > 0) absl::StrAppend(&str, ", ");
147     absl::StrAppend(&str, step_numbers[i]);
148   }
149   absl::StrAppend(&str, "]");
150   return str;
151 }
152 
StringSrcToDstIndexMap(uint32 src_first_step_idx,uint32 num_steps)153 std::string StringSrcToDstIndexMap(uint32 src_first_step_idx,
154                                    uint32 num_steps) {
155   std::string str;
156   absl::StrAppend(&str, "[");
157   for (auto i = 0; i < num_steps; i++) {
158     if (i > 0) absl::StrAppend(&str, ", ");
159     absl::StrAppend(&str, src_first_step_idx + i, ":", i);
160   }
161   absl::StrAppend(&str, "]");
162   return str;
163 }
164 
165 }  // namespace
166 
StepIntersection(uint32 max_steps,const absl::flat_hash_map<uint32,const StepDatabaseResult * > & perhost_stepdb)167 StepIntersection::StepIntersection(
168     uint32 max_steps,
169     const absl::flat_hash_map<uint32, const StepDatabaseResult*>&
170         perhost_stepdb) {
171   empty_intersect_ = false;
172 
173   // Figures out the host with the shortest timespan among their steps (called
174   // this host the "chief").
175   chief_host_id_ = kuint32max;
176   uint64 min_duration_ps = kuint64max;
177   const StepDatabaseResult* chief_step_db = nullptr;
178   for (const auto& hostid_stepdb : perhost_stepdb) {
179     auto host_id = hostid_stepdb.first;
180     const auto& step_db = hostid_stepdb.second;
181     Timespan timespan = AllStepsTimespan(*step_db);
182     if (timespan.duration_ps() < min_duration_ps) {
183       chief_host_id_ = host_id;
184       chief_step_db = step_db;
185       min_duration_ps = timespan.duration_ps();
186     }
187   }
188   if (chief_host_id_ == kuint32max) {
189     // There is no step at all on any host.
190     steps_dropped_ = 0;
191     begin_chief_idx_ = 0;
192     end_chief_idx_ = 0;
193     return;
194   }
195 
196   uint32 max_begin_chief_idx = 0;
197   uint32 min_end_chief_idx = kuint32max;
198   // Aligns the steps in all hosts with those in the chief.
199   for (const auto& hostid_stepdb : perhost_stepdb) {
200     auto host_id = hostid_stepdb.first;
201     const auto& step_db = hostid_stepdb.second;
202     if (host_id == chief_host_id_) {
203       // Simply aligns with itself.
204       perhost_alignment_[host_id] = {
205           /*begin_subordinate_idx=*/0, /*begin_chief_idx=*/0,
206           static_cast<uint32>(step_db->step_sequence_size())};
207     } else {
208       perhost_alignment_[host_id] =
209           FindStepsAlignment(*step_db, *chief_step_db);
210     }
211     // Intersects this host's alignment with other hosts' alignments.
212     uint32 host_begin_chief_idx = perhost_alignment_[host_id].begin_chief_idx;
213     max_begin_chief_idx = std::max(max_begin_chief_idx, host_begin_chief_idx);
214     uint32 host_end_chief_idx = perhost_alignment_[host_id].begin_chief_idx +
215                                 perhost_alignment_[host_id].num_steps;
216     min_end_chief_idx = std::min(min_end_chief_idx, host_end_chief_idx);
217   }
218   if (max_begin_chief_idx > min_end_chief_idx) {
219     // The intersection is empty.
220     steps_dropped_ = 0;
221     begin_chief_idx_ = 0;
222     end_chief_idx_ = 0;
223     empty_intersect_ = true;
224     return;
225   }
226 
227   begin_chief_idx_ = max_begin_chief_idx;
228 
229   // Takes max_steps into account.
230   uint32 num_steps = min_end_chief_idx - max_begin_chief_idx;
231   if (num_steps > max_steps) {
232     steps_dropped_ = num_steps - max_steps;
233     // TODO(ckluk): Drops from both ends to avoid incomplete steps at the
234     // beginning and end of the profile.
235     end_chief_idx_ = max_begin_chief_idx + max_steps;
236   } else {
237     steps_dropped_ = 0;
238     end_chief_idx_ = min_end_chief_idx;
239   }
240 }
241 
DstStepNumbers() const242 std::vector<uint32> StepIntersection::DstStepNumbers() const {
243   // TODO(ckluk): Honors training-loop boundaries (if more than one loop
244   // sampled).
245   std::vector<uint32> result;
246   result.reserve(NumSteps());
247   for (uint32 i = 0; i < NumSteps(); i++) {
248     result.push_back(i);
249   }
250   return result;
251 }
252 
FirstStepIndex(uint32 host_id) const253 uint32 StepIntersection::FirstStepIndex(uint32 host_id) const {
254   const auto* alignment = gtl::FindOrNull(perhost_alignment_, host_id);
255   if (alignment == nullptr) return 0;
256   DCHECK(alignment->begin_chief_idx <= begin_chief_idx_);
257   uint32 shift = begin_chief_idx_ - alignment->begin_chief_idx;
258   uint32 begin_subordinate_idx = alignment->begin_subordinate_idx + shift;
259   return begin_subordinate_idx;
260 }
261 
DebugString() const262 std::string StepIntersection::DebugString() const {
263   std::string str;
264   absl::StrAppend(&str, "chief host id_: ", chief_host_id_, "\n");
265   absl::StrAppend(&str, "begin_chief_idx_: ", begin_chief_idx_,
266                   ", num_steps: ", NumSteps(), "\n");
267   absl::StrAppend(
268       &str, "DstStepNumbers(): ", StringDstStepNumbers(DstStepNumbers()), "\n");
269 
270   std::vector<uint32> host_ids;
271   host_ids.reserve(perhost_alignment_.size());
272   for (const auto& hostid_alignment : perhost_alignment_) {
273     auto host_id = hostid_alignment.first;
274     host_ids.push_back(host_id);
275   }
276   absl::c_sort(host_ids);
277 
278   absl::StrAppend(&str, "perhost_alignment:\n");
279   for (const auto host_id : host_ids) {
280     const auto* ptr = gtl::FindOrNull(perhost_alignment_, host_id);
281     if (ptr == nullptr) continue;
282     absl::StrAppend(&str, "host: ", host_id,
283                     ", step-alignment: ", StringStepsAlignment(*ptr), "\n");
284   }
285   absl::StrAppend(&str, "SrcToDstIndexMap():\n");
286   for (const auto host_id : host_ids) {
287     absl::StrAppend(&str, "host: ", host_id, ", src-to-dst-index-map: ",
288                     StringSrcToDstIndexMap(FirstStepIndex(host_id), NumSteps()),
289                     "\n");
290   }
291   return str;
292 }
293 
294 }  // namespace profiler
295 }  // namespace tensorflow
296