xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/convert/op_stats_combiner.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_COMBINER_H_
17 #define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_COMBINER_H_
18 
19 #include "absl/container/flat_hash_map.h"
20 #include "tensorflow/core/platform/logging.h"
21 #include "tensorflow/core/platform/macros.h"
22 #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h"
23 #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
24 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
25 #include "tensorflow/core/profiler/utils/step_intersection.h"
26 
27 namespace tensorflow {
28 namespace profiler {
29 
30 // Whether a host is a coordinator.
31 bool IsCoordinator(bool no_accelerator_in_system, HardwareType hardware_type);
32 
33 // Translates the core id from single host to the one for multiple-host.
34 // We need this translation because the device_ordinal was assigned when a
35 // single host response was given. Now, we need a global core_id to distinguish
36 // it with multiple hosts.
37 uint32 GlobalCoreId(int host_id, uint32 device_ordinal);
38 
39 // Combines the src map into the dst map.
40 // The src map keys are local core_ids. The src_host_id is used to convert them
41 // into global core_ids used as keys in the dst map.
42 // REQUIRED: cores from src_host_id are not already in dst.
43 template <typename CoreIdMap>
CombineCoreIdMap(int src_host_id,const CoreIdMap & src,CoreIdMap * dst)44 void CombineCoreIdMap(int src_host_id, const CoreIdMap& src, CoreIdMap* dst) {
45   for (const auto& core_id_and_value : src) {
46     uint32 global_core_id = GlobalCoreId(src_host_id, core_id_and_value.first);
47     auto iter_and_inserted =
48         dst->insert({global_core_id, core_id_and_value.second});
49     DCHECK(iter_and_inserted.second)
50         << "Duplicated core_id: " << iter_and_inserted.first->first;
51   }
52 }
53 
54 // A struct that contains all the information that is needed to combine OpStats.
55 struct OpStatsInfo {
OpStatsInfoOpStatsInfo56   OpStatsInfo(const OpStats* op_stats, HardwareType hardware_type,
57               int src_host_id)
58       : op_stats(op_stats),
59         hardware_type(hardware_type),
60         src_host_id(src_host_id) {}
61   const OpStats* op_stats;
62   HardwareType hardware_type;
63   int src_host_id;
64 };
65 
66 // Returns true if there is no device (accelerator) in any of the hosts.
67 bool NoAcceleratorInSystem(const std::vector<OpStatsInfo>& all_op_stats_info);
68 
69 // Compute the StepIntersection to merge OpStats.
70 // Profiler will limit the number of steps to be at most <max_step_per_host>.
71 StepIntersection ComputeStepIntersectionToMergeOpStats(
72     const std::vector<OpStatsInfo>& all_op_stats_info,
73     uint32 max_step_per_host);
74 
75 // Combine all the OpStats in <all_op_stats_info> using the steps in range
76 // <step_intersection>. The result is stored in <combined_op_stats>.
77 void CombineAllOpStats(const std::vector<OpStatsInfo>& all_op_stats_info,
78                        const StepIntersection& step_intersection,
79                        OpStats* combined_op_stats);
80 
81 }  // namespace profiler
82 }  // namespace tensorflow
83 
84 #endif  // TENSORFLOW_CORE_PROFILER_CONVERT_OP_STATS_COMBINER_H_
85