xref: /aosp_15_r20/external/grpc-grpc/src/core/ext/xds/xds_client_stats.h (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 //
2 //
3 // Copyright 2018 gRPC authors.
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 //
18 
19 #ifndef GRPC_SRC_CORE_EXT_XDS_XDS_CLIENT_STATS_H
20 #define GRPC_SRC_CORE_EXT_XDS_XDS_CLIENT_STATS_H
21 
22 #include <grpc/support/port_platform.h>
23 
24 #include <atomic>
25 #include <cstdint>
26 #include <map>
27 #include <string>
28 #include <utility>
29 
30 #include "absl/base/thread_annotations.h"
31 #include "absl/strings/str_format.h"
32 #include "absl/strings/string_view.h"
33 
34 #include "src/core/ext/xds/xds_bootstrap.h"
35 #include "src/core/lib/channel/call_tracer.h"
36 #include "src/core/lib/gpr/useful.h"
37 #include "src/core/lib/gprpp/per_cpu.h"
38 #include "src/core/lib/gprpp/ref_counted.h"
39 #include "src/core/lib/gprpp/ref_counted_ptr.h"
40 #include "src/core/lib/gprpp/sync.h"
41 #include "src/core/resolver/endpoint_addresses.h"
42 
43 namespace grpc_core {
44 
45 // Forward declaration to avoid circular dependency.
46 class XdsClient;
47 
48 // Locality name.
49 class XdsLocalityName final : public RefCounted<XdsLocalityName> {
50  public:
51   struct Less {
operatorLess52     bool operator()(const XdsLocalityName* lhs,
53                     const XdsLocalityName* rhs) const {
54       if (lhs == nullptr || rhs == nullptr) return QsortCompare(lhs, rhs);
55       return lhs->Compare(*rhs) < 0;
56     }
57 
operatorLess58     bool operator()(const RefCountedPtr<XdsLocalityName>& lhs,
59                     const RefCountedPtr<XdsLocalityName>& rhs) const {
60       return (*this)(lhs.get(), rhs.get());
61     }
62   };
63 
XdsLocalityName(std::string region,std::string zone,std::string sub_zone)64   XdsLocalityName(std::string region, std::string zone, std::string sub_zone)
65       : region_(std::move(region)),
66         zone_(std::move(zone)),
67         sub_zone_(std::move(sub_zone)),
68         human_readable_string_(
69             absl::StrFormat("{region=\"%s\", zone=\"%s\", sub_zone=\"%s\"}",
70                             region_, zone_, sub_zone_)) {}
71 
72   bool operator==(const XdsLocalityName& other) const {
73     return region_ == other.region_ && zone_ == other.zone_ &&
74            sub_zone_ == other.sub_zone_;
75   }
76 
77   bool operator!=(const XdsLocalityName& other) const {
78     return !(*this == other);
79   }
80 
Compare(const XdsLocalityName & other)81   int Compare(const XdsLocalityName& other) const {
82     int cmp_result = region_.compare(other.region_);
83     if (cmp_result != 0) return cmp_result;
84     cmp_result = zone_.compare(other.zone_);
85     if (cmp_result != 0) return cmp_result;
86     return sub_zone_.compare(other.sub_zone_);
87   }
88 
region()89   const std::string& region() const { return region_; }
zone()90   const std::string& zone() const { return zone_; }
sub_zone()91   const std::string& sub_zone() const { return sub_zone_; }
92 
human_readable_string()93   const RefCountedStringValue& human_readable_string() const {
94     return human_readable_string_;
95   }
96 
97   // Channel args traits.
ChannelArgName()98   static absl::string_view ChannelArgName() {
99     return GRPC_ARG_NO_SUBCHANNEL_PREFIX "xds_locality_name";
100   }
ChannelArgsCompare(const XdsLocalityName * a,const XdsLocalityName * b)101   static int ChannelArgsCompare(const XdsLocalityName* a,
102                                 const XdsLocalityName* b) {
103     return a->Compare(*b);
104   }
105 
106  private:
107   std::string region_;
108   std::string zone_;
109   std::string sub_zone_;
110   RefCountedStringValue human_readable_string_;
111 };
112 
113 // Drop stats for an xds cluster.
114 class XdsClusterDropStats final : public RefCounted<XdsClusterDropStats> {
115  public:
116   // The total number of requests dropped for any reason is the sum of
117   // uncategorized_drops, and dropped_requests map.
118   using CategorizedDropsMap = std::map<std::string /* category */, uint64_t>;
119   struct Snapshot {
120     uint64_t uncategorized_drops = 0;
121     // The number of requests dropped for the specific drop categories
122     // outlined in the drop_overloads field in the EDS response.
123     CategorizedDropsMap categorized_drops;
124 
125     Snapshot& operator+=(const Snapshot& other) {
126       uncategorized_drops += other.uncategorized_drops;
127       for (const auto& p : other.categorized_drops) {
128         categorized_drops[p.first] += p.second;
129       }
130       return *this;
131     }
132 
IsZeroSnapshot133     bool IsZero() const {
134       if (uncategorized_drops != 0) return false;
135       for (const auto& p : categorized_drops) {
136         if (p.second != 0) return false;
137       }
138       return true;
139     }
140   };
141 
142   XdsClusterDropStats(RefCountedPtr<XdsClient> xds_client,
143                       absl::string_view lrs_server,
144                       absl::string_view cluster_name,
145                       absl::string_view eds_service_name);
146   ~XdsClusterDropStats() override;
147 
148   // Returns a snapshot of this instance and resets all the counters.
149   Snapshot GetSnapshotAndReset();
150 
151   void AddUncategorizedDrops();
152   void AddCallDropped(const std::string& category);
153 
154  private:
155   RefCountedPtr<XdsClient> xds_client_;
156   absl::string_view lrs_server_;
157   absl::string_view cluster_name_;
158   absl::string_view eds_service_name_;
159   std::atomic<uint64_t> uncategorized_drops_{0};
160   // Protects categorized_drops_. A mutex is necessary because the length of
161   // dropped_requests can be accessed by both the picker (from data plane
162   // mutex) and the load reporting thread (from the control plane combiner).
163   Mutex mu_;
164   CategorizedDropsMap categorized_drops_ ABSL_GUARDED_BY(mu_);
165 };
166 
167 // Locality stats for an xds cluster.
168 class XdsClusterLocalityStats final
169     : public RefCounted<XdsClusterLocalityStats> {
170  public:
171   struct BackendMetric {
172     uint64_t num_requests_finished_with_metric = 0;
173     double total_metric_value = 0;
174 
175     BackendMetric& operator+=(const BackendMetric& other) {
176       num_requests_finished_with_metric +=
177           other.num_requests_finished_with_metric;
178       total_metric_value += other.total_metric_value;
179       return *this;
180     }
181 
IsZeroBackendMetric182     bool IsZero() const {
183       return num_requests_finished_with_metric == 0 && total_metric_value == 0;
184     }
185   };
186 
187   struct Snapshot {
188     uint64_t total_successful_requests = 0;
189     uint64_t total_requests_in_progress = 0;
190     uint64_t total_error_requests = 0;
191     uint64_t total_issued_requests = 0;
192     std::map<std::string, BackendMetric> backend_metrics;
193 
194     Snapshot& operator+=(const Snapshot& other) {
195       total_successful_requests += other.total_successful_requests;
196       total_requests_in_progress += other.total_requests_in_progress;
197       total_error_requests += other.total_error_requests;
198       total_issued_requests += other.total_issued_requests;
199       for (const auto& p : other.backend_metrics) {
200         backend_metrics[p.first] += p.second;
201       }
202       return *this;
203     }
204 
IsZeroSnapshot205     bool IsZero() const {
206       if (total_successful_requests != 0 || total_requests_in_progress != 0 ||
207           total_error_requests != 0 || total_issued_requests != 0) {
208         return false;
209       }
210       for (const auto& p : backend_metrics) {
211         if (!p.second.IsZero()) return false;
212       }
213       return true;
214     }
215   };
216 
217   XdsClusterLocalityStats(RefCountedPtr<XdsClient> xds_client,
218                           absl::string_view lrs_server,
219                           absl::string_view cluster_name,
220                           absl::string_view eds_service_name,
221                           RefCountedPtr<XdsLocalityName> name);
222   ~XdsClusterLocalityStats() override;
223 
224   // Returns a snapshot of this instance and resets all the counters.
225   Snapshot GetSnapshotAndReset();
226 
227   void AddCallStarted();
228   void AddCallFinished(const std::map<absl::string_view, double>* named_metrics,
229                        bool fail = false);
230 
locality_name()231   XdsLocalityName* locality_name() const { return name_.get(); }
232 
233  private:
234   struct Stats {
235     std::atomic<uint64_t> total_successful_requests{0};
236     std::atomic<uint64_t> total_requests_in_progress{0};
237     std::atomic<uint64_t> total_error_requests{0};
238     std::atomic<uint64_t> total_issued_requests{0};
239 
240     // Protects backend_metrics. A mutex is necessary because the length of
241     // backend_metrics_ can be accessed by both the callback intercepting the
242     // call's recv_trailing_metadata and the load reporting thread.
243     Mutex backend_metrics_mu;
244     std::map<std::string, BackendMetric> backend_metrics
245         ABSL_GUARDED_BY(backend_metrics_mu);
246   };
247 
248   RefCountedPtr<XdsClient> xds_client_;
249   absl::string_view lrs_server_;
250   absl::string_view cluster_name_;
251   absl::string_view eds_service_name_;
252   RefCountedPtr<XdsLocalityName> name_;
253   PerCpu<Stats> stats_{PerCpuOptions().SetMaxShards(32).SetCpusPerShard(4)};
254 };
255 
256 }  // namespace grpc_core
257 
258 #endif  // GRPC_SRC_CORE_EXT_XDS_XDS_CLIENT_STATS_H
259