xref: /aosp_15_r20/external/grpc-grpc/src/core/load_balancing/lb_policy.h (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 //
2 // Copyright 2015 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #ifndef GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H
18 #define GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H
19 
20 #include <grpc/support/port_platform.h>
21 
22 #include <stddef.h>
23 #include <stdint.h>
24 
25 #include <memory>
26 #include <string>
27 #include <utility>
28 #include <vector>
29 
30 #include "absl/base/thread_annotations.h"
31 #include "absl/status/status.h"
32 #include "absl/status/statusor.h"
33 #include "absl/strings/string_view.h"
34 #include "absl/types/optional.h"
35 #include "absl/types/variant.h"
36 
37 #include <grpc/event_engine/event_engine.h>
38 #include <grpc/grpc.h>
39 #include <grpc/impl/connectivity_state.h>
40 
41 #include "src/core/lib/channel/channel_args.h"
42 #include "src/core/lib/channel/metrics.h"
43 #include "src/core/lib/debug/trace.h"
44 #include "src/core/lib/gprpp/debug_location.h"
45 #include "src/core/lib/gprpp/dual_ref_counted.h"
46 #include "src/core/lib/gprpp/orphanable.h"
47 #include "src/core/lib/gprpp/ref_counted.h"
48 #include "src/core/lib/gprpp/ref_counted_ptr.h"
49 #include "src/core/lib/gprpp/sync.h"
50 #include "src/core/lib/gprpp/work_serializer.h"
51 #include "src/core/lib/iomgr/iomgr_fwd.h"
52 #include "src/core/lib/iomgr/resolved_address.h"
53 #include "src/core/load_balancing/backend_metric_data.h"
54 #include "src/core/load_balancing/subchannel_interface.h"
55 #include "src/core/resolver/endpoint_addresses.h"
56 
57 namespace grpc_core {
58 
59 extern DebugOnlyTraceFlag grpc_trace_lb_policy_refcount;
60 
61 /// Interface for load balancing policies.
62 ///
63 /// The following concepts are used here:
64 ///
65 /// Channel: An abstraction that manages connections to backend servers
66 ///   on behalf of a client application.  The application creates a channel
67 ///   for a given server name and then sends calls (RPCs) on it, and the
68 ///   channel figures out which backend server to send each call to.  A channel
69 ///   contains a resolver, a load balancing policy (or a tree of LB policies),
70 ///   and a set of one or more subchannels.
71 ///
72 /// Subchannel: A subchannel represents a connection to one backend server.
73 ///   The LB policy decides which subchannels to create, manages the
74 ///   connectivity state of those subchannels, and decides which subchannel
75 ///   to send any given call to.
76 ///
77 /// Resolver: A plugin that takes a gRPC server URI and resolves it to a
78 ///   list of one or more addresses and a service config, as described
79 ///   in https://github.com/grpc/grpc/blob/master/doc/naming.md.  See
80 ///   resolver.h for the resolver API.
81 ///
82 /// Load Balancing (LB) Policy: A plugin that takes a list of addresses
83 ///   from the resolver, maintains and manages a subchannel for each
84 ///   backend address, and decides which subchannel to send each call on.
85 ///   An LB policy has two parts:
86 ///   - A LoadBalancingPolicy, which deals with the control plane work of
87 ///     managing subchannels.
88 ///   - A SubchannelPicker, which handles the data plane work of
89 ///     determining which subchannel a given call should be sent on.
90 
91 /// LoadBalacingPolicy API.
92 ///
93 /// Note: All methods with a "Locked" suffix must be called from the
94 /// work_serializer passed to the constructor.
95 ///
96 /// Any I/O done by the LB policy should be done under the pollset_set
97 /// returned by \a interested_parties().
98 // TODO(roth): Once we move to EventManager-based polling, remove the
99 // interested_parties() hooks from the API.
100 class LoadBalancingPolicy : public InternallyRefCounted<LoadBalancingPolicy> {
101  public:
102   /// Interface for accessing per-call state.
103   /// Implemented by the client channel and used by the SubchannelPicker.
104   class CallState {
105    public:
106     CallState() = default;
107     virtual ~CallState() = default;
108 
109     /// Allocates memory associated with the call, which will be
110     /// automatically freed when the call is complete.
111     /// It is more efficient to use this than to allocate memory directly
112     /// for allocations that need to be made on a per-call basis.
113     virtual void* Alloc(size_t size) = 0;
114   };
115 
116   /// Interface for accessing metadata.
117   /// Implemented by the client channel and used by the SubchannelPicker.
118   class MetadataInterface {
119    public:
120     virtual ~MetadataInterface() = default;
121 
122     //////////////////////////////////////////////////////////////////////////
123     // TODO(ctiller): DO NOT MAKE THIS A PUBLIC API YET
124     // This needs some API design to ensure we can add/remove/replace metadata
125     // keys... we're deliberately not doing so to save some time whilst
126     // cleaning up the internal metadata representation, but we should add
127     // something back before making this a public API.
128     //////////////////////////////////////////////////////////////////////////
129 
130     /// Adds a key/value pair.
131     /// Does NOT take ownership of \a key or \a value.
132     /// Implementations must ensure that the key and value remain alive
133     /// until the call ends.  If desired, they may be allocated via
134     /// CallState::Alloc().
135     virtual void Add(absl::string_view key, absl::string_view value) = 0;
136 
137     /// Produce a vector of metadata key/value strings for tests.
138     virtual std::vector<std::pair<std::string, std::string>>
139     TestOnlyCopyToVector() = 0;
140 
141     virtual absl::optional<absl::string_view> Lookup(
142         absl::string_view key, std::string* buffer) const = 0;
143   };
144 
145   /// Arguments used when picking a subchannel for a call.
146   struct PickArgs {
147     /// The path of the call.  Indicates the RPC service and method name.
148     absl::string_view path;
149     /// Initial metadata associated with the picking call.
150     /// The LB policy may use the existing metadata to influence its routing
151     /// decision, and it may add new metadata elements to be sent with the
152     /// call to the chosen backend.
153     // TODO(roth): Before making the LB policy API public, consider
154     // whether this is the right way to expose metadata to the picker.
155     // This approach means that if a pick modifies metadata but then we
156     // discard the pick because the subchannel is not connected, the
157     // metadata change will still have been made.  Maybe we actually
158     // want to somehow provide metadata changes in PickResult::Complete
159     // instead?  Or maybe we use a CallTracer that can add metadata when
160     // the call actually starts on the subchannel?
161     MetadataInterface* initial_metadata;
162     /// An interface for accessing call state.  Can be used to allocate
163     /// memory associated with the call in an efficient way.
164     CallState* call_state;
165   };
166 
167   /// Interface for accessing backend metric data.
168   /// Implemented by the client channel and used by
169   /// SubchannelCallTrackerInterface.
170   class BackendMetricAccessor {
171    public:
172     virtual ~BackendMetricAccessor() = default;
173 
174     /// Returns the backend metric data returned by the server for the call,
175     /// or null if no backend metric data was returned.
176     virtual const BackendMetricData* GetBackendMetricData() = 0;
177   };
178 
179   /// Interface for tracking subchannel calls.
180   /// Implemented by LB policy and used by the channel.
181   // TODO(roth): Before making this API public, consider whether we
182   // should just replace this with a CallTracer, similar to what Java does.
183   class SubchannelCallTrackerInterface {
184    public:
185     virtual ~SubchannelCallTrackerInterface() = default;
186 
187     /// Called when a subchannel call is started after an LB pick.
188     virtual void Start() = 0;
189 
190     /// Called when a subchannel call is completed.
191     /// The metadata may be modified by the implementation.  However, the
192     /// implementation does not take ownership, so any data that needs to be
193     /// used after returning must be copied.
194     struct FinishArgs {
195       absl::string_view peer_address;
196       absl::Status status;
197       MetadataInterface* trailing_metadata;
198       BackendMetricAccessor* backend_metric_accessor;
199     };
200     virtual void Finish(FinishArgs args) = 0;
201   };
202 
203   /// The result of picking a subchannel for a call.
204   struct PickResult {
205     /// A successful pick.
206     struct Complete {
207       /// The subchannel to be used for the call.  Must be non-null.
208       RefCountedPtr<SubchannelInterface> subchannel;
209 
210       /// Optionally set by the LB policy when it wishes to be notified
211       /// about the resulting subchannel call.
212       /// Note that if the pick is abandoned by the channel, this may never
213       /// be used.
214       std::unique_ptr<SubchannelCallTrackerInterface> subchannel_call_tracker;
215 
216       explicit Complete(
217           RefCountedPtr<SubchannelInterface> sc,
218           std::unique_ptr<SubchannelCallTrackerInterface> tracker = nullptr)
subchannelPickResult::Complete219           : subchannel(std::move(sc)),
220             subchannel_call_tracker(std::move(tracker)) {}
221     };
222 
223     /// Pick cannot be completed until something changes on the control
224     /// plane.  The client channel will queue the pick and try again the
225     /// next time the picker is updated.
226     struct Queue {};
227 
228     /// Pick failed.  If the call is wait_for_ready, the client channel
229     /// will wait for the next picker and try again; otherwise, it
230     /// will immediately fail the call with the status indicated (although
231     /// the call may be retried if the client channel is configured to do so).
232     struct Fail {
233       absl::Status status;
234 
FailPickResult::Fail235       explicit Fail(absl::Status s) : status(s) {}
236     };
237 
238     /// Pick will be dropped with the status specified.
239     /// Unlike FailPick, the call will be dropped even if it is
240     /// wait_for_ready, and retries (if configured) will be inhibited.
241     struct Drop {
242       absl::Status status;
243 
DropPickResult::Drop244       explicit Drop(absl::Status s) : status(s) {}
245     };
246 
247     // A pick result must be one of these types.
248     // Default to Queue, just to allow default construction.
249     absl::variant<Complete, Queue, Fail, Drop> result = Queue();
250 
251     PickResult() = default;
252     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult253     PickResult(Complete complete) : result(std::move(complete)) {}
254     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult255     PickResult(Queue queue) : result(queue) {}
256     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult257     PickResult(Fail fail) : result(std::move(fail)) {}
258     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult259     PickResult(Drop drop) : result(std::move(drop)) {}
260   };
261 
262   /// A subchannel picker is the object used to pick the subchannel to
263   /// use for a given call.  This is implemented by the LB policy and
264   /// used by the client channel to perform picks.
265   ///
266   /// Pickers are intended to encapsulate all of the state and logic
267   /// needed on the data plane (i.e., to actually process picks for
268   /// individual calls sent on the channel) while excluding all of the
269   /// state and logic needed on the control plane (i.e., resolver
270   /// updates, connectivity state notifications, etc); the latter should
271   /// live in the LB policy object itself.
272   class SubchannelPicker : public DualRefCounted<SubchannelPicker> {
273    public:
274     SubchannelPicker();
275 
276     virtual PickResult Pick(PickArgs args) = 0;
277 
278    protected:
Orphaned()279     void Orphaned() override {}
280   };
281 
282   /// A proxy object implemented by the client channel and used by the
283   /// LB policy to communicate with the channel.
284   class ChannelControlHelper {
285    public:
286     ChannelControlHelper() = default;
287     virtual ~ChannelControlHelper() = default;
288 
289     /// Creates a new subchannel with the specified channel args.
290     /// The args and per_address_args will be merged by the channel.
291     virtual RefCountedPtr<SubchannelInterface> CreateSubchannel(
292         const grpc_resolved_address& address,
293         const ChannelArgs& per_address_args, const ChannelArgs& args) = 0;
294 
295     /// Sets the connectivity state and returns a new picker to be used
296     /// by the client channel.
297     virtual void UpdateState(grpc_connectivity_state state,
298                              const absl::Status& status,
299                              RefCountedPtr<SubchannelPicker> picker) = 0;
300 
301     /// Requests that the resolver re-resolve.
302     virtual void RequestReresolution() = 0;
303 
304     /// Returns the channel target.
305     virtual absl::string_view GetTarget() = 0;
306 
307     /// Returns the channel authority.
308     virtual absl::string_view GetAuthority() = 0;
309 
310     /// Returns the channel credentials from the parent channel.  This can
311     /// be used to create a control-plane channel inside an LB policy.
312     virtual RefCountedPtr<grpc_channel_credentials> GetChannelCredentials() = 0;
313 
314     /// Returns the UNSAFE ChannelCredentials used to construct the channel,
315     /// including bearer tokens.  LB policies should generally have no use for
316     /// these credentials, and use of them is heavily discouraged.  These must
317     /// be used VERY carefully to avoid sending bearer tokens to untrusted
318     /// servers, as the server could then impersonate the client.  Generally,
319     /// it is safe to use these credentials only when communicating with the
320     /// backends.
321     virtual RefCountedPtr<grpc_channel_credentials>
322     GetUnsafeChannelCredentials() = 0;
323 
324     /// Returns the EventEngine to use for timers and async work.
325     virtual grpc_event_engine::experimental::EventEngine* GetEventEngine() = 0;
326 
327     /// Returns the stats plugin group for reporting metrics.
328     virtual GlobalStatsPluginRegistry::StatsPluginGroup&
329     GetStatsPluginGroup() = 0;
330 
331     /// Adds a trace message associated with the channel.
332     enum TraceSeverity { TRACE_INFO, TRACE_WARNING, TRACE_ERROR };
333     virtual void AddTraceEvent(TraceSeverity severity,
334                                absl::string_view message) = 0;
335   };
336 
337   class DelegatingChannelControlHelper;
338 
339   template <typename ParentPolicy>
340   class ParentOwningDelegatingChannelControlHelper;
341 
342   /// Interface for configuration data used by an LB policy implementation.
343   /// Individual implementations will create a subclass that adds methods to
344   /// return the parameters they need.
345   class Config : public RefCounted<Config> {
346    public:
347     ~Config() override = default;
348 
349     // Returns the load balancing policy name
350     virtual absl::string_view name() const = 0;
351   };
352 
353   /// Data passed to the UpdateLocked() method when new addresses and
354   /// config are available.
355   struct UpdateArgs {
356     /// A list of endpoints, each with one or more address, or an error
357     /// indicating a failure to obtain the list of addresses.
358     absl::StatusOr<std::shared_ptr<EndpointAddressesIterator>> addresses;
359     /// The LB policy config.
360     RefCountedPtr<Config> config;
361     /// A human-readable note providing context about the name resolution that
362     /// provided this update.  LB policies may wish to include this message
363     /// in RPC failure status messages.  For example, if the update has an
364     /// empty list of addresses, this message might say "no DNS entries
365     /// found for <name>".
366     std::string resolution_note;
367 
368     // TODO(roth): Before making this a public API, find a better
369     // abstraction for representing channel args.
370     ChannelArgs args;
371   };
372 
373   /// Args used to instantiate an LB policy.
374   struct Args {
375     /// The work_serializer under which all LB policy calls will be run.
376     std::shared_ptr<WorkSerializer> work_serializer;
377     /// Channel control helper.
378     /// Note: LB policies MUST NOT call any method on the helper from
379     /// their constructor.
380     std::unique_ptr<ChannelControlHelper> channel_control_helper;
381     /// Channel args.
382     // TODO(roth): Find a better channel args representation for this API.
383     ChannelArgs args;
384   };
385 
386   explicit LoadBalancingPolicy(Args args, intptr_t initial_refcount = 1);
387   ~LoadBalancingPolicy() override;
388 
389   // Not copyable nor movable.
390   LoadBalancingPolicy(const LoadBalancingPolicy&) = delete;
391   LoadBalancingPolicy& operator=(const LoadBalancingPolicy&) = delete;
392 
393   /// Returns the name of the LB policy.
394   virtual absl::string_view name() const = 0;
395 
396   /// Updates the policy with new data from the resolver.  Will be invoked
397   /// immediately after LB policy is constructed, and then again whenever
398   /// the resolver returns a new result.  The returned status indicates
399   /// whether the LB policy accepted the update; if non-OK, informs
400   /// polling-based resolvers that they should go into backoff delay and
401   /// eventually reattempt the resolution.
402   ///
403   /// The first time that UpdateLocked() is called, the LB policy will
404   /// generally not be able to determine the appropriate connectivity
405   /// state by the time UpdateLocked() returns (e.g., it will need to
406   /// wait for connectivity state notifications from each subchannel,
407   /// which will be delivered asynchronously).  In this case, the LB
408   /// policy should not call the helper's UpdateState() method until it
409   /// does have a clear picture of the connectivity state (e.g., it
410   /// should wait for all subchannels to report connectivity state
411   /// before calling the helper's UpdateState() method), although it is
412   /// expected to do so within some short period of time.  The parent of
413   /// the LB policy will assume that the policy's initial state is
414   /// CONNECTING and that picks should be queued.
415   virtual absl::Status UpdateLocked(UpdateArgs) = 0;  // NOLINT
416 
417   /// Tries to enter a READY connectivity state.
418   /// This is a no-op by default, since most LB policies never go into
419   /// IDLE state.
ExitIdleLocked()420   virtual void ExitIdleLocked() {}
421 
422   /// Resets connection backoff.
423   virtual void ResetBackoffLocked() = 0;
424 
interested_parties()425   grpc_pollset_set* interested_parties() const { return interested_parties_; }
426 
427   // Note: This must be invoked while holding the work_serializer.
428   void Orphan() override;
429 
430   // A picker that returns PickResult::Queue for all picks.
431   // Also calls the parent LB policy's ExitIdleLocked() method when the
432   // first pick is seen.
433   class QueuePicker final : public SubchannelPicker {
434    public:
QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)435     explicit QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)
436         : parent_(std::move(parent)) {}
437 
~QueuePicker()438     ~QueuePicker() override { parent_.reset(DEBUG_LOCATION, "QueuePicker"); }
439 
440     PickResult Pick(PickArgs args) override;
441 
442    private:
443     Mutex mu_;
444     RefCountedPtr<LoadBalancingPolicy> parent_ ABSL_GUARDED_BY(&mu_);
445   };
446 
447   // A picker that returns PickResult::Fail for all picks.
448   class TransientFailurePicker final : public SubchannelPicker {
449    public:
TransientFailurePicker(absl::Status status)450     explicit TransientFailurePicker(absl::Status status) : status_(status) {}
451 
Pick(PickArgs)452     PickResult Pick(PickArgs /*args*/) override {
453       return PickResult::Fail(status_);
454     }
455 
456    private:
457     absl::Status status_;
458   };
459 
460  protected:
work_serializer()461   std::shared_ptr<WorkSerializer> work_serializer() const {
462     return work_serializer_;
463   }
464 
channel_args()465   const ChannelArgs& channel_args() const { return channel_args_; }
466 
467   // Note: LB policies MUST NOT call any method on the helper from their
468   // constructor.
channel_control_helper()469   ChannelControlHelper* channel_control_helper() const {
470     return channel_control_helper_.get();
471   }
472 
473   /// Shuts down the policy.
474   virtual void ShutdownLocked() = 0;
475 
476  private:
477   /// Work Serializer under which LB policy actions take place.
478   std::shared_ptr<WorkSerializer> work_serializer_;
479   /// Owned pointer to interested parties in load balancing decisions.
480   grpc_pollset_set* interested_parties_;
481   /// Channel control helper.
482   std::unique_ptr<ChannelControlHelper> channel_control_helper_;
483   /// Channel args passed in.
484   // TODO(roth): Rework Args so that we don't need to capture channel args here.
485   ChannelArgs channel_args_;
486 };
487 
488 }  // namespace grpc_core
489 
490 #endif  // GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H
491