1 //
2 // Copyright 2015 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #ifndef GRPC_SRC_CORE_LIB_LOAD_BALANCING_LB_POLICY_H
18 #define GRPC_SRC_CORE_LIB_LOAD_BALANCING_LB_POLICY_H
19 
20 #include <grpc/support/port_platform.h>
21 
22 #include <stddef.h>
23 #include <stdint.h>
24 
25 #include <memory>
26 #include <string>
27 #include <utility>
28 #include <vector>
29 
30 #include "absl/base/thread_annotations.h"
31 #include "absl/status/status.h"
32 #include "absl/status/statusor.h"
33 #include "absl/strings/string_view.h"
34 #include "absl/types/optional.h"
35 #include "absl/types/variant.h"
36 
37 #include <grpc/event_engine/event_engine.h>
38 #include <grpc/impl/connectivity_state.h>
39 
40 #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h"
41 #include "src/core/lib/channel/channel_args.h"
42 #include "src/core/lib/debug/trace.h"
43 #include "src/core/lib/gprpp/debug_location.h"
44 #include "src/core/lib/gprpp/dual_ref_counted.h"
45 #include "src/core/lib/gprpp/orphanable.h"
46 #include "src/core/lib/gprpp/ref_counted.h"
47 #include "src/core/lib/gprpp/ref_counted_ptr.h"
48 #include "src/core/lib/gprpp/sync.h"
49 #include "src/core/lib/gprpp/work_serializer.h"
50 #include "src/core/lib/iomgr/iomgr_fwd.h"
51 #include "src/core/lib/load_balancing/subchannel_interface.h"
52 #include "src/core/lib/resolver/server_address.h"
53 
54 namespace grpc_core {
55 
56 extern DebugOnlyTraceFlag grpc_trace_lb_policy_refcount;
57 
58 /// Interface for load balancing policies.
59 ///
60 /// The following concepts are used here:
61 ///
62 /// Channel: An abstraction that manages connections to backend servers
63 ///   on behalf of a client application.  The application creates a channel
64 ///   for a given server name and then sends calls (RPCs) on it, and the
65 ///   channel figures out which backend server to send each call to.  A channel
66 ///   contains a resolver, a load balancing policy (or a tree of LB policies),
67 ///   and a set of one or more subchannels.
68 ///
69 /// Subchannel: A subchannel represents a connection to one backend server.
70 ///   The LB policy decides which subchannels to create, manages the
71 ///   connectivity state of those subchannels, and decides which subchannel
72 ///   to send any given call to.
73 ///
74 /// Resolver: A plugin that takes a gRPC server URI and resolves it to a
75 ///   list of one or more addresses and a service config, as described
76 ///   in https://github.com/grpc/grpc/blob/master/doc/naming.md.  See
77 ///   resolver.h for the resolver API.
78 ///
79 /// Load Balancing (LB) Policy: A plugin that takes a list of addresses
80 ///   from the resolver, maintains and manages a subchannel for each
81 ///   backend address, and decides which subchannel to send each call on.
82 ///   An LB policy has two parts:
83 ///   - A LoadBalancingPolicy, which deals with the control plane work of
84 ///     managing subchannels.
85 ///   - A SubchannelPicker, which handles the data plane work of
86 ///     determining which subchannel a given call should be sent on.
87 
88 /// LoadBalacingPolicy API.
89 ///
90 /// Note: All methods with a "Locked" suffix must be called from the
91 /// work_serializer passed to the constructor.
92 ///
93 /// Any I/O done by the LB policy should be done under the pollset_set
94 /// returned by \a interested_parties().
95 // TODO(roth): Once we move to EventManager-based polling, remove the
96 // interested_parties() hooks from the API.
97 class LoadBalancingPolicy : public InternallyRefCounted<LoadBalancingPolicy> {
98  public:
99   /// Interface for accessing per-call state.
100   /// Implemented by the client channel and used by the SubchannelPicker.
101   class CallState {
102    public:
103     CallState() = default;
104     virtual ~CallState() = default;
105 
106     /// Allocates memory associated with the call, which will be
107     /// automatically freed when the call is complete.
108     /// It is more efficient to use this than to allocate memory directly
109     /// for allocations that need to be made on a per-call basis.
110     virtual void* Alloc(size_t size) = 0;
111   };
112 
113   /// Interface for accessing metadata.
114   /// Implemented by the client channel and used by the SubchannelPicker.
115   class MetadataInterface {
116    public:
117     virtual ~MetadataInterface() = default;
118 
119     //////////////////////////////////////////////////////////////////////////
120     // TODO(ctiller): DO NOT MAKE THIS A PUBLIC API YET
121     // This needs some API design to ensure we can add/remove/replace metadata
122     // keys... we're deliberately not doing so to save some time whilst
123     // cleaning up the internal metadata representation, but we should add
124     // something back before making this a public API.
125     //////////////////////////////////////////////////////////////////////////
126 
127     /// Adds a key/value pair.
128     /// Does NOT take ownership of \a key or \a value.
129     /// Implementations must ensure that the key and value remain alive
130     /// until the call ends.  If desired, they may be allocated via
131     /// CallState::Alloc().
132     virtual void Add(absl::string_view key, absl::string_view value) = 0;
133 
134     /// Produce a vector of metadata key/value strings for tests.
135     virtual std::vector<std::pair<std::string, std::string>>
136     TestOnlyCopyToVector() = 0;
137 
138     virtual absl::optional<absl::string_view> Lookup(
139         absl::string_view key, std::string* buffer) const = 0;
140   };
141 
142   /// Arguments used when picking a subchannel for a call.
143   struct PickArgs {
144     /// The path of the call.  Indicates the RPC service and method name.
145     absl::string_view path;
146     /// Initial metadata associated with the picking call.
147     /// The LB policy may use the existing metadata to influence its routing
148     /// decision, and it may add new metadata elements to be sent with the
149     /// call to the chosen backend.
150     MetadataInterface* initial_metadata;
151     /// An interface for accessing call state.  Can be used to allocate
152     /// memory associated with the call in an efficient way.
153     CallState* call_state;
154   };
155 
156   /// Interface for accessing backend metric data.
157   /// Implemented by the client channel and used by
158   /// SubchannelCallTrackerInterface.
159   class BackendMetricAccessor {
160    public:
161     virtual ~BackendMetricAccessor() = default;
162 
163     /// Returns the backend metric data returned by the server for the call,
164     /// or null if no backend metric data was returned.
165     virtual const BackendMetricData* GetBackendMetricData() = 0;
166   };
167 
168   /// Interface for tracking subchannel calls.
169   /// Implemented by LB policy and used by the channel.
170   class SubchannelCallTrackerInterface {
171    public:
172     virtual ~SubchannelCallTrackerInterface() = default;
173 
174     /// Called when a subchannel call is started after an LB pick.
175     virtual void Start() = 0;
176 
177     /// Called when a subchannel call is completed.
178     /// The metadata may be modified by the implementation.  However, the
179     /// implementation does not take ownership, so any data that needs to be
180     /// used after returning must be copied.
181     struct FinishArgs {
182       absl::string_view peer_address;
183       absl::Status status;
184       MetadataInterface* trailing_metadata;
185       BackendMetricAccessor* backend_metric_accessor;
186     };
187     virtual void Finish(FinishArgs args) = 0;
188   };
189 
190   /// The result of picking a subchannel for a call.
191   struct PickResult {
192     /// A successful pick.
193     struct Complete {
194       /// The subchannel to be used for the call.  Must be non-null.
195       RefCountedPtr<SubchannelInterface> subchannel;
196 
197       /// Optionally set by the LB policy when it wishes to be notified
198       /// about the resulting subchannel call.
199       /// Note that if the pick is abandoned by the channel, this may never
200       /// be used.
201       std::unique_ptr<SubchannelCallTrackerInterface> subchannel_call_tracker;
202 
203       explicit Complete(
204           RefCountedPtr<SubchannelInterface> sc,
205           std::unique_ptr<SubchannelCallTrackerInterface> tracker = nullptr)
subchannelPickResult::Complete206           : subchannel(std::move(sc)),
207             subchannel_call_tracker(std::move(tracker)) {}
208     };
209 
210     /// Pick cannot be completed until something changes on the control
211     /// plane.  The client channel will queue the pick and try again the
212     /// next time the picker is updated.
213     struct Queue {};
214 
215     /// Pick failed.  If the call is wait_for_ready, the client channel
216     /// will wait for the next picker and try again; otherwise, it
217     /// will immediately fail the call with the status indicated (although
218     /// the call may be retried if the client channel is configured to do so).
219     struct Fail {
220       absl::Status status;
221 
FailPickResult::Fail222       explicit Fail(absl::Status s) : status(s) {}
223     };
224 
225     /// Pick will be dropped with the status specified.
226     /// Unlike FailPick, the call will be dropped even if it is
227     /// wait_for_ready, and retries (if configured) will be inhibited.
228     struct Drop {
229       absl::Status status;
230 
DropPickResult::Drop231       explicit Drop(absl::Status s) : status(s) {}
232     };
233 
234     // A pick result must be one of these types.
235     // Default to Queue, just to allow default construction.
236     absl::variant<Complete, Queue, Fail, Drop> result = Queue();
237 
238     PickResult() = default;
239     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult240     PickResult(Complete complete) : result(std::move(complete)) {}
241     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult242     PickResult(Queue queue) : result(queue) {}
243     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult244     PickResult(Fail fail) : result(std::move(fail)) {}
245     // NOLINTNEXTLINE(google-explicit-constructor)
PickResultPickResult246     PickResult(Drop drop) : result(std::move(drop)) {}
247   };
248 
249   /// A subchannel picker is the object used to pick the subchannel to
250   /// use for a given call.  This is implemented by the LB policy and
251   /// used by the client channel to perform picks.
252   ///
253   /// Pickers are intended to encapsulate all of the state and logic
254   /// needed on the data plane (i.e., to actually process picks for
255   /// individual calls sent on the channel) while excluding all of the
256   /// state and logic needed on the control plane (i.e., resolver
257   /// updates, connectivity state notifications, etc); the latter should
258   /// live in the LB policy object itself.
259   ///
260   /// Currently, pickers are always accessed from within the
261   /// client_channel data plane mutex, so they do not have to be
262   /// thread-safe.
263   class SubchannelPicker : public DualRefCounted<SubchannelPicker> {
264    public:
265     SubchannelPicker();
266 
267     virtual PickResult Pick(PickArgs args) = 0;
268 
Orphan()269     void Orphan() override {}
270   };
271 
272   /// A proxy object implemented by the client channel and used by the
273   /// LB policy to communicate with the channel.
274   // TODO(roth): Once insecure builds go away, add methods for accessing
275   // channel creds.  By default, that should strip off the call creds
276   // attached to the channel creds, but there should also be a "use at
277   // your own risk" option to get the channel creds without stripping
278   // off the attached call creds.
279   class ChannelControlHelper {
280    public:
281     ChannelControlHelper() = default;
282     virtual ~ChannelControlHelper() = default;
283 
284     /// Creates a new subchannel with the specified channel args.
285     virtual RefCountedPtr<SubchannelInterface> CreateSubchannel(
286         ServerAddress address, const ChannelArgs& args) = 0;
287 
288     /// Sets the connectivity state and returns a new picker to be used
289     /// by the client channel.
290     virtual void UpdateState(grpc_connectivity_state state,
291                              const absl::Status& status,
292                              RefCountedPtr<SubchannelPicker> picker) = 0;
293 
294     /// Requests that the resolver re-resolve.
295     virtual void RequestReresolution() = 0;
296 
297     /// Returns the channel authority.
298     virtual absl::string_view GetAuthority() = 0;
299 
300     /// Returns the EventEngine to use for timers and async work.
301     virtual grpc_event_engine::experimental::EventEngine* GetEventEngine() = 0;
302 
303     /// Adds a trace message associated with the channel.
304     enum TraceSeverity { TRACE_INFO, TRACE_WARNING, TRACE_ERROR };
305     virtual void AddTraceEvent(TraceSeverity severity,
306                                absl::string_view message) = 0;
307   };
308 
309   /// Interface for configuration data used by an LB policy implementation.
310   /// Individual implementations will create a subclass that adds methods to
311   /// return the parameters they need.
312   class Config : public RefCounted<Config> {
313    public:
314     ~Config() override = default;
315 
316     // Returns the load balancing policy name
317     virtual absl::string_view name() const = 0;
318   };
319 
320   /// Data passed to the UpdateLocked() method when new addresses and
321   /// config are available.
322   struct UpdateArgs {
323     /// A list of addresses, or an error indicating a failure to obtain the
324     /// list of addresses.
325     absl::StatusOr<ServerAddressList> addresses;
326     /// The LB policy config.
327     RefCountedPtr<Config> config;
328     /// A human-readable note providing context about the name resolution that
329     /// provided this update.  LB policies may wish to include this message
330     /// in RPC failure status messages.  For example, if the update has an
331     /// empty list of addresses, this message might say "no DNS entries
332     /// found for <name>".
333     std::string resolution_note;
334 
335     // TODO(roth): Before making this a public API, find a better
336     // abstraction for representing channel args.
337     ChannelArgs args;
338   };
339 
340   /// Args used to instantiate an LB policy.
341   struct Args {
342     /// The work_serializer under which all LB policy calls will be run.
343     std::shared_ptr<WorkSerializer> work_serializer;
344     /// Channel control helper.
345     /// Note: LB policies MUST NOT call any method on the helper from
346     /// their constructor.
347     std::unique_ptr<ChannelControlHelper> channel_control_helper;
348     /// Channel args.
349     // TODO(roth): Find a better channel args representation for this API.
350     ChannelArgs args;
351   };
352 
353   explicit LoadBalancingPolicy(Args args, intptr_t initial_refcount = 1);
354   ~LoadBalancingPolicy() override;
355 
356   // Not copyable nor movable.
357   LoadBalancingPolicy(const LoadBalancingPolicy&) = delete;
358   LoadBalancingPolicy& operator=(const LoadBalancingPolicy&) = delete;
359 
360   /// Returns the name of the LB policy.
361   virtual absl::string_view name() const = 0;
362 
363   /// Updates the policy with new data from the resolver.  Will be invoked
364   /// immediately after LB policy is constructed, and then again whenever
365   /// the resolver returns a new result.  The returned status indicates
366   /// whether the LB policy accepted the update; if non-OK, informs
367   /// polling-based resolvers that they should go into backoff delay and
368   /// eventually reattempt the resolution.
369   ///
370   /// The first time that UpdateLocked() is called, the LB policy will
371   /// generally not be able to determine the appropriate connectivity
372   /// state by the time UpdateLocked() returns (e.g., it will need to
373   /// wait for connectivity state notifications from each subchannel,
374   /// which will be delivered asynchronously).  In this case, the LB
375   /// policy should not call the helper's UpdateState() method until it
376   /// does have a clear picture of the connectivity state (e.g., it
377   /// should wait for all subchannels to report connectivity state
378   /// before calling the helper's UpdateState() method), although it is
379   /// expected to do so within some short period of time.  The parent of
380   /// the LB policy will assume that the policy's initial state is
381   /// CONNECTING and that picks should be queued.
382   virtual absl::Status UpdateLocked(UpdateArgs) = 0;  // NOLINT
383 
384   /// Tries to enter a READY connectivity state.
385   /// This is a no-op by default, since most LB policies never go into
386   /// IDLE state.
ExitIdleLocked()387   virtual void ExitIdleLocked() {}
388 
389   /// Resets connection backoff.
390   virtual void ResetBackoffLocked() = 0;
391 
interested_parties()392   grpc_pollset_set* interested_parties() const { return interested_parties_; }
393 
394   // Note: This must be invoked while holding the work_serializer.
395   void Orphan() override;
396 
397   // A picker that returns PickResult::Queue for all picks.
398   // Also calls the parent LB policy's ExitIdleLocked() method when the
399   // first pick is seen.
400   class QueuePicker : public SubchannelPicker {
401    public:
QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)402     explicit QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)
403         : parent_(std::move(parent)) {}
404 
~QueuePicker()405     ~QueuePicker() override { parent_.reset(DEBUG_LOCATION, "QueuePicker"); }
406 
407     PickResult Pick(PickArgs args) override;
408 
409    private:
410     Mutex mu_;
411     RefCountedPtr<LoadBalancingPolicy> parent_ ABSL_GUARDED_BY(&mu_);
412   };
413 
414   // A picker that returns PickResult::Fail for all picks.
415   class TransientFailurePicker : public SubchannelPicker {
416    public:
TransientFailurePicker(absl::Status status)417     explicit TransientFailurePicker(absl::Status status) : status_(status) {}
418 
Pick(PickArgs)419     PickResult Pick(PickArgs /*args*/) override {
420       return PickResult::Fail(status_);
421     }
422 
423    private:
424     absl::Status status_;
425   };
426 
427  protected:
work_serializer()428   std::shared_ptr<WorkSerializer> work_serializer() const {
429     return work_serializer_;
430   }
431 
channel_args()432   const ChannelArgs& channel_args() const { return channel_args_; }
433 
434   // Note: LB policies MUST NOT call any method on the helper from their
435   // constructor.
channel_control_helper()436   ChannelControlHelper* channel_control_helper() const {
437     return channel_control_helper_.get();
438   }
439 
440   /// Shuts down the policy.
441   virtual void ShutdownLocked() = 0;
442 
443  private:
444   /// Work Serializer under which LB policy actions take place.
445   std::shared_ptr<WorkSerializer> work_serializer_;
446   /// Owned pointer to interested parties in load balancing decisions.
447   grpc_pollset_set* interested_parties_;
448   /// Channel control helper.
449   std::unique_ptr<ChannelControlHelper> channel_control_helper_;
450   /// Channel args passed in.
451   // TODO(roth): Rework Args so that we don't need to capture channel args here.
452   ChannelArgs channel_args_;
453 };
454 
455 }  // namespace grpc_core
456 
457 #endif  // GRPC_SRC_CORE_LIB_LOAD_BALANCING_LB_POLICY_H
458