1 // 2 // Copyright 2015 gRPC authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 #ifndef GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H 18 #define GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H 19 20 #include <grpc/support/port_platform.h> 21 22 #include <stddef.h> 23 #include <stdint.h> 24 25 #include <memory> 26 #include <string> 27 #include <utility> 28 #include <vector> 29 30 #include "absl/base/thread_annotations.h" 31 #include "absl/status/status.h" 32 #include "absl/status/statusor.h" 33 #include "absl/strings/string_view.h" 34 #include "absl/types/optional.h" 35 #include "absl/types/variant.h" 36 37 #include <grpc/event_engine/event_engine.h> 38 #include <grpc/grpc.h> 39 #include <grpc/impl/connectivity_state.h> 40 41 #include "src/core/lib/channel/channel_args.h" 42 #include "src/core/lib/channel/metrics.h" 43 #include "src/core/lib/debug/trace.h" 44 #include "src/core/lib/gprpp/debug_location.h" 45 #include "src/core/lib/gprpp/dual_ref_counted.h" 46 #include "src/core/lib/gprpp/orphanable.h" 47 #include "src/core/lib/gprpp/ref_counted.h" 48 #include "src/core/lib/gprpp/ref_counted_ptr.h" 49 #include "src/core/lib/gprpp/sync.h" 50 #include "src/core/lib/gprpp/work_serializer.h" 51 #include "src/core/lib/iomgr/iomgr_fwd.h" 52 #include "src/core/lib/iomgr/resolved_address.h" 53 #include "src/core/load_balancing/backend_metric_data.h" 54 #include "src/core/load_balancing/subchannel_interface.h" 55 #include "src/core/resolver/endpoint_addresses.h" 56 57 namespace grpc_core { 58 59 extern DebugOnlyTraceFlag grpc_trace_lb_policy_refcount; 60 61 /// Interface for load balancing policies. 62 /// 63 /// The following concepts are used here: 64 /// 65 /// Channel: An abstraction that manages connections to backend servers 66 /// on behalf of a client application. The application creates a channel 67 /// for a given server name and then sends calls (RPCs) on it, and the 68 /// channel figures out which backend server to send each call to. A channel 69 /// contains a resolver, a load balancing policy (or a tree of LB policies), 70 /// and a set of one or more subchannels. 71 /// 72 /// Subchannel: A subchannel represents a connection to one backend server. 73 /// The LB policy decides which subchannels to create, manages the 74 /// connectivity state of those subchannels, and decides which subchannel 75 /// to send any given call to. 76 /// 77 /// Resolver: A plugin that takes a gRPC server URI and resolves it to a 78 /// list of one or more addresses and a service config, as described 79 /// in https://github.com/grpc/grpc/blob/master/doc/naming.md. See 80 /// resolver.h for the resolver API. 81 /// 82 /// Load Balancing (LB) Policy: A plugin that takes a list of addresses 83 /// from the resolver, maintains and manages a subchannel for each 84 /// backend address, and decides which subchannel to send each call on. 85 /// An LB policy has two parts: 86 /// - A LoadBalancingPolicy, which deals with the control plane work of 87 /// managing subchannels. 88 /// - A SubchannelPicker, which handles the data plane work of 89 /// determining which subchannel a given call should be sent on. 90 91 /// LoadBalacingPolicy API. 92 /// 93 /// Note: All methods with a "Locked" suffix must be called from the 94 /// work_serializer passed to the constructor. 95 /// 96 /// Any I/O done by the LB policy should be done under the pollset_set 97 /// returned by \a interested_parties(). 98 // TODO(roth): Once we move to EventManager-based polling, remove the 99 // interested_parties() hooks from the API. 100 class LoadBalancingPolicy : public InternallyRefCounted<LoadBalancingPolicy> { 101 public: 102 /// Interface for accessing per-call state. 103 /// Implemented by the client channel and used by the SubchannelPicker. 104 class CallState { 105 public: 106 CallState() = default; 107 virtual ~CallState() = default; 108 109 /// Allocates memory associated with the call, which will be 110 /// automatically freed when the call is complete. 111 /// It is more efficient to use this than to allocate memory directly 112 /// for allocations that need to be made on a per-call basis. 113 virtual void* Alloc(size_t size) = 0; 114 }; 115 116 /// Interface for accessing metadata. 117 /// Implemented by the client channel and used by the SubchannelPicker. 118 class MetadataInterface { 119 public: 120 virtual ~MetadataInterface() = default; 121 122 ////////////////////////////////////////////////////////////////////////// 123 // TODO(ctiller): DO NOT MAKE THIS A PUBLIC API YET 124 // This needs some API design to ensure we can add/remove/replace metadata 125 // keys... we're deliberately not doing so to save some time whilst 126 // cleaning up the internal metadata representation, but we should add 127 // something back before making this a public API. 128 ////////////////////////////////////////////////////////////////////////// 129 130 /// Adds a key/value pair. 131 /// Does NOT take ownership of \a key or \a value. 132 /// Implementations must ensure that the key and value remain alive 133 /// until the call ends. If desired, they may be allocated via 134 /// CallState::Alloc(). 135 virtual void Add(absl::string_view key, absl::string_view value) = 0; 136 137 /// Produce a vector of metadata key/value strings for tests. 138 virtual std::vector<std::pair<std::string, std::string>> 139 TestOnlyCopyToVector() = 0; 140 141 virtual absl::optional<absl::string_view> Lookup( 142 absl::string_view key, std::string* buffer) const = 0; 143 }; 144 145 /// Arguments used when picking a subchannel for a call. 146 struct PickArgs { 147 /// The path of the call. Indicates the RPC service and method name. 148 absl::string_view path; 149 /// Initial metadata associated with the picking call. 150 /// The LB policy may use the existing metadata to influence its routing 151 /// decision, and it may add new metadata elements to be sent with the 152 /// call to the chosen backend. 153 // TODO(roth): Before making the LB policy API public, consider 154 // whether this is the right way to expose metadata to the picker. 155 // This approach means that if a pick modifies metadata but then we 156 // discard the pick because the subchannel is not connected, the 157 // metadata change will still have been made. Maybe we actually 158 // want to somehow provide metadata changes in PickResult::Complete 159 // instead? Or maybe we use a CallTracer that can add metadata when 160 // the call actually starts on the subchannel? 161 MetadataInterface* initial_metadata; 162 /// An interface for accessing call state. Can be used to allocate 163 /// memory associated with the call in an efficient way. 164 CallState* call_state; 165 }; 166 167 /// Interface for accessing backend metric data. 168 /// Implemented by the client channel and used by 169 /// SubchannelCallTrackerInterface. 170 class BackendMetricAccessor { 171 public: 172 virtual ~BackendMetricAccessor() = default; 173 174 /// Returns the backend metric data returned by the server for the call, 175 /// or null if no backend metric data was returned. 176 virtual const BackendMetricData* GetBackendMetricData() = 0; 177 }; 178 179 /// Interface for tracking subchannel calls. 180 /// Implemented by LB policy and used by the channel. 181 // TODO(roth): Before making this API public, consider whether we 182 // should just replace this with a CallTracer, similar to what Java does. 183 class SubchannelCallTrackerInterface { 184 public: 185 virtual ~SubchannelCallTrackerInterface() = default; 186 187 /// Called when a subchannel call is started after an LB pick. 188 virtual void Start() = 0; 189 190 /// Called when a subchannel call is completed. 191 /// The metadata may be modified by the implementation. However, the 192 /// implementation does not take ownership, so any data that needs to be 193 /// used after returning must be copied. 194 struct FinishArgs { 195 absl::string_view peer_address; 196 absl::Status status; 197 MetadataInterface* trailing_metadata; 198 BackendMetricAccessor* backend_metric_accessor; 199 }; 200 virtual void Finish(FinishArgs args) = 0; 201 }; 202 203 /// The result of picking a subchannel for a call. 204 struct PickResult { 205 /// A successful pick. 206 struct Complete { 207 /// The subchannel to be used for the call. Must be non-null. 208 RefCountedPtr<SubchannelInterface> subchannel; 209 210 /// Optionally set by the LB policy when it wishes to be notified 211 /// about the resulting subchannel call. 212 /// Note that if the pick is abandoned by the channel, this may never 213 /// be used. 214 std::unique_ptr<SubchannelCallTrackerInterface> subchannel_call_tracker; 215 216 explicit Complete( 217 RefCountedPtr<SubchannelInterface> sc, 218 std::unique_ptr<SubchannelCallTrackerInterface> tracker = nullptr) subchannelPickResult::Complete219 : subchannel(std::move(sc)), 220 subchannel_call_tracker(std::move(tracker)) {} 221 }; 222 223 /// Pick cannot be completed until something changes on the control 224 /// plane. The client channel will queue the pick and try again the 225 /// next time the picker is updated. 226 struct Queue {}; 227 228 /// Pick failed. If the call is wait_for_ready, the client channel 229 /// will wait for the next picker and try again; otherwise, it 230 /// will immediately fail the call with the status indicated (although 231 /// the call may be retried if the client channel is configured to do so). 232 struct Fail { 233 absl::Status status; 234 FailPickResult::Fail235 explicit Fail(absl::Status s) : status(s) {} 236 }; 237 238 /// Pick will be dropped with the status specified. 239 /// Unlike FailPick, the call will be dropped even if it is 240 /// wait_for_ready, and retries (if configured) will be inhibited. 241 struct Drop { 242 absl::Status status; 243 DropPickResult::Drop244 explicit Drop(absl::Status s) : status(s) {} 245 }; 246 247 // A pick result must be one of these types. 248 // Default to Queue, just to allow default construction. 249 absl::variant<Complete, Queue, Fail, Drop> result = Queue(); 250 251 PickResult() = default; 252 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult253 PickResult(Complete complete) : result(std::move(complete)) {} 254 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult255 PickResult(Queue queue) : result(queue) {} 256 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult257 PickResult(Fail fail) : result(std::move(fail)) {} 258 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult259 PickResult(Drop drop) : result(std::move(drop)) {} 260 }; 261 262 /// A subchannel picker is the object used to pick the subchannel to 263 /// use for a given call. This is implemented by the LB policy and 264 /// used by the client channel to perform picks. 265 /// 266 /// Pickers are intended to encapsulate all of the state and logic 267 /// needed on the data plane (i.e., to actually process picks for 268 /// individual calls sent on the channel) while excluding all of the 269 /// state and logic needed on the control plane (i.e., resolver 270 /// updates, connectivity state notifications, etc); the latter should 271 /// live in the LB policy object itself. 272 class SubchannelPicker : public DualRefCounted<SubchannelPicker> { 273 public: 274 SubchannelPicker(); 275 276 virtual PickResult Pick(PickArgs args) = 0; 277 278 protected: Orphaned()279 void Orphaned() override {} 280 }; 281 282 /// A proxy object implemented by the client channel and used by the 283 /// LB policy to communicate with the channel. 284 class ChannelControlHelper { 285 public: 286 ChannelControlHelper() = default; 287 virtual ~ChannelControlHelper() = default; 288 289 /// Creates a new subchannel with the specified channel args. 290 /// The args and per_address_args will be merged by the channel. 291 virtual RefCountedPtr<SubchannelInterface> CreateSubchannel( 292 const grpc_resolved_address& address, 293 const ChannelArgs& per_address_args, const ChannelArgs& args) = 0; 294 295 /// Sets the connectivity state and returns a new picker to be used 296 /// by the client channel. 297 virtual void UpdateState(grpc_connectivity_state state, 298 const absl::Status& status, 299 RefCountedPtr<SubchannelPicker> picker) = 0; 300 301 /// Requests that the resolver re-resolve. 302 virtual void RequestReresolution() = 0; 303 304 /// Returns the channel target. 305 virtual absl::string_view GetTarget() = 0; 306 307 /// Returns the channel authority. 308 virtual absl::string_view GetAuthority() = 0; 309 310 /// Returns the channel credentials from the parent channel. This can 311 /// be used to create a control-plane channel inside an LB policy. 312 virtual RefCountedPtr<grpc_channel_credentials> GetChannelCredentials() = 0; 313 314 /// Returns the UNSAFE ChannelCredentials used to construct the channel, 315 /// including bearer tokens. LB policies should generally have no use for 316 /// these credentials, and use of them is heavily discouraged. These must 317 /// be used VERY carefully to avoid sending bearer tokens to untrusted 318 /// servers, as the server could then impersonate the client. Generally, 319 /// it is safe to use these credentials only when communicating with the 320 /// backends. 321 virtual RefCountedPtr<grpc_channel_credentials> 322 GetUnsafeChannelCredentials() = 0; 323 324 /// Returns the EventEngine to use for timers and async work. 325 virtual grpc_event_engine::experimental::EventEngine* GetEventEngine() = 0; 326 327 /// Returns the stats plugin group for reporting metrics. 328 virtual GlobalStatsPluginRegistry::StatsPluginGroup& 329 GetStatsPluginGroup() = 0; 330 331 /// Adds a trace message associated with the channel. 332 enum TraceSeverity { TRACE_INFO, TRACE_WARNING, TRACE_ERROR }; 333 virtual void AddTraceEvent(TraceSeverity severity, 334 absl::string_view message) = 0; 335 }; 336 337 class DelegatingChannelControlHelper; 338 339 template <typename ParentPolicy> 340 class ParentOwningDelegatingChannelControlHelper; 341 342 /// Interface for configuration data used by an LB policy implementation. 343 /// Individual implementations will create a subclass that adds methods to 344 /// return the parameters they need. 345 class Config : public RefCounted<Config> { 346 public: 347 ~Config() override = default; 348 349 // Returns the load balancing policy name 350 virtual absl::string_view name() const = 0; 351 }; 352 353 /// Data passed to the UpdateLocked() method when new addresses and 354 /// config are available. 355 struct UpdateArgs { 356 /// A list of endpoints, each with one or more address, or an error 357 /// indicating a failure to obtain the list of addresses. 358 absl::StatusOr<std::shared_ptr<EndpointAddressesIterator>> addresses; 359 /// The LB policy config. 360 RefCountedPtr<Config> config; 361 /// A human-readable note providing context about the name resolution that 362 /// provided this update. LB policies may wish to include this message 363 /// in RPC failure status messages. For example, if the update has an 364 /// empty list of addresses, this message might say "no DNS entries 365 /// found for <name>". 366 std::string resolution_note; 367 368 // TODO(roth): Before making this a public API, find a better 369 // abstraction for representing channel args. 370 ChannelArgs args; 371 }; 372 373 /// Args used to instantiate an LB policy. 374 struct Args { 375 /// The work_serializer under which all LB policy calls will be run. 376 std::shared_ptr<WorkSerializer> work_serializer; 377 /// Channel control helper. 378 /// Note: LB policies MUST NOT call any method on the helper from 379 /// their constructor. 380 std::unique_ptr<ChannelControlHelper> channel_control_helper; 381 /// Channel args. 382 // TODO(roth): Find a better channel args representation for this API. 383 ChannelArgs args; 384 }; 385 386 explicit LoadBalancingPolicy(Args args, intptr_t initial_refcount = 1); 387 ~LoadBalancingPolicy() override; 388 389 // Not copyable nor movable. 390 LoadBalancingPolicy(const LoadBalancingPolicy&) = delete; 391 LoadBalancingPolicy& operator=(const LoadBalancingPolicy&) = delete; 392 393 /// Returns the name of the LB policy. 394 virtual absl::string_view name() const = 0; 395 396 /// Updates the policy with new data from the resolver. Will be invoked 397 /// immediately after LB policy is constructed, and then again whenever 398 /// the resolver returns a new result. The returned status indicates 399 /// whether the LB policy accepted the update; if non-OK, informs 400 /// polling-based resolvers that they should go into backoff delay and 401 /// eventually reattempt the resolution. 402 /// 403 /// The first time that UpdateLocked() is called, the LB policy will 404 /// generally not be able to determine the appropriate connectivity 405 /// state by the time UpdateLocked() returns (e.g., it will need to 406 /// wait for connectivity state notifications from each subchannel, 407 /// which will be delivered asynchronously). In this case, the LB 408 /// policy should not call the helper's UpdateState() method until it 409 /// does have a clear picture of the connectivity state (e.g., it 410 /// should wait for all subchannels to report connectivity state 411 /// before calling the helper's UpdateState() method), although it is 412 /// expected to do so within some short period of time. The parent of 413 /// the LB policy will assume that the policy's initial state is 414 /// CONNECTING and that picks should be queued. 415 virtual absl::Status UpdateLocked(UpdateArgs) = 0; // NOLINT 416 417 /// Tries to enter a READY connectivity state. 418 /// This is a no-op by default, since most LB policies never go into 419 /// IDLE state. ExitIdleLocked()420 virtual void ExitIdleLocked() {} 421 422 /// Resets connection backoff. 423 virtual void ResetBackoffLocked() = 0; 424 interested_parties()425 grpc_pollset_set* interested_parties() const { return interested_parties_; } 426 427 // Note: This must be invoked while holding the work_serializer. 428 void Orphan() override; 429 430 // A picker that returns PickResult::Queue for all picks. 431 // Also calls the parent LB policy's ExitIdleLocked() method when the 432 // first pick is seen. 433 class QueuePicker final : public SubchannelPicker { 434 public: QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)435 explicit QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent) 436 : parent_(std::move(parent)) {} 437 ~QueuePicker()438 ~QueuePicker() override { parent_.reset(DEBUG_LOCATION, "QueuePicker"); } 439 440 PickResult Pick(PickArgs args) override; 441 442 private: 443 Mutex mu_; 444 RefCountedPtr<LoadBalancingPolicy> parent_ ABSL_GUARDED_BY(&mu_); 445 }; 446 447 // A picker that returns PickResult::Fail for all picks. 448 class TransientFailurePicker final : public SubchannelPicker { 449 public: TransientFailurePicker(absl::Status status)450 explicit TransientFailurePicker(absl::Status status) : status_(status) {} 451 Pick(PickArgs)452 PickResult Pick(PickArgs /*args*/) override { 453 return PickResult::Fail(status_); 454 } 455 456 private: 457 absl::Status status_; 458 }; 459 460 protected: work_serializer()461 std::shared_ptr<WorkSerializer> work_serializer() const { 462 return work_serializer_; 463 } 464 channel_args()465 const ChannelArgs& channel_args() const { return channel_args_; } 466 467 // Note: LB policies MUST NOT call any method on the helper from their 468 // constructor. channel_control_helper()469 ChannelControlHelper* channel_control_helper() const { 470 return channel_control_helper_.get(); 471 } 472 473 /// Shuts down the policy. 474 virtual void ShutdownLocked() = 0; 475 476 private: 477 /// Work Serializer under which LB policy actions take place. 478 std::shared_ptr<WorkSerializer> work_serializer_; 479 /// Owned pointer to interested parties in load balancing decisions. 480 grpc_pollset_set* interested_parties_; 481 /// Channel control helper. 482 std::unique_ptr<ChannelControlHelper> channel_control_helper_; 483 /// Channel args passed in. 484 // TODO(roth): Rework Args so that we don't need to capture channel args here. 485 ChannelArgs channel_args_; 486 }; 487 488 } // namespace grpc_core 489 490 #endif // GRPC_SRC_CORE_LOAD_BALANCING_LB_POLICY_H 491