1 // 2 // Copyright 2015 gRPC authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 #ifndef GRPC_SRC_CORE_LIB_LOAD_BALANCING_LB_POLICY_H 18 #define GRPC_SRC_CORE_LIB_LOAD_BALANCING_LB_POLICY_H 19 20 #include <grpc/support/port_platform.h> 21 22 #include <stddef.h> 23 #include <stdint.h> 24 25 #include <memory> 26 #include <string> 27 #include <utility> 28 #include <vector> 29 30 #include "absl/base/thread_annotations.h" 31 #include "absl/status/status.h" 32 #include "absl/status/statusor.h" 33 #include "absl/strings/string_view.h" 34 #include "absl/types/optional.h" 35 #include "absl/types/variant.h" 36 37 #include <grpc/event_engine/event_engine.h> 38 #include <grpc/impl/connectivity_state.h> 39 40 #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" 41 #include "src/core/lib/channel/channel_args.h" 42 #include "src/core/lib/debug/trace.h" 43 #include "src/core/lib/gprpp/debug_location.h" 44 #include "src/core/lib/gprpp/dual_ref_counted.h" 45 #include "src/core/lib/gprpp/orphanable.h" 46 #include "src/core/lib/gprpp/ref_counted.h" 47 #include "src/core/lib/gprpp/ref_counted_ptr.h" 48 #include "src/core/lib/gprpp/sync.h" 49 #include "src/core/lib/gprpp/work_serializer.h" 50 #include "src/core/lib/iomgr/iomgr_fwd.h" 51 #include "src/core/lib/load_balancing/subchannel_interface.h" 52 #include "src/core/lib/resolver/server_address.h" 53 54 namespace grpc_core { 55 56 extern DebugOnlyTraceFlag grpc_trace_lb_policy_refcount; 57 58 /// Interface for load balancing policies. 59 /// 60 /// The following concepts are used here: 61 /// 62 /// Channel: An abstraction that manages connections to backend servers 63 /// on behalf of a client application. The application creates a channel 64 /// for a given server name and then sends calls (RPCs) on it, and the 65 /// channel figures out which backend server to send each call to. A channel 66 /// contains a resolver, a load balancing policy (or a tree of LB policies), 67 /// and a set of one or more subchannels. 68 /// 69 /// Subchannel: A subchannel represents a connection to one backend server. 70 /// The LB policy decides which subchannels to create, manages the 71 /// connectivity state of those subchannels, and decides which subchannel 72 /// to send any given call to. 73 /// 74 /// Resolver: A plugin that takes a gRPC server URI and resolves it to a 75 /// list of one or more addresses and a service config, as described 76 /// in https://github.com/grpc/grpc/blob/master/doc/naming.md. See 77 /// resolver.h for the resolver API. 78 /// 79 /// Load Balancing (LB) Policy: A plugin that takes a list of addresses 80 /// from the resolver, maintains and manages a subchannel for each 81 /// backend address, and decides which subchannel to send each call on. 82 /// An LB policy has two parts: 83 /// - A LoadBalancingPolicy, which deals with the control plane work of 84 /// managing subchannels. 85 /// - A SubchannelPicker, which handles the data plane work of 86 /// determining which subchannel a given call should be sent on. 87 88 /// LoadBalacingPolicy API. 89 /// 90 /// Note: All methods with a "Locked" suffix must be called from the 91 /// work_serializer passed to the constructor. 92 /// 93 /// Any I/O done by the LB policy should be done under the pollset_set 94 /// returned by \a interested_parties(). 95 // TODO(roth): Once we move to EventManager-based polling, remove the 96 // interested_parties() hooks from the API. 97 class LoadBalancingPolicy : public InternallyRefCounted<LoadBalancingPolicy> { 98 public: 99 /// Interface for accessing per-call state. 100 /// Implemented by the client channel and used by the SubchannelPicker. 101 class CallState { 102 public: 103 CallState() = default; 104 virtual ~CallState() = default; 105 106 /// Allocates memory associated with the call, which will be 107 /// automatically freed when the call is complete. 108 /// It is more efficient to use this than to allocate memory directly 109 /// for allocations that need to be made on a per-call basis. 110 virtual void* Alloc(size_t size) = 0; 111 }; 112 113 /// Interface for accessing metadata. 114 /// Implemented by the client channel and used by the SubchannelPicker. 115 class MetadataInterface { 116 public: 117 virtual ~MetadataInterface() = default; 118 119 ////////////////////////////////////////////////////////////////////////// 120 // TODO(ctiller): DO NOT MAKE THIS A PUBLIC API YET 121 // This needs some API design to ensure we can add/remove/replace metadata 122 // keys... we're deliberately not doing so to save some time whilst 123 // cleaning up the internal metadata representation, but we should add 124 // something back before making this a public API. 125 ////////////////////////////////////////////////////////////////////////// 126 127 /// Adds a key/value pair. 128 /// Does NOT take ownership of \a key or \a value. 129 /// Implementations must ensure that the key and value remain alive 130 /// until the call ends. If desired, they may be allocated via 131 /// CallState::Alloc(). 132 virtual void Add(absl::string_view key, absl::string_view value) = 0; 133 134 /// Produce a vector of metadata key/value strings for tests. 135 virtual std::vector<std::pair<std::string, std::string>> 136 TestOnlyCopyToVector() = 0; 137 138 virtual absl::optional<absl::string_view> Lookup( 139 absl::string_view key, std::string* buffer) const = 0; 140 }; 141 142 /// Arguments used when picking a subchannel for a call. 143 struct PickArgs { 144 /// The path of the call. Indicates the RPC service and method name. 145 absl::string_view path; 146 /// Initial metadata associated with the picking call. 147 /// The LB policy may use the existing metadata to influence its routing 148 /// decision, and it may add new metadata elements to be sent with the 149 /// call to the chosen backend. 150 MetadataInterface* initial_metadata; 151 /// An interface for accessing call state. Can be used to allocate 152 /// memory associated with the call in an efficient way. 153 CallState* call_state; 154 }; 155 156 /// Interface for accessing backend metric data. 157 /// Implemented by the client channel and used by 158 /// SubchannelCallTrackerInterface. 159 class BackendMetricAccessor { 160 public: 161 virtual ~BackendMetricAccessor() = default; 162 163 /// Returns the backend metric data returned by the server for the call, 164 /// or null if no backend metric data was returned. 165 virtual const BackendMetricData* GetBackendMetricData() = 0; 166 }; 167 168 /// Interface for tracking subchannel calls. 169 /// Implemented by LB policy and used by the channel. 170 class SubchannelCallTrackerInterface { 171 public: 172 virtual ~SubchannelCallTrackerInterface() = default; 173 174 /// Called when a subchannel call is started after an LB pick. 175 virtual void Start() = 0; 176 177 /// Called when a subchannel call is completed. 178 /// The metadata may be modified by the implementation. However, the 179 /// implementation does not take ownership, so any data that needs to be 180 /// used after returning must be copied. 181 struct FinishArgs { 182 absl::string_view peer_address; 183 absl::Status status; 184 MetadataInterface* trailing_metadata; 185 BackendMetricAccessor* backend_metric_accessor; 186 }; 187 virtual void Finish(FinishArgs args) = 0; 188 }; 189 190 /// The result of picking a subchannel for a call. 191 struct PickResult { 192 /// A successful pick. 193 struct Complete { 194 /// The subchannel to be used for the call. Must be non-null. 195 RefCountedPtr<SubchannelInterface> subchannel; 196 197 /// Optionally set by the LB policy when it wishes to be notified 198 /// about the resulting subchannel call. 199 /// Note that if the pick is abandoned by the channel, this may never 200 /// be used. 201 std::unique_ptr<SubchannelCallTrackerInterface> subchannel_call_tracker; 202 203 explicit Complete( 204 RefCountedPtr<SubchannelInterface> sc, 205 std::unique_ptr<SubchannelCallTrackerInterface> tracker = nullptr) subchannelPickResult::Complete206 : subchannel(std::move(sc)), 207 subchannel_call_tracker(std::move(tracker)) {} 208 }; 209 210 /// Pick cannot be completed until something changes on the control 211 /// plane. The client channel will queue the pick and try again the 212 /// next time the picker is updated. 213 struct Queue {}; 214 215 /// Pick failed. If the call is wait_for_ready, the client channel 216 /// will wait for the next picker and try again; otherwise, it 217 /// will immediately fail the call with the status indicated (although 218 /// the call may be retried if the client channel is configured to do so). 219 struct Fail { 220 absl::Status status; 221 FailPickResult::Fail222 explicit Fail(absl::Status s) : status(s) {} 223 }; 224 225 /// Pick will be dropped with the status specified. 226 /// Unlike FailPick, the call will be dropped even if it is 227 /// wait_for_ready, and retries (if configured) will be inhibited. 228 struct Drop { 229 absl::Status status; 230 DropPickResult::Drop231 explicit Drop(absl::Status s) : status(s) {} 232 }; 233 234 // A pick result must be one of these types. 235 // Default to Queue, just to allow default construction. 236 absl::variant<Complete, Queue, Fail, Drop> result = Queue(); 237 238 PickResult() = default; 239 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult240 PickResult(Complete complete) : result(std::move(complete)) {} 241 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult242 PickResult(Queue queue) : result(queue) {} 243 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult244 PickResult(Fail fail) : result(std::move(fail)) {} 245 // NOLINTNEXTLINE(google-explicit-constructor) PickResultPickResult246 PickResult(Drop drop) : result(std::move(drop)) {} 247 }; 248 249 /// A subchannel picker is the object used to pick the subchannel to 250 /// use for a given call. This is implemented by the LB policy and 251 /// used by the client channel to perform picks. 252 /// 253 /// Pickers are intended to encapsulate all of the state and logic 254 /// needed on the data plane (i.e., to actually process picks for 255 /// individual calls sent on the channel) while excluding all of the 256 /// state and logic needed on the control plane (i.e., resolver 257 /// updates, connectivity state notifications, etc); the latter should 258 /// live in the LB policy object itself. 259 /// 260 /// Currently, pickers are always accessed from within the 261 /// client_channel data plane mutex, so they do not have to be 262 /// thread-safe. 263 class SubchannelPicker : public DualRefCounted<SubchannelPicker> { 264 public: 265 SubchannelPicker(); 266 267 virtual PickResult Pick(PickArgs args) = 0; 268 Orphan()269 void Orphan() override {} 270 }; 271 272 /// A proxy object implemented by the client channel and used by the 273 /// LB policy to communicate with the channel. 274 // TODO(roth): Once insecure builds go away, add methods for accessing 275 // channel creds. By default, that should strip off the call creds 276 // attached to the channel creds, but there should also be a "use at 277 // your own risk" option to get the channel creds without stripping 278 // off the attached call creds. 279 class ChannelControlHelper { 280 public: 281 ChannelControlHelper() = default; 282 virtual ~ChannelControlHelper() = default; 283 284 /// Creates a new subchannel with the specified channel args. 285 virtual RefCountedPtr<SubchannelInterface> CreateSubchannel( 286 ServerAddress address, const ChannelArgs& args) = 0; 287 288 /// Sets the connectivity state and returns a new picker to be used 289 /// by the client channel. 290 virtual void UpdateState(grpc_connectivity_state state, 291 const absl::Status& status, 292 RefCountedPtr<SubchannelPicker> picker) = 0; 293 294 /// Requests that the resolver re-resolve. 295 virtual void RequestReresolution() = 0; 296 297 /// Returns the channel authority. 298 virtual absl::string_view GetAuthority() = 0; 299 300 /// Returns the EventEngine to use for timers and async work. 301 virtual grpc_event_engine::experimental::EventEngine* GetEventEngine() = 0; 302 303 /// Adds a trace message associated with the channel. 304 enum TraceSeverity { TRACE_INFO, TRACE_WARNING, TRACE_ERROR }; 305 virtual void AddTraceEvent(TraceSeverity severity, 306 absl::string_view message) = 0; 307 }; 308 309 /// Interface for configuration data used by an LB policy implementation. 310 /// Individual implementations will create a subclass that adds methods to 311 /// return the parameters they need. 312 class Config : public RefCounted<Config> { 313 public: 314 ~Config() override = default; 315 316 // Returns the load balancing policy name 317 virtual absl::string_view name() const = 0; 318 }; 319 320 /// Data passed to the UpdateLocked() method when new addresses and 321 /// config are available. 322 struct UpdateArgs { 323 /// A list of addresses, or an error indicating a failure to obtain the 324 /// list of addresses. 325 absl::StatusOr<ServerAddressList> addresses; 326 /// The LB policy config. 327 RefCountedPtr<Config> config; 328 /// A human-readable note providing context about the name resolution that 329 /// provided this update. LB policies may wish to include this message 330 /// in RPC failure status messages. For example, if the update has an 331 /// empty list of addresses, this message might say "no DNS entries 332 /// found for <name>". 333 std::string resolution_note; 334 335 // TODO(roth): Before making this a public API, find a better 336 // abstraction for representing channel args. 337 ChannelArgs args; 338 }; 339 340 /// Args used to instantiate an LB policy. 341 struct Args { 342 /// The work_serializer under which all LB policy calls will be run. 343 std::shared_ptr<WorkSerializer> work_serializer; 344 /// Channel control helper. 345 /// Note: LB policies MUST NOT call any method on the helper from 346 /// their constructor. 347 std::unique_ptr<ChannelControlHelper> channel_control_helper; 348 /// Channel args. 349 // TODO(roth): Find a better channel args representation for this API. 350 ChannelArgs args; 351 }; 352 353 explicit LoadBalancingPolicy(Args args, intptr_t initial_refcount = 1); 354 ~LoadBalancingPolicy() override; 355 356 // Not copyable nor movable. 357 LoadBalancingPolicy(const LoadBalancingPolicy&) = delete; 358 LoadBalancingPolicy& operator=(const LoadBalancingPolicy&) = delete; 359 360 /// Returns the name of the LB policy. 361 virtual absl::string_view name() const = 0; 362 363 /// Updates the policy with new data from the resolver. Will be invoked 364 /// immediately after LB policy is constructed, and then again whenever 365 /// the resolver returns a new result. The returned status indicates 366 /// whether the LB policy accepted the update; if non-OK, informs 367 /// polling-based resolvers that they should go into backoff delay and 368 /// eventually reattempt the resolution. 369 /// 370 /// The first time that UpdateLocked() is called, the LB policy will 371 /// generally not be able to determine the appropriate connectivity 372 /// state by the time UpdateLocked() returns (e.g., it will need to 373 /// wait for connectivity state notifications from each subchannel, 374 /// which will be delivered asynchronously). In this case, the LB 375 /// policy should not call the helper's UpdateState() method until it 376 /// does have a clear picture of the connectivity state (e.g., it 377 /// should wait for all subchannels to report connectivity state 378 /// before calling the helper's UpdateState() method), although it is 379 /// expected to do so within some short period of time. The parent of 380 /// the LB policy will assume that the policy's initial state is 381 /// CONNECTING and that picks should be queued. 382 virtual absl::Status UpdateLocked(UpdateArgs) = 0; // NOLINT 383 384 /// Tries to enter a READY connectivity state. 385 /// This is a no-op by default, since most LB policies never go into 386 /// IDLE state. ExitIdleLocked()387 virtual void ExitIdleLocked() {} 388 389 /// Resets connection backoff. 390 virtual void ResetBackoffLocked() = 0; 391 interested_parties()392 grpc_pollset_set* interested_parties() const { return interested_parties_; } 393 394 // Note: This must be invoked while holding the work_serializer. 395 void Orphan() override; 396 397 // A picker that returns PickResult::Queue for all picks. 398 // Also calls the parent LB policy's ExitIdleLocked() method when the 399 // first pick is seen. 400 class QueuePicker : public SubchannelPicker { 401 public: QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent)402 explicit QueuePicker(RefCountedPtr<LoadBalancingPolicy> parent) 403 : parent_(std::move(parent)) {} 404 ~QueuePicker()405 ~QueuePicker() override { parent_.reset(DEBUG_LOCATION, "QueuePicker"); } 406 407 PickResult Pick(PickArgs args) override; 408 409 private: 410 Mutex mu_; 411 RefCountedPtr<LoadBalancingPolicy> parent_ ABSL_GUARDED_BY(&mu_); 412 }; 413 414 // A picker that returns PickResult::Fail for all picks. 415 class TransientFailurePicker : public SubchannelPicker { 416 public: TransientFailurePicker(absl::Status status)417 explicit TransientFailurePicker(absl::Status status) : status_(status) {} 418 Pick(PickArgs)419 PickResult Pick(PickArgs /*args*/) override { 420 return PickResult::Fail(status_); 421 } 422 423 private: 424 absl::Status status_; 425 }; 426 427 protected: work_serializer()428 std::shared_ptr<WorkSerializer> work_serializer() const { 429 return work_serializer_; 430 } 431 channel_args()432 const ChannelArgs& channel_args() const { return channel_args_; } 433 434 // Note: LB policies MUST NOT call any method on the helper from their 435 // constructor. channel_control_helper()436 ChannelControlHelper* channel_control_helper() const { 437 return channel_control_helper_.get(); 438 } 439 440 /// Shuts down the policy. 441 virtual void ShutdownLocked() = 0; 442 443 private: 444 /// Work Serializer under which LB policy actions take place. 445 std::shared_ptr<WorkSerializer> work_serializer_; 446 /// Owned pointer to interested parties in load balancing decisions. 447 grpc_pollset_set* interested_parties_; 448 /// Channel control helper. 449 std::unique_ptr<ChannelControlHelper> channel_control_helper_; 450 /// Channel args passed in. 451 // TODO(roth): Rework Args so that we don't need to capture channel args here. 452 ChannelArgs channel_args_; 453 }; 454 455 } // namespace grpc_core 456 457 #endif // GRPC_SRC_CORE_LIB_LOAD_BALANCING_LB_POLICY_H 458