1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataproc.logging; 18 19import "google/protobuf/duration.proto"; 20 21option csharp_namespace = "Google.Cloud.Dataproc.Logging"; 22option go_package = "cloud.google.com/go/dataproc/logging/loggingpb;loggingpb"; 23option java_multiple_files = true; 24option java_package = "com.google.cloud.dataproc.logging"; 25 26// The short version of cluster configuration for Cloud Logging. 27message ClusterSize { 28 // The number of primary workers in the cluster. 29 int32 primary_worker_count = 1; 30 31 // The number of secondary workers in the cluster. 32 int32 secondary_worker_count = 2; 33} 34 35// The Autoscaler state. 36enum AutoscalerState { 37 AUTOSCALER_STATE_UNSPECIFIED = 0; 38 39 // The Autoscaler is sleeping and waiting for the next update. 40 COOLDOWN = 1; 41 42 // The Autoscaler is in the process of calculating its recommendation on 43 // whether to scale the cluster, and if so, how to autoscale. 44 RECOMMENDING = 6; 45 46 // The Autoscaler is scaling the cluster. 47 SCALING = 2; 48 49 // The Autoscaler has stopped. 50 STOPPED = 3; 51 52 // The Autoscaler has failed. 53 FAILED = 4; 54 55 // The Autoscaler is initializing. 56 INITIALIZING = 5; 57} 58 59// The Autoscaling decision type. 60enum ScalingDecisionType { 61 SCALING_DECISION_TYPE_UNSPECIFIED = 0; 62 63 // Increase the number of primary and/or secondary workers. 64 SCALE_UP = 1; 65 66 // Decrease the number of primary and/or secondary workers. 67 SCALE_DOWN = 2; 68 69 // Not changing the number of primary or secondary workers. 70 NO_SCALE = 3; 71 72 // Scale the primary and secondary worker groups in different directions. 73 MIXED = 4; 74 75 // Cancel the ongoing scale down operation. 76 CANCEL = 5; 77 78 // Do not cancel the ongoing scale down operation. 79 DO_NOT_CANCEL = 6; 80} 81 82enum ConstrainingFactor { 83 CONSTRAINING_FACTOR_UNSPECIFIED = 0; 84 85 // The project does not have sufficient regional, global, and or preemptible 86 // quota to allocate a new VM. 87 SCALING_CAPPED_DUE_TO_LACK_OF_QUOTA = 1; 88 89 // All worker groups have reached maximum size. This message will not be 90 // issued if one group reached maximum size, but workers were able to be 91 // allocated to another group. 92 REACHED_MAXIMUM_CLUSTER_SIZE = 2; 93 94 // All worker groups have reached minimum size. This message will not be 95 // issued if workers were able to be removed from another group that had not 96 // reached minimum size. 97 REACHED_MINIMUM_CLUSTER_SIZE = 3; 98 99 // The secondary worker group cannot be scaled down by more than 1k nodes in a 100 // single update request. 101 SECONDARY_SCALEDOWN_SINGLE_REQUEST_LIMIT_REACHED = 4; 102} 103 104// The kind of metric input to the Autoscaling algorithm. 105enum MetricType { 106 // Default. 107 METRIC_TYPE_UNSPECIFIED = 0; 108 109 // The yarn memory metric. 110 YARN_MEMORY = 1; 111 112 // The yarn cores or vCPUs metric. 113 YARN_CORES = 2; 114 115 // The number of executors in Spark serverless. 116 SPARK_EXECUTORS = 3; 117} 118 119// The main proto that will be converted to JSON format and then written to 120// Logging. 121message AutoscalerLog { 122 // The current Autoscaler status. 123 AutoscalerStatus status = 1; 124 125 // Optional. The autoscaling recommendation including its inputs, outputs, 126 // scaling decision, and detailed explanation. 127 AutoscalerRecommendation recommendation = 2; 128} 129 130// The Autoscaler's status, including its state and other details. 131message AutoscalerStatus { 132 // The high-level Autoscaler state. 133 AutoscalerState state = 1; 134 135 // The detailed description of Autoscaler status. 136 string details = 2; 137 138 // The cluster update operation ID. 139 string update_cluster_operation_id = 3; 140 141 // Error message from an Autoscaler exception, if any. 142 string error = 4; 143} 144 145// The inputs, outputs, and detailed explanation of the Autoscaling 146// recommendation. 147message AutoscalerRecommendation { 148 // The input values for the Autoscaling recommendation algorithm. 149 message Inputs { 150 // The metrics collected by the Dataproc agent running on the cluster. 151 // For example, {"avg-yarn-pending-memory": "1040 MB"} 152 map<string, string> cluster_metrics = 1; 153 154 // The cluster configuration before updating the cluster. 155 ClusterSize current_cluster_size = 2; 156 157 // The minimum worker counts for each instance group. 158 ClusterSize min_worker_counts = 3; 159 160 // The maximum worker counts for each instance group. 161 ClusterSize max_worker_counts = 4; 162 } 163 164 // Autoscaler recommendations. 165 message Outputs { 166 // The high-level autoscaling decision, such as SCALE_UP, SCALE_DOWN, 167 // NO_OP. 168 ScalingDecisionType decision = 1; 169 170 // The recommended cluster size. 171 ClusterSize recommended_cluster_size = 2; 172 173 // The graceful decommission timeout for downscaling operations. 174 google.protobuf.Duration graceful_decommission_timeout = 3; 175 176 // Reasons why the Autoscaler didn't add or remove more workers. 177 repeated ConstrainingFactor constraints_reached = 4; 178 179 // Less significant recommendations that are not included in the 180 // `AutoscalerStatus.details` message. 181 repeated string additional_recommendation_details = 5; 182 183 // A unique id for this recommendation that should be included when opening 184 // a support ticket. 185 string recommendation_id = 6; 186 187 // The metric source deciding the autoscaling recommendation. 188 MetricType decision_metric = 7; 189 } 190 191 // The autoscaling algorithm inputs. 192 Inputs inputs = 1; 193 194 // The algorithm outputs for the recommended cluster size. 195 Outputs outputs = 2; 196} 197