xref: /aosp_15_r20/external/googleapis/google/cloud/dataproc/logging/autoscaler_log.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataproc.logging;
18
19import "google/protobuf/duration.proto";
20
21option csharp_namespace = "Google.Cloud.Dataproc.Logging";
22option go_package = "cloud.google.com/go/dataproc/logging/loggingpb;loggingpb";
23option java_multiple_files = true;
24option java_package = "com.google.cloud.dataproc.logging";
25
26// The short version of cluster configuration for Cloud Logging.
27message ClusterSize {
28  // The number of primary workers in the cluster.
29  int32 primary_worker_count = 1;
30
31  // The number of secondary workers in the cluster.
32  int32 secondary_worker_count = 2;
33}
34
35// The Autoscaler state.
36enum AutoscalerState {
37  AUTOSCALER_STATE_UNSPECIFIED = 0;
38
39  // The Autoscaler is sleeping and waiting for the next update.
40  COOLDOWN = 1;
41
42  // The Autoscaler is in the process of calculating its recommendation on
43  // whether to scale the cluster, and if so, how to autoscale.
44  RECOMMENDING = 6;
45
46  // The Autoscaler is scaling the cluster.
47  SCALING = 2;
48
49  // The Autoscaler has stopped.
50  STOPPED = 3;
51
52  // The Autoscaler has failed.
53  FAILED = 4;
54
55  // The Autoscaler is initializing.
56  INITIALIZING = 5;
57}
58
59// The Autoscaling decision type.
60enum ScalingDecisionType {
61  SCALING_DECISION_TYPE_UNSPECIFIED = 0;
62
63  // Increase the number of primary and/or secondary workers.
64  SCALE_UP = 1;
65
66  // Decrease the number of primary and/or secondary workers.
67  SCALE_DOWN = 2;
68
69  // Not changing the number of primary or secondary workers.
70  NO_SCALE = 3;
71
72  // Scale the primary and secondary worker groups in different directions.
73  MIXED = 4;
74
75  // Cancel the ongoing scale down operation.
76  CANCEL = 5;
77
78  // Do not cancel the ongoing scale down operation.
79  DO_NOT_CANCEL = 6;
80}
81
82enum ConstrainingFactor {
83  CONSTRAINING_FACTOR_UNSPECIFIED = 0;
84
85  // The project does not have sufficient regional, global, and or preemptible
86  // quota to allocate a new VM.
87  SCALING_CAPPED_DUE_TO_LACK_OF_QUOTA = 1;
88
89  // All worker groups have reached maximum size. This message will not be
90  // issued if one group reached maximum size, but workers were able to be
91  // allocated to another group.
92  REACHED_MAXIMUM_CLUSTER_SIZE = 2;
93
94  // All worker groups have reached minimum size. This message will not be
95  // issued if workers were able to be removed from another group that had not
96  // reached minimum size.
97  REACHED_MINIMUM_CLUSTER_SIZE = 3;
98
99  // The secondary worker group cannot be scaled down by more than 1k nodes in a
100  // single update request.
101  SECONDARY_SCALEDOWN_SINGLE_REQUEST_LIMIT_REACHED = 4;
102}
103
104// The kind of metric input to the Autoscaling algorithm.
105enum MetricType {
106  // Default.
107  METRIC_TYPE_UNSPECIFIED = 0;
108
109  // The yarn memory metric.
110  YARN_MEMORY = 1;
111
112  // The yarn cores or vCPUs metric.
113  YARN_CORES = 2;
114
115  // The number of executors in Spark serverless.
116  SPARK_EXECUTORS = 3;
117}
118
119// The main proto that will be converted to JSON format and then written to
120// Logging.
121message AutoscalerLog {
122  // The current Autoscaler status.
123  AutoscalerStatus status = 1;
124
125  // Optional. The autoscaling recommendation including its inputs, outputs,
126  // scaling decision, and detailed explanation.
127  AutoscalerRecommendation recommendation = 2;
128}
129
130// The Autoscaler's status, including its state and other details.
131message AutoscalerStatus {
132  // The high-level Autoscaler state.
133  AutoscalerState state = 1;
134
135  // The detailed description of Autoscaler status.
136  string details = 2;
137
138  // The cluster update operation ID.
139  string update_cluster_operation_id = 3;
140
141  // Error message from an Autoscaler exception, if any.
142  string error = 4;
143}
144
145// The inputs, outputs, and detailed explanation of the Autoscaling
146// recommendation.
147message AutoscalerRecommendation {
148  // The input values for the Autoscaling recommendation algorithm.
149  message Inputs {
150    // The metrics collected by the Dataproc agent running on the cluster.
151    // For example, {"avg-yarn-pending-memory": "1040 MB"}
152    map<string, string> cluster_metrics = 1;
153
154    // The cluster configuration before updating the cluster.
155    ClusterSize current_cluster_size = 2;
156
157    // The minimum worker counts for each instance group.
158    ClusterSize min_worker_counts = 3;
159
160    // The maximum worker counts for each instance group.
161    ClusterSize max_worker_counts = 4;
162  }
163
164  // Autoscaler recommendations.
165  message Outputs {
166    // The high-level autoscaling decision, such as SCALE_UP, SCALE_DOWN,
167    // NO_OP.
168    ScalingDecisionType decision = 1;
169
170    // The recommended cluster size.
171    ClusterSize recommended_cluster_size = 2;
172
173    // The graceful decommission timeout for downscaling operations.
174    google.protobuf.Duration graceful_decommission_timeout = 3;
175
176    // Reasons why the Autoscaler didn't add or remove more workers.
177    repeated ConstrainingFactor constraints_reached = 4;
178
179    // Less significant recommendations that are not included in the
180    // `AutoscalerStatus.details` message.
181    repeated string additional_recommendation_details = 5;
182
183    // A unique id for this recommendation that should be included when opening
184    // a support ticket.
185    string recommendation_id = 6;
186
187    // The metric source deciding the autoscaling recommendation.
188    MetricType decision_metric = 7;
189  }
190
191  // The autoscaling algorithm inputs.
192  Inputs inputs = 1;
193
194  // The algorithm outputs for the recommended cluster size.
195  Outputs outputs = 2;
196}
197