1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1; 18 19import "google/api/field_behavior.proto"; 20import "google/cloud/aiplatform/v1/accelerator_type.proto"; 21 22option csharp_namespace = "Google.Cloud.AIPlatform.V1"; 23option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; 24option java_multiple_files = true; 25option java_outer_classname = "MachineResourcesProto"; 26option java_package = "com.google.cloud.aiplatform.v1"; 27option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; 28option ruby_package = "Google::Cloud::AIPlatform::V1"; 29 30// Specification of a single machine. 31message MachineSpec { 32 // Immutable. The type of the machine. 33 // 34 // See the [list of machine types supported for 35 // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types) 36 // 37 // See the [list of machine types supported for custom 38 // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). 39 // 40 // For [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] this field is 41 // optional, and the default value is `n1-standard-2`. For 42 // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] or as 43 // part of [WorkerPoolSpec][google.cloud.aiplatform.v1.WorkerPoolSpec] this 44 // field is required. 45 string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE]; 46 47 // Immutable. The type of accelerator(s) that may be attached to the machine 48 // as per 49 // [accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]. 50 AcceleratorType accelerator_type = 2 51 [(google.api.field_behavior) = IMMUTABLE]; 52 53 // The number of accelerators to attach to the machine. 54 int32 accelerator_count = 3; 55} 56 57// A description of resources that are dedicated to a DeployedModel, and 58// that need a higher degree of manual configuration. 59message DedicatedResources { 60 // Required. Immutable. The specification of a single machine used by the 61 // prediction. 62 MachineSpec machine_spec = 1 [ 63 (google.api.field_behavior) = REQUIRED, 64 (google.api.field_behavior) = IMMUTABLE 65 ]; 66 67 // Required. Immutable. The minimum number of machine replicas this 68 // DeployedModel will be always deployed on. This value must be greater than 69 // or equal to 1. 70 // 71 // If traffic against the DeployedModel increases, it may dynamically be 72 // deployed onto more replicas, and as traffic decreases, some of these extra 73 // replicas may be freed. 74 int32 min_replica_count = 2 [ 75 (google.api.field_behavior) = REQUIRED, 76 (google.api.field_behavior) = IMMUTABLE 77 ]; 78 79 // Immutable. The maximum number of replicas this DeployedModel may be 80 // deployed on when the traffic against it increases. If the requested value 81 // is too large, the deployment will error, but if deployment succeeds then 82 // the ability to scale the model to that many replicas is guaranteed (barring 83 // service outages). If traffic against the DeployedModel increases beyond 84 // what its replicas at maximum may handle, a portion of the traffic will be 85 // dropped. If this value is not provided, will use 86 // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count] 87 // as the default value. 88 // 89 // The value of this field impacts the charge against Vertex CPU and GPU 90 // quotas. Specifically, you will be charged for (max_replica_count * 91 // number of cores in the selected machine type) and (max_replica_count * 92 // number of GPUs per replica in the selected machine type). 93 int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE]; 94 95 // Immutable. The metric specifications that overrides a resource 96 // utilization metric (CPU utilization, accelerator's duty cycle, and so on) 97 // target value (default to 60 if not set). At most one entry is allowed per 98 // metric. 99 // 100 // If 101 // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count] 102 // is above 0, the autoscaling will be based on both CPU utilization and 103 // accelerator's duty cycle metrics and scale up when either metrics exceeds 104 // its target value while scale down if both metrics are under their target 105 // value. The default target value is 60 for both metrics. 106 // 107 // If 108 // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count] 109 // is 0, the autoscaling will be based on CPU utilization metric only with 110 // default target value 60 if not explicitly set. 111 // 112 // For example, in the case of Online Prediction, if you want to override 113 // target CPU utilization to 80, you should set 114 // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1.AutoscalingMetricSpec.metric_name] 115 // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and 116 // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1.AutoscalingMetricSpec.target] 117 // to `80`. 118 repeated AutoscalingMetricSpec autoscaling_metric_specs = 4 119 [(google.api.field_behavior) = IMMUTABLE]; 120} 121 122// A description of resources that to large degree are decided by Vertex AI, 123// and require only a modest additional configuration. 124// Each Model supporting these resources documents its specific guidelines. 125message AutomaticResources { 126 // Immutable. The minimum number of replicas this DeployedModel will be always 127 // deployed on. If traffic against it increases, it may dynamically be 128 // deployed onto more replicas up to 129 // [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count], 130 // and as traffic decreases, some of these extra replicas may be freed. If the 131 // requested value is too large, the deployment will error. 132 int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE]; 133 134 // Immutable. The maximum number of replicas this DeployedModel may be 135 // deployed on when the traffic against it increases. If the requested value 136 // is too large, the deployment will error, but if deployment succeeds then 137 // the ability to scale the model to that many replicas is guaranteed (barring 138 // service outages). If traffic against the DeployedModel increases beyond 139 // what its replicas at maximum may handle, a portion of the traffic will be 140 // dropped. If this value is not provided, a no upper bound for scaling under 141 // heavy traffic will be assume, though Vertex AI may be unable to scale 142 // beyond certain replica number. 143 int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE]; 144} 145 146// A description of resources that are used for performing batch operations, are 147// dedicated to a Model, and need manual configuration. 148message BatchDedicatedResources { 149 // Required. Immutable. The specification of a single machine. 150 MachineSpec machine_spec = 1 [ 151 (google.api.field_behavior) = REQUIRED, 152 (google.api.field_behavior) = IMMUTABLE 153 ]; 154 155 // Immutable. The number of machine replicas used at the start of the batch 156 // operation. If not set, Vertex AI decides starting number, not greater than 157 // [max_replica_count][google.cloud.aiplatform.v1.BatchDedicatedResources.max_replica_count] 158 int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE]; 159 160 // Immutable. The maximum number of machine replicas the batch operation may 161 // be scaled to. The default value is 10. 162 int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE]; 163} 164 165// Statistics information about resource consumption. 166message ResourcesConsumed { 167 // Output only. The number of replica hours used. Note that many replicas may 168 // run in parallel, and additionally any given work may be queued for some 169 // time. Therefore this value is not strictly related to wall time. 170 double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 171} 172 173// Represents the spec of disk options. 174message DiskSpec { 175 // Type of the boot disk (default is "pd-ssd"). 176 // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or 177 // "pd-standard" (Persistent Disk Hard Disk Drive). 178 string boot_disk_type = 1; 179 180 // Size in GB of the boot disk (default is 100GB). 181 int32 boot_disk_size_gb = 2; 182} 183 184// Represents a mount configuration for Network File System (NFS) to mount. 185message NfsMount { 186 // Required. IP address of the NFS server. 187 string server = 1 [(google.api.field_behavior) = REQUIRED]; 188 189 // Required. Source path exported from NFS server. 190 // Has to start with '/', and combined with the ip address, it indicates 191 // the source mount path in the form of `server:path` 192 string path = 2 [(google.api.field_behavior) = REQUIRED]; 193 194 // Required. Destination mount path. The NFS will be mounted for the user 195 // under /mnt/nfs/<mount_point> 196 string mount_point = 3 [(google.api.field_behavior) = REQUIRED]; 197} 198 199// The metric specification that defines the target resource utilization 200// (CPU utilization, accelerator's duty cycle, and so on) for calculating the 201// desired replica count. 202message AutoscalingMetricSpec { 203 // Required. The resource metric name. 204 // Supported metrics: 205 // 206 // * For Online Prediction: 207 // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle` 208 // * `aiplatform.googleapis.com/prediction/online/cpu/utilization` 209 string metric_name = 1 [(google.api.field_behavior) = REQUIRED]; 210 211 // The target resource utilization in percentage (1% - 100%) for the given 212 // metric; once the real usage deviates from the target by a certain 213 // percentage, the machine replicas change. The default value is 60 214 // (representing 60%) if not provided. 215 int32 target = 2; 216} 217