1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1;
18
19import "google/api/field_behavior.proto";
20import "google/cloud/aiplatform/v1/accelerator_type.proto";
21
22option csharp_namespace = "Google.Cloud.AIPlatform.V1";
23option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
24option java_multiple_files = true;
25option java_outer_classname = "MachineResourcesProto";
26option java_package = "com.google.cloud.aiplatform.v1";
27option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
28option ruby_package = "Google::Cloud::AIPlatform::V1";
29
30// Specification of a single machine.
31message MachineSpec {
32  // Immutable. The type of the machine.
33  //
34  // See the [list of machine types supported for
35  // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
36  //
37  // See the [list of machine types supported for custom
38  // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
39  //
40  // For [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] this field is
41  // optional, and the default value is `n1-standard-2`. For
42  // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] or as
43  // part of [WorkerPoolSpec][google.cloud.aiplatform.v1.WorkerPoolSpec] this
44  // field is required.
45  string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE];
46
47  // Immutable. The type of accelerator(s) that may be attached to the machine
48  // as per
49  // [accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count].
50  AcceleratorType accelerator_type = 2
51      [(google.api.field_behavior) = IMMUTABLE];
52
53  // The number of accelerators to attach to the machine.
54  int32 accelerator_count = 3;
55}
56
57// A description of resources that are dedicated to a DeployedModel, and
58// that need a higher degree of manual configuration.
59message DedicatedResources {
60  // Required. Immutable. The specification of a single machine used by the
61  // prediction.
62  MachineSpec machine_spec = 1 [
63    (google.api.field_behavior) = REQUIRED,
64    (google.api.field_behavior) = IMMUTABLE
65  ];
66
67  // Required. Immutable. The minimum number of machine replicas this
68  // DeployedModel will be always deployed on. This value must be greater than
69  // or equal to 1.
70  //
71  // If traffic against the DeployedModel increases, it may dynamically be
72  // deployed onto more replicas, and as traffic decreases, some of these extra
73  // replicas may be freed.
74  int32 min_replica_count = 2 [
75    (google.api.field_behavior) = REQUIRED,
76    (google.api.field_behavior) = IMMUTABLE
77  ];
78
79  // Immutable. The maximum number of replicas this DeployedModel may be
80  // deployed on when the traffic against it increases. If the requested value
81  // is too large, the deployment will error, but if deployment succeeds then
82  // the ability to scale the model to that many replicas is guaranteed (barring
83  // service outages). If traffic against the DeployedModel increases beyond
84  // what its replicas at maximum may handle, a portion of the traffic will be
85  // dropped. If this value is not provided, will use
86  // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count]
87  // as the default value.
88  //
89  // The value of this field impacts the charge against Vertex CPU and GPU
90  // quotas. Specifically, you will be charged for (max_replica_count *
91  // number of cores in the selected machine type) and (max_replica_count *
92  // number of GPUs per replica in the selected machine type).
93  int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
94
95  // Immutable. The metric specifications that overrides a resource
96  // utilization metric (CPU utilization, accelerator's duty cycle, and so on)
97  // target value (default to 60 if not set). At most one entry is allowed per
98  // metric.
99  //
100  // If
101  // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]
102  // is above 0, the autoscaling will be based on both CPU utilization and
103  // accelerator's duty cycle metrics and scale up when either metrics exceeds
104  // its target value while scale down if both metrics are under their target
105  // value. The default target value is 60 for both metrics.
106  //
107  // If
108  // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]
109  // is 0, the autoscaling will be based on CPU utilization metric only with
110  // default target value 60 if not explicitly set.
111  //
112  // For example, in the case of Online Prediction, if you want to override
113  // target CPU utilization to 80, you should set
114  // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1.AutoscalingMetricSpec.metric_name]
115  // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and
116  // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1.AutoscalingMetricSpec.target]
117  // to `80`.
118  repeated AutoscalingMetricSpec autoscaling_metric_specs = 4
119      [(google.api.field_behavior) = IMMUTABLE];
120}
121
122// A description of resources that to large degree are decided by Vertex AI,
123// and require only a modest additional configuration.
124// Each Model supporting these resources documents its specific guidelines.
125message AutomaticResources {
126  // Immutable. The minimum number of replicas this DeployedModel will be always
127  // deployed on. If traffic against it increases, it may dynamically be
128  // deployed onto more replicas up to
129  // [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count],
130  // and as traffic decreases, some of these extra replicas may be freed. If the
131  // requested value is too large, the deployment will error.
132  int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];
133
134  // Immutable. The maximum number of replicas this DeployedModel may be
135  // deployed on when the traffic against it increases. If the requested value
136  // is too large, the deployment will error, but if deployment succeeds then
137  // the ability to scale the model to that many replicas is guaranteed (barring
138  // service outages). If traffic against the DeployedModel increases beyond
139  // what its replicas at maximum may handle, a portion of the traffic will be
140  // dropped. If this value is not provided, a no upper bound for scaling under
141  // heavy traffic will be assume, though Vertex AI may be unable to scale
142  // beyond certain replica number.
143  int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
144}
145
146// A description of resources that are used for performing batch operations, are
147// dedicated to a Model, and need manual configuration.
148message BatchDedicatedResources {
149  // Required. Immutable. The specification of a single machine.
150  MachineSpec machine_spec = 1 [
151    (google.api.field_behavior) = REQUIRED,
152    (google.api.field_behavior) = IMMUTABLE
153  ];
154
155  // Immutable. The number of machine replicas used at the start of the batch
156  // operation. If not set, Vertex AI decides starting number, not greater than
157  // [max_replica_count][google.cloud.aiplatform.v1.BatchDedicatedResources.max_replica_count]
158  int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
159
160  // Immutable. The maximum number of machine replicas the batch operation may
161  // be scaled to. The default value is 10.
162  int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
163}
164
165// Statistics information about resource consumption.
166message ResourcesConsumed {
167  // Output only. The number of replica hours used. Note that many replicas may
168  // run in parallel, and additionally any given work may be queued for some
169  // time. Therefore this value is not strictly related to wall time.
170  double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
171}
172
173// Represents the spec of disk options.
174message DiskSpec {
175  // Type of the boot disk (default is "pd-ssd").
176  // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
177  // "pd-standard" (Persistent Disk Hard Disk Drive).
178  string boot_disk_type = 1;
179
180  // Size in GB of the boot disk (default is 100GB).
181  int32 boot_disk_size_gb = 2;
182}
183
184// Represents a mount configuration for Network File System (NFS) to mount.
185message NfsMount {
186  // Required. IP address of the NFS server.
187  string server = 1 [(google.api.field_behavior) = REQUIRED];
188
189  // Required. Source path exported from NFS server.
190  // Has to start with '/', and combined with the ip address, it indicates
191  // the source mount path in the form of `server:path`
192  string path = 2 [(google.api.field_behavior) = REQUIRED];
193
194  // Required. Destination mount path. The NFS will be mounted for the user
195  // under /mnt/nfs/<mount_point>
196  string mount_point = 3 [(google.api.field_behavior) = REQUIRED];
197}
198
199// The metric specification that defines the target resource utilization
200// (CPU utilization, accelerator's duty cycle, and so on) for calculating the
201// desired replica count.
202message AutoscalingMetricSpec {
203  // Required. The resource metric name.
204  // Supported metrics:
205  //
206  // * For Online Prediction:
207  // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
208  // * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
209  string metric_name = 1 [(google.api.field_behavior) = REQUIRED];
210
211  // The target resource utilization in percentage (1% - 100%) for the given
212  // metric; once the real usage deviates from the target by a certain
213  // percentage, the machine replicas change. The default value is 60
214  // (representing 60%) if not provided.
215  int32 target = 2;
216}
217