1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/cloud/aiplatform/v1/encryption_spec.proto";
22import "google/cloud/aiplatform/v1/explanation.proto";
23import "google/cloud/aiplatform/v1/io.proto";
24import "google/cloud/aiplatform/v1/machine_resources.proto";
25import "google/protobuf/timestamp.proto";
26
27option csharp_namespace = "Google.Cloud.AIPlatform.V1";
28option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
29option java_multiple_files = true;
30option java_outer_classname = "EndpointProto";
31option java_package = "com.google.cloud.aiplatform.v1";
32option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
33option ruby_package = "Google::Cloud::AIPlatform::V1";
34
35// Models are deployed into it, and afterwards Endpoint is called to obtain
36// predictions and explanations.
37message Endpoint {
38  option (google.api.resource) = {
39    type: "aiplatform.googleapis.com/Endpoint"
40    pattern: "projects/{project}/locations/{location}/endpoints/{endpoint}"
41    pattern: "projects/{project}/locations/{location}/publishers/{publisher}/models/{model}"
42  };
43
44  // Output only. The resource name of the Endpoint.
45  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
46
47  // Required. The display name of the Endpoint.
48  // The name can be up to 128 characters long and can consist of any UTF-8
49  // characters.
50  string display_name = 2 [(google.api.field_behavior) = REQUIRED];
51
52  // The description of the Endpoint.
53  string description = 3;
54
55  // Output only. The models deployed in this Endpoint.
56  // To add or remove DeployedModels use
57  // [EndpointService.DeployModel][google.cloud.aiplatform.v1.EndpointService.DeployModel]
58  // and
59  // [EndpointService.UndeployModel][google.cloud.aiplatform.v1.EndpointService.UndeployModel]
60  // respectively.
61  repeated DeployedModel deployed_models = 4
62      [(google.api.field_behavior) = OUTPUT_ONLY];
63
64  // A map from a DeployedModel's ID to the percentage of this Endpoint's
65  // traffic that should be forwarded to that DeployedModel.
66  //
67  // If a DeployedModel's ID is not listed in this map, then it receives no
68  // traffic.
69  //
70  // The traffic percentage values must add up to 100, or map must be empty if
71  // the Endpoint is to not accept any traffic at a moment.
72  map<string, int32> traffic_split = 5;
73
74  // Used to perform consistent read-modify-write updates. If not set, a blind
75  // "overwrite" update happens.
76  string etag = 6;
77
78  // The labels with user-defined metadata to organize your Endpoints.
79  //
80  // Label keys and values can be no longer than 64 characters
81  // (Unicode codepoints), can only contain lowercase letters, numeric
82  // characters, underscores and dashes. International characters are allowed.
83  //
84  // See https://goo.gl/xmQnxf for more information and examples of labels.
85  map<string, string> labels = 7;
86
87  // Output only. Timestamp when this Endpoint was created.
88  google.protobuf.Timestamp create_time = 8
89      [(google.api.field_behavior) = OUTPUT_ONLY];
90
91  // Output only. Timestamp when this Endpoint was last updated.
92  google.protobuf.Timestamp update_time = 9
93      [(google.api.field_behavior) = OUTPUT_ONLY];
94
95  // Customer-managed encryption key spec for an Endpoint. If set, this
96  // Endpoint and all sub-resources of this Endpoint will be secured by
97  // this key.
98  EncryptionSpec encryption_spec = 10;
99
100  // Optional. The full name of the Google Compute Engine
101  // [network](https://cloud.google.com//compute/docs/networks-and-firewalls#networks)
102  // to which the Endpoint should be peered.
103  //
104  // Private services access must already be configured for the network. If left
105  // unspecified, the Endpoint is not peered with any network.
106  //
107  // Only one of the fields,
108  // [network][google.cloud.aiplatform.v1.Endpoint.network] or
109  // [enable_private_service_connect][google.cloud.aiplatform.v1.Endpoint.enable_private_service_connect],
110  // can be set.
111  //
112  // [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
113  // `projects/{project}/global/networks/{network}`.
114  // Where `{project}` is a project number, as in `12345`, and `{network}` is
115  // network name.
116  string network = 13 [
117    (google.api.field_behavior) = OPTIONAL,
118    (google.api.resource_reference) = { type: "compute.googleapis.com/Network" }
119  ];
120
121  // Deprecated: If true, expose the Endpoint via private service connect.
122  //
123  // Only one of the fields,
124  // [network][google.cloud.aiplatform.v1.Endpoint.network] or
125  // [enable_private_service_connect][google.cloud.aiplatform.v1.Endpoint.enable_private_service_connect],
126  // can be set.
127  bool enable_private_service_connect = 17 [deprecated = true];
128
129  // Output only. Resource name of the Model Monitoring job associated with this
130  // Endpoint if monitoring is enabled by
131  // [JobService.CreateModelDeploymentMonitoringJob][google.cloud.aiplatform.v1.JobService.CreateModelDeploymentMonitoringJob].
132  // Format:
133  // `projects/{project}/locations/{location}/modelDeploymentMonitoringJobs/{model_deployment_monitoring_job}`
134  string model_deployment_monitoring_job = 14 [
135    (google.api.field_behavior) = OUTPUT_ONLY,
136    (google.api.resource_reference) = {
137      type: "aiplatform.googleapis.com/ModelDeploymentMonitoringJob"
138    }
139  ];
140
141  // Configures the request-response logging for online prediction.
142  PredictRequestResponseLoggingConfig predict_request_response_logging_config =
143      18;
144}
145
146// A deployment of a Model. Endpoints contain one or more DeployedModels.
147message DeployedModel {
148  // The prediction (for example, the machine) resources that the DeployedModel
149  // uses. The user is billed for the resources (at least their minimal amount)
150  // even if the DeployedModel receives no traffic.
151  // Not all Models support all resources types. See
152  // [Model.supported_deployment_resources_types][google.cloud.aiplatform.v1.Model.supported_deployment_resources_types].
153  // Required except for Large Model Deploy use cases.
154  oneof prediction_resources {
155    // A description of resources that are dedicated to the DeployedModel, and
156    // that need a higher degree of manual configuration.
157    DedicatedResources dedicated_resources = 7;
158
159    // A description of resources that to large degree are decided by Vertex
160    // AI, and require only a modest additional configuration.
161    AutomaticResources automatic_resources = 8;
162  }
163
164  // Immutable. The ID of the DeployedModel. If not provided upon deployment,
165  // Vertex AI will generate a value for this ID.
166  //
167  // This value should be 1-10 characters, and valid characters are /[0-9]/.
168  string id = 1 [(google.api.field_behavior) = IMMUTABLE];
169
170  // Required. The resource name of the Model that this is the deployment of.
171  // Note that the Model may be in a different location than the DeployedModel's
172  // Endpoint.
173  //
174  // The resource name may contain version id or version alias to specify the
175  // version.
176  //  Example: `projects/{project}/locations/{location}/models/{model}@2`
177  //              or
178  //            `projects/{project}/locations/{location}/models/{model}@golden`
179  // if no version is specified, the default version will be deployed.
180  string model = 2 [
181    (google.api.field_behavior) = REQUIRED,
182    (google.api.resource_reference) = {
183      type: "aiplatform.googleapis.com/Model"
184    }
185  ];
186
187  // Output only. The version ID of the model that is deployed.
188  string model_version_id = 18 [(google.api.field_behavior) = OUTPUT_ONLY];
189
190  // The display name of the DeployedModel. If not provided upon creation,
191  // the Model's display_name is used.
192  string display_name = 3;
193
194  // Output only. Timestamp when the DeployedModel was created.
195  google.protobuf.Timestamp create_time = 6
196      [(google.api.field_behavior) = OUTPUT_ONLY];
197
198  // Explanation configuration for this DeployedModel.
199  //
200  // When deploying a Model using
201  // [EndpointService.DeployModel][google.cloud.aiplatform.v1.EndpointService.DeployModel],
202  // this value overrides the value of
203  // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec].
204  // All fields of
205  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
206  // are optional in the request. If a field of
207  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
208  // is not populated, the value of the same field of
209  // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec]
210  // is inherited. If the corresponding
211  // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec]
212  // is not populated, all fields of the
213  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
214  // will be used for the explanation configuration.
215  ExplanationSpec explanation_spec = 9;
216
217  // The service account that the DeployedModel's container runs as. Specify the
218  // email address of the service account. If this service account is not
219  // specified, the container runs as a service account that doesn't have access
220  // to the resource project.
221  //
222  // Users deploying the Model must have the `iam.serviceAccounts.actAs`
223  // permission on this service account.
224  string service_account = 11;
225
226  // For custom-trained Models and AutoML Tabular Models, the container of the
227  // DeployedModel instances will send `stderr` and `stdout` streams to
228  // Cloud Logging by default. Please note that the logs incur cost,
229  // which are subject to [Cloud Logging
230  // pricing](https://cloud.google.com/logging/pricing).
231  //
232  // User can disable container logging by setting this flag to true.
233  bool disable_container_logging = 15;
234
235  // If true, online prediction access logs are sent to Cloud
236  // Logging.
237  // These logs are like standard server access logs, containing
238  // information like timestamp and latency for each prediction request.
239  //
240  // Note that logs may incur a cost, especially if your project
241  // receives prediction requests at a high queries per second rate (QPS).
242  // Estimate your costs before enabling this option.
243  bool enable_access_logging = 13;
244
245  // Output only. Provide paths for users to send predict/explain/health
246  // requests directly to the deployed model services running on Cloud via
247  // private services access. This field is populated if
248  // [network][google.cloud.aiplatform.v1.Endpoint.network] is configured.
249  PrivateEndpoints private_endpoints = 14
250      [(google.api.field_behavior) = OUTPUT_ONLY];
251}
252
253// PrivateEndpoints proto is used to provide paths for users to send
254// requests privately.
255// To send request via private service access, use predict_http_uri,
256// explain_http_uri or health_http_uri. To send request via private service
257// connect, use service_attachment.
258message PrivateEndpoints {
259  // Output only. Http(s) path to send prediction requests.
260  string predict_http_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
261
262  // Output only. Http(s) path to send explain requests.
263  string explain_http_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
264
265  // Output only. Http(s) path to send health check requests.
266  string health_http_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
267
268  // Output only. The name of the service attachment resource. Populated if
269  // private service connect is enabled.
270  string service_attachment = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
271}
272
273// Configuration for logging request-response to a BigQuery table.
274message PredictRequestResponseLoggingConfig {
275  // If logging is enabled or not.
276  bool enabled = 1;
277
278  // Percentage of requests to be logged, expressed as a fraction in
279  // range(0,1].
280  double sampling_rate = 2;
281
282  // BigQuery table for logging.
283  // If only given a project, a new dataset will be created with name
284  // `logging_<endpoint-display-name>_<endpoint-id>` where
285  // <endpoint-display-name> will be made BigQuery-dataset-name compatible (e.g.
286  // most special characters will become underscores). If no table name is
287  // given, a new table will be created with name `request_response_logging`
288  BigQueryDestination bigquery_destination = 3;
289}
290