notebooks/v1/execution.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.notebooks.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/protobuf/timestamp.proto";

option go_package = "cloud.google.com/go/notebooks/apiv1/notebookspb;notebookspb";
option java_multiple_files = true;
option java_outer_classname = "ExecutionProto";
option java_package = "com.google.cloud.notebooks.v1";
option (google.api.resource_definition) = {
  type: "aiplatform.googleapis.com/Tensorboard"
  pattern: "projects/{project}/locations/{location}/tensorboards/{tensorboard}"
};

// The description a notebook execution workload.
message ExecutionTemplate {
  // Required. Specifies the machine types, the number of replicas for workers
  // and parameter servers.
  enum ScaleTier {
    // Unspecified Scale Tier.
    SCALE_TIER_UNSPECIFIED = 0;

    // A single worker instance. This tier is suitable for learning how to use
    // Cloud ML, and for experimenting with new models using small datasets.
    BASIC = 1;

    // Many workers and a few parameter servers.
    STANDARD_1 = 2;

    // A large number of workers with many parameter servers.
    PREMIUM_1 = 3;

    // A single worker instance with a K80 GPU.
    BASIC_GPU = 4;

    // A single worker instance with a Cloud TPU.
    BASIC_TPU = 5;

    // The CUSTOM tier is not a set tier, but rather enables you to use your
    // own cluster specification. When you use this tier, set values to
    // configure your processing cluster according to these guidelines:
    //
    // *   You _must_ set `ExecutionTemplate.masterType` to specify the type
    //     of machine to use for your master node. This is the only required
    //     setting.
    CUSTOM = 6;
  }

  // Hardware accelerator types for AI Platform Training jobs.
  enum SchedulerAcceleratorType {
    // Unspecified accelerator type. Default to no GPU.
    SCHEDULER_ACCELERATOR_TYPE_UNSPECIFIED = 0;

    // Nvidia Tesla K80 GPU.
    NVIDIA_TESLA_K80 = 1;

    // Nvidia Tesla P100 GPU.
    NVIDIA_TESLA_P100 = 2;

    // Nvidia Tesla V100 GPU.
    NVIDIA_TESLA_V100 = 3;

    // Nvidia Tesla P4 GPU.
    NVIDIA_TESLA_P4 = 4;

    // Nvidia Tesla T4 GPU.
    NVIDIA_TESLA_T4 = 5;

    // Nvidia Tesla A100 GPU.
    NVIDIA_TESLA_A100 = 10;

    // TPU v2.
    TPU_V2 = 6;

    // TPU v3.
    TPU_V3 = 7;
  }

  // Definition of a hardware accelerator. Note that not all combinations
  // of `type` and `core_count` are valid. Check [GPUs on
  // Compute Engine](https://cloud.google.com/compute/docs/gpus) to find a valid
  // combination. TPUs are not supported.
  message SchedulerAcceleratorConfig {
    // Type of this accelerator.
    SchedulerAcceleratorType type = 1;

    // Count of cores of this accelerator.
    int64 core_count = 2;
  }

  // The backend used for this execution.
  enum JobType {
    // No type specified.
    JOB_TYPE_UNSPECIFIED = 0;

    // Custom Job in `aiplatform.googleapis.com`.
    // Default value for an execution.
    VERTEX_AI = 1;

    // Run execution on a cluster with Dataproc as a job.
    // https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs
    DATAPROC = 2;
  }

  // Parameters used in Dataproc JobType executions.
  message DataprocParameters {
    // URI for cluster used to run Dataproc execution.
    // Format: `projects/{PROJECT_ID}/regions/{REGION}/clusters/{CLUSTER_NAME}`
    string cluster = 1;
  }

  // Parameters used in Vertex AI JobType executions.
  message VertexAIParameters {
    // The full name of the Compute Engine
    // [network](https://cloud.google.com/compute/docs/networks-and-firewalls#networks)
    // to which the Job should be peered. For example,
    // `projects/12345/global/networks/myVPC`.
    // [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert)
    // is of the form `projects/{project}/global/networks/{network}`.
    // Where `{project}` is a project number, as in `12345`, and `{network}` is
    // a network name.
    //
    // Private services access must already be configured for the network. If
    // left unspecified, the job is not peered with any network.
    string network = 1;

    // Environment variables.
    // At most 100 environment variables can be specified and unique.
    // Example: `GCP_BUCKET=gs://my-bucket/samples/`
    map<string, string> env = 2;
  }

  // Required. Scale tier of the hardware used for notebook execution.
  // DEPRECATED Will be discontinued. As right now only CUSTOM is supported.
  ScaleTier scale_tier = 1 [
    deprecated = true,
    (google.api.field_behavior) = REQUIRED
  ];

  // Specifies the type of virtual machine to use for your training
  // job's master worker. You must specify this field when `scaleTier` is set to
  // `CUSTOM`.
  //
  // You can use certain Compute Engine machine types directly in this field.
  // The following types are supported:
  //
  // - `n1-standard-4`
  // - `n1-standard-8`
  // - `n1-standard-16`
  // - `n1-standard-32`
  // - `n1-standard-64`
  // - `n1-standard-96`
  // - `n1-highmem-2`
  // - `n1-highmem-4`
  // - `n1-highmem-8`
  // - `n1-highmem-16`
  // - `n1-highmem-32`
  // - `n1-highmem-64`
  // - `n1-highmem-96`
  // - `n1-highcpu-16`
  // - `n1-highcpu-32`
  // - `n1-highcpu-64`
  // - `n1-highcpu-96`
  //
  //
  // Alternatively, you can use the following legacy machine types:
  //
  // - `standard`
  // - `large_model`
  // - `complex_model_s`
  // - `complex_model_m`
  // - `complex_model_l`
  // - `standard_gpu`
  // - `complex_model_m_gpu`
  // - `complex_model_l_gpu`
  // - `standard_p100`
  // - `complex_model_m_p100`
  // - `standard_v100`
  // - `large_model_v100`
  // - `complex_model_m_v100`
  // - `complex_model_l_v100`
  //
  //
  // Finally, if you want to use a TPU for training, specify `cloud_tpu` in this
  // field. Learn more about the [special configuration options for training
  // with
  // TPU](https://cloud.google.com/ai-platform/training/docs/using-tpus#configuring_a_custom_tpu_machine).
  string master_type = 2;

  // Configuration (count and accelerator type) for hardware running notebook
  // execution.
  SchedulerAcceleratorConfig accelerator_config = 3;

  // Labels for execution.
  // If execution is scheduled, a field included will be 'nbs-scheduled'.
  // Otherwise, it is an immediate execution, and an included field will be
  // 'nbs-immediate'. Use fields to efficiently index between various types of
  // executions.
  map<string, string> labels = 4;

  // Path to the notebook file to execute.
  // Must be in a Google Cloud Storage bucket.
  // Format: `gs://{bucket_name}/{folder}/{notebook_file_name}`
  // Ex: `gs://notebook_user/scheduled_notebooks/sentiment_notebook.ipynb`
  string input_notebook_file = 5;

  // Container Image URI to a DLVM
  // Example: 'gcr.io/deeplearning-platform-release/base-cu100'
  // More examples can be found at:
  // https://cloud.google.com/ai-platform/deep-learning-containers/docs/choosing-container
  string container_image_uri = 6;

  // Path to the notebook folder to write to.
  // Must be in a Google Cloud Storage bucket path.
  // Format: `gs://{bucket_name}/{folder}`
  // Ex: `gs://notebook_user/scheduled_notebooks`
  string output_notebook_folder = 7;

  // Parameters to be overridden in the notebook during execution.
  // Ref https://papermill.readthedocs.io/en/latest/usage-parameterize.html on
  // how to specifying parameters in the input notebook and pass them here
  // in an YAML file.
  // Ex: `gs://notebook_user/scheduled_notebooks/sentiment_notebook_params.yaml`
  string params_yaml_file = 8;

  // Parameters used within the 'input_notebook_file' notebook.
  string parameters = 9;

  // The email address of a service account to use when running the execution.
  // You must have the `iam.serviceAccounts.actAs` permission for the specified
  // service account.
  string service_account = 10;

  // The type of Job to be used on this execution.
  JobType job_type = 11;

  // Parameters for an execution type.
  // NOTE: There are currently no extra parameters for VertexAI jobs.
  oneof job_parameters {
    // Parameters used in Dataproc JobType executions.
    DataprocParameters dataproc_parameters = 12;

    // Parameters used in Vertex AI JobType executions.
    VertexAIParameters vertex_ai_parameters = 13;
  }

  // Name of the kernel spec to use. This must be specified if the
  // kernel spec name on the execution target does not match the name in the
  // input notebook file.
  string kernel_spec = 14;

  // The name of a Vertex AI [Tensorboard] resource to which this execution
  // will upload Tensorboard logs.
  // Format:
  // `projects/{project}/locations/{location}/tensorboards/{tensorboard}`
  string tensorboard = 15 [(google.api.resource_reference) = {
                             type: "aiplatform.googleapis.com/Tensorboard"
                           }];
}

// The definition of a single executed notebook.
message Execution {
  option (google.api.resource) = {
    type: "notebooks.googleapis.com/Execution"
    pattern: "projects/{project}/location/{location}/executions/{execution}"
  };

  // Enum description of the state of the underlying AIP job.
  enum State {
    // The job state is unspecified.
    STATE_UNSPECIFIED = 0;

    // The job has been just created and processing has not yet begun.
    QUEUED = 1;

    // The service is preparing to execution the job.
    PREPARING = 2;

    // The job is in progress.
    RUNNING = 3;

    // The job completed successfully.
    SUCCEEDED = 4;

    // The job failed.
    // `error_message` should contain the details of the failure.
    FAILED = 5;

    // The job is being cancelled.
    // `error_message` should describe the reason for the cancellation.
    CANCELLING = 6;

    // The job has been cancelled.
    // `error_message` should describe the reason for the cancellation.
    CANCELLED = 7;

    // The job has become expired (relevant to Vertex AI jobs)
    // https://cloud.google.com/vertex-ai/docs/reference/rest/v1/JobState
    EXPIRED = 9;

    // The Execution is being created.
    INITIALIZING = 10;
  }

  // execute metadata including name, hardware spec, region, labels, etc.
  ExecutionTemplate execution_template = 1;

  // Output only. The resource name of the execute. Format:
  // `projects/{project_id}/locations/{location}/executions/{execution_id}`
  string name = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Name used for UI purposes.
  // Name can only contain alphanumeric characters and underscores '_'.
  string display_name = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // A brief description of this execution.
  string description = 4;

  // Output only. Time the Execution was instantiated.
  google.protobuf.Timestamp create_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Time the Execution was last updated.
  google.protobuf.Timestamp update_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. State of the underlying AI Platform job.
  State state = 7 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output notebook file generated by this execution
  string output_notebook_file = 8;

  // Output only. The URI of the external job used to execute the notebook.
  string job_uri = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
}