xref: /aosp_15_r20/external/googleapis/google/genomics/v1alpha2/pipelines.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2016 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.genomics.v1alpha2;
18
19import "google/api/annotations.proto";
20import "google/longrunning/operations.proto";
21import "google/protobuf/duration.proto";
22import "google/protobuf/empty.proto";
23import "google/protobuf/timestamp.proto";
24import "google/rpc/code.proto";
25
26option cc_enable_arenas = true;
27option go_package = "google.golang.org/genproto/googleapis/genomics/v1alpha2;genomics";
28option java_multiple_files = true;
29option java_outer_classname = "PipelinesProto";
30option java_package = "com.google.genomics.v1a";
31
32// A service for running genomics pipelines.
33service PipelinesV1Alpha2 {
34  // Creates a pipeline that can be run later. Create takes a Pipeline that
35  // has all fields other than `pipelineId` populated, and then returns
36  // the same pipeline with `pipelineId` populated. This id can be used
37  // to run the pipeline.
38  //
39  // Caller must have WRITE permission to the project.
40  rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) {
41    option (google.api.http) = {
42      post: "/v1alpha2/pipelines"
43      body: "pipeline"
44    };
45  }
46
47  // Runs a pipeline. If `pipelineId` is specified in the request, then
48  // run a saved pipeline. If `ephemeralPipeline` is specified, then run
49  // that pipeline once without saving a copy.
50  //
51  // The caller must have READ permission to the project where the pipeline
52  // is stored and WRITE permission to the project where the pipeline will be
53  // run, as VMs will be created and storage will be used.
54  rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) {
55    option (google.api.http) = {
56      post: "/v1alpha2/pipelines:run"
57      body: "*"
58    };
59  }
60
61  // Retrieves a pipeline based on ID.
62  //
63  // Caller must have READ permission to the project.
64  rpc GetPipeline(GetPipelineRequest) returns (Pipeline) {
65    option (google.api.http) = {
66      get: "/v1alpha2/pipelines/{pipeline_id}"
67    };
68  }
69
70  // Lists pipelines.
71  //
72  // Caller must have READ permission to the project.
73  rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) {
74    option (google.api.http) = {
75      get: "/v1alpha2/pipelines"
76    };
77  }
78
79  // Deletes a pipeline based on ID.
80  //
81  // Caller must have WRITE permission to the project.
82  rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) {
83    option (google.api.http) = {
84      delete: "/v1alpha2/pipelines/{pipeline_id}"
85    };
86  }
87
88  // Gets controller configuration information. Should only be called
89  // by VMs created by the Pipelines Service and not by end users.
90  rpc GetControllerConfig(GetControllerConfigRequest)
91      returns (ControllerConfig) {
92    option (google.api.http) = {
93      get: "/v1alpha2/pipelines:getControllerConfig"
94    };
95  }
96
97  // Sets status of a given operation. Any new timestamps (as determined by
98  // description) are appended to TimestampEvents. Should only be called by VMs
99  // created by the Pipelines Service and not by end users.
100  rpc SetOperationStatus(SetOperationStatusRequest)
101      returns (google.protobuf.Empty) {
102    option (google.api.http) = {
103      put: "/v1alpha2/pipelines:setOperationStatus"
104      body: "*"
105    };
106  }
107}
108
109// Describes a Compute Engine resource that is being managed by a running
110// [pipeline][google.genomics.v1alpha2.Pipeline].
111message ComputeEngine {
112  // The instance on which the operation is running.
113  string instance_name = 1;
114
115  // The availability zone in which the instance resides.
116  string zone = 2;
117
118  // The machine type of the instance.
119  string machine_type = 3;
120
121  // The names of the disks that were created for this pipeline.
122  repeated string disk_names = 4;
123}
124
125// Runtime metadata that will be populated in the
126// [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata]
127// field of the Operation associated with a RunPipeline execution.
128message RuntimeMetadata {
129  // Execution information specific to Google Compute Engine.
130  ComputeEngine compute_engine = 1;
131}
132
133// The pipeline object. Represents a transformation from a set of input
134// parameters to a set of output parameters. The transformation is defined
135// as a docker image and command to run within that image. Each pipeline
136// is run on a Google Compute Engine VM. A pipeline can be created with the
137// `create` method and then later run with the `run` method, or a pipeline can
138// be defined and run all at once with the `run` method.
139message Pipeline {
140  // Required. The project in which to create the pipeline. The caller must have
141  // WRITE access.
142  string project_id = 1;
143
144  // Required. A user specified pipeline name that does not have to be unique.
145  // This name can be used for filtering Pipelines in ListPipelines.
146  string name = 2;
147
148  // User-specified description.
149  string description = 3;
150
151  // Input parameters of the pipeline.
152  repeated PipelineParameter input_parameters = 8;
153
154  // Output parameters of the pipeline.
155  repeated PipelineParameter output_parameters = 9;
156
157  // Required. The executor indicates in which environment the pipeline runs.
158  oneof executor {
159    // Specifies the docker run information.
160    DockerExecutor docker = 5;
161  }
162
163  // Required. Specifies resource requirements for the pipeline run.
164  // Required fields:
165  //
166  // *
167  // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores]
168  //
169  // *
170  // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb]
171  PipelineResources resources = 6;
172
173  // Unique pipeline id that is generated by the service when CreatePipeline
174  // is called. Cannot be specified in the Pipeline used in the
175  // CreatePipelineRequest, and will be populated in the response to
176  // CreatePipeline and all subsequent Get and List calls. Indicates that the
177  // service has registered this pipeline.
178  string pipeline_id = 7;
179}
180
181// The request to create a pipeline. The pipeline field here should not have
182// `pipelineId` populated, as that will be populated by the server.
183message CreatePipelineRequest {
184  // The pipeline to create. Should not have `pipelineId` populated.
185  Pipeline pipeline = 1;
186}
187
188// The pipeline run arguments.
189message RunPipelineArgs {
190  // Required. The project in which to run the pipeline. The caller must have
191  // WRITER access to all Google Cloud services and resources (e.g. Google
192  // Compute Engine) will be used.
193  string project_id = 1;
194
195  // Pipeline input arguments; keys are defined in the pipeline documentation.
196  // All input parameters that do not have default values  must be specified.
197  // If parameters with defaults are specified here, the defaults will be
198  // overridden.
199  map<string, string> inputs = 2;
200
201  // Pipeline output arguments; keys are defined in the pipeline
202  // documentation.  All output parameters of without default values
203  // must be specified.  If parameters with defaults are specified
204  // here, the defaults will be overridden.
205  map<string, string> outputs = 3;
206
207  // The Google Cloud Service Account that will be used to access data and
208  // services. By default, the compute service account associated with
209  // `projectId` is used.
210  ServiceAccount service_account = 4;
211
212  // This field is deprecated. Use `labels` instead. Client-specified pipeline
213  // operation identifier.
214  string client_id = 5;
215
216  // Specifies resource requirements/overrides for the pipeline run.
217  PipelineResources resources = 6;
218
219  // Required. Logging options. Used by the service to communicate results
220  // to the user.
221  LoggingOptions logging = 7;
222
223  // How long to keep the VM up after a failure (for example docker command
224  // failed, copying input or output files failed, etc). While the VM is up, one
225  // can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day.
226  google.protobuf.Duration keep_vm_alive_on_failure_duration = 8;
227
228  // Labels to apply to this pipeline run. Labels will also be applied to
229  // compute resources (VM, disks) created by this pipeline run. When listing
230  // operations, operations can [filtered by labels]
231  // [google.longrunning.ListOperationsRequest.filter].
232  // Label keys may not be empty; label values may be empty. Non-empty labels
233  // must be 1-63 characters long, and comply with [RFC1035]
234  // (https://www.ietf.org/rfc/rfc1035.txt).
235  // Specifically, the name must be 1-63 characters long and match the regular
236  // expression `[a-z]([-a-z0-9]*[a-z0-9])?` which means the first
237  // character must be a lowercase letter, and all following characters must be
238  // a dash, lowercase letter, or digit, except the last character, which cannot
239  // be a dash.
240  map<string, string> labels = 9;
241}
242
243// The request to run a pipeline. If `pipelineId` is specified, it
244// refers to a saved pipeline created with CreatePipeline and set as
245// the `pipelineId` of the returned Pipeline object. If
246// `ephemeralPipeline` is specified, that pipeline is run once
247// with the given args and not saved. It is an error to specify both
248// `pipelineId` and `ephemeralPipeline`. `pipelineArgs`
249// must be specified.
250message RunPipelineRequest {
251  oneof pipeline {
252    // The already created pipeline to run.
253    string pipeline_id = 1;
254
255    // A new pipeline object to run once and then delete.
256    Pipeline ephemeral_pipeline = 2;
257  }
258
259  // The arguments to use when running this pipeline.
260  RunPipelineArgs pipeline_args = 3;
261}
262
263// A request to get a saved pipeline by id.
264message GetPipelineRequest {
265  // Caller must have READ access to the project in which this pipeline
266  // is defined.
267  string pipeline_id = 1;
268}
269
270// A request to list pipelines in a given project. Pipelines can be
271// filtered by name using `namePrefix`: all pipelines with names that
272// begin with `namePrefix` will be returned. Uses standard pagination:
273// `pageSize` indicates how many pipelines to return, and
274// `pageToken` comes from a previous ListPipelinesResponse to
275// indicate offset.
276message ListPipelinesRequest {
277  // Required. The name of the project to search for pipelines. Caller
278  // must have READ access to this project.
279  string project_id = 1;
280
281  // Pipelines with names that match this prefix should be
282  // returned.  If unspecified, all pipelines in the project, up to
283  // `pageSize`, will be returned.
284  string name_prefix = 2;
285
286  // Number of pipelines to return at once. Defaults to 256, and max
287  // is 2048.
288  int32 page_size = 3;
289
290  // Token to use to indicate where to start getting results.
291  // If unspecified, returns the first page of results.
292  string page_token = 4;
293}
294
295// The response of ListPipelines. Contains at most `pageSize`
296// pipelines. If it contains `pageSize` pipelines, and more pipelines
297// exist, then `nextPageToken` will be populated and should be
298// used as the `pageToken` argument to a subsequent ListPipelines
299// request.
300message ListPipelinesResponse {
301  // The matched pipelines.
302  repeated Pipeline pipelines = 1;
303
304  // The token to use to get the next page of results.
305  string next_page_token = 2;
306}
307
308// The request to delete a saved pipeline by ID.
309message DeletePipelineRequest {
310  // Caller must have WRITE access to the project in which this pipeline
311  // is defined.
312  string pipeline_id = 1;
313}
314
315// Request to get controller configuation.  Should only be used
316// by VMs created by the Pipelines Service and not by end users.
317message GetControllerConfigRequest {
318  // The operation to retrieve controller configuration for.
319  string operation_id = 1;
320
321  uint64 validation_token = 2;
322}
323
324// Stores the information that the controller will fetch from the
325// server in order to run. Should only be used by VMs created by the
326// Pipelines Service and not by end users.
327message ControllerConfig {
328  message RepeatedString {
329    repeated string values = 1;
330  }
331
332  string image = 1;
333
334  string cmd = 2;
335
336  string gcs_log_path = 3;
337
338  string machine_type = 4;
339
340  map<string, string> vars = 5;
341
342  map<string, string> disks = 6;
343
344  map<string, RepeatedString> gcs_sources = 7;
345
346  map<string, RepeatedString> gcs_sinks = 8;
347}
348
349// Stores the list of events and times they occured for major events in job
350// execution.
351message TimestampEvent {
352  // String indicating the type of event
353  string description = 1;
354
355  // The time this event occured.
356  google.protobuf.Timestamp timestamp = 2;
357}
358
359// Request to set operation status. Should only be used by VMs
360// created by the Pipelines Service and not by end users.
361message SetOperationStatusRequest {
362  string operation_id = 1;
363
364  repeated TimestampEvent timestamp_events = 2;
365
366  google.rpc.Code error_code = 3;
367
368  string error_message = 4;
369
370  uint64 validation_token = 5;
371}
372
373// A Google Cloud Service Account.
374message ServiceAccount {
375  // Email address of the service account. Defaults to `default`,
376  // which uses the compute service account associated with the project.
377  string email = 1;
378
379  // List of scopes to be enabled for this service account on the VM.
380  // The following scopes are automatically included:
381  //
382  // * https://www.googleapis.com/auth/compute
383  // * https://www.googleapis.com/auth/devstorage.full_control
384  // * https://www.googleapis.com/auth/genomics
385  // * https://www.googleapis.com/auth/logging.write
386  // * https://www.googleapis.com/auth/monitoring.write
387  repeated string scopes = 2;
388}
389
390// The logging options for the pipeline run.
391message LoggingOptions {
392  // The location in Google Cloud Storage to which the pipeline logs
393  // will be copied. Can be specified as a fully qualified directory
394  // path, in which case logs will be output with a unique identifier
395  // as the filename in that directory, or as a fully specified path,
396  // which must end in `.log`, in which case that path will be
397  // used, and the user must ensure that logs are not
398  // overwritten. Stdout and stderr logs from the run are also
399  // generated and output as `-stdout.log` and `-stderr.log`.
400  string gcs_path = 1;
401}
402
403// The system resources for the pipeline run.
404message PipelineResources {
405  // A Google Compute Engine disk resource specification.
406  message Disk {
407    // The types of disks that may be attached to VMs.
408    enum Type {
409      // Default disk type. Use one of the other options below.
410      TYPE_UNSPECIFIED = 0;
411
412      // Specifies a Google Compute Engine persistent hard disk. See
413      // https://cloud.google.com/compute/docs/disks/#pdspecs for details.
414      PERSISTENT_HDD = 1;
415
416      // Specifies a Google Compute Engine persistent solid-state disk. See
417      // https://cloud.google.com/compute/docs/disks/#pdspecs for details.
418      PERSISTENT_SSD = 2;
419
420      // Specifies a Google Compute Engine local SSD.
421      // See https://cloud.google.com/compute/docs/disks/local-ssd for details.
422      LOCAL_SSD = 3;
423    }
424
425    // Required. The name of the disk that can be used in the pipeline
426    // parameters. Must be 1 - 63 characters.
427    // The name "boot" is reserved for system use.
428    string name = 1;
429
430    // Required. The type of the disk to create.
431    Type type = 2;
432
433    // The size of the disk. Defaults to 500 (GB).
434    // This field is not applicable for local SSD.
435    int32 size_gb = 3;
436
437    // The full or partial URL of the persistent disk to attach. See
438    // https://cloud.google.com/compute/docs/reference/latest/instances#resource
439    // and
440    // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots
441    // for more details.
442    string source = 4;
443
444    // Deprecated. Disks created by the Pipelines API will be deleted at the end
445    // of the pipeline run, regardless of what this field is set to.
446    bool auto_delete = 6;
447
448    // Required at create time and cannot be overridden at run time.
449    // Specifies the path in the docker container where files on
450    // this disk should be located. For example, if `mountPoint`
451    // is `/mnt/disk`, and the parameter has `localPath`
452    // `inputs/file.txt`, the docker container can access the data at
453    // `/mnt/disk/inputs/file.txt`.
454    string mount_point = 8;
455  }
456
457  // The minimum number of cores to use. Defaults to 1.
458  int32 minimum_cpu_cores = 1;
459
460  // Whether to use preemptible VMs. Defaults to `false`. In order to use this,
461  // must be true for both create time and run time. Cannot be true at run time
462  // if false at create time.
463  bool preemptible = 2;
464
465  // The minimum amount of RAM to use. Defaults to 3.75 (GB)
466  double minimum_ram_gb = 3;
467
468  // Disks to attach.
469  repeated Disk disks = 4;
470
471  // List of Google Compute Engine availability zones to which resource
472  // creation will restricted. If empty, any zone may be chosen.
473  repeated string zones = 5;
474
475  // The size of the boot disk. Defaults to 10 (GB).
476  int32 boot_disk_size_gb = 6;
477
478  // Whether to assign an external IP to the instance. This is an experimental
479  // feature that may go away. Defaults to false.
480  // Corresponds to `--no_address` flag for [gcloud compute instances create]
481  // (https://cloud.google.com/sdk/gcloud/reference/compute/instances/create).
482  // In order to use this, must be true for both create time and run time.
483  // Cannot be true at run time if false at create time. If you need to ssh into
484  // a private IP VM for debugging, you can ssh to a public VM and then ssh into
485  // the private VM's Internal IP.  If noAddress is set, this pipeline run may
486  // only load docker images from Google Container Registry and not Docker Hub.
487  // ** Note: To use this option, your project must be in Google Access for
488  // Private IPs Early Access Program.**
489  bool no_address = 7;
490}
491
492// Parameters facilitate setting and delivering data into the
493// pipeline's execution environment. They are defined at create time,
494// with optional defaults, and can be overridden at run time.
495//
496// If `localCopy` is unset, then the parameter specifies a string that
497// is passed as-is into the pipeline, as the value of the environment
498// variable with the given name.  A default value can be optionally
499// specified at create time. The default can be overridden at run time
500// using the inputs map. If no default is given, a value must be
501// supplied at runtime.
502//
503// If `localCopy` is defined, then the parameter specifies a data
504// source or sink, both in Google Cloud Storage and on the Docker container
505// where the pipeline computation is run. The [service account associated with
506// the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by
507// default the project's Compute Engine service account) must have access to the
508// Google Cloud Storage paths.
509//
510// At run time, the Google Cloud Storage paths can be overridden if a default
511// was provided at create time, or must be set otherwise. The pipeline runner
512// should add a key/value pair to either the inputs or outputs map. The
513// indicated data copies will be carried out before/after pipeline execution,
514// just as if the corresponding arguments were provided to `gsutil cp`.
515//
516// For example: Given the following `PipelineParameter`, specified
517// in the `inputParameters` list:
518//
519// ```
520// {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}}
521// ```
522//
523// where `disk` is defined in the `PipelineResources` object as:
524//
525// ```
526// {name: "pd1", mountPoint: "/mnt/disk/"}
527// ```
528//
529// We create a disk named `pd1`, mount it on the host VM, and map
530// `/mnt/pd1` to `/mnt/disk` in the docker container.  At
531// runtime, an entry for `input_file` would be required in the inputs
532// map, such as:
533//
534// ```
535//   inputs["input_file"] = "gs://my-bucket/bar.txt"
536// ```
537//
538// This would generate the following gsutil call:
539//
540// ```
541//   gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt
542// ```
543//
544// The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the
545// Docker container. Acceptable paths are:
546//
547// <table>
548//   <thead>
549//     <tr><th>Google Cloud storage path</th><th>Local path</th></tr>
550//   </thead>
551//   <tbody>
552//     <tr><td>file</td><td>file</td></tr>
553//     <tr><td>glob</td><td>directory</td></tr>
554//   </tbody>
555// </table>
556//
557// For outputs, the direction of the copy is reversed:
558//
559// ```
560//   gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt
561// ```
562//
563// Acceptable paths are:
564//
565// <table>
566//   <thead>
567//     <tr><th>Local path</th><th>Google Cloud Storage path</th></tr>
568//   </thead>
569//   <tbody>
570//     <tr><td>file</td><td>file</td></tr>
571//     <tr>
572//       <td>file</td>
573//       <td>directory - directory must already exist</td>
574//     </tr>
575//     <tr>
576//       <td>glob</td>
577//       <td>directory - directory will be created if it doesn't exist</td></tr>
578//   </tbody>
579// </table>
580//
581// One restriction due to docker limitations, is that for outputs that are found
582// on the boot disk, the local path cannot be a glob and must be a file.
583message PipelineParameter {
584  // LocalCopy defines how a remote file should be copied to and from the VM.
585  message LocalCopy {
586    // Required. The path within the user's docker container where
587    // this input should be localized to and from, relative to the specified
588    // disk's mount point. For example: file.txt,
589    string path = 1;
590
591    // Required. The name of the disk where this parameter is
592    // located. Can be the name of one of the disks specified in the
593    // Resources field, or "boot", which represents the Docker
594    // instance's boot disk and has a mount point of `/`.
595    string disk = 2;
596  }
597
598  // Required. Name of the parameter - the pipeline runner uses this string
599  // as the key to the input and output maps in RunPipeline.
600  string name = 1;
601
602  // Human-readable description.
603  string description = 2;
604
605  // The default value for this parameter. Can be overridden at runtime.
606  // If `localCopy` is present, then this must be a Google Cloud Storage path
607  // beginning with `gs://`.
608  string default_value = 5;
609
610  // If present, this parameter is marked for copying to and from the VM.
611  // `LocalCopy` indicates where on the VM the file should be. The value
612  // given to this parameter (either at runtime or using `defaultValue`)
613  // must be the remote path where the file should be.
614  LocalCopy local_copy = 6;
615}
616
617// The Docker execuctor specification.
618message DockerExecutor {
619  // Required. Image name from either Docker Hub or Google Container Registry.
620  // Users that run pipelines must have READ access to the image.
621  string image_name = 1;
622
623  // Required. The command or newline delimited script to run. The command
624  // string will be executed within a bash shell.
625  //
626  // If the command exits with a non-zero exit code, output parameter
627  // de-localization will be skipped and the pipeline operation's
628  // [`error`][google.longrunning.Operation.error] field will be populated.
629  //
630  // Maximum command string length is 16384.
631  string cmd = 2;
632}
633