1// Copyright 2016 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.genomics.v1alpha2; 18 19import "google/api/annotations.proto"; 20import "google/longrunning/operations.proto"; 21import "google/protobuf/duration.proto"; 22import "google/protobuf/empty.proto"; 23import "google/protobuf/timestamp.proto"; 24import "google/rpc/code.proto"; 25 26option cc_enable_arenas = true; 27option go_package = "google.golang.org/genproto/googleapis/genomics/v1alpha2;genomics"; 28option java_multiple_files = true; 29option java_outer_classname = "PipelinesProto"; 30option java_package = "com.google.genomics.v1a"; 31 32// A service for running genomics pipelines. 33service PipelinesV1Alpha2 { 34 // Creates a pipeline that can be run later. Create takes a Pipeline that 35 // has all fields other than `pipelineId` populated, and then returns 36 // the same pipeline with `pipelineId` populated. This id can be used 37 // to run the pipeline. 38 // 39 // Caller must have WRITE permission to the project. 40 rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) { 41 option (google.api.http) = { 42 post: "/v1alpha2/pipelines" 43 body: "pipeline" 44 }; 45 } 46 47 // Runs a pipeline. If `pipelineId` is specified in the request, then 48 // run a saved pipeline. If `ephemeralPipeline` is specified, then run 49 // that pipeline once without saving a copy. 50 // 51 // The caller must have READ permission to the project where the pipeline 52 // is stored and WRITE permission to the project where the pipeline will be 53 // run, as VMs will be created and storage will be used. 54 rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) { 55 option (google.api.http) = { 56 post: "/v1alpha2/pipelines:run" 57 body: "*" 58 }; 59 } 60 61 // Retrieves a pipeline based on ID. 62 // 63 // Caller must have READ permission to the project. 64 rpc GetPipeline(GetPipelineRequest) returns (Pipeline) { 65 option (google.api.http) = { 66 get: "/v1alpha2/pipelines/{pipeline_id}" 67 }; 68 } 69 70 // Lists pipelines. 71 // 72 // Caller must have READ permission to the project. 73 rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) { 74 option (google.api.http) = { 75 get: "/v1alpha2/pipelines" 76 }; 77 } 78 79 // Deletes a pipeline based on ID. 80 // 81 // Caller must have WRITE permission to the project. 82 rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) { 83 option (google.api.http) = { 84 delete: "/v1alpha2/pipelines/{pipeline_id}" 85 }; 86 } 87 88 // Gets controller configuration information. Should only be called 89 // by VMs created by the Pipelines Service and not by end users. 90 rpc GetControllerConfig(GetControllerConfigRequest) 91 returns (ControllerConfig) { 92 option (google.api.http) = { 93 get: "/v1alpha2/pipelines:getControllerConfig" 94 }; 95 } 96 97 // Sets status of a given operation. Any new timestamps (as determined by 98 // description) are appended to TimestampEvents. Should only be called by VMs 99 // created by the Pipelines Service and not by end users. 100 rpc SetOperationStatus(SetOperationStatusRequest) 101 returns (google.protobuf.Empty) { 102 option (google.api.http) = { 103 put: "/v1alpha2/pipelines:setOperationStatus" 104 body: "*" 105 }; 106 } 107} 108 109// Describes a Compute Engine resource that is being managed by a running 110// [pipeline][google.genomics.v1alpha2.Pipeline]. 111message ComputeEngine { 112 // The instance on which the operation is running. 113 string instance_name = 1; 114 115 // The availability zone in which the instance resides. 116 string zone = 2; 117 118 // The machine type of the instance. 119 string machine_type = 3; 120 121 // The names of the disks that were created for this pipeline. 122 repeated string disk_names = 4; 123} 124 125// Runtime metadata that will be populated in the 126// [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata] 127// field of the Operation associated with a RunPipeline execution. 128message RuntimeMetadata { 129 // Execution information specific to Google Compute Engine. 130 ComputeEngine compute_engine = 1; 131} 132 133// The pipeline object. Represents a transformation from a set of input 134// parameters to a set of output parameters. The transformation is defined 135// as a docker image and command to run within that image. Each pipeline 136// is run on a Google Compute Engine VM. A pipeline can be created with the 137// `create` method and then later run with the `run` method, or a pipeline can 138// be defined and run all at once with the `run` method. 139message Pipeline { 140 // Required. The project in which to create the pipeline. The caller must have 141 // WRITE access. 142 string project_id = 1; 143 144 // Required. A user specified pipeline name that does not have to be unique. 145 // This name can be used for filtering Pipelines in ListPipelines. 146 string name = 2; 147 148 // User-specified description. 149 string description = 3; 150 151 // Input parameters of the pipeline. 152 repeated PipelineParameter input_parameters = 8; 153 154 // Output parameters of the pipeline. 155 repeated PipelineParameter output_parameters = 9; 156 157 // Required. The executor indicates in which environment the pipeline runs. 158 oneof executor { 159 // Specifies the docker run information. 160 DockerExecutor docker = 5; 161 } 162 163 // Required. Specifies resource requirements for the pipeline run. 164 // Required fields: 165 // 166 // * 167 // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores] 168 // 169 // * 170 // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb] 171 PipelineResources resources = 6; 172 173 // Unique pipeline id that is generated by the service when CreatePipeline 174 // is called. Cannot be specified in the Pipeline used in the 175 // CreatePipelineRequest, and will be populated in the response to 176 // CreatePipeline and all subsequent Get and List calls. Indicates that the 177 // service has registered this pipeline. 178 string pipeline_id = 7; 179} 180 181// The request to create a pipeline. The pipeline field here should not have 182// `pipelineId` populated, as that will be populated by the server. 183message CreatePipelineRequest { 184 // The pipeline to create. Should not have `pipelineId` populated. 185 Pipeline pipeline = 1; 186} 187 188// The pipeline run arguments. 189message RunPipelineArgs { 190 // Required. The project in which to run the pipeline. The caller must have 191 // WRITER access to all Google Cloud services and resources (e.g. Google 192 // Compute Engine) will be used. 193 string project_id = 1; 194 195 // Pipeline input arguments; keys are defined in the pipeline documentation. 196 // All input parameters that do not have default values must be specified. 197 // If parameters with defaults are specified here, the defaults will be 198 // overridden. 199 map<string, string> inputs = 2; 200 201 // Pipeline output arguments; keys are defined in the pipeline 202 // documentation. All output parameters of without default values 203 // must be specified. If parameters with defaults are specified 204 // here, the defaults will be overridden. 205 map<string, string> outputs = 3; 206 207 // The Google Cloud Service Account that will be used to access data and 208 // services. By default, the compute service account associated with 209 // `projectId` is used. 210 ServiceAccount service_account = 4; 211 212 // This field is deprecated. Use `labels` instead. Client-specified pipeline 213 // operation identifier. 214 string client_id = 5; 215 216 // Specifies resource requirements/overrides for the pipeline run. 217 PipelineResources resources = 6; 218 219 // Required. Logging options. Used by the service to communicate results 220 // to the user. 221 LoggingOptions logging = 7; 222 223 // How long to keep the VM up after a failure (for example docker command 224 // failed, copying input or output files failed, etc). While the VM is up, one 225 // can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day. 226 google.protobuf.Duration keep_vm_alive_on_failure_duration = 8; 227 228 // Labels to apply to this pipeline run. Labels will also be applied to 229 // compute resources (VM, disks) created by this pipeline run. When listing 230 // operations, operations can [filtered by labels] 231 // [google.longrunning.ListOperationsRequest.filter]. 232 // Label keys may not be empty; label values may be empty. Non-empty labels 233 // must be 1-63 characters long, and comply with [RFC1035] 234 // (https://www.ietf.org/rfc/rfc1035.txt). 235 // Specifically, the name must be 1-63 characters long and match the regular 236 // expression `[a-z]([-a-z0-9]*[a-z0-9])?` which means the first 237 // character must be a lowercase letter, and all following characters must be 238 // a dash, lowercase letter, or digit, except the last character, which cannot 239 // be a dash. 240 map<string, string> labels = 9; 241} 242 243// The request to run a pipeline. If `pipelineId` is specified, it 244// refers to a saved pipeline created with CreatePipeline and set as 245// the `pipelineId` of the returned Pipeline object. If 246// `ephemeralPipeline` is specified, that pipeline is run once 247// with the given args and not saved. It is an error to specify both 248// `pipelineId` and `ephemeralPipeline`. `pipelineArgs` 249// must be specified. 250message RunPipelineRequest { 251 oneof pipeline { 252 // The already created pipeline to run. 253 string pipeline_id = 1; 254 255 // A new pipeline object to run once and then delete. 256 Pipeline ephemeral_pipeline = 2; 257 } 258 259 // The arguments to use when running this pipeline. 260 RunPipelineArgs pipeline_args = 3; 261} 262 263// A request to get a saved pipeline by id. 264message GetPipelineRequest { 265 // Caller must have READ access to the project in which this pipeline 266 // is defined. 267 string pipeline_id = 1; 268} 269 270// A request to list pipelines in a given project. Pipelines can be 271// filtered by name using `namePrefix`: all pipelines with names that 272// begin with `namePrefix` will be returned. Uses standard pagination: 273// `pageSize` indicates how many pipelines to return, and 274// `pageToken` comes from a previous ListPipelinesResponse to 275// indicate offset. 276message ListPipelinesRequest { 277 // Required. The name of the project to search for pipelines. Caller 278 // must have READ access to this project. 279 string project_id = 1; 280 281 // Pipelines with names that match this prefix should be 282 // returned. If unspecified, all pipelines in the project, up to 283 // `pageSize`, will be returned. 284 string name_prefix = 2; 285 286 // Number of pipelines to return at once. Defaults to 256, and max 287 // is 2048. 288 int32 page_size = 3; 289 290 // Token to use to indicate where to start getting results. 291 // If unspecified, returns the first page of results. 292 string page_token = 4; 293} 294 295// The response of ListPipelines. Contains at most `pageSize` 296// pipelines. If it contains `pageSize` pipelines, and more pipelines 297// exist, then `nextPageToken` will be populated and should be 298// used as the `pageToken` argument to a subsequent ListPipelines 299// request. 300message ListPipelinesResponse { 301 // The matched pipelines. 302 repeated Pipeline pipelines = 1; 303 304 // The token to use to get the next page of results. 305 string next_page_token = 2; 306} 307 308// The request to delete a saved pipeline by ID. 309message DeletePipelineRequest { 310 // Caller must have WRITE access to the project in which this pipeline 311 // is defined. 312 string pipeline_id = 1; 313} 314 315// Request to get controller configuation. Should only be used 316// by VMs created by the Pipelines Service and not by end users. 317message GetControllerConfigRequest { 318 // The operation to retrieve controller configuration for. 319 string operation_id = 1; 320 321 uint64 validation_token = 2; 322} 323 324// Stores the information that the controller will fetch from the 325// server in order to run. Should only be used by VMs created by the 326// Pipelines Service and not by end users. 327message ControllerConfig { 328 message RepeatedString { 329 repeated string values = 1; 330 } 331 332 string image = 1; 333 334 string cmd = 2; 335 336 string gcs_log_path = 3; 337 338 string machine_type = 4; 339 340 map<string, string> vars = 5; 341 342 map<string, string> disks = 6; 343 344 map<string, RepeatedString> gcs_sources = 7; 345 346 map<string, RepeatedString> gcs_sinks = 8; 347} 348 349// Stores the list of events and times they occured for major events in job 350// execution. 351message TimestampEvent { 352 // String indicating the type of event 353 string description = 1; 354 355 // The time this event occured. 356 google.protobuf.Timestamp timestamp = 2; 357} 358 359// Request to set operation status. Should only be used by VMs 360// created by the Pipelines Service and not by end users. 361message SetOperationStatusRequest { 362 string operation_id = 1; 363 364 repeated TimestampEvent timestamp_events = 2; 365 366 google.rpc.Code error_code = 3; 367 368 string error_message = 4; 369 370 uint64 validation_token = 5; 371} 372 373// A Google Cloud Service Account. 374message ServiceAccount { 375 // Email address of the service account. Defaults to `default`, 376 // which uses the compute service account associated with the project. 377 string email = 1; 378 379 // List of scopes to be enabled for this service account on the VM. 380 // The following scopes are automatically included: 381 // 382 // * https://www.googleapis.com/auth/compute 383 // * https://www.googleapis.com/auth/devstorage.full_control 384 // * https://www.googleapis.com/auth/genomics 385 // * https://www.googleapis.com/auth/logging.write 386 // * https://www.googleapis.com/auth/monitoring.write 387 repeated string scopes = 2; 388} 389 390// The logging options for the pipeline run. 391message LoggingOptions { 392 // The location in Google Cloud Storage to which the pipeline logs 393 // will be copied. Can be specified as a fully qualified directory 394 // path, in which case logs will be output with a unique identifier 395 // as the filename in that directory, or as a fully specified path, 396 // which must end in `.log`, in which case that path will be 397 // used, and the user must ensure that logs are not 398 // overwritten. Stdout and stderr logs from the run are also 399 // generated and output as `-stdout.log` and `-stderr.log`. 400 string gcs_path = 1; 401} 402 403// The system resources for the pipeline run. 404message PipelineResources { 405 // A Google Compute Engine disk resource specification. 406 message Disk { 407 // The types of disks that may be attached to VMs. 408 enum Type { 409 // Default disk type. Use one of the other options below. 410 TYPE_UNSPECIFIED = 0; 411 412 // Specifies a Google Compute Engine persistent hard disk. See 413 // https://cloud.google.com/compute/docs/disks/#pdspecs for details. 414 PERSISTENT_HDD = 1; 415 416 // Specifies a Google Compute Engine persistent solid-state disk. See 417 // https://cloud.google.com/compute/docs/disks/#pdspecs for details. 418 PERSISTENT_SSD = 2; 419 420 // Specifies a Google Compute Engine local SSD. 421 // See https://cloud.google.com/compute/docs/disks/local-ssd for details. 422 LOCAL_SSD = 3; 423 } 424 425 // Required. The name of the disk that can be used in the pipeline 426 // parameters. Must be 1 - 63 characters. 427 // The name "boot" is reserved for system use. 428 string name = 1; 429 430 // Required. The type of the disk to create. 431 Type type = 2; 432 433 // The size of the disk. Defaults to 500 (GB). 434 // This field is not applicable for local SSD. 435 int32 size_gb = 3; 436 437 // The full or partial URL of the persistent disk to attach. See 438 // https://cloud.google.com/compute/docs/reference/latest/instances#resource 439 // and 440 // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots 441 // for more details. 442 string source = 4; 443 444 // Deprecated. Disks created by the Pipelines API will be deleted at the end 445 // of the pipeline run, regardless of what this field is set to. 446 bool auto_delete = 6; 447 448 // Required at create time and cannot be overridden at run time. 449 // Specifies the path in the docker container where files on 450 // this disk should be located. For example, if `mountPoint` 451 // is `/mnt/disk`, and the parameter has `localPath` 452 // `inputs/file.txt`, the docker container can access the data at 453 // `/mnt/disk/inputs/file.txt`. 454 string mount_point = 8; 455 } 456 457 // The minimum number of cores to use. Defaults to 1. 458 int32 minimum_cpu_cores = 1; 459 460 // Whether to use preemptible VMs. Defaults to `false`. In order to use this, 461 // must be true for both create time and run time. Cannot be true at run time 462 // if false at create time. 463 bool preemptible = 2; 464 465 // The minimum amount of RAM to use. Defaults to 3.75 (GB) 466 double minimum_ram_gb = 3; 467 468 // Disks to attach. 469 repeated Disk disks = 4; 470 471 // List of Google Compute Engine availability zones to which resource 472 // creation will restricted. If empty, any zone may be chosen. 473 repeated string zones = 5; 474 475 // The size of the boot disk. Defaults to 10 (GB). 476 int32 boot_disk_size_gb = 6; 477 478 // Whether to assign an external IP to the instance. This is an experimental 479 // feature that may go away. Defaults to false. 480 // Corresponds to `--no_address` flag for [gcloud compute instances create] 481 // (https://cloud.google.com/sdk/gcloud/reference/compute/instances/create). 482 // In order to use this, must be true for both create time and run time. 483 // Cannot be true at run time if false at create time. If you need to ssh into 484 // a private IP VM for debugging, you can ssh to a public VM and then ssh into 485 // the private VM's Internal IP. If noAddress is set, this pipeline run may 486 // only load docker images from Google Container Registry and not Docker Hub. 487 // ** Note: To use this option, your project must be in Google Access for 488 // Private IPs Early Access Program.** 489 bool no_address = 7; 490} 491 492// Parameters facilitate setting and delivering data into the 493// pipeline's execution environment. They are defined at create time, 494// with optional defaults, and can be overridden at run time. 495// 496// If `localCopy` is unset, then the parameter specifies a string that 497// is passed as-is into the pipeline, as the value of the environment 498// variable with the given name. A default value can be optionally 499// specified at create time. The default can be overridden at run time 500// using the inputs map. If no default is given, a value must be 501// supplied at runtime. 502// 503// If `localCopy` is defined, then the parameter specifies a data 504// source or sink, both in Google Cloud Storage and on the Docker container 505// where the pipeline computation is run. The [service account associated with 506// the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by 507// default the project's Compute Engine service account) must have access to the 508// Google Cloud Storage paths. 509// 510// At run time, the Google Cloud Storage paths can be overridden if a default 511// was provided at create time, or must be set otherwise. The pipeline runner 512// should add a key/value pair to either the inputs or outputs map. The 513// indicated data copies will be carried out before/after pipeline execution, 514// just as if the corresponding arguments were provided to `gsutil cp`. 515// 516// For example: Given the following `PipelineParameter`, specified 517// in the `inputParameters` list: 518// 519// ``` 520// {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}} 521// ``` 522// 523// where `disk` is defined in the `PipelineResources` object as: 524// 525// ``` 526// {name: "pd1", mountPoint: "/mnt/disk/"} 527// ``` 528// 529// We create a disk named `pd1`, mount it on the host VM, and map 530// `/mnt/pd1` to `/mnt/disk` in the docker container. At 531// runtime, an entry for `input_file` would be required in the inputs 532// map, such as: 533// 534// ``` 535// inputs["input_file"] = "gs://my-bucket/bar.txt" 536// ``` 537// 538// This would generate the following gsutil call: 539// 540// ``` 541// gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt 542// ``` 543// 544// The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the 545// Docker container. Acceptable paths are: 546// 547// <table> 548// <thead> 549// <tr><th>Google Cloud storage path</th><th>Local path</th></tr> 550// </thead> 551// <tbody> 552// <tr><td>file</td><td>file</td></tr> 553// <tr><td>glob</td><td>directory</td></tr> 554// </tbody> 555// </table> 556// 557// For outputs, the direction of the copy is reversed: 558// 559// ``` 560// gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt 561// ``` 562// 563// Acceptable paths are: 564// 565// <table> 566// <thead> 567// <tr><th>Local path</th><th>Google Cloud Storage path</th></tr> 568// </thead> 569// <tbody> 570// <tr><td>file</td><td>file</td></tr> 571// <tr> 572// <td>file</td> 573// <td>directory - directory must already exist</td> 574// </tr> 575// <tr> 576// <td>glob</td> 577// <td>directory - directory will be created if it doesn't exist</td></tr> 578// </tbody> 579// </table> 580// 581// One restriction due to docker limitations, is that for outputs that are found 582// on the boot disk, the local path cannot be a glob and must be a file. 583message PipelineParameter { 584 // LocalCopy defines how a remote file should be copied to and from the VM. 585 message LocalCopy { 586 // Required. The path within the user's docker container where 587 // this input should be localized to and from, relative to the specified 588 // disk's mount point. For example: file.txt, 589 string path = 1; 590 591 // Required. The name of the disk where this parameter is 592 // located. Can be the name of one of the disks specified in the 593 // Resources field, or "boot", which represents the Docker 594 // instance's boot disk and has a mount point of `/`. 595 string disk = 2; 596 } 597 598 // Required. Name of the parameter - the pipeline runner uses this string 599 // as the key to the input and output maps in RunPipeline. 600 string name = 1; 601 602 // Human-readable description. 603 string description = 2; 604 605 // The default value for this parameter. Can be overridden at runtime. 606 // If `localCopy` is present, then this must be a Google Cloud Storage path 607 // beginning with `gs://`. 608 string default_value = 5; 609 610 // If present, this parameter is marked for copying to and from the VM. 611 // `LocalCopy` indicates where on the VM the file should be. The value 612 // given to this parameter (either at runtime or using `defaultValue`) 613 // must be the remote path where the file should be. 614 LocalCopy local_copy = 6; 615} 616 617// The Docker execuctor specification. 618message DockerExecutor { 619 // Required. Image name from either Docker Hub or Google Container Registry. 620 // Users that run pipelines must have READ access to the image. 621 string image_name = 1; 622 623 // Required. The command or newline delimited script to run. The command 624 // string will be executed within a bash shell. 625 // 626 // If the command exits with a non-zero exit code, output parameter 627 // de-localization will be skipped and the pipeline operation's 628 // [`error`][google.longrunning.Operation.error] field will be populated. 629 // 630 // Maximum command string length is 16384. 631 string cmd = 2; 632} 633