1// Copyright 2016 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.genomics.v1; 18 19import "google/api/annotations.proto"; 20import "google/genomics/v1/range.proto"; 21import "google/genomics/v1/readalignment.proto"; 22import "google/genomics/v1/readgroupset.proto"; 23import "google/longrunning/operations.proto"; 24import "google/protobuf/empty.proto"; 25import "google/protobuf/field_mask.proto"; 26 27option cc_enable_arenas = true; 28option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; 29option java_multiple_files = true; 30option java_outer_classname = "ReadsProto"; 31option java_package = "com.google.genomics.v1"; 32 33service StreamingReadService { 34 // Returns a stream of all the reads matching the search request, ordered 35 // by reference name, position, and ID. 36 rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) { 37 option (google.api.http) = { 38 post: "/v1/reads:stream" 39 body: "*" 40 }; 41 } 42} 43 44// The Readstore. A data store for DNA sequencing Reads. 45service ReadServiceV1 { 46 // Creates read group sets by asynchronously importing the provided 47 // information. 48 // 49 // For the definitions of read group sets and other genomics resources, see 50 // [Fundamentals of Google 51 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 52 // 53 // The caller must have WRITE permissions to the dataset. 54 // 55 // ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import 56 // 57 // - Tags will be converted to strings - tag types are not preserved 58 // - Comments (`@CO`) in the input file header will not be preserved 59 // - Original header order of references (`@SQ`) will not be preserved 60 // - Any reverse stranded unmapped reads will be reverse complemented, and 61 // their qualities (also the "BQ" and "OQ" tags, if any) will be reversed 62 // - Unmapped reads will be stripped of positional information (reference name 63 // and position) 64 rpc ImportReadGroupSets(ImportReadGroupSetsRequest) 65 returns (google.longrunning.Operation) { 66 option (google.api.http) = { 67 post: "/v1/readgroupsets:import" 68 body: "*" 69 }; 70 } 71 72 // Exports a read group set to a BAM file in Google Cloud Storage. 73 // 74 // For the definitions of read group sets and other genomics resources, see 75 // [Fundamentals of Google 76 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 77 // 78 // Note that currently there may be some differences between exported BAM 79 // files and the original BAM file at the time of import. See 80 // [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets] 81 // for caveats. 82 rpc ExportReadGroupSet(ExportReadGroupSetRequest) 83 returns (google.longrunning.Operation) { 84 option (google.api.http) = { 85 post: "/v1/readgroupsets/{read_group_set_id}:export" 86 body: "*" 87 }; 88 } 89 90 // Searches for read group sets matching the criteria. 91 // 92 // For the definitions of read group sets and other genomics resources, see 93 // [Fundamentals of Google 94 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 95 // 96 // Implements 97 // [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135). 98 rpc SearchReadGroupSets(SearchReadGroupSetsRequest) 99 returns (SearchReadGroupSetsResponse) { 100 option (google.api.http) = { 101 post: "/v1/readgroupsets/search" 102 body: "*" 103 }; 104 } 105 106 // Updates a read group set. 107 // 108 // For the definitions of read group sets and other genomics resources, see 109 // [Fundamentals of Google 110 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 111 // 112 // This method supports patch semantics. 113 rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) { 114 option (google.api.http) = { 115 patch: "/v1/readgroupsets/{read_group_set_id}" 116 body: "read_group_set" 117 }; 118 } 119 120 // Deletes a read group set. 121 // 122 // For the definitions of read group sets and other genomics resources, see 123 // [Fundamentals of Google 124 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 125 rpc DeleteReadGroupSet(DeleteReadGroupSetRequest) 126 returns (google.protobuf.Empty) { 127 option (google.api.http) = { 128 delete: "/v1/readgroupsets/{read_group_set_id}" 129 }; 130 } 131 132 // Gets a read group set by ID. 133 // 134 // For the definitions of read group sets and other genomics resources, see 135 // [Fundamentals of Google 136 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 137 rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) { 138 option (google.api.http) = { 139 get: "/v1/readgroupsets/{read_group_set_id}" 140 }; 141 } 142 143 // Lists fixed width coverage buckets for a read group set, each of which 144 // correspond to a range of a reference sequence. Each bucket summarizes 145 // coverage information across its corresponding genomic range. 146 // 147 // For the definitions of read group sets and other genomics resources, see 148 // [Fundamentals of Google 149 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 150 // 151 // Coverage is defined as the number of reads which are aligned to a given 152 // base in the reference sequence. Coverage buckets are available at several 153 // precomputed bucket widths, enabling retrieval of various coverage 'zoom 154 // levels'. The caller must have READ permissions for the target read group 155 // set. 156 rpc ListCoverageBuckets(ListCoverageBucketsRequest) 157 returns (ListCoverageBucketsResponse) { 158 option (google.api.http) = { 159 get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets" 160 }; 161 } 162 163 // Gets a list of reads for one or more read group sets. 164 // 165 // For the definitions of read group sets and other genomics resources, see 166 // [Fundamentals of Google 167 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 168 // 169 // Reads search operates over a genomic coordinate space of reference sequence 170 // & position defined over the reference sequences to which the requested 171 // read group sets are aligned. 172 // 173 // If a target positional range is specified, search returns all reads whose 174 // alignment to the reference genome overlap the range. A query which 175 // specifies only read group set IDs yields all reads in those read group 176 // sets, including unmapped reads. 177 // 178 // All reads returned (including reads on subsequent pages) are ordered by 179 // genomic coordinate (by reference sequence, then position). Reads with 180 // equivalent genomic coordinates are returned in an unspecified order. This 181 // order is consistent, such that two queries for the same content (regardless 182 // of page size) yield reads in the same order across their respective streams 183 // of paginated responses. 184 // 185 // Implements 186 // [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85). 187 rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) { 188 option (google.api.http) = { 189 post: "/v1/reads/search" 190 body: "*" 191 }; 192 } 193} 194 195// The read group set search request. 196message SearchReadGroupSetsRequest { 197 // Restricts this query to read group sets within the given datasets. At least 198 // one ID must be provided. 199 repeated string dataset_ids = 1; 200 201 // Only return read group sets for which a substring of the name matches this 202 // string. 203 string name = 3; 204 205 // The continuation token, which is used to page through large result sets. 206 // To get the next page of results, set this parameter to the value of 207 // `nextPageToken` from the previous response. 208 string page_token = 2; 209 210 // The maximum number of results to return in a single page. If unspecified, 211 // defaults to 256. The maximum value is 1024. 212 int32 page_size = 4; 213} 214 215// The read group set search response. 216message SearchReadGroupSetsResponse { 217 // The list of matching read group sets. 218 repeated ReadGroupSet read_group_sets = 1; 219 220 // The continuation token, which is used to page through large result sets. 221 // Provide this value in a subsequent request to return the next page of 222 // results. This field will be empty if there aren't any additional results. 223 string next_page_token = 2; 224} 225 226// The read group set import request. 227message ImportReadGroupSetsRequest { 228 enum PartitionStrategy { 229 PARTITION_STRATEGY_UNSPECIFIED = 0; 230 231 // In most cases, this strategy yields one read group set per file. This is 232 // the default behavior. 233 // 234 // Allocate one read group set per file per sample. For BAM files, read 235 // groups are considered to share a sample if they have identical sample 236 // names. Furthermore, all reads for each file which do not belong to a read 237 // group, if any, will be grouped into a single read group set per-file. 238 PER_FILE_PER_SAMPLE = 1; 239 240 // Includes all read groups in all imported files into a single read group 241 // set. Requires that the headers for all imported files are equivalent. All 242 // reads which do not belong to a read group, if any, will be grouped into a 243 // separate read group set. 244 MERGE_ALL = 2; 245 } 246 247 // Required. The ID of the dataset these read group sets will belong to. The 248 // caller must have WRITE permissions to this dataset. 249 string dataset_id = 1; 250 251 // The reference set to which the imported read group sets are aligned to, if 252 // any. The reference names of this reference set must be a superset of those 253 // found in the imported file headers. If no reference set id is provided, a 254 // best effort is made to associate with a matching reference set. 255 string reference_set_id = 4; 256 257 // A list of URIs pointing at [BAM 258 // files](https://samtools.github.io/hts-specs/SAMv1.pdf) 259 // in Google Cloud Storage. 260 // Those URIs can include wildcards (*), but do not add or remove 261 // matching files before import has completed. 262 // 263 // Note that Google Cloud Storage object listing is only eventually 264 // consistent: files added may be not be immediately visible to 265 // everyone. Thus, if using a wildcard it is preferable not to start 266 // the import immediately after the files are created. 267 repeated string source_uris = 2; 268 269 // The partition strategy describes how read groups are partitioned into read 270 // group sets. 271 PartitionStrategy partition_strategy = 5; 272} 273 274// The read group set import response. 275message ImportReadGroupSetsResponse { 276 // IDs of the read group sets that were created. 277 repeated string read_group_set_ids = 1; 278} 279 280// The read group set export request. 281message ExportReadGroupSetRequest { 282 // Required. The Google Cloud project ID that owns this 283 // export. The caller must have WRITE access to this project. 284 string project_id = 1; 285 286 // Required. A Google Cloud Storage URI for the exported BAM file. 287 // The currently authenticated user must have write access to the new file. 288 // An error will be returned if the URI already contains data. 289 string export_uri = 2; 290 291 // Required. The ID of the read group set to export. The caller must have 292 // READ access to this read group set. 293 string read_group_set_id = 3; 294 295 // The reference names to export. If this is not specified, all reference 296 // sequences, including unmapped reads, are exported. 297 // Use `*` to export only unmapped reads. 298 repeated string reference_names = 4; 299} 300 301message UpdateReadGroupSetRequest { 302 // The ID of the read group set to be updated. The caller must have WRITE 303 // permissions to the dataset associated with this read group set. 304 string read_group_set_id = 1; 305 306 // The new read group set data. See `updateMask` for details on mutability of 307 // fields. 308 ReadGroupSet read_group_set = 2; 309 310 // An optional mask specifying which fields to update. Supported fields: 311 // 312 // * [name][google.genomics.v1.ReadGroupSet.name]. 313 // * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id]. 314 // 315 // Leaving `updateMask` unset is equivalent to specifying all mutable 316 // fields. 317 google.protobuf.FieldMask update_mask = 3; 318} 319 320message DeleteReadGroupSetRequest { 321 // The ID of the read group set to be deleted. The caller must have WRITE 322 // permissions to the dataset associated with this read group set. 323 string read_group_set_id = 1; 324} 325 326message GetReadGroupSetRequest { 327 // The ID of the read group set. 328 string read_group_set_id = 1; 329} 330 331message ListCoverageBucketsRequest { 332 // Required. The ID of the read group set over which coverage is requested. 333 string read_group_set_id = 1; 334 335 // The name of the reference to query, within the reference set associated 336 // with this query. Optional. 337 string reference_name = 3; 338 339 // The start position of the range on the reference, 0-based inclusive. If 340 // specified, `referenceName` must also be specified. Defaults to 0. 341 int64 start = 4; 342 343 // The end position of the range on the reference, 0-based exclusive. If 344 // specified, `referenceName` must also be specified. If unset or 0, defaults 345 // to the length of the reference. 346 int64 end = 5; 347 348 // The desired width of each reported coverage bucket in base pairs. This 349 // will be rounded down to the nearest precomputed bucket width; the value 350 // of which is returned as `bucketWidth` in the response. Defaults 351 // to infinity (each bucket spans an entire reference sequence) or the length 352 // of the target range, if specified. The smallest precomputed 353 // `bucketWidth` is currently 2048 base pairs; this is subject to 354 // change. 355 int64 target_bucket_width = 6; 356 357 // The continuation token, which is used to page through large result sets. 358 // To get the next page of results, set this parameter to the value of 359 // `nextPageToken` from the previous response. 360 string page_token = 7; 361 362 // The maximum number of results to return in a single page. If unspecified, 363 // defaults to 1024. The maximum value is 2048. 364 int32 page_size = 8; 365} 366 367// A bucket over which read coverage has been precomputed. A bucket corresponds 368// to a specific range of the reference sequence. 369message CoverageBucket { 370 // The genomic coordinate range spanned by this bucket. 371 Range range = 1; 372 373 // The average number of reads which are aligned to each individual 374 // reference base in this bucket. 375 float mean_coverage = 2; 376} 377 378message ListCoverageBucketsResponse { 379 // The length of each coverage bucket in base pairs. Note that buckets at the 380 // end of a reference sequence may be shorter. This value is omitted if the 381 // bucket width is infinity (the default behaviour, with no range or 382 // `targetBucketWidth`). 383 int64 bucket_width = 1; 384 385 // The coverage buckets. The list of buckets is sparse; a bucket with 0 386 // overlapping reads is not returned. A bucket never crosses more than one 387 // reference sequence. Each bucket has width `bucketWidth`, unless 388 // its end is the end of the reference sequence. 389 repeated CoverageBucket coverage_buckets = 2; 390 391 // The continuation token, which is used to page through large result sets. 392 // Provide this value in a subsequent request to return the next page of 393 // results. This field will be empty if there aren't any additional results. 394 string next_page_token = 3; 395} 396 397// The read search request. 398message SearchReadsRequest { 399 // The IDs of the read groups sets within which to search for reads. All 400 // specified read group sets must be aligned against a common set of reference 401 // sequences; this defines the genomic coordinates for the query. Must specify 402 // one of `readGroupSetIds` or `readGroupIds`. 403 repeated string read_group_set_ids = 1; 404 405 // The IDs of the read groups within which to search for reads. All specified 406 // read groups must belong to the same read group sets. Must specify one of 407 // `readGroupSetIds` or `readGroupIds`. 408 repeated string read_group_ids = 5; 409 410 // The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to 411 // `*`, only unmapped reads are returned. If unspecified, all reads (mapped 412 // and unmapped) are returned. 413 string reference_name = 7; 414 415 // The start position of the range on the reference, 0-based inclusive. If 416 // specified, `referenceName` must also be specified. 417 int64 start = 8; 418 419 // The end position of the range on the reference, 0-based exclusive. If 420 // specified, `referenceName` must also be specified. 421 int64 end = 9; 422 423 // The continuation token, which is used to page through large result sets. 424 // To get the next page of results, set this parameter to the value of 425 // `nextPageToken` from the previous response. 426 string page_token = 3; 427 428 // The maximum number of results to return in a single page. If unspecified, 429 // defaults to 256. The maximum value is 2048. 430 int32 page_size = 4; 431} 432 433// The read search response. 434message SearchReadsResponse { 435 // The list of matching alignments sorted by mapped genomic coordinate, 436 // if any, ascending in position within the same reference. Unmapped reads, 437 // which have no position, are returned contiguously and are sorted in 438 // ascending lexicographic order by fragment name. 439 repeated Read alignments = 1; 440 441 // The continuation token, which is used to page through large result sets. 442 // Provide this value in a subsequent request to return the next page of 443 // results. This field will be empty if there aren't any additional results. 444 string next_page_token = 2; 445} 446 447// The stream reads request. 448message StreamReadsRequest { 449 // The Google Cloud project ID which will be billed 450 // for this access. The caller must have WRITE access to this project. 451 // Required. 452 string project_id = 1; 453 454 // The ID of the read group set from which to stream reads. 455 string read_group_set_id = 2; 456 457 // The reference sequence name, for example `chr1`, 458 // `1`, or `chrX`. If set to *, only unmapped reads are 459 // returned. 460 string reference_name = 3; 461 462 // The start position of the range on the reference, 0-based inclusive. If 463 // specified, `referenceName` must also be specified. 464 int64 start = 4; 465 466 // The end position of the range on the reference, 0-based exclusive. If 467 // specified, `referenceName` must also be specified. 468 int64 end = 5; 469 470 // Restricts results to a shard containing approximately `1/totalShards` 471 // of the normal response payload for this query. Results from a sharded 472 // request are disjoint from those returned by all queries which differ only 473 // in their shard parameter. A shard may yield 0 results; this is especially 474 // likely for large values of `totalShards`. 475 // 476 // Valid values are `[0, totalShards)`. 477 int32 shard = 6; 478 479 // Specifying `totalShards` causes a disjoint subset of the normal response 480 // payload to be returned for each query with a unique `shard` parameter 481 // specified. A best effort is made to yield equally sized shards. Sharding 482 // can be used to distribute processing amongst workers, where each worker is 483 // assigned a unique `shard` number and all workers specify the same 484 // `totalShards` number. The union of reads returned for all sharded queries 485 // `[0, totalShards)` is equal to those returned by a single unsharded query. 486 // 487 // Queries for different values of `totalShards` with common divisors will 488 // share shard boundaries. For example, streaming `shard` 2 of 5 489 // `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10 490 // `totalShards`. This property can be leveraged for adaptive retries. 491 int32 total_shards = 7; 492} 493 494message StreamReadsResponse { 495 repeated Read alignments = 1; 496} 497