1// Copyright 2016 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.genomics.v1; 18 19import "google/api/annotations.proto"; 20 21option cc_enable_arenas = true; 22option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; 23option java_multiple_files = true; 24option java_outer_classname = "ReferencesProto"; 25option java_package = "com.google.genomics.v1"; 26 27service ReferenceServiceV1 { 28 // Searches for reference sets which match the given criteria. 29 // 30 // For the definitions of references and other genomics resources, see 31 // [Fundamentals of Google 32 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 33 // 34 // Implements 35 // [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71) 36 rpc SearchReferenceSets(SearchReferenceSetsRequest) 37 returns (SearchReferenceSetsResponse) { 38 option (google.api.http) = { 39 post: "/v1/referencesets/search" 40 body: "*" 41 }; 42 } 43 44 // Gets a reference set. 45 // 46 // For the definitions of references and other genomics resources, see 47 // [Fundamentals of Google 48 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 49 // 50 // Implements 51 // [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83). 52 rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) { 53 option (google.api.http) = { 54 get: "/v1/referencesets/{reference_set_id}" 55 }; 56 } 57 58 // Searches for references which match the given criteria. 59 // 60 // For the definitions of references and other genomics resources, see 61 // [Fundamentals of Google 62 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 63 // 64 // Implements 65 // [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146). 66 rpc SearchReferences(SearchReferencesRequest) 67 returns (SearchReferencesResponse) { 68 option (google.api.http) = { 69 post: "/v1/references/search" 70 body: "*" 71 }; 72 } 73 74 // Gets a reference. 75 // 76 // For the definitions of references and other genomics resources, see 77 // [Fundamentals of Google 78 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 79 // 80 // Implements 81 // [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158). 82 rpc GetReference(GetReferenceRequest) returns (Reference) { 83 option (google.api.http) = { 84 get: "/v1/references/{reference_id}" 85 }; 86 } 87 88 // Lists the bases in a reference, optionally restricted to a range. 89 // 90 // For the definitions of references and other genomics resources, see 91 // [Fundamentals of Google 92 // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 93 // 94 // Implements 95 // [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221). 96 rpc ListBases(ListBasesRequest) returns (ListBasesResponse) { 97 option (google.api.http) = { 98 get: "/v1/references/{reference_id}/bases" 99 }; 100 } 101} 102 103// A reference is a canonical assembled DNA sequence, intended to act as a 104// reference coordinate space for other genomic annotations. A single reference 105// might represent the human chromosome 1 or mitochandrial DNA, for instance. A 106// reference belongs to one or more reference sets. 107// 108// For more genomics resource definitions, see [Fundamentals of Google 109// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 110message Reference { 111 // The server-generated reference ID, unique across all references. 112 string id = 1; 113 114 // The length of this reference's sequence. 115 int64 length = 2; 116 117 // MD5 of the upper-case sequence excluding all whitespace characters (this 118 // is equivalent to SQ:M5 in SAM). This value is represented in lower case 119 // hexadecimal format. 120 string md5checksum = 3; 121 122 // The name of this reference, for example `22`. 123 string name = 4; 124 125 // The URI from which the sequence was obtained. Typically specifies a FASTA 126 // format file. 127 string source_uri = 5; 128 129 // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally 130 // with a version number, for example `GCF_000001405.26`. 131 repeated string source_accessions = 6; 132 133 // ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human. 134 int32 ncbi_taxon_id = 7; 135} 136 137// A reference set is a set of references which typically comprise a reference 138// assembly for a species, such as `GRCh38` which is representative 139// of the human genome. A reference set defines a common coordinate space for 140// comparing reference-aligned experimental data. A reference set contains 1 or 141// more references. 142// 143// For more genomics resource definitions, see [Fundamentals of Google 144// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics) 145message ReferenceSet { 146 // The server-generated reference set ID, unique across all reference sets. 147 string id = 1; 148 149 // The IDs of the reference objects that are part of this set. 150 // `Reference.md5checksum` must be unique within this set. 151 repeated string reference_ids = 2; 152 153 // Order-independent MD5 checksum which identifies this reference set. The 154 // checksum is computed by sorting all lower case hexidecimal string 155 // `reference.md5checksum` (for all reference in this set) in 156 // ascending lexicographic order, concatenating, and taking the MD5 of that 157 // value. The resulting value is represented in lower case hexadecimal format. 158 string md5checksum = 3; 159 160 // ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human) 161 // indicating the species which this reference set is intended to model. Note 162 // that contained references may specify a different `ncbiTaxonId`, as 163 // assemblies may contain reference sequences which do not belong to the 164 // modeled species, for example EBV in a human reference genome. 165 int32 ncbi_taxon_id = 4; 166 167 // Free text description of this reference set. 168 string description = 5; 169 170 // Public id of this reference set, such as `GRCh37`. 171 string assembly_id = 6; 172 173 // The URI from which the references were obtained. 174 string source_uri = 7; 175 176 // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally 177 // with a version number, for example `NC_000001.11`. 178 repeated string source_accessions = 8; 179} 180 181message SearchReferenceSetsRequest { 182 // If present, return reference sets for which the 183 // [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly. 184 repeated string md5checksums = 1; 185 186 // If present, return reference sets for which a prefix of any of 187 // [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions] 188 // match any of these strings. Accession numbers typically have a main number 189 // and a version, for example `NC_000001.11`. 190 repeated string accessions = 2; 191 192 // If present, return reference sets for which a substring of their 193 // `assemblyId` matches this string (case insensitive). 194 string assembly_id = 3; 195 196 // The continuation token, which is used to page through large result sets. 197 // To get the next page of results, set this parameter to the value of 198 // `nextPageToken` from the previous response. 199 string page_token = 4; 200 201 // The maximum number of results to return in a single page. If unspecified, 202 // defaults to 1024. The maximum value is 4096. 203 int32 page_size = 5; 204} 205 206message SearchReferenceSetsResponse { 207 // The matching references sets. 208 repeated ReferenceSet reference_sets = 1; 209 210 // The continuation token, which is used to page through large result sets. 211 // Provide this value in a subsequent request to return the next page of 212 // results. This field will be empty if there aren't any additional results. 213 string next_page_token = 2; 214} 215 216message GetReferenceSetRequest { 217 // The ID of the reference set. 218 string reference_set_id = 1; 219} 220 221message SearchReferencesRequest { 222 // If present, return references for which the 223 // [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly. 224 repeated string md5checksums = 1; 225 226 // If present, return references for which a prefix of any of 227 // [sourceAccessions][google.genomics.v1.Reference.source_accessions] match 228 // any of these strings. Accession numbers typically have a main number and a 229 // version, for example `GCF_000001405.26`. 230 repeated string accessions = 2; 231 232 // If present, return only references which belong to this reference set. 233 string reference_set_id = 3; 234 235 // The continuation token, which is used to page through large result sets. 236 // To get the next page of results, set this parameter to the value of 237 // `nextPageToken` from the previous response. 238 string page_token = 4; 239 240 // The maximum number of results to return in a single page. If unspecified, 241 // defaults to 1024. The maximum value is 4096. 242 int32 page_size = 5; 243} 244 245message SearchReferencesResponse { 246 // The matching references. 247 repeated Reference references = 1; 248 249 // The continuation token, which is used to page through large result sets. 250 // Provide this value in a subsequent request to return the next page of 251 // results. This field will be empty if there aren't any additional results. 252 string next_page_token = 2; 253} 254 255message GetReferenceRequest { 256 // The ID of the reference. 257 string reference_id = 1; 258} 259 260message ListBasesRequest { 261 // The ID of the reference. 262 string reference_id = 1; 263 264 // The start position (0-based) of this query. Defaults to 0. 265 int64 start = 2; 266 267 // The end position (0-based, exclusive) of this query. Defaults to the length 268 // of this reference. 269 int64 end = 3; 270 271 // The continuation token, which is used to page through large result sets. 272 // To get the next page of results, set this parameter to the value of 273 // `nextPageToken` from the previous response. 274 string page_token = 4; 275 276 // The maximum number of bases to return in a single page. If unspecified, 277 // defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base 278 // pairs). 279 int32 page_size = 5; 280} 281 282message ListBasesResponse { 283 // The offset position (0-based) of the given `sequence` from the 284 // start of this `Reference`. This value will differ for each page 285 // in a paginated request. 286 int64 offset = 1; 287 288 // A substring of the bases that make up this reference. 289 string sequence = 2; 290 291 // The continuation token, which is used to page through large result sets. 292 // Provide this value in a subsequent request to return the next page of 293 // results. This field will be empty if there aren't any additional results. 294 string next_page_token = 3; 295} 296