1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.bigtable.v2; 18 19import "google/api/field_behavior.proto"; 20 21option csharp_namespace = "Google.Cloud.Bigtable.V2"; 22option go_package = "google.golang.org/genproto/googleapis/bigtable/v2;bigtable"; 23option java_multiple_files = true; 24option java_outer_classname = "DataProto"; 25option java_package = "com.google.bigtable.v2"; 26option php_namespace = "Google\\Cloud\\Bigtable\\V2"; 27option ruby_package = "Google::Cloud::Bigtable::V2"; 28 29// Specifies the complete (requested) contents of a single row of a table. 30// Rows which exceed 256MiB in size cannot be read in full. 31message Row { 32 // The unique key which identifies this row within its table. This is the same 33 // key that's used to identify the row in, for example, a MutateRowRequest. 34 // May contain any non-empty byte string up to 4KiB in length. 35 bytes key = 1; 36 37 // May be empty, but only if the entire row is empty. 38 // The mutual ordering of column families is not specified. 39 repeated Family families = 2; 40} 41 42// Specifies (some of) the contents of a single row/column family intersection 43// of a table. 44message Family { 45 // The unique key which identifies this family within its row. This is the 46 // same key that's used to identify the family in, for example, a RowFilter 47 // which sets its "family_name_regex_filter" field. 48 // Must match `[-_.a-zA-Z0-9]+`, except that AggregatingRowProcessors may 49 // produce cells in a sentinel family with an empty name. 50 // Must be no greater than 64 characters in length. 51 string name = 1; 52 53 // Must not be empty. Sorted in order of increasing "qualifier". 54 repeated Column columns = 2; 55} 56 57// Specifies (some of) the contents of a single row/column intersection of a 58// table. 59message Column { 60 // The unique key which identifies this column within its family. This is the 61 // same key that's used to identify the column in, for example, a RowFilter 62 // which sets its `column_qualifier_regex_filter` field. 63 // May contain any byte string, including the empty string, up to 16kiB in 64 // length. 65 bytes qualifier = 1; 66 67 // Must not be empty. Sorted in order of decreasing "timestamp_micros". 68 repeated Cell cells = 2; 69} 70 71// Specifies (some of) the contents of a single row/column/timestamp of a table. 72message Cell { 73 // The cell's stored timestamp, which also uniquely identifies it within 74 // its column. 75 // Values are always expressed in microseconds, but individual tables may set 76 // a coarser granularity to further restrict the allowed values. For 77 // example, a table which specifies millisecond granularity will only allow 78 // values of `timestamp_micros` which are multiples of 1000. 79 int64 timestamp_micros = 1; 80 81 // The value stored in the cell. 82 // May contain any byte string, including the empty string, up to 100MiB in 83 // length. 84 bytes value = 2; 85 86 // Labels applied to the cell by a [RowFilter][google.bigtable.v2.RowFilter]. 87 repeated string labels = 3; 88} 89 90// `Value` represents a dynamically typed value. 91// The typed fields in `Value` are used as a transport encoding for the actual 92// value (which may be of a more complex type). See the documentation of the 93// `Type` message for more details. 94message Value { 95 // Options for transporting values within the protobuf type system. A given 96 // `kind` may support more than one `type` and vice versa. On write, this is 97 // roughly analogous to a GoogleSQL literal. 98 // 99 // The value is `NULL` if none of the fields in `kind` is set. If `type` is 100 // also omitted on write, we will infer it based on the schema. 101 oneof kind { 102 // Represents a raw byte sequence with no type information. 103 // The `type` field must be omitted. 104 bytes raw_value = 8; 105 106 // Represents a raw cell timestamp with no type information. 107 // The `type` field must be omitted. 108 int64 raw_timestamp_micros = 9; 109 110 // Represents a typed value transported as an integer. 111 // Default type for writes: `Int64` 112 int64 int_value = 6; 113 } 114} 115 116// Specifies a contiguous range of rows. 117message RowRange { 118 // The row key at which to start the range. 119 // If neither field is set, interpreted as the empty string, inclusive. 120 oneof start_key { 121 // Used when giving an inclusive lower bound for the range. 122 bytes start_key_closed = 1; 123 124 // Used when giving an exclusive lower bound for the range. 125 bytes start_key_open = 2; 126 } 127 128 // The row key at which to end the range. 129 // If neither field is set, interpreted as the infinite row key, exclusive. 130 oneof end_key { 131 // Used when giving an exclusive upper bound for the range. 132 bytes end_key_open = 3; 133 134 // Used when giving an inclusive upper bound for the range. 135 bytes end_key_closed = 4; 136 } 137} 138 139// Specifies a non-contiguous set of rows. 140message RowSet { 141 // Single rows included in the set. 142 repeated bytes row_keys = 1; 143 144 // Contiguous row ranges included in the set. 145 repeated RowRange row_ranges = 2; 146} 147 148// Specifies a contiguous range of columns within a single column family. 149// The range spans from <column_family>:<start_qualifier> to 150// <column_family>:<end_qualifier>, where both bounds can be either 151// inclusive or exclusive. 152message ColumnRange { 153 // The name of the column family within which this range falls. 154 string family_name = 1; 155 156 // The column qualifier at which to start the range (within `column_family`). 157 // If neither field is set, interpreted as the empty string, inclusive. 158 oneof start_qualifier { 159 // Used when giving an inclusive lower bound for the range. 160 bytes start_qualifier_closed = 2; 161 162 // Used when giving an exclusive lower bound for the range. 163 bytes start_qualifier_open = 3; 164 } 165 166 // The column qualifier at which to end the range (within `column_family`). 167 // If neither field is set, interpreted as the infinite string, exclusive. 168 oneof end_qualifier { 169 // Used when giving an inclusive upper bound for the range. 170 bytes end_qualifier_closed = 4; 171 172 // Used when giving an exclusive upper bound for the range. 173 bytes end_qualifier_open = 5; 174 } 175} 176 177// Specified a contiguous range of microsecond timestamps. 178message TimestampRange { 179 // Inclusive lower bound. If left empty, interpreted as 0. 180 int64 start_timestamp_micros = 1; 181 182 // Exclusive upper bound. If left empty, interpreted as infinity. 183 int64 end_timestamp_micros = 2; 184} 185 186// Specifies a contiguous range of raw byte values. 187message ValueRange { 188 // The value at which to start the range. 189 // If neither field is set, interpreted as the empty string, inclusive. 190 oneof start_value { 191 // Used when giving an inclusive lower bound for the range. 192 bytes start_value_closed = 1; 193 194 // Used when giving an exclusive lower bound for the range. 195 bytes start_value_open = 2; 196 } 197 198 // The value at which to end the range. 199 // If neither field is set, interpreted as the infinite string, exclusive. 200 oneof end_value { 201 // Used when giving an inclusive upper bound for the range. 202 bytes end_value_closed = 3; 203 204 // Used when giving an exclusive upper bound for the range. 205 bytes end_value_open = 4; 206 } 207} 208 209// Takes a row as input and produces an alternate view of the row based on 210// specified rules. For example, a RowFilter might trim down a row to include 211// just the cells from columns matching a given regular expression, or might 212// return all the cells of a row but not their values. More complicated filters 213// can be composed out of these components to express requests such as, "within 214// every column of a particular family, give just the two most recent cells 215// which are older than timestamp X." 216// 217// There are two broad categories of RowFilters (true filters and transformers), 218// as well as two ways to compose simple filters into more complex ones 219// (chains and interleaves). They work as follows: 220// 221// * True filters alter the input row by excluding some of its cells wholesale 222// from the output row. An example of a true filter is the `value_regex_filter`, 223// which excludes cells whose values don't match the specified pattern. All 224// regex true filters use RE2 syntax (https://github.com/google/re2/wiki/Syntax) 225// in raw byte mode (RE2::Latin1), and are evaluated as full matches. An 226// important point to keep in mind is that `RE2(.)` is equivalent by default to 227// `RE2([^\n])`, meaning that it does not match newlines. When attempting to 228// match an arbitrary byte, you should therefore use the escape sequence `\C`, 229// which may need to be further escaped as `\\C` in your client language. 230// 231// * Transformers alter the input row by changing the values of some of its 232// cells in the output, without excluding them completely. Currently, the only 233// supported transformer is the `strip_value_transformer`, which replaces every 234// cell's value with the empty string. 235// 236// * Chains and interleaves are described in more detail in the 237// RowFilter.Chain and RowFilter.Interleave documentation. 238// 239// The total serialized size of a RowFilter message must not 240// exceed 20480 bytes, and RowFilters may not be nested within each other 241// (in Chains or Interleaves) to a depth of more than 20. 242message RowFilter { 243 // A RowFilter which sends rows through several RowFilters in sequence. 244 message Chain { 245 // The elements of "filters" are chained together to process the input row: 246 // in row -> f(0) -> intermediate row -> f(1) -> ... -> f(N) -> out row 247 // The full chain is executed atomically. 248 repeated RowFilter filters = 1; 249 } 250 251 // A RowFilter which sends each row to each of several component 252 // RowFilters and interleaves the results. 253 message Interleave { 254 // The elements of "filters" all process a copy of the input row, and the 255 // results are pooled, sorted, and combined into a single output row. 256 // If multiple cells are produced with the same column and timestamp, 257 // they will all appear in the output row in an unspecified mutual order. 258 // Consider the following example, with three filters: 259 // 260 // input row 261 // | 262 // ----------------------------------------------------- 263 // | | | 264 // f(0) f(1) f(2) 265 // | | | 266 // 1: foo,bar,10,x foo,bar,10,z far,bar,7,a 267 // 2: foo,blah,11,z far,blah,5,x far,blah,5,x 268 // | | | 269 // ----------------------------------------------------- 270 // | 271 // 1: foo,bar,10,z // could have switched with #2 272 // 2: foo,bar,10,x // could have switched with #1 273 // 3: foo,blah,11,z 274 // 4: far,bar,7,a 275 // 5: far,blah,5,x // identical to #6 276 // 6: far,blah,5,x // identical to #5 277 // 278 // All interleaved filters are executed atomically. 279 repeated RowFilter filters = 1; 280 } 281 282 // A RowFilter which evaluates one of two possible RowFilters, depending on 283 // whether or not a predicate RowFilter outputs any cells from the input row. 284 // 285 // IMPORTANT NOTE: The predicate filter does not execute atomically with the 286 // true and false filters, which may lead to inconsistent or unexpected 287 // results. Additionally, Condition filters have poor performance, especially 288 // when filters are set for the false condition. 289 message Condition { 290 // If `predicate_filter` outputs any cells, then `true_filter` will be 291 // evaluated on the input row. Otherwise, `false_filter` will be evaluated. 292 RowFilter predicate_filter = 1; 293 294 // The filter to apply to the input row if `predicate_filter` returns any 295 // results. If not provided, no results will be returned in the true case. 296 RowFilter true_filter = 2; 297 298 // The filter to apply to the input row if `predicate_filter` does not 299 // return any results. If not provided, no results will be returned in the 300 // false case. 301 RowFilter false_filter = 3; 302 } 303 304 // Which of the possible RowFilter types to apply. If none are set, this 305 // RowFilter returns all cells in the input row. 306 oneof filter { 307 // Applies several RowFilters to the data in sequence, progressively 308 // narrowing the results. 309 Chain chain = 1; 310 311 // Applies several RowFilters to the data in parallel and combines the 312 // results. 313 Interleave interleave = 2; 314 315 // Applies one of two possible RowFilters to the data based on the output of 316 // a predicate RowFilter. 317 Condition condition = 3; 318 319 // ADVANCED USE ONLY. 320 // Hook for introspection into the RowFilter. Outputs all cells directly to 321 // the output of the read rather than to any parent filter. Consider the 322 // following example: 323 // 324 // Chain( 325 // FamilyRegex("A"), 326 // Interleave( 327 // All(), 328 // Chain(Label("foo"), Sink()) 329 // ), 330 // QualifierRegex("B") 331 // ) 332 // 333 // A,A,1,w 334 // A,B,2,x 335 // B,B,4,z 336 // | 337 // FamilyRegex("A") 338 // | 339 // A,A,1,w 340 // A,B,2,x 341 // | 342 // +------------+-------------+ 343 // | | 344 // All() Label(foo) 345 // | | 346 // A,A,1,w A,A,1,w,labels:[foo] 347 // A,B,2,x A,B,2,x,labels:[foo] 348 // | | 349 // | Sink() --------------+ 350 // | | | 351 // +------------+ x------+ A,A,1,w,labels:[foo] 352 // | A,B,2,x,labels:[foo] 353 // A,A,1,w | 354 // A,B,2,x | 355 // | | 356 // QualifierRegex("B") | 357 // | | 358 // A,B,2,x | 359 // | | 360 // +--------------------------------+ 361 // | 362 // A,A,1,w,labels:[foo] 363 // A,B,2,x,labels:[foo] // could be switched 364 // A,B,2,x // could be switched 365 // 366 // Despite being excluded by the qualifier filter, a copy of every cell 367 // that reaches the sink is present in the final result. 368 // 369 // As with an [Interleave][google.bigtable.v2.RowFilter.Interleave], 370 // duplicate cells are possible, and appear in an unspecified mutual order. 371 // In this case we have a duplicate with column "A:B" and timestamp 2, 372 // because one copy passed through the all filter while the other was 373 // passed through the label and sink. Note that one copy has label "foo", 374 // while the other does not. 375 // 376 // Cannot be used within the `predicate_filter`, `true_filter`, or 377 // `false_filter` of a [Condition][google.bigtable.v2.RowFilter.Condition]. 378 bool sink = 16; 379 380 // Matches all cells, regardless of input. Functionally equivalent to 381 // leaving `filter` unset, but included for completeness. 382 bool pass_all_filter = 17; 383 384 // Does not match any cells, regardless of input. Useful for temporarily 385 // disabling just part of a filter. 386 bool block_all_filter = 18; 387 388 // Matches only cells from rows whose keys satisfy the given RE2 regex. In 389 // other words, passes through the entire row when the key matches, and 390 // otherwise produces an empty row. 391 // Note that, since row keys can contain arbitrary bytes, the `\C` escape 392 // sequence must be used if a true wildcard is desired. The `.` character 393 // will not match the new line character `\n`, which may be present in a 394 // binary key. 395 bytes row_key_regex_filter = 4; 396 397 // Matches all cells from a row with probability p, and matches no cells 398 // from the row with probability 1-p. 399 double row_sample_filter = 14; 400 401 // Matches only cells from columns whose families satisfy the given RE2 402 // regex. For technical reasons, the regex must not contain the `:` 403 // character, even if it is not being used as a literal. 404 // Note that, since column families cannot contain the new line character 405 // `\n`, it is sufficient to use `.` as a full wildcard when matching 406 // column family names. 407 string family_name_regex_filter = 5; 408 409 // Matches only cells from columns whose qualifiers satisfy the given RE2 410 // regex. 411 // Note that, since column qualifiers can contain arbitrary bytes, the `\C` 412 // escape sequence must be used if a true wildcard is desired. The `.` 413 // character will not match the new line character `\n`, which may be 414 // present in a binary qualifier. 415 bytes column_qualifier_regex_filter = 6; 416 417 // Matches only cells from columns within the given range. 418 ColumnRange column_range_filter = 7; 419 420 // Matches only cells with timestamps within the given range. 421 TimestampRange timestamp_range_filter = 8; 422 423 // Matches only cells with values that satisfy the given regular expression. 424 // Note that, since cell values can contain arbitrary bytes, the `\C` escape 425 // sequence must be used if a true wildcard is desired. The `.` character 426 // will not match the new line character `\n`, which may be present in a 427 // binary value. 428 bytes value_regex_filter = 9; 429 430 // Matches only cells with values that fall within the given range. 431 ValueRange value_range_filter = 15; 432 433 // Skips the first N cells of each row, matching all subsequent cells. 434 // If duplicate cells are present, as is possible when using an Interleave, 435 // each copy of the cell is counted separately. 436 int32 cells_per_row_offset_filter = 10; 437 438 // Matches only the first N cells of each row. 439 // If duplicate cells are present, as is possible when using an Interleave, 440 // each copy of the cell is counted separately. 441 int32 cells_per_row_limit_filter = 11; 442 443 // Matches only the most recent N cells within each column. For example, 444 // if N=2, this filter would match column `foo:bar` at timestamps 10 and 9, 445 // skip all earlier cells in `foo:bar`, and then begin matching again in 446 // column `foo:bar2`. 447 // If duplicate cells are present, as is possible when using an Interleave, 448 // each copy of the cell is counted separately. 449 int32 cells_per_column_limit_filter = 12; 450 451 // Replaces each cell's value with the empty string. 452 bool strip_value_transformer = 13; 453 454 // Applies the given label to all cells in the output row. This allows 455 // the client to determine which results were produced from which part of 456 // the filter. 457 // 458 // Values must be at most 15 characters in length, and match the RE2 459 // pattern `[a-z0-9\\-]+` 460 // 461 // Due to a technical limitation, it is not currently possible to apply 462 // multiple labels to a cell. As a result, a Chain may have no more than 463 // one sub-filter which contains a `apply_label_transformer`. It is okay for 464 // an Interleave to contain multiple `apply_label_transformers`, as they 465 // will be applied to separate copies of the input. This may be relaxed in 466 // the future. 467 string apply_label_transformer = 19; 468 } 469} 470 471// Specifies a particular change to be made to the contents of a row. 472message Mutation { 473 // A Mutation which sets the value of the specified cell. 474 message SetCell { 475 // The name of the family into which new data should be written. 476 // Must match `[-_.a-zA-Z0-9]+` 477 string family_name = 1; 478 479 // The qualifier of the column into which new data should be written. 480 // Can be any byte string, including the empty string. 481 bytes column_qualifier = 2; 482 483 // The timestamp of the cell into which new data should be written. 484 // Use -1 for current Bigtable server time. 485 // Otherwise, the client should set this value itself, noting that the 486 // default value is a timestamp of zero if the field is left unspecified. 487 // Values must match the granularity of the table (e.g. micros, millis). 488 int64 timestamp_micros = 3; 489 490 // The value to be written into the specified cell. 491 bytes value = 4; 492 } 493 494 // A Mutation which incrementally updates a cell in an `Aggregate` family. 495 message AddToCell { 496 // The name of the `Aggregate` family into which new data should be added. 497 // This must be a family with a `value_type` of `Aggregate`. 498 // Format: `[-_.a-zA-Z0-9]+` 499 string family_name = 1; 500 501 // The qualifier of the column into which new data should be added. This 502 // must be a `raw_value`. 503 Value column_qualifier = 2; 504 505 // The timestamp of the cell to which new data should be added. This must 506 // be a `raw_timestamp_micros` that matches the table's `granularity`. 507 Value timestamp = 3; 508 509 // The input value to be accumulated into the specified cell. This must be 510 // compatible with the family's `value_type.input_type`. 511 Value input = 4; 512 } 513 514 // A Mutation which deletes cells from the specified column, optionally 515 // restricting the deletions to a given timestamp range. 516 message DeleteFromColumn { 517 // The name of the family from which cells should be deleted. 518 // Must match `[-_.a-zA-Z0-9]+` 519 string family_name = 1; 520 521 // The qualifier of the column from which cells should be deleted. 522 // Can be any byte string, including the empty string. 523 bytes column_qualifier = 2; 524 525 // The range of timestamps within which cells should be deleted. 526 TimestampRange time_range = 3; 527 } 528 529 // A Mutation which deletes all cells from the specified column family. 530 message DeleteFromFamily { 531 // The name of the family from which cells should be deleted. 532 // Must match `[-_.a-zA-Z0-9]+` 533 string family_name = 1; 534 } 535 536 // A Mutation which deletes all cells from the containing row. 537 message DeleteFromRow {} 538 539 // Which of the possible Mutation types to apply. 540 oneof mutation { 541 // Set a cell's value. 542 SetCell set_cell = 1; 543 544 // Incrementally updates an `Aggregate` cell. 545 AddToCell add_to_cell = 5; 546 547 // Deletes cells from a column. 548 DeleteFromColumn delete_from_column = 2; 549 550 // Deletes cells from a column family. 551 DeleteFromFamily delete_from_family = 3; 552 553 // Deletes cells from the entire row. 554 DeleteFromRow delete_from_row = 4; 555 } 556} 557 558// Specifies an atomic read/modify/write operation on the latest value of the 559// specified column. 560message ReadModifyWriteRule { 561 // The name of the family to which the read/modify/write should be applied. 562 // Must match `[-_.a-zA-Z0-9]+` 563 string family_name = 1; 564 565 // The qualifier of the column to which the read/modify/write should be 566 // applied. 567 // Can be any byte string, including the empty string. 568 bytes column_qualifier = 2; 569 570 // The rule used to determine the column's new latest value from its current 571 // latest value. 572 oneof rule { 573 // Rule specifying that `append_value` be appended to the existing value. 574 // If the targeted cell is unset, it will be treated as containing the 575 // empty string. 576 bytes append_value = 3; 577 578 // Rule specifying that `increment_amount` be added to the existing value. 579 // If the targeted cell is unset, it will be treated as containing a zero. 580 // Otherwise, the targeted cell must contain an 8-byte value (interpreted 581 // as a 64-bit big-endian signed integer), or the entire request will fail. 582 int64 increment_amount = 4; 583 } 584} 585 586// NOTE: This API is intended to be used by Apache Beam BigtableIO. 587// A partition of a change stream. 588message StreamPartition { 589 // The row range covered by this partition and is specified by 590 // [`start_key_closed`, `end_key_open`). 591 RowRange row_range = 1; 592} 593 594// NOTE: This API is intended to be used by Apache Beam BigtableIO. 595// The information required to continue reading the data from multiple 596// `StreamPartitions` from where a previous read left off. 597message StreamContinuationTokens { 598 // List of continuation tokens. 599 repeated StreamContinuationToken tokens = 1; 600} 601 602// NOTE: This API is intended to be used by Apache Beam BigtableIO. 603// The information required to continue reading the data from a 604// `StreamPartition` from where a previous read left off. 605message StreamContinuationToken { 606 // The partition that this token applies to. 607 StreamPartition partition = 1; 608 609 // An encoded position in the stream to restart reading from. 610 string token = 2; 611} 612