xref: /aosp_15_r20/external/googleapis/google/bigtable/v2/data.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.bigtable.v2;
18
19import "google/api/field_behavior.proto";
20
21option csharp_namespace = "Google.Cloud.Bigtable.V2";
22option go_package = "google.golang.org/genproto/googleapis/bigtable/v2;bigtable";
23option java_multiple_files = true;
24option java_outer_classname = "DataProto";
25option java_package = "com.google.bigtable.v2";
26option php_namespace = "Google\\Cloud\\Bigtable\\V2";
27option ruby_package = "Google::Cloud::Bigtable::V2";
28
29// Specifies the complete (requested) contents of a single row of a table.
30// Rows which exceed 256MiB in size cannot be read in full.
31message Row {
32  // The unique key which identifies this row within its table. This is the same
33  // key that's used to identify the row in, for example, a MutateRowRequest.
34  // May contain any non-empty byte string up to 4KiB in length.
35  bytes key = 1;
36
37  // May be empty, but only if the entire row is empty.
38  // The mutual ordering of column families is not specified.
39  repeated Family families = 2;
40}
41
42// Specifies (some of) the contents of a single row/column family intersection
43// of a table.
44message Family {
45  // The unique key which identifies this family within its row. This is the
46  // same key that's used to identify the family in, for example, a RowFilter
47  // which sets its "family_name_regex_filter" field.
48  // Must match `[-_.a-zA-Z0-9]+`, except that AggregatingRowProcessors may
49  // produce cells in a sentinel family with an empty name.
50  // Must be no greater than 64 characters in length.
51  string name = 1;
52
53  // Must not be empty. Sorted in order of increasing "qualifier".
54  repeated Column columns = 2;
55}
56
57// Specifies (some of) the contents of a single row/column intersection of a
58// table.
59message Column {
60  // The unique key which identifies this column within its family. This is the
61  // same key that's used to identify the column in, for example, a RowFilter
62  // which sets its `column_qualifier_regex_filter` field.
63  // May contain any byte string, including the empty string, up to 16kiB in
64  // length.
65  bytes qualifier = 1;
66
67  // Must not be empty. Sorted in order of decreasing "timestamp_micros".
68  repeated Cell cells = 2;
69}
70
71// Specifies (some of) the contents of a single row/column/timestamp of a table.
72message Cell {
73  // The cell's stored timestamp, which also uniquely identifies it within
74  // its column.
75  // Values are always expressed in microseconds, but individual tables may set
76  // a coarser granularity to further restrict the allowed values. For
77  // example, a table which specifies millisecond granularity will only allow
78  // values of `timestamp_micros` which are multiples of 1000.
79  int64 timestamp_micros = 1;
80
81  // The value stored in the cell.
82  // May contain any byte string, including the empty string, up to 100MiB in
83  // length.
84  bytes value = 2;
85
86  // Labels applied to the cell by a [RowFilter][google.bigtable.v2.RowFilter].
87  repeated string labels = 3;
88}
89
90// `Value` represents a dynamically typed value.
91// The typed fields in `Value` are used as a transport encoding for the actual
92// value (which may be of a more complex type). See the documentation of the
93// `Type` message for more details.
94message Value {
95  // Options for transporting values within the protobuf type system. A given
96  // `kind` may support more than one `type` and vice versa. On write, this is
97  // roughly analogous to a GoogleSQL literal.
98  //
99  // The value is `NULL` if none of the fields in `kind` is set. If `type` is
100  // also omitted on write, we will infer it based on the schema.
101  oneof kind {
102    // Represents a raw byte sequence with no type information.
103    // The `type` field must be omitted.
104    bytes raw_value = 8;
105
106    // Represents a raw cell timestamp with no type information.
107    // The `type` field must be omitted.
108    int64 raw_timestamp_micros = 9;
109
110    // Represents a typed value transported as an integer.
111    // Default type for writes: `Int64`
112    int64 int_value = 6;
113  }
114}
115
116// Specifies a contiguous range of rows.
117message RowRange {
118  // The row key at which to start the range.
119  // If neither field is set, interpreted as the empty string, inclusive.
120  oneof start_key {
121    // Used when giving an inclusive lower bound for the range.
122    bytes start_key_closed = 1;
123
124    // Used when giving an exclusive lower bound for the range.
125    bytes start_key_open = 2;
126  }
127
128  // The row key at which to end the range.
129  // If neither field is set, interpreted as the infinite row key, exclusive.
130  oneof end_key {
131    // Used when giving an exclusive upper bound for the range.
132    bytes end_key_open = 3;
133
134    // Used when giving an inclusive upper bound for the range.
135    bytes end_key_closed = 4;
136  }
137}
138
139// Specifies a non-contiguous set of rows.
140message RowSet {
141  // Single rows included in the set.
142  repeated bytes row_keys = 1;
143
144  // Contiguous row ranges included in the set.
145  repeated RowRange row_ranges = 2;
146}
147
148// Specifies a contiguous range of columns within a single column family.
149// The range spans from <column_family>:<start_qualifier> to
150// <column_family>:<end_qualifier>, where both bounds can be either
151// inclusive or exclusive.
152message ColumnRange {
153  // The name of the column family within which this range falls.
154  string family_name = 1;
155
156  // The column qualifier at which to start the range (within `column_family`).
157  // If neither field is set, interpreted as the empty string, inclusive.
158  oneof start_qualifier {
159    // Used when giving an inclusive lower bound for the range.
160    bytes start_qualifier_closed = 2;
161
162    // Used when giving an exclusive lower bound for the range.
163    bytes start_qualifier_open = 3;
164  }
165
166  // The column qualifier at which to end the range (within `column_family`).
167  // If neither field is set, interpreted as the infinite string, exclusive.
168  oneof end_qualifier {
169    // Used when giving an inclusive upper bound for the range.
170    bytes end_qualifier_closed = 4;
171
172    // Used when giving an exclusive upper bound for the range.
173    bytes end_qualifier_open = 5;
174  }
175}
176
177// Specified a contiguous range of microsecond timestamps.
178message TimestampRange {
179  // Inclusive lower bound. If left empty, interpreted as 0.
180  int64 start_timestamp_micros = 1;
181
182  // Exclusive upper bound. If left empty, interpreted as infinity.
183  int64 end_timestamp_micros = 2;
184}
185
186// Specifies a contiguous range of raw byte values.
187message ValueRange {
188  // The value at which to start the range.
189  // If neither field is set, interpreted as the empty string, inclusive.
190  oneof start_value {
191    // Used when giving an inclusive lower bound for the range.
192    bytes start_value_closed = 1;
193
194    // Used when giving an exclusive lower bound for the range.
195    bytes start_value_open = 2;
196  }
197
198  // The value at which to end the range.
199  // If neither field is set, interpreted as the infinite string, exclusive.
200  oneof end_value {
201    // Used when giving an inclusive upper bound for the range.
202    bytes end_value_closed = 3;
203
204    // Used when giving an exclusive upper bound for the range.
205    bytes end_value_open = 4;
206  }
207}
208
209// Takes a row as input and produces an alternate view of the row based on
210// specified rules. For example, a RowFilter might trim down a row to include
211// just the cells from columns matching a given regular expression, or might
212// return all the cells of a row but not their values. More complicated filters
213// can be composed out of these components to express requests such as, "within
214// every column of a particular family, give just the two most recent cells
215// which are older than timestamp X."
216//
217// There are two broad categories of RowFilters (true filters and transformers),
218// as well as two ways to compose simple filters into more complex ones
219// (chains and interleaves). They work as follows:
220//
221// * True filters alter the input row by excluding some of its cells wholesale
222// from the output row. An example of a true filter is the `value_regex_filter`,
223// which excludes cells whose values don't match the specified pattern. All
224// regex true filters use RE2 syntax (https://github.com/google/re2/wiki/Syntax)
225// in raw byte mode (RE2::Latin1), and are evaluated as full matches. An
226// important point to keep in mind is that `RE2(.)` is equivalent by default to
227// `RE2([^\n])`, meaning that it does not match newlines. When attempting to
228// match an arbitrary byte, you should therefore use the escape sequence `\C`,
229// which may need to be further escaped as `\\C` in your client language.
230//
231// * Transformers alter the input row by changing the values of some of its
232// cells in the output, without excluding them completely. Currently, the only
233// supported transformer is the `strip_value_transformer`, which replaces every
234// cell's value with the empty string.
235//
236// * Chains and interleaves are described in more detail in the
237// RowFilter.Chain and RowFilter.Interleave documentation.
238//
239// The total serialized size of a RowFilter message must not
240// exceed 20480 bytes, and RowFilters may not be nested within each other
241// (in Chains or Interleaves) to a depth of more than 20.
242message RowFilter {
243  // A RowFilter which sends rows through several RowFilters in sequence.
244  message Chain {
245    // The elements of "filters" are chained together to process the input row:
246    // in row -> f(0) -> intermediate row -> f(1) -> ... -> f(N) -> out row
247    // The full chain is executed atomically.
248    repeated RowFilter filters = 1;
249  }
250
251  // A RowFilter which sends each row to each of several component
252  // RowFilters and interleaves the results.
253  message Interleave {
254    // The elements of "filters" all process a copy of the input row, and the
255    // results are pooled, sorted, and combined into a single output row.
256    // If multiple cells are produced with the same column and timestamp,
257    // they will all appear in the output row in an unspecified mutual order.
258    // Consider the following example, with three filters:
259    //
260    //                                  input row
261    //                                      |
262    //            -----------------------------------------------------
263    //            |                         |                         |
264    //           f(0)                      f(1)                      f(2)
265    //            |                         |                         |
266    //     1: foo,bar,10,x             foo,bar,10,z              far,bar,7,a
267    //     2: foo,blah,11,z            far,blah,5,x              far,blah,5,x
268    //            |                         |                         |
269    //            -----------------------------------------------------
270    //                                      |
271    //     1:                      foo,bar,10,z   // could have switched with #2
272    //     2:                      foo,bar,10,x   // could have switched with #1
273    //     3:                      foo,blah,11,z
274    //     4:                      far,bar,7,a
275    //     5:                      far,blah,5,x   // identical to #6
276    //     6:                      far,blah,5,x   // identical to #5
277    //
278    // All interleaved filters are executed atomically.
279    repeated RowFilter filters = 1;
280  }
281
282  // A RowFilter which evaluates one of two possible RowFilters, depending on
283  // whether or not a predicate RowFilter outputs any cells from the input row.
284  //
285  // IMPORTANT NOTE: The predicate filter does not execute atomically with the
286  // true and false filters, which may lead to inconsistent or unexpected
287  // results. Additionally, Condition filters have poor performance, especially
288  // when filters are set for the false condition.
289  message Condition {
290    // If `predicate_filter` outputs any cells, then `true_filter` will be
291    // evaluated on the input row. Otherwise, `false_filter` will be evaluated.
292    RowFilter predicate_filter = 1;
293
294    // The filter to apply to the input row if `predicate_filter` returns any
295    // results. If not provided, no results will be returned in the true case.
296    RowFilter true_filter = 2;
297
298    // The filter to apply to the input row if `predicate_filter` does not
299    // return any results. If not provided, no results will be returned in the
300    // false case.
301    RowFilter false_filter = 3;
302  }
303
304  // Which of the possible RowFilter types to apply. If none are set, this
305  // RowFilter returns all cells in the input row.
306  oneof filter {
307    // Applies several RowFilters to the data in sequence, progressively
308    // narrowing the results.
309    Chain chain = 1;
310
311    // Applies several RowFilters to the data in parallel and combines the
312    // results.
313    Interleave interleave = 2;
314
315    // Applies one of two possible RowFilters to the data based on the output of
316    // a predicate RowFilter.
317    Condition condition = 3;
318
319    // ADVANCED USE ONLY.
320    // Hook for introspection into the RowFilter. Outputs all cells directly to
321    // the output of the read rather than to any parent filter. Consider the
322    // following example:
323    //
324    //     Chain(
325    //       FamilyRegex("A"),
326    //       Interleave(
327    //         All(),
328    //         Chain(Label("foo"), Sink())
329    //       ),
330    //       QualifierRegex("B")
331    //     )
332    //
333    //                         A,A,1,w
334    //                         A,B,2,x
335    //                         B,B,4,z
336    //                            |
337    //                     FamilyRegex("A")
338    //                            |
339    //                         A,A,1,w
340    //                         A,B,2,x
341    //                            |
342    //               +------------+-------------+
343    //               |                          |
344    //             All()                    Label(foo)
345    //               |                          |
346    //            A,A,1,w              A,A,1,w,labels:[foo]
347    //            A,B,2,x              A,B,2,x,labels:[foo]
348    //               |                          |
349    //               |                        Sink() --------------+
350    //               |                          |                  |
351    //               +------------+      x------+          A,A,1,w,labels:[foo]
352    //                            |                        A,B,2,x,labels:[foo]
353    //                         A,A,1,w                             |
354    //                         A,B,2,x                             |
355    //                            |                                |
356    //                    QualifierRegex("B")                      |
357    //                            |                                |
358    //                         A,B,2,x                             |
359    //                            |                                |
360    //                            +--------------------------------+
361    //                            |
362    //                         A,A,1,w,labels:[foo]
363    //                         A,B,2,x,labels:[foo]  // could be switched
364    //                         A,B,2,x               // could be switched
365    //
366    // Despite being excluded by the qualifier filter, a copy of every cell
367    // that reaches the sink is present in the final result.
368    //
369    // As with an [Interleave][google.bigtable.v2.RowFilter.Interleave],
370    // duplicate cells are possible, and appear in an unspecified mutual order.
371    // In this case we have a duplicate with column "A:B" and timestamp 2,
372    // because one copy passed through the all filter while the other was
373    // passed through the label and sink. Note that one copy has label "foo",
374    // while the other does not.
375    //
376    // Cannot be used within the `predicate_filter`, `true_filter`, or
377    // `false_filter` of a [Condition][google.bigtable.v2.RowFilter.Condition].
378    bool sink = 16;
379
380    // Matches all cells, regardless of input. Functionally equivalent to
381    // leaving `filter` unset, but included for completeness.
382    bool pass_all_filter = 17;
383
384    // Does not match any cells, regardless of input. Useful for temporarily
385    // disabling just part of a filter.
386    bool block_all_filter = 18;
387
388    // Matches only cells from rows whose keys satisfy the given RE2 regex. In
389    // other words, passes through the entire row when the key matches, and
390    // otherwise produces an empty row.
391    // Note that, since row keys can contain arbitrary bytes, the `\C` escape
392    // sequence must be used if a true wildcard is desired. The `.` character
393    // will not match the new line character `\n`, which may be present in a
394    // binary key.
395    bytes row_key_regex_filter = 4;
396
397    // Matches all cells from a row with probability p, and matches no cells
398    // from the row with probability 1-p.
399    double row_sample_filter = 14;
400
401    // Matches only cells from columns whose families satisfy the given RE2
402    // regex. For technical reasons, the regex must not contain the `:`
403    // character, even if it is not being used as a literal.
404    // Note that, since column families cannot contain the new line character
405    // `\n`, it is sufficient to use `.` as a full wildcard when matching
406    // column family names.
407    string family_name_regex_filter = 5;
408
409    // Matches only cells from columns whose qualifiers satisfy the given RE2
410    // regex.
411    // Note that, since column qualifiers can contain arbitrary bytes, the `\C`
412    // escape sequence must be used if a true wildcard is desired. The `.`
413    // character will not match the new line character `\n`, which may be
414    // present in a binary qualifier.
415    bytes column_qualifier_regex_filter = 6;
416
417    // Matches only cells from columns within the given range.
418    ColumnRange column_range_filter = 7;
419
420    // Matches only cells with timestamps within the given range.
421    TimestampRange timestamp_range_filter = 8;
422
423    // Matches only cells with values that satisfy the given regular expression.
424    // Note that, since cell values can contain arbitrary bytes, the `\C` escape
425    // sequence must be used if a true wildcard is desired. The `.` character
426    // will not match the new line character `\n`, which may be present in a
427    // binary value.
428    bytes value_regex_filter = 9;
429
430    // Matches only cells with values that fall within the given range.
431    ValueRange value_range_filter = 15;
432
433    // Skips the first N cells of each row, matching all subsequent cells.
434    // If duplicate cells are present, as is possible when using an Interleave,
435    // each copy of the cell is counted separately.
436    int32 cells_per_row_offset_filter = 10;
437
438    // Matches only the first N cells of each row.
439    // If duplicate cells are present, as is possible when using an Interleave,
440    // each copy of the cell is counted separately.
441    int32 cells_per_row_limit_filter = 11;
442
443    // Matches only the most recent N cells within each column. For example,
444    // if N=2, this filter would match column `foo:bar` at timestamps 10 and 9,
445    // skip all earlier cells in `foo:bar`, and then begin matching again in
446    // column `foo:bar2`.
447    // If duplicate cells are present, as is possible when using an Interleave,
448    // each copy of the cell is counted separately.
449    int32 cells_per_column_limit_filter = 12;
450
451    // Replaces each cell's value with the empty string.
452    bool strip_value_transformer = 13;
453
454    // Applies the given label to all cells in the output row. This allows
455    // the client to determine which results were produced from which part of
456    // the filter.
457    //
458    // Values must be at most 15 characters in length, and match the RE2
459    // pattern `[a-z0-9\\-]+`
460    //
461    // Due to a technical limitation, it is not currently possible to apply
462    // multiple labels to a cell. As a result, a Chain may have no more than
463    // one sub-filter which contains a `apply_label_transformer`. It is okay for
464    // an Interleave to contain multiple `apply_label_transformers`, as they
465    // will be applied to separate copies of the input. This may be relaxed in
466    // the future.
467    string apply_label_transformer = 19;
468  }
469}
470
471// Specifies a particular change to be made to the contents of a row.
472message Mutation {
473  // A Mutation which sets the value of the specified cell.
474  message SetCell {
475    // The name of the family into which new data should be written.
476    // Must match `[-_.a-zA-Z0-9]+`
477    string family_name = 1;
478
479    // The qualifier of the column into which new data should be written.
480    // Can be any byte string, including the empty string.
481    bytes column_qualifier = 2;
482
483    // The timestamp of the cell into which new data should be written.
484    // Use -1 for current Bigtable server time.
485    // Otherwise, the client should set this value itself, noting that the
486    // default value is a timestamp of zero if the field is left unspecified.
487    // Values must match the granularity of the table (e.g. micros, millis).
488    int64 timestamp_micros = 3;
489
490    // The value to be written into the specified cell.
491    bytes value = 4;
492  }
493
494  // A Mutation which incrementally updates a cell in an `Aggregate` family.
495  message AddToCell {
496    // The name of the `Aggregate` family into which new data should be added.
497    // This must be a family with a `value_type` of `Aggregate`.
498    // Format: `[-_.a-zA-Z0-9]+`
499    string family_name = 1;
500
501    // The qualifier of the column into which new data should be added. This
502    // must be a `raw_value`.
503    Value column_qualifier = 2;
504
505    // The timestamp of the cell to which new data should be added. This must
506    // be a `raw_timestamp_micros` that matches the table's `granularity`.
507    Value timestamp = 3;
508
509    // The input value to be accumulated into the specified cell. This must be
510    // compatible with the family's `value_type.input_type`.
511    Value input = 4;
512  }
513
514  // A Mutation which deletes cells from the specified column, optionally
515  // restricting the deletions to a given timestamp range.
516  message DeleteFromColumn {
517    // The name of the family from which cells should be deleted.
518    // Must match `[-_.a-zA-Z0-9]+`
519    string family_name = 1;
520
521    // The qualifier of the column from which cells should be deleted.
522    // Can be any byte string, including the empty string.
523    bytes column_qualifier = 2;
524
525    // The range of timestamps within which cells should be deleted.
526    TimestampRange time_range = 3;
527  }
528
529  // A Mutation which deletes all cells from the specified column family.
530  message DeleteFromFamily {
531    // The name of the family from which cells should be deleted.
532    // Must match `[-_.a-zA-Z0-9]+`
533    string family_name = 1;
534  }
535
536  // A Mutation which deletes all cells from the containing row.
537  message DeleteFromRow {}
538
539  // Which of the possible Mutation types to apply.
540  oneof mutation {
541    // Set a cell's value.
542    SetCell set_cell = 1;
543
544    // Incrementally updates an `Aggregate` cell.
545    AddToCell add_to_cell = 5;
546
547    // Deletes cells from a column.
548    DeleteFromColumn delete_from_column = 2;
549
550    // Deletes cells from a column family.
551    DeleteFromFamily delete_from_family = 3;
552
553    // Deletes cells from the entire row.
554    DeleteFromRow delete_from_row = 4;
555  }
556}
557
558// Specifies an atomic read/modify/write operation on the latest value of the
559// specified column.
560message ReadModifyWriteRule {
561  // The name of the family to which the read/modify/write should be applied.
562  // Must match `[-_.a-zA-Z0-9]+`
563  string family_name = 1;
564
565  // The qualifier of the column to which the read/modify/write should be
566  // applied.
567  // Can be any byte string, including the empty string.
568  bytes column_qualifier = 2;
569
570  // The rule used to determine the column's new latest value from its current
571  // latest value.
572  oneof rule {
573    // Rule specifying that `append_value` be appended to the existing value.
574    // If the targeted cell is unset, it will be treated as containing the
575    // empty string.
576    bytes append_value = 3;
577
578    // Rule specifying that `increment_amount` be added to the existing value.
579    // If the targeted cell is unset, it will be treated as containing a zero.
580    // Otherwise, the targeted cell must contain an 8-byte value (interpreted
581    // as a 64-bit big-endian signed integer), or the entire request will fail.
582    int64 increment_amount = 4;
583  }
584}
585
586// NOTE: This API is intended to be used by Apache Beam BigtableIO.
587// A partition of a change stream.
588message StreamPartition {
589  // The row range covered by this partition and is specified by
590  // [`start_key_closed`, `end_key_open`).
591  RowRange row_range = 1;
592}
593
594// NOTE: This API is intended to be used by Apache Beam BigtableIO.
595// The information required to continue reading the data from multiple
596// `StreamPartitions` from where a previous read left off.
597message StreamContinuationTokens {
598  // List of continuation tokens.
599  repeated StreamContinuationToken tokens = 1;
600}
601
602// NOTE: This API is intended to be used by Apache Beam BigtableIO.
603// The information required to continue reading the data from a
604// `StreamPartition` from where a previous read left off.
605message StreamContinuationToken {
606  // The partition that this token applies to.
607  StreamPartition partition = 1;
608
609  // An encoded position in the stream to restart reading from.
610  string token = 2;
611}
612