1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14syntax = "proto3";
15
16package cobalt;
17
18import "window_size.proto";
19
20option java_multiple_files = true;
21option java_package = "com.google.cobalt";
22
23////////////////////////////////////////////////////////////////////////////////
24// NOTE: This file is used by the Cobalt client and the Cobalt servers.
25// The source-of-truth of this file is located in Cobalt's open source code
26// repository, and the file is copied to Android where it is used by the Cobalt
27// client. Do not edit the copy of this file in this Android repo as those edits
28// will be overwritten when the file is next copied.
29////////////////////////////////////////////////////////////////////////////////
30
31// A Report analyzes Events that were logged to Cobalt and emits an aggregated
32// output that may then be queried or visualized by an analyst user of Cobalt.
33//
34// A Report is associated with a Metric and this means that the Report analyzes
35// the Events that were logged to that Metric. The first step occurs on a
36// device where Cobalt analyzes the logged Events in order to form Observations.
37//
38// An Observation is built for a particular Report. The type of observation,
39// including which of several privacy-oriented Encodings is used or not, depends
40// on the Report type.
41//
42// The Observations are sent to the Cobalt Shuffler which shuffles them in order
43// to break linkability between Observations and linkability with the
44// originating device. Next the shuffled Observations are sent to the Analyzer
45// which aggregates Observations from all devices in order to generate a report.
46//
47// There are multiple types of Metrics and multiple types of Reports. Each
48// Report type is compatible with only some of the Metric types.
49//
50// A ReportDefinition defines a Cobalt Report to be generated.
51// An instance of ReportDefinition is always associated with an instance of
52// MetricDefinition called the owning MetricDefinition.
53// Next ID: 33
54message ReportDefinition {
55  reserved 4, 5, 6, 7, 8, 9, 11, 14, 15, 16, 12, 20, 21, 30, 31, 101, 102;
56  reserved "aggregation_type", "aggregation_window", "candidate_lis", "dp_release_config",
57      "expected_population_size", "expected_string_set_size", "export_location_override",
58      "local_privacy_noise_level", "output_location", "percentiles", "threshold", "window_size",
59      "use_poisson_mechanism_for_privacy", "prob_bit_flip", "candidate_file", "privacy_level",
60      "poisson_mean";
61
62  // Unique name for this Report within its owning MetricDefinition.
63  // The name must obey the syntax of a C variable name and must have length
64  // at most 64. The integer |id| field is the stable identifier for a report
65  // so this name may be changed. However doing this may affect the
66  // names and locations of some artifacts produced by Cobalt's report
67  // generation pipeline.
68  string report_name = 1;
69
70  // The unique integer ID for this report within its owning metric.
71  // The user must manually set this |id| field. This is the stable identifier
72  // for a report and should not be changed once data collection begins.
73  uint32 id = 2;
74
75  // A Report has one of the following types.
76  // Next standard report type ID: 22
77  enum ReportType {
78    reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999;
79    reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT", "HIGH_FREQUENCY_STRING_COUNTS",
80        "INT_RANGE_HISTOGRAM", "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP",
81        "PER_DEVICE_HISTOGRAM", "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT",
82        "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS", "UNIQUE_N_DAY_ACTIVES";
83
84    REPORT_TYPE_UNSET = 0;
85
86    // For each system_profile SP and each event_vector EV, produces the total
87    // count of all occurrences on all devices in the fleet with system profile
88    // SP of the event associated with EV over the course of the report day.
89    // For example, a report of this type might give the total number of times
90    // a medium, red widget was used across the fleet yesterday.
91    //
92    // Input metric types: OCCURRENCE
93    //
94    // Local aggregation: COUNT
95    // Local aggregation period: 1 hour
96    // Global aggregation: OCCURRENCE_COUNTS
97    // System Profile Selection Policy: REPORT_ALL
98    //
99    // Output report row type: OccurrenceCountReportRow
100    // (See report_row.proto)
101    //
102    // ReportDefinition fields particular to this type:
103    //    none
104    FLEETWIDE_OCCURRENCE_COUNTS = 11;
105
106    // For each system_profile SP and each event_vector EV, produces the count
107    // of the number of unique devices with system profile SP for which EV
108    // “is accepted” during the aggregation period, which must be DAYS_1,
109    // DAYS_7, DAYS_28 or DAYS_30.
110    //
111    // There are different versions of what “is accepted” means depending on
112    // which local aggregation procedure is specified:
113    //
114    // AT_LEAST_ONCE. In this case EV is accepted if EV was logged at least once
115    // during the aggregation period. For example, a report of this type might
116    // give the total number of devices with system profile SP on which a
117    // medium, red widget was used at least once in the seven-day period
118    // ending yesterday.
119    //
120    // SELECT_FIRST, SELECT_MOST_COMMON. In this case EV is accepted if the
121    // category selection procedure selected EV. For example, a report of this
122    // type using SELECT_MOST_COMMON might give the total number of devices
123    // with system profile SP on which most of the widgets used during the
124    // seven-day period ending yesterday were medium-red.
125    //
126    // NOTE: Using a local aggregation procedure of AT_LEAST_ONCE or
127    // SELECT_FIRST, in combination with setting expedited_sending, results in
128    // the count being sent by the device when the event occurs (instead of at
129    // the end of the day). This can be desirable for having data for the
130    // current day appear faster in the reports output by Cobalt.
131    //
132    // Input metric types: OCCURRENCE
133    //
134    // Local aggregation: AT_LEAST_ONCE, SELECT_FIRST, or SELECT_MOST_COMMON
135    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
136    // Global aggregation: OCCURRENCE_COUNTS
137    //
138    // Output report row type: OccurrenceCountReportRow
139    // (See report_row.proto)
140    //
141    // ReportDefinition fields particular to this type:
142    //   - local_aggregation_procedure
143    //   - local_aggregation_period
144    //   - expedited_sending
145    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
146    //     uniqueness, REPORT_ALL may be useful in some cases)
147    UNIQUE_DEVICE_COUNTS = 12;
148
149    // For each system_profile SP and each event_vector EV, produces an
150    // int-range histogram such that in each int range bucket it gives the
151    // number of unique devices with system_profile SP for which an integer
152    // value, aggregated locally on device over the aggregation period,
153    // associated with EV, falls into the bucket.
154    //
155    // There are two versions of this depending on the metric type:
156    //
157    // With metrics of type OCCURRENCE the integer values are occurrence counts.
158    // For example, for the integer bucket 10-100, a report of this type might
159    // give the number of devices with system profile SP on which a medium,
160    // red widget was used between 10 and 100 times in the seven-day period
161    // ending yesterday.
162    //
163    // With metrics of type INTEGER the integer values are computed statistics.
164    // For example, for the integer bucket 10-100, a report of this type that
165    // specifies the MINIMUM local aggregation procedure might give the number
166    // of devices with system profile SP on which the minimum temperature of a
167    // medium red widget over the seven-day period ending yesterday was between
168    // 10 and 100 degrees.
169    //
170    // Input metric types: OCCURRENCE or INTEGER
171    //
172    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
173    //                    NUMERIC_STAT (used with INTEGER metrics)
174    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
175    // Global aggregation: INTEGER_HISTOGRAMS
176    //
177    // Output report row type: IntegerHistogramReportRow
178    // (See report_row.proto)
179    //
180    // ReportDefinition fields particular to this type:
181    //   - local_aggregation_procedure (only when the metric type is INTEGER)
182    //   - local_aggregation_period
183    //   - int_buckets (this is used only on the server for reports without
184    //     added privacy, but is used on the client for reports with added
185    //     privacy)
186    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
187    //     uniqueness, REPORT_ALL may be useful in some cases)
188    UNIQUE_DEVICE_HISTOGRAMS = 13;
189
190    // For each system_profile SP and each event_vector EV, produces an
191    // int-range histogram such that in each int range bucket it gives the
192    // number of values, associated with EV, from devices
193    // with system_profile SP, that fall into the bucket, where each device
194    // computes one such value per hour.
195    //
196    // Computationally this report type is identical to
197    // UNQIQUE_DEVICE_HISTOGRAMS except that the local aggregation period
198    // used is one hour and so the counts in each buckets are not interpreted
199    // as a number of unique devices.
200    //
201    // There are two versions of this depending on the metric type:
202    //
203    // With metrics of type OCCURRENCE the integer values are occurrence counts.
204    // For example, for the integer bucket 10-100, a report of this type might
205    // give the number of times that the hourly count of medium red widgets
206    // used was between 10 and 100 over devices with system profile SP,
207    // yesterday.
208    //
209    // With metrics of type INTEGER the integer values are computed statistics.
210    // For example, for the integer bucket 10-100, a report of this that
211    // specifies the MINIMUM local aggregation procedure might give the number
212    // of times that the minimum temperature over an hour of all medium red
213    // widgets used was between 10 and 100 degrees over all devices with
214    // system profile SP, yesterday.
215    //
216    // Input metric types: OCCURRENCE or INTEGER
217    //
218    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
219    //                    NUMERIC_STAT (used with INTEGER metrics)
220    // Local aggregation period: one hour
221    // Global aggregation: INTEGER_HISTOGRAMS
222    //
223    // Output report row type: IntegerHistogramReportRow
224    // (See report_row.proto)
225    //
226    // ReportDefinition fields particular to this type:
227    //   - local_aggregation_procedure (only when the metric type is INTEGER)
228    //   - int_buckets (this is used only on the server for reports without
229    //     added privacy, but is used on the client for reports with added
230    //     privacy)
231    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
232    //     uniqueness, REPORT_ALL may be useful in some cases)
233    HOURLY_VALUE_HISTOGRAMS = 14;
234
235    // For each system_profile SP and each event_vector EV, produces an
236    // int-range histogram such that in each int range bucket it gives the
237    // number of integer measurements, associated with EV, logged on devices
238    // with system_profile SP, that fall into the bucket. Here we are counting
239    // each value logged by the instrumented code individually and so the rate
240    // at which values are being recorded is arbitrary and varies from device
241    // to device. For example, for the integer bucket 10-100, a report of this
242    // type might give the number of times that a medium red widget's
243    // temperature was measured as being between 10 and 100 degrees over all
244    // devices with system profile SP, yesterday. The rate at which these
245    // widget temperature measurements are taken is arbitrary and may vary
246    // from device to device.
247    //
248    // Input metric types: INTEGER or INTEGER_HISTOGRAM
249    //
250    // Local aggregation: INTEGER_HISTOGRAM
251    // Local aggregation period: one hour
252    // Global aggregation: INTEGER_HISTOGRAMS
253    // System Profile Selection Policy: REPORT_ALL
254    //
255    // Output report row type: IntegerHistogramReportRow
256    // (See report_row.proto)
257    //
258    // ReportDefinition fields particular to this type:
259    //   - int_buckets (Only with metric_type = INTEGER)
260    FLEETWIDE_HISTOGRAMS = 15;
261
262    // For each system_profile SP and each event_vector EV, produces the sum
263    // and count of many integer measurements associated with EV, logged on
264    // devices with system_profile SP. Here we are counting each value logged
265    // by the instrumented code individually and so the rate at which values are
266    // being recorded is arbitrary and varies from device to device. This allows
267    // us to  produce a fleetwide mean. For example, a report of this type might
268    // give the mean of all temperature measurements of medium-red widgets
269    // yesterday, across all devices with system profile SP, regardless of how
270    // many temperature measurements were taken on each device individually.
271    //
272    // Input metric types: INTEGER
273    //
274    // Local aggregation: SUM_AND_COUNT
275    // Local aggregation period: one hour
276    // Global aggregation: SUM_AND_COUNTS
277    // System Profile Selection Policy: REPORT_ALL
278    //
279    // Output report row type: SumAndCountReportRow
280    // (See report_row.proto)
281    //
282    // ReportDefinition fields particular to this type:
283    //   none
284    FLEETWIDE_MEANS = 16;
285
286    // For each system_profile SP and each event_vector EV, produces several
287    // numeric statistics (e.g. 95%-ile) over a set of integers associated
288    // with EV, collected from all devices with system_profile SP. Each unique
289    // device contributes a single value and so the distribution of the values
290    // may be thought of as a distribution of unique devices.
291    //
292    // There are different versions of this depending on the metric type:
293    //
294    // With metrics of type OCCURRENCE the integer values are occurrence counts
295    // over the course of the aggregation period. For example a report of this
296    // type might give the 95%-ile of the counts of medium-red widgets used by
297    // each device over the 7-day period ending yesterday.
298    //
299    // With metrics of type INTEGER the integer values are computed statistics.
300    // For example, a report of this type that specifies the MINIMUM local
301    // aggregation procedure might give the 95%-ile of the minimum temperature
302    // over the 7-day period ending yesterday of all medium-red widgets over
303    // all devices with system profile SP.
304    //
305    // Input metric types: OCCURRENCE or INTEGER
306    //
307    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
308    //                    NUMERIC_STAT (used with INTEGER metrics)
309    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
310    // Global aggregation: NUMERIC_STATS
311    // System Profile Selection Policy: REPORT_ALL
312    //
313    // Output report row type: NumericStatsReportRow
314    // (See report_row.proto)
315    //
316    // ReportDefinition fields particular to this type:
317    //   - local_aggregation_procedure (only when the metric type is INTEGER)
318    //   - local_aggregation_period
319    UNIQUE_DEVICE_NUMERIC_STATS = 17;
320
321    // For each system_profile SP and each event_vector EV, produces several
322    // numeric statistics (e.g. 95%-ile) over a set of integers associated
323    // with EV, collected from all devices with system_profile SP. Each unique
324    // device contributes a value every hour and so the distribution of the
325    // values may NOT be thought of as a distribution of unique devices.
326    //
327    // Computationally this report type is identical to
328    // UNIQUE_DEVICE_NUMERIC_STATS except that the local aggregation period
329    // used is one hour.
330    //
331    // There are different versions of this depending on the metric type:
332    //
333    // With metrics of type OCCURRENCE the integer values are occurrence counts
334    // over the course of the hour. For example a report of this
335    // type might give the 95%-ile of the counts of medium-red widgets used in
336    // any one hour period on any device with System profile SP, yesterday.
337    //
338    // With metrics of type INTEGER the integer values are computed statistics.
339    // For example, a report of this type that specifies the MINIMUM local
340    // aggregation procedure might give the 95%-ile of the minimum temperature
341    // over any one-hour period of medium-red widgets use on any device
342    // with system profile SP, yesterday.
343    //
344    // Input metric types: OCCURRENCE or INTEGER
345    //
346    // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or
347    //                    NUMERIC_STAT (used with INTEGER metrics)
348    // Local aggregation period: 1 hour
349    // Global aggregation: NUMERIC_STATS
350    // System Profile Selection Policy: REPORT_ALL
351    //
352    // Output report row type: NumericStatsReportRow
353    // (See report_row.proto)
354    //
355    // ReportDefinition fields particular to this type:
356    //   - local_aggregation_procedure (only when the metric type is INTEGER)
357    HOURLY_VALUE_NUMERIC_STATS = 18;
358
359    // For each system_profile SP and each event_vector EV, produces the total
360    // count of all occurrences of a string value on all devices in the fleet
361    // with system profile SP of the event associated with EV over the course
362    // of the report day.
363    //
364    // Input metric types: STRING
365    //
366    // Local aggregation: STRING_HISTOGRAM
367    // Local aggregation period: 1 hour
368    // Global aggregation: STRING_HISTOGRAMS
369    // System Profile Selection Policy: REPORT_ALL
370    //
371    // Output report row type: StringCountReportRow
372    // (See report_row.proto)
373    //
374    // ReportDefinition fields particular to this type:
375    //   - string_buffer_max
376    STRING_COUNTS = 20;
377
378    // For each system_profile SP, each event_vector EV, and each string value
379    // produces the count of the number of unique devices with system profile
380    // SP on which the string value was logged in connection with the EV during
381    // the aggregation period, which must be DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
382    //
383    // This is similar to the AT_LEAST_ONCE local aggregation procedure for
384    // UNIQUE_DEVICE_COUNTS. For example, a report of this type might
385    // give the total number of devices with system profile SP on which a
386    // medium, red widget was used in conjunction with the component name
387    // "widget-consumer" at least once in the seven-day period ending
388    // yesterday.
389    //
390    // Input metric types: STRING
391    //
392    // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30.
393    // Global aggregation: STRING_HISTOGRAMS
394    //
395    // Output report row type: StringCountReportRow
396    // (See report_row.proto)
397    //
398    // ReportDefinition fields particular to this type:
399    //   - local_aggregation_period
400    //   - string_buffer_max
401    //   - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
402    //     uniqueness, REPORT_ALL may be useful in some cases)
403    UNIQUE_DEVICE_STRING_COUNTS = 21;
404  }
405  ReportType report_type = 3;
406
407  ////////////////  Fields for reports with privacy enabled  /////////////////
408
409  // When reporting numerical values with privacy, the values are mapped to
410  // indices from 0 to num_index_points-1 with a randomized rounding method.
411  //
412  // In the future, the value of this field will be computed by the registry
413  // parser as a function of other privacy-related fields and an estimate of the
414  // user population size. For now, it should be set manually in the Cobalt
415  // registry in consultation with the Cobalt team.
416  //
417  // TODO(b/278932979): update this comment once the field is populated by
418  // the registry parser.
419  uint32 num_index_points = 22;
420
421  // When reporting strings with privacy, the strings are counted using a linear
422  // sketch.
423  //
424  // In the future, the value of this field will be computed by the registry
425  // parser as a function of other privacy-related fields and an estimate of the
426  // user population size. For now, it should be set manually in the Cobalt
427  // registry in consultation with the Cobalt team.
428  //
429  // TODO(b/278932979): update this comment once the field is populated by
430  // the registry parser.
431  StringSketchParameters string_sketch_params = 27;
432
433  // These fields specify the range of values that can be reported by a device
434  // in the specified local_aggregation_period. If the true value to be reported
435  // falls outside specified range, the value is clipped.
436  //
437  // For FLEETWIDE_OCCURRENCE_COUNTS, UNIQUE_DEVICE_NUMERIC_STATS and
438  // HOURLY_VALUE_NUMERIC_STATS, the range applies to the total numerical value
439  // computed for the device over the aggregation period specified in the
440  // report.
441  //
442  // For FLEETWIDE_MEANS, the range applies to the per-device sum of the value
443  // to be averaged over one hour. (For FLEETWIDE_MEANS, the `max_count` field
444  // is also required in order to bound the `count` value.)
445  //
446  // If a privacy_mechanism other than DE_IDENTIFICATION is specified, this field is
447  // required for reports of type:
448  // * FLEETWIDE_OCCURRENCE_COUNTS
449  // * UNIQUE_DEVICE_NUMERIC_STATS
450  // * HOURLY_VALUE_NUMERIC_STATS
451  // * FLEETWIDE_MEANS
452  int64 min_value = 23;
453  int64 max_value = 24;
454
455  // This field specifies the maximum count to be reported by a device in the
456  // specified local_aggregation_period. If the true count is greater than
457  // max_count, then the count will be reported as max_count.
458  //
459  // For FLEETWIDE_HISTOGRAMS, the bound applies to the count for each
460  // individual histogram bucket over the aggregation period of one hour. For
461  // STRING_COUNTS, it applies to the count for each string over one hour.
462  //
463  // For FLEETWIDE_MEANS, the bound applies to the per-device count of the
464  // values to be averaged over one hour.
465  //
466  // If a privacy_mechanism other than DE_IDENTIFICATION is specified, this field is
467  // required for reports of type:
468  // * FLEETWIDE_HISTOGRAMS
469  // * FLEETWIDE_MEANS
470  // * STRING_COUNTS
471  uint64 max_count = 25;
472
473  ////////////////  Fields specific to some report types /////////////////
474
475  // A specification of integer-range buckets for a histogram.
476  //
477  // This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS,
478  // HOURLY_VALUE_HISTOGRAMS, and FLEETWIDE_HISTOGRAMS -- but for
479  // FLEETWIDE_HISTOGRAMS only with metrics of type INTEGER, not with metrics of
480  // type INTEGER_HISTOGRAM, because in that case the MetricDefinition already
481  // contains an instance of IntegerBuckets.
482  IntegerBuckets int_buckets = 10;
483
484  // The interval with which clients will generate and upload observations.
485  enum ReportingInterval {
486    REPORTING_INTERVAL_UNSET = 0;
487    HOURS_1 = 1;
488    DAYS_1 = 2;
489  }
490
491  // This field is optional for FLEETWIDE_OCCURRENCE_COUNTS reports, and is only
492  // supported by some client platforms. If not set, the reporting interval
493  // defaults to 1 hour for FLEETWIDE_OCCURRENCE_COUNTS reports.
494  ReportingInterval reporting_interval = 32;
495
496  // This field can be used with all Report types. When set, the generated
497  // report will exclude an Observation if there are not at least
498  // |reporting_threshold| number of distinct devices reporting Observations
499  // with the same ObservationMetadata.
500  uint32 reporting_threshold = 13;
501
502  // The on-device function computed on the metric during the aggregation
503  // window.
504  enum LocalAggregationProcedure {
505    LOCAL_AGGREGATION_PROCEDURE_UNSET = 0;
506
507    // Numerical statistic aggregation procedures to be used with reports
508    // of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS,
509    // UNIQUE_DEVICE_NUMERIC_STATS and HOURLY_VALUE_NUMERIC_STATS.
510    // TODO(fxbug.dev/87151): Rename these to remove the '_PROCEDURE' suffix.
511    SUM_PROCEDURE = 1;
512    MIN_PROCEDURE = 2;
513    MAX_PROCEDURE = 3;
514    MEAN = 4;
515    MEDIAN = 5;
516    // The value of N is set in the field
517    // |local_aggregation_procedure_percentile_n|.
518    PERCENTILE_N = 6;
519
520    // Logical aggregation procedures to be used with reports of type
521    // UNIQUE_DEVICE_COUNTS
522    AT_LEAST_ONCE = 7;
523    SELECT_FIRST = 8;
524    SELECT_MOST_COMMON = 9;
525  }
526
527  // This field is required for reports of type
528  // UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS,
529  // UNIQUE_DEVICE_NUMERIC_STATS, HOURLY_VALUE_NUMERIC_STATS
530  // and UNIQUE_DEVICE_COUNTS. Different report types support
531  // different values of this field. See the comments on the
532  // enum values in LocalAggregationProcedure.
533  LocalAggregationProcedure local_aggregation_procedure = 17;
534
535  // This field is required when
536  // local_aggregation_procedure = LOCAL_AGGREGATION_PROCEDURE_PERCENTILE_N.
537  // In this case it gives the value of N to use. Otherwise this field is
538  // ignored.
539  uint32 local_aggregation_procedure_percentile_n = 18;
540
541  // Time window over which the metric is aggregated. The local aggregation
542  // period is specified for UNIQUE_DEVICE_* report types.
543  WindowSize local_aggregation_period = 19;
544
545  // The maximum number of distinct event vectors for which an instance of the Cobalt
546  // client should produce an observation, for a given local aggregation period. Event
547  // vectors are prioritized in order of first arrival during the aggregation period.
548  //
549  // For example, if a report has an event_vector_buffer_max of 10, and 12 distinct event
550  // vectors are logged for this metric over an aggregation period, then Cobalt will send
551  // observations of the first 10 event vectors for that aggregation period and drop the
552  // last 2.
553  //
554  // If this field is unset, the registry parser assigns to it the total number of event
555  // vectors for the report's parent metric (i.e., the product over all metric dimensions
556  // of the number of event codes per dimension).
557  //
558  // The report's project will be charged against a resource budget for this value
559  // so project owners are encouraged to set this as small as possible.  For example,
560  // the report's parent metric may include a dimension with thousands of event codes,
561  // but it is expected that any one device will log only a few distinct event vectors
562  // per day. In that case we may set event_vector_buffer_max to a relatively small number,
563  // say 20. For reports which use differential privacy, setting event_vector_buffer_max
564  // to a smaller number will improve the signal for event vectors which are included in
565  // observations.
566  uint64 event_vector_buffer_max = 26;
567
568  // The maximum number of distinct strings that Cobalt must keep in its in-memory buffer
569  // on any single device. During local aggregation for reports of type STRING_COUNTS and
570  // UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep track of this many distinct strings per
571  // aggregation period. The report's project will be charged against a resource budget for this
572  // value so project owners are encouraged to set this as small as possible. A STRING metric
573  // includes a file of candidate strings that may contain many thousands of strings. But it is
574  // expected that any one device will log only a few of these strings per day. We may set
575  // string_buffer_max to a relatively small number, say 20.
576  //
577  // This is a required field for reports of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS.
578  uint32 string_buffer_max = 28;
579
580  // For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the
581  // event occurs, instead of waiting for the end of the day.
582  //
583  // This can only be enabled when using a local aggregation procedure of
584  // AT_LEAST_ONCE or SELECT_FIRST, and when the privacy mechanism is
585  // DE_IDENTIFICATION. When used with a system_profile_selection of REPORT_ALL
586  // or SELECT_FIRST, enabling this is recommended as Cobalt will send the count
587  // for the current day when the event occurs instead of at the end of the day.
588  // For a system_profile_selection of SELECT_LAST, this may also be desirable,
589  // though it may result in a slight change in the current day's system profile
590  // that is used, as Cobalt won't wait until the end of the day to determine
591  // the final system profile, but will instead send the count immediately with
592  // the system profile that is currently active on the device.
593  bool expedited_sending = 29;
594
595  ///////////////////  Fields used by all report types ///////////////////
596  // Next id: 109
597
598  // The list of SystemProfileFields to include in each row of the report.
599  // Optional.
600  repeated SystemProfileField system_profile_field = 100;
601
602  // The list of Experiments to include in each row of the report.
603  //
604  // Each report row lists the intersection of the experiment ids active on the device and
605  // experiment ids specified in this field.
606  //
607  // The specified experiment ids must be found in one of the project's experiments_namespaces.
608  repeated int64 experiment_id = 104;
609
610  // This field is required for reports of type UNIQUE_DEVICE_COUNTS,
611  // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and
612  // HOURLY_VALUE_HISTOGRAMS. The value for these reports must be SELECT_LAST,
613  // SELECT_FIRST, or occasionally REPORT_ALL.
614  //
615  // If the system profile value changed during the aggregation window specified
616  // for this report, system_profile_selection specifies which system profile to
617  // report for each device.
618  SystemProfileSelectionPolicy system_profile_selection = 103;
619
620  // Maximum ReleaseStage for which this Report is allowed to be collected.
621  ReleaseStage max_release_stage = 105;
622
623  // Report can be collected even if the user/device has not consented.
624  // This field can only be set to true on reports that use privacy mechanisms
625  // that include differential privacy (i.e. not DE_IDENTIFICATION). The use of
626  // this field is for collecting anonymized data that is allowed even when
627  // the consent is not given. These use cases need to be specially approved
628  // by privacy reviewers.
629  bool exempt_from_consent = 108;
630
631  // New Privacy API
632
633  // This enum identifies what privacy protection is applied to the report.
634  enum PrivacyMechanism {
635    PRIVACY_MECHANISM_UNSPECIFIED = 0;
636    // If you specify this value the data will be de-identified without
637    // additional privacy protections.
638    DE_IDENTIFICATION = 1;
639    // If you specify this value the data will be protected with Shuffled
640    // Differential Privacy guarantees (e.g., the noise wll be added on the
641    // devices)
642    SHUFFLED_DIFFERENTIAL_PRIVACY = 2;
643  }
644
645  // This field identifies what privacy protection is applied to the report.
646  PrivacyMechanism privacy_mechanism = 106;
647
648  // The object for grouping all parameters needed for SHUFFLED DP mode.
649  message ShuffledDifferentialPrivacyConfig {
650    // This field represents an upper bound on the amount of information which
651    // can be learned about a device from a report including that device.
652    // Lower values correspond to higher privacy.
653    // Epsilon must be > 0.
654    double epsilon = 1;
655    // This field represents the risk of the epsilon guarantee not holding. This
656    // is usually set as 1 over the expected number of participating devices.
657    // Delta must be > 0 and < 1.
658    double delta = 2;
659    // The generated report will exclude an Observation if there are not at
660    // least |reporting_threshold| number of distinct devices reporting
661    // Observations with the same ObservationMetadata.
662    uint32 reporting_threshold = 3;
663
664    // The mean number of observations added per index point when performing the
665    // Poisson mechanism encoding for Cobalt reports. Required.
666    //
667    // In the future, the value of this field will be computed by the registry
668    // parser as a function of other fields in this
669    // ShuffledDifferentialPrivacyConfig. For now, it should be set manually in
670    // the Cobalt registry in consultation with the Cobalt team.
671    //
672    // TODO(b/295053509): update this comment once the field is auto populated by
673    // the registry parser.
674    double poisson_mean = 4;
675
676    // If true, skip validating the |poisson_mean| value. This is meant to be used only
677    // in end-to-end tests where the traffic volume would not allow a reasonable |poisson_mean|.
678    bool skip_poisson_mean_validation_test_only = 5;
679
680
681    // The report fields that affect how a device participates in a private
682    // report.
683    //
684    // Note, this field is populated by the registry parser and must not be set
685    // manually.
686    DevicePrivacyDependencySet device_privacy_dependency_set = 6;
687
688    // Captures the report fields a device _must_ use in order to properly
689    // make contributions, real and fabricated, to a report.
690    //
691    // Any report field that changes how observations are encoded or noise is
692    // fabricated must result in a new value.
693    enum DevicePrivacyDependencySet {
694      DEVICE_PRIVACY_DEPENDENCY_SET_UNSET = 0;
695
696      // Captures:
697      // - Fields that affect index points include:
698      //  * metric dimensions
699      //  * num_index_points
700      //  * string_sketch_params
701      //  * min_value
702      //  * max_value
703      //  * max_count
704      //  * int_buckets
705      //
706      // - Fields that affect sparsity are:
707      //  * event_vector_buffer_max
708      //  * string_buffer_max
709      //
710      // - poisson_mean
711      V1 = 1;
712    }
713  }
714
715  // If privacy_mechanism is SHUFFLED_DIFFERENTIAL_PRIVACY then privacy_config
716  // must contain valid ShuffledDifferentialPrivacyConfig otherwise empty.
717  oneof privacy_config {
718    ShuffledDifferentialPrivacyConfig shuffled_dp = 107;
719  }
720}
721
722// A specification for SystemProfile selection policy.
723enum SystemProfileSelectionPolicy {
724  // Use the default value. For reports of type FLEETWIDE_OCCURRENCE_COUNTS,
725  // FLEETWIDE_HISTOGRAMS, FLEETWIDE_MEANS, UNIQUE_DEVICE_NUMERIC_STATS,
726  // HOURLY_VALUE_NUMERIC_STATS, and STRING_COUNTS this will resolve to
727  // 'REPORT_ALL' and should not be changed. For all other report types,
728  // SELECT_DEFAULT must not be used.
729  SELECT_DEFAULT = 0;
730
731  // Always report the last SystemProfile seen in the aggregation window. This
732  // will be the last SystemProfile seen *at the time of an event* in the
733  // aggregation window.
734  SELECT_LAST = 1;
735
736  // Always report the first SystemProfile seen in the aggregation window. This
737  // will be the first SystemProfile seen *at the time of an event* in the
738  // aggregation window.
739  SELECT_FIRST = 2;
740
741  // Report all system profiles in the aggregation window. For most report
742  // types, this is the most sensible value to use. For reports that depend on
743  // some concept of uniqueness (such as UNIQUE_DEVICE_COUNTS,
744  // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and
745  // HOURLY_VALUE_HISTOGRAMS) this may not be the best choice, since it will no
746  // longer be the case that a single device will only upload one observation
747  // per time period (It will upload one observation per time period *per unique
748  // system_profile*).
749  REPORT_ALL = 3;
750}
751
752// A specification of a field from SystemProfile. These are used in a
753// ReportDefinition to specify which fields should be included in the generated
754// Observations and reports.
755//
756// For a description of the meaning of each field, see the fields in the
757// SystemProfile in: cobalt/proto/common.proto
758enum SystemProfileField {
759  OS = 0;
760  ARCH = 1;
761  BOARD_NAME = 2;
762  PRODUCT_NAME = 3;
763  SYSTEM_VERSION = 4;
764  APP_VERSION = 10;
765  CHANNEL = 5;
766  BUILD_TYPE = 7;
767  EXPERIMENT_IDS = 9;
768  reserved 6, 8;
769  reserved "REALM", "EXPERIMENT_TOKENS";
770}
771
772// Stages in the release cycle of a component. Each Cobalt customer determines
773// its current ReleaseStage when initializing the CobaltService. Each Metric
774// and Report can declare the maximum ReleaseStage for which it is allowed to
775// be collected. For example a DEBUG Metric will not be collected from a device
776// running a FISHFOOD release.
777enum ReleaseStage {
778  RELEASE_STAGE_NOT_SET = 0;
779
780  // A test build. Also called "eng". Only use this value when the device is
781  // running test builds as all metrics/reports will be collected.
782  DEBUG = 10;
783  // Small, internal prototype. Used for testing a new feature internally,
784  // usually within the team or a small group.
785  FISHFOOD = 20;
786  // An internal release for testing with internal users.
787  DOGFOOD = 40;
788  // An open beta, for testing with internal and external users.
789  OPEN_BETA = 60;
790
791  // Generally-available. The final stage of a release. Also called
792  // "production". If unsure of which release stage the device is running, it
793  // is safest to fallback to this value (which is the default if no value is
794  // set), to avoid inadvertently collecting metric/report data.
795  GA = 99;
796}
797
798// ExponentialIntegerBuckets is used to define a partition of the integers into
799// a finite number of exponentially increasing buckets.
800//
801// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
802//
803// The bucket boundaries are:
804// a[0] = floor
805// a[1] = floor + initial_step
806// a[2] = floor + initial_step * step_multiplier
807// a[3] = floor + initial_step * step_multiplier ^ 2
808// a[4] = floor + initial_step * step_multiplier ^ 3
809// and in general, for i = 1, 2, 3 ... n
810// a[i] = floor + initial_step * step_multiplier ^ (i-1)
811//
812// Then, the buckets are defined as follows:
813// Bucket 0 is the underflow bucket: (-infinity, floor)
814// Bucket i for 0 < i < n+1: [a[i-1], a[i])
815// Bucket n+1 is the overflow bucket: [a[n], +infinity)
816//
817// Examples:
818// floor = 0
819// num_buckets = 3
820// initial_step = 10
821// step_multiplier = 10
822// Then, the buckets are:
823// (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity)
824//
825// floor = 0
826// num_buckets = 3
827// initial_step = 2
828// step_multiplier = 2
829// Then, the buckets are:
830// (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity)
831//
832// floor = 10
833// num_buckets = 3
834// initial_step = 2
835// step_multiplier = 2
836// Then, the buckets are:
837// (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity)
838//
839// floor = 0
840// num_buckets = 3
841// initial_step = 100
842// step_multiplier = 10
843// Then, the buckets are:
844// (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity)
845//
846message ExponentialIntegerBuckets {
847  int64 floor = 1;
848
849  // num_buckets must be at least 1.
850  uint32 num_buckets = 2;
851
852  // Must be at least one.
853  uint32 initial_step = 3;
854
855  // Must be at least one.
856  uint32 step_multiplier = 4;
857}
858
859// LinearIntegerBuckets is used to define a partition of the integers into a
860// finite number of buckets of equal size.
861//
862// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1.
863// Bucket 0 is the underflow bucket: (-infinity, floor)
864// Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity)
865//
866// For i = 1 to n, the bucket i is defined as
867// [floor + step_size * (i-1), floor + step_size * i)
868//
869// Example: floor = 0, num_buckets = 3, step_size = 10.
870// (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +inifinity)
871message LinearIntegerBuckets {
872  int64 floor = 1;
873
874  // Must be at least one.
875  uint32 num_buckets = 2;
876
877  // Must be at least one.
878  uint32 step_size = 3;
879}
880
881message IntegerBuckets {
882  oneof buckets {
883    ExponentialIntegerBuckets exponential = 1;
884    LinearIntegerBuckets linear = 2;
885  }
886
887  // If set to true, empty buckets will not be added to the report data such
888  // that all histograms contain a row for every bucket. Buckets with a zero
889  // count may still occur if data is logged that contains a zero count. This
890  // field can not be set on reports with added privacy.
891  bool sparse_output = 3;
892}
893
894message StringSketchParameters {
895  // Number of hashes in Count-Min Sketch.
896  int32 num_hashes = 1;
897
898  // Number of cells per hash in Count-Min Sketch.
899  int32 num_cells_per_hash = 2;
900}
901