1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14syntax = "proto3"; 15 16package cobalt; 17 18import "window_size.proto"; 19 20option java_multiple_files = true; 21option java_package = "com.google.cobalt"; 22 23//////////////////////////////////////////////////////////////////////////////// 24// NOTE: This file is used by the Cobalt client and the Cobalt servers. 25// The source-of-truth of this file is located in Cobalt's open source code 26// repository, and the file is copied to Android where it is used by the Cobalt 27// client. Do not edit the copy of this file in this Android repo as those edits 28// will be overwritten when the file is next copied. 29//////////////////////////////////////////////////////////////////////////////// 30 31// A Report analyzes Events that were logged to Cobalt and emits an aggregated 32// output that may then be queried or visualized by an analyst user of Cobalt. 33// 34// A Report is associated with a Metric and this means that the Report analyzes 35// the Events that were logged to that Metric. The first step occurs on a 36// device where Cobalt analyzes the logged Events in order to form Observations. 37// 38// An Observation is built for a particular Report. The type of observation, 39// including which of several privacy-oriented Encodings is used or not, depends 40// on the Report type. 41// 42// The Observations are sent to the Cobalt Shuffler which shuffles them in order 43// to break linkability between Observations and linkability with the 44// originating device. Next the shuffled Observations are sent to the Analyzer 45// which aggregates Observations from all devices in order to generate a report. 46// 47// There are multiple types of Metrics and multiple types of Reports. Each 48// Report type is compatible with only some of the Metric types. 49// 50// A ReportDefinition defines a Cobalt Report to be generated. 51// An instance of ReportDefinition is always associated with an instance of 52// MetricDefinition called the owning MetricDefinition. 53// Next ID: 33 54message ReportDefinition { 55 reserved 4, 5, 6, 7, 8, 9, 11, 14, 15, 16, 12, 20, 21, 30, 31, 101, 102; 56 reserved "aggregation_type", "aggregation_window", "candidate_lis", "dp_release_config", 57 "expected_population_size", "expected_string_set_size", "export_location_override", 58 "local_privacy_noise_level", "output_location", "percentiles", "threshold", "window_size", 59 "use_poisson_mechanism_for_privacy", "prob_bit_flip", "candidate_file", "privacy_level", 60 "poisson_mean"; 61 62 // Unique name for this Report within its owning MetricDefinition. 63 // The name must obey the syntax of a C variable name and must have length 64 // at most 64. The integer |id| field is the stable identifier for a report 65 // so this name may be changed. However doing this may affect the 66 // names and locations of some artifacts produced by Cobalt's report 67 // generation pipeline. 68 string report_name = 1; 69 70 // The unique integer ID for this report within its owning metric. 71 // The user must manually set this |id| field. This is the stable identifier 72 // for a report and should not be changed once data collection begins. 73 uint32 id = 2; 74 75 // A Report has one of the following types. 76 // Next standard report type ID: 22 77 enum ReportType { 78 reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999; 79 reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT", "HIGH_FREQUENCY_STRING_COUNTS", 80 "INT_RANGE_HISTOGRAM", "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP", 81 "PER_DEVICE_HISTOGRAM", "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT", 82 "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS", "UNIQUE_N_DAY_ACTIVES"; 83 84 REPORT_TYPE_UNSET = 0; 85 86 // For each system_profile SP and each event_vector EV, produces the total 87 // count of all occurrences on all devices in the fleet with system profile 88 // SP of the event associated with EV over the course of the report day. 89 // For example, a report of this type might give the total number of times 90 // a medium, red widget was used across the fleet yesterday. 91 // 92 // Input metric types: OCCURRENCE 93 // 94 // Local aggregation: COUNT 95 // Local aggregation period: 1 hour 96 // Global aggregation: OCCURRENCE_COUNTS 97 // System Profile Selection Policy: REPORT_ALL 98 // 99 // Output report row type: OccurrenceCountReportRow 100 // (See report_row.proto) 101 // 102 // ReportDefinition fields particular to this type: 103 // none 104 FLEETWIDE_OCCURRENCE_COUNTS = 11; 105 106 // For each system_profile SP and each event_vector EV, produces the count 107 // of the number of unique devices with system profile SP for which EV 108 // “is accepted” during the aggregation period, which must be DAYS_1, 109 // DAYS_7, DAYS_28 or DAYS_30. 110 // 111 // There are different versions of what “is accepted” means depending on 112 // which local aggregation procedure is specified: 113 // 114 // AT_LEAST_ONCE. In this case EV is accepted if EV was logged at least once 115 // during the aggregation period. For example, a report of this type might 116 // give the total number of devices with system profile SP on which a 117 // medium, red widget was used at least once in the seven-day period 118 // ending yesterday. 119 // 120 // SELECT_FIRST, SELECT_MOST_COMMON. In this case EV is accepted if the 121 // category selection procedure selected EV. For example, a report of this 122 // type using SELECT_MOST_COMMON might give the total number of devices 123 // with system profile SP on which most of the widgets used during the 124 // seven-day period ending yesterday were medium-red. 125 // 126 // NOTE: Using a local aggregation procedure of AT_LEAST_ONCE or 127 // SELECT_FIRST, in combination with setting expedited_sending, results in 128 // the count being sent by the device when the event occurs (instead of at 129 // the end of the day). This can be desirable for having data for the 130 // current day appear faster in the reports output by Cobalt. 131 // 132 // Input metric types: OCCURRENCE 133 // 134 // Local aggregation: AT_LEAST_ONCE, SELECT_FIRST, or SELECT_MOST_COMMON 135 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 136 // Global aggregation: OCCURRENCE_COUNTS 137 // 138 // Output report row type: OccurrenceCountReportRow 139 // (See report_row.proto) 140 // 141 // ReportDefinition fields particular to this type: 142 // - local_aggregation_procedure 143 // - local_aggregation_period 144 // - expedited_sending 145 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 146 // uniqueness, REPORT_ALL may be useful in some cases) 147 UNIQUE_DEVICE_COUNTS = 12; 148 149 // For each system_profile SP and each event_vector EV, produces an 150 // int-range histogram such that in each int range bucket it gives the 151 // number of unique devices with system_profile SP for which an integer 152 // value, aggregated locally on device over the aggregation period, 153 // associated with EV, falls into the bucket. 154 // 155 // There are two versions of this depending on the metric type: 156 // 157 // With metrics of type OCCURRENCE the integer values are occurrence counts. 158 // For example, for the integer bucket 10-100, a report of this type might 159 // give the number of devices with system profile SP on which a medium, 160 // red widget was used between 10 and 100 times in the seven-day period 161 // ending yesterday. 162 // 163 // With metrics of type INTEGER the integer values are computed statistics. 164 // For example, for the integer bucket 10-100, a report of this type that 165 // specifies the MINIMUM local aggregation procedure might give the number 166 // of devices with system profile SP on which the minimum temperature of a 167 // medium red widget over the seven-day period ending yesterday was between 168 // 10 and 100 degrees. 169 // 170 // Input metric types: OCCURRENCE or INTEGER 171 // 172 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 173 // NUMERIC_STAT (used with INTEGER metrics) 174 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 175 // Global aggregation: INTEGER_HISTOGRAMS 176 // 177 // Output report row type: IntegerHistogramReportRow 178 // (See report_row.proto) 179 // 180 // ReportDefinition fields particular to this type: 181 // - local_aggregation_procedure (only when the metric type is INTEGER) 182 // - local_aggregation_period 183 // - int_buckets (this is used only on the server for reports without 184 // added privacy, but is used on the client for reports with added 185 // privacy) 186 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 187 // uniqueness, REPORT_ALL may be useful in some cases) 188 UNIQUE_DEVICE_HISTOGRAMS = 13; 189 190 // For each system_profile SP and each event_vector EV, produces an 191 // int-range histogram such that in each int range bucket it gives the 192 // number of values, associated with EV, from devices 193 // with system_profile SP, that fall into the bucket, where each device 194 // computes one such value per hour. 195 // 196 // Computationally this report type is identical to 197 // UNQIQUE_DEVICE_HISTOGRAMS except that the local aggregation period 198 // used is one hour and so the counts in each buckets are not interpreted 199 // as a number of unique devices. 200 // 201 // There are two versions of this depending on the metric type: 202 // 203 // With metrics of type OCCURRENCE the integer values are occurrence counts. 204 // For example, for the integer bucket 10-100, a report of this type might 205 // give the number of times that the hourly count of medium red widgets 206 // used was between 10 and 100 over devices with system profile SP, 207 // yesterday. 208 // 209 // With metrics of type INTEGER the integer values are computed statistics. 210 // For example, for the integer bucket 10-100, a report of this that 211 // specifies the MINIMUM local aggregation procedure might give the number 212 // of times that the minimum temperature over an hour of all medium red 213 // widgets used was between 10 and 100 degrees over all devices with 214 // system profile SP, yesterday. 215 // 216 // Input metric types: OCCURRENCE or INTEGER 217 // 218 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 219 // NUMERIC_STAT (used with INTEGER metrics) 220 // Local aggregation period: one hour 221 // Global aggregation: INTEGER_HISTOGRAMS 222 // 223 // Output report row type: IntegerHistogramReportRow 224 // (See report_row.proto) 225 // 226 // ReportDefinition fields particular to this type: 227 // - local_aggregation_procedure (only when the metric type is INTEGER) 228 // - int_buckets (this is used only on the server for reports without 229 // added privacy, but is used on the client for reports with added 230 // privacy) 231 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 232 // uniqueness, REPORT_ALL may be useful in some cases) 233 HOURLY_VALUE_HISTOGRAMS = 14; 234 235 // For each system_profile SP and each event_vector EV, produces an 236 // int-range histogram such that in each int range bucket it gives the 237 // number of integer measurements, associated with EV, logged on devices 238 // with system_profile SP, that fall into the bucket. Here we are counting 239 // each value logged by the instrumented code individually and so the rate 240 // at which values are being recorded is arbitrary and varies from device 241 // to device. For example, for the integer bucket 10-100, a report of this 242 // type might give the number of times that a medium red widget's 243 // temperature was measured as being between 10 and 100 degrees over all 244 // devices with system profile SP, yesterday. The rate at which these 245 // widget temperature measurements are taken is arbitrary and may vary 246 // from device to device. 247 // 248 // Input metric types: INTEGER or INTEGER_HISTOGRAM 249 // 250 // Local aggregation: INTEGER_HISTOGRAM 251 // Local aggregation period: one hour 252 // Global aggregation: INTEGER_HISTOGRAMS 253 // System Profile Selection Policy: REPORT_ALL 254 // 255 // Output report row type: IntegerHistogramReportRow 256 // (See report_row.proto) 257 // 258 // ReportDefinition fields particular to this type: 259 // - int_buckets (Only with metric_type = INTEGER) 260 FLEETWIDE_HISTOGRAMS = 15; 261 262 // For each system_profile SP and each event_vector EV, produces the sum 263 // and count of many integer measurements associated with EV, logged on 264 // devices with system_profile SP. Here we are counting each value logged 265 // by the instrumented code individually and so the rate at which values are 266 // being recorded is arbitrary and varies from device to device. This allows 267 // us to produce a fleetwide mean. For example, a report of this type might 268 // give the mean of all temperature measurements of medium-red widgets 269 // yesterday, across all devices with system profile SP, regardless of how 270 // many temperature measurements were taken on each device individually. 271 // 272 // Input metric types: INTEGER 273 // 274 // Local aggregation: SUM_AND_COUNT 275 // Local aggregation period: one hour 276 // Global aggregation: SUM_AND_COUNTS 277 // System Profile Selection Policy: REPORT_ALL 278 // 279 // Output report row type: SumAndCountReportRow 280 // (See report_row.proto) 281 // 282 // ReportDefinition fields particular to this type: 283 // none 284 FLEETWIDE_MEANS = 16; 285 286 // For each system_profile SP and each event_vector EV, produces several 287 // numeric statistics (e.g. 95%-ile) over a set of integers associated 288 // with EV, collected from all devices with system_profile SP. Each unique 289 // device contributes a single value and so the distribution of the values 290 // may be thought of as a distribution of unique devices. 291 // 292 // There are different versions of this depending on the metric type: 293 // 294 // With metrics of type OCCURRENCE the integer values are occurrence counts 295 // over the course of the aggregation period. For example a report of this 296 // type might give the 95%-ile of the counts of medium-red widgets used by 297 // each device over the 7-day period ending yesterday. 298 // 299 // With metrics of type INTEGER the integer values are computed statistics. 300 // For example, a report of this type that specifies the MINIMUM local 301 // aggregation procedure might give the 95%-ile of the minimum temperature 302 // over the 7-day period ending yesterday of all medium-red widgets over 303 // all devices with system profile SP. 304 // 305 // Input metric types: OCCURRENCE or INTEGER 306 // 307 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 308 // NUMERIC_STAT (used with INTEGER metrics) 309 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 310 // Global aggregation: NUMERIC_STATS 311 // System Profile Selection Policy: REPORT_ALL 312 // 313 // Output report row type: NumericStatsReportRow 314 // (See report_row.proto) 315 // 316 // ReportDefinition fields particular to this type: 317 // - local_aggregation_procedure (only when the metric type is INTEGER) 318 // - local_aggregation_period 319 UNIQUE_DEVICE_NUMERIC_STATS = 17; 320 321 // For each system_profile SP and each event_vector EV, produces several 322 // numeric statistics (e.g. 95%-ile) over a set of integers associated 323 // with EV, collected from all devices with system_profile SP. Each unique 324 // device contributes a value every hour and so the distribution of the 325 // values may NOT be thought of as a distribution of unique devices. 326 // 327 // Computationally this report type is identical to 328 // UNIQUE_DEVICE_NUMERIC_STATS except that the local aggregation period 329 // used is one hour. 330 // 331 // There are different versions of this depending on the metric type: 332 // 333 // With metrics of type OCCURRENCE the integer values are occurrence counts 334 // over the course of the hour. For example a report of this 335 // type might give the 95%-ile of the counts of medium-red widgets used in 336 // any one hour period on any device with System profile SP, yesterday. 337 // 338 // With metrics of type INTEGER the integer values are computed statistics. 339 // For example, a report of this type that specifies the MINIMUM local 340 // aggregation procedure might give the 95%-ile of the minimum temperature 341 // over any one-hour period of medium-red widgets use on any device 342 // with system profile SP, yesterday. 343 // 344 // Input metric types: OCCURRENCE or INTEGER 345 // 346 // Local aggregation: COUNT_AS_INTEGER (used with OCCURRENCE metrics) or 347 // NUMERIC_STAT (used with INTEGER metrics) 348 // Local aggregation period: 1 hour 349 // Global aggregation: NUMERIC_STATS 350 // System Profile Selection Policy: REPORT_ALL 351 // 352 // Output report row type: NumericStatsReportRow 353 // (See report_row.proto) 354 // 355 // ReportDefinition fields particular to this type: 356 // - local_aggregation_procedure (only when the metric type is INTEGER) 357 HOURLY_VALUE_NUMERIC_STATS = 18; 358 359 // For each system_profile SP and each event_vector EV, produces the total 360 // count of all occurrences of a string value on all devices in the fleet 361 // with system profile SP of the event associated with EV over the course 362 // of the report day. 363 // 364 // Input metric types: STRING 365 // 366 // Local aggregation: STRING_HISTOGRAM 367 // Local aggregation period: 1 hour 368 // Global aggregation: STRING_HISTOGRAMS 369 // System Profile Selection Policy: REPORT_ALL 370 // 371 // Output report row type: StringCountReportRow 372 // (See report_row.proto) 373 // 374 // ReportDefinition fields particular to this type: 375 // - string_buffer_max 376 STRING_COUNTS = 20; 377 378 // For each system_profile SP, each event_vector EV, and each string value 379 // produces the count of the number of unique devices with system profile 380 // SP on which the string value was logged in connection with the EV during 381 // the aggregation period, which must be DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 382 // 383 // This is similar to the AT_LEAST_ONCE local aggregation procedure for 384 // UNIQUE_DEVICE_COUNTS. For example, a report of this type might 385 // give the total number of devices with system profile SP on which a 386 // medium, red widget was used in conjunction with the component name 387 // "widget-consumer" at least once in the seven-day period ending 388 // yesterday. 389 // 390 // Input metric types: STRING 391 // 392 // Local aggregation period: DAYS_1, DAYS_7, DAYS_28 or DAYS_30. 393 // Global aggregation: STRING_HISTOGRAMS 394 // 395 // Output report row type: StringCountReportRow 396 // (See report_row.proto) 397 // 398 // ReportDefinition fields particular to this type: 399 // - local_aggregation_period 400 // - string_buffer_max 401 // - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain 402 // uniqueness, REPORT_ALL may be useful in some cases) 403 UNIQUE_DEVICE_STRING_COUNTS = 21; 404 } 405 ReportType report_type = 3; 406 407 //////////////// Fields for reports with privacy enabled ///////////////// 408 409 // When reporting numerical values with privacy, the values are mapped to 410 // indices from 0 to num_index_points-1 with a randomized rounding method. 411 // 412 // In the future, the value of this field will be computed by the registry 413 // parser as a function of other privacy-related fields and an estimate of the 414 // user population size. For now, it should be set manually in the Cobalt 415 // registry in consultation with the Cobalt team. 416 // 417 // TODO(b/278932979): update this comment once the field is populated by 418 // the registry parser. 419 uint32 num_index_points = 22; 420 421 // When reporting strings with privacy, the strings are counted using a linear 422 // sketch. 423 // 424 // In the future, the value of this field will be computed by the registry 425 // parser as a function of other privacy-related fields and an estimate of the 426 // user population size. For now, it should be set manually in the Cobalt 427 // registry in consultation with the Cobalt team. 428 // 429 // TODO(b/278932979): update this comment once the field is populated by 430 // the registry parser. 431 StringSketchParameters string_sketch_params = 27; 432 433 // These fields specify the range of values that can be reported by a device 434 // in the specified local_aggregation_period. If the true value to be reported 435 // falls outside specified range, the value is clipped. 436 // 437 // For FLEETWIDE_OCCURRENCE_COUNTS, UNIQUE_DEVICE_NUMERIC_STATS and 438 // HOURLY_VALUE_NUMERIC_STATS, the range applies to the total numerical value 439 // computed for the device over the aggregation period specified in the 440 // report. 441 // 442 // For FLEETWIDE_MEANS, the range applies to the per-device sum of the value 443 // to be averaged over one hour. (For FLEETWIDE_MEANS, the `max_count` field 444 // is also required in order to bound the `count` value.) 445 // 446 // If a privacy_mechanism other than DE_IDENTIFICATION is specified, this field is 447 // required for reports of type: 448 // * FLEETWIDE_OCCURRENCE_COUNTS 449 // * UNIQUE_DEVICE_NUMERIC_STATS 450 // * HOURLY_VALUE_NUMERIC_STATS 451 // * FLEETWIDE_MEANS 452 int64 min_value = 23; 453 int64 max_value = 24; 454 455 // This field specifies the maximum count to be reported by a device in the 456 // specified local_aggregation_period. If the true count is greater than 457 // max_count, then the count will be reported as max_count. 458 // 459 // For FLEETWIDE_HISTOGRAMS, the bound applies to the count for each 460 // individual histogram bucket over the aggregation period of one hour. For 461 // STRING_COUNTS, it applies to the count for each string over one hour. 462 // 463 // For FLEETWIDE_MEANS, the bound applies to the per-device count of the 464 // values to be averaged over one hour. 465 // 466 // If a privacy_mechanism other than DE_IDENTIFICATION is specified, this field is 467 // required for reports of type: 468 // * FLEETWIDE_HISTOGRAMS 469 // * FLEETWIDE_MEANS 470 // * STRING_COUNTS 471 uint64 max_count = 25; 472 473 //////////////// Fields specific to some report types ///////////////// 474 475 // A specification of integer-range buckets for a histogram. 476 // 477 // This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS, 478 // HOURLY_VALUE_HISTOGRAMS, and FLEETWIDE_HISTOGRAMS -- but for 479 // FLEETWIDE_HISTOGRAMS only with metrics of type INTEGER, not with metrics of 480 // type INTEGER_HISTOGRAM, because in that case the MetricDefinition already 481 // contains an instance of IntegerBuckets. 482 IntegerBuckets int_buckets = 10; 483 484 // The interval with which clients will generate and upload observations. 485 enum ReportingInterval { 486 REPORTING_INTERVAL_UNSET = 0; 487 HOURS_1 = 1; 488 DAYS_1 = 2; 489 } 490 491 // This field is optional for FLEETWIDE_OCCURRENCE_COUNTS reports, and is only 492 // supported by some client platforms. If not set, the reporting interval 493 // defaults to 1 hour for FLEETWIDE_OCCURRENCE_COUNTS reports. 494 ReportingInterval reporting_interval = 32; 495 496 // This field can be used with all Report types. When set, the generated 497 // report will exclude an Observation if there are not at least 498 // |reporting_threshold| number of distinct devices reporting Observations 499 // with the same ObservationMetadata. 500 uint32 reporting_threshold = 13; 501 502 // The on-device function computed on the metric during the aggregation 503 // window. 504 enum LocalAggregationProcedure { 505 LOCAL_AGGREGATION_PROCEDURE_UNSET = 0; 506 507 // Numerical statistic aggregation procedures to be used with reports 508 // of type UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS, 509 // UNIQUE_DEVICE_NUMERIC_STATS and HOURLY_VALUE_NUMERIC_STATS. 510 // TODO(fxbug.dev/87151): Rename these to remove the '_PROCEDURE' suffix. 511 SUM_PROCEDURE = 1; 512 MIN_PROCEDURE = 2; 513 MAX_PROCEDURE = 3; 514 MEAN = 4; 515 MEDIAN = 5; 516 // The value of N is set in the field 517 // |local_aggregation_procedure_percentile_n|. 518 PERCENTILE_N = 6; 519 520 // Logical aggregation procedures to be used with reports of type 521 // UNIQUE_DEVICE_COUNTS 522 AT_LEAST_ONCE = 7; 523 SELECT_FIRST = 8; 524 SELECT_MOST_COMMON = 9; 525 } 526 527 // This field is required for reports of type 528 // UNIQUE_DEVICE_HISTOGRAMS, HOURLY_VALUE_HISTOGRAMS, 529 // UNIQUE_DEVICE_NUMERIC_STATS, HOURLY_VALUE_NUMERIC_STATS 530 // and UNIQUE_DEVICE_COUNTS. Different report types support 531 // different values of this field. See the comments on the 532 // enum values in LocalAggregationProcedure. 533 LocalAggregationProcedure local_aggregation_procedure = 17; 534 535 // This field is required when 536 // local_aggregation_procedure = LOCAL_AGGREGATION_PROCEDURE_PERCENTILE_N. 537 // In this case it gives the value of N to use. Otherwise this field is 538 // ignored. 539 uint32 local_aggregation_procedure_percentile_n = 18; 540 541 // Time window over which the metric is aggregated. The local aggregation 542 // period is specified for UNIQUE_DEVICE_* report types. 543 WindowSize local_aggregation_period = 19; 544 545 // The maximum number of distinct event vectors for which an instance of the Cobalt 546 // client should produce an observation, for a given local aggregation period. Event 547 // vectors are prioritized in order of first arrival during the aggregation period. 548 // 549 // For example, if a report has an event_vector_buffer_max of 10, and 12 distinct event 550 // vectors are logged for this metric over an aggregation period, then Cobalt will send 551 // observations of the first 10 event vectors for that aggregation period and drop the 552 // last 2. 553 // 554 // If this field is unset, the registry parser assigns to it the total number of event 555 // vectors for the report's parent metric (i.e., the product over all metric dimensions 556 // of the number of event codes per dimension). 557 // 558 // The report's project will be charged against a resource budget for this value 559 // so project owners are encouraged to set this as small as possible. For example, 560 // the report's parent metric may include a dimension with thousands of event codes, 561 // but it is expected that any one device will log only a few distinct event vectors 562 // per day. In that case we may set event_vector_buffer_max to a relatively small number, 563 // say 20. For reports which use differential privacy, setting event_vector_buffer_max 564 // to a smaller number will improve the signal for event vectors which are included in 565 // observations. 566 uint64 event_vector_buffer_max = 26; 567 568 // The maximum number of distinct strings that Cobalt must keep in its in-memory buffer 569 // on any single device. During local aggregation for reports of type STRING_COUNTS and 570 // UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep track of this many distinct strings per 571 // aggregation period. The report's project will be charged against a resource budget for this 572 // value so project owners are encouraged to set this as small as possible. A STRING metric 573 // includes a file of candidate strings that may contain many thousands of strings. But it is 574 // expected that any one device will log only a few of these strings per day. We may set 575 // string_buffer_max to a relatively small number, say 20. 576 // 577 // This is a required field for reports of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS. 578 uint32 string_buffer_max = 28; 579 580 // For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the 581 // event occurs, instead of waiting for the end of the day. 582 // 583 // This can only be enabled when using a local aggregation procedure of 584 // AT_LEAST_ONCE or SELECT_FIRST, and when the privacy mechanism is 585 // DE_IDENTIFICATION. When used with a system_profile_selection of REPORT_ALL 586 // or SELECT_FIRST, enabling this is recommended as Cobalt will send the count 587 // for the current day when the event occurs instead of at the end of the day. 588 // For a system_profile_selection of SELECT_LAST, this may also be desirable, 589 // though it may result in a slight change in the current day's system profile 590 // that is used, as Cobalt won't wait until the end of the day to determine 591 // the final system profile, but will instead send the count immediately with 592 // the system profile that is currently active on the device. 593 bool expedited_sending = 29; 594 595 /////////////////// Fields used by all report types /////////////////// 596 // Next id: 109 597 598 // The list of SystemProfileFields to include in each row of the report. 599 // Optional. 600 repeated SystemProfileField system_profile_field = 100; 601 602 // The list of Experiments to include in each row of the report. 603 // 604 // Each report row lists the intersection of the experiment ids active on the device and 605 // experiment ids specified in this field. 606 // 607 // The specified experiment ids must be found in one of the project's experiments_namespaces. 608 repeated int64 experiment_id = 104; 609 610 // This field is required for reports of type UNIQUE_DEVICE_COUNTS, 611 // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and 612 // HOURLY_VALUE_HISTOGRAMS. The value for these reports must be SELECT_LAST, 613 // SELECT_FIRST, or occasionally REPORT_ALL. 614 // 615 // If the system profile value changed during the aggregation window specified 616 // for this report, system_profile_selection specifies which system profile to 617 // report for each device. 618 SystemProfileSelectionPolicy system_profile_selection = 103; 619 620 // Maximum ReleaseStage for which this Report is allowed to be collected. 621 ReleaseStage max_release_stage = 105; 622 623 // Report can be collected even if the user/device has not consented. 624 // This field can only be set to true on reports that use privacy mechanisms 625 // that include differential privacy (i.e. not DE_IDENTIFICATION). The use of 626 // this field is for collecting anonymized data that is allowed even when 627 // the consent is not given. These use cases need to be specially approved 628 // by privacy reviewers. 629 bool exempt_from_consent = 108; 630 631 // New Privacy API 632 633 // This enum identifies what privacy protection is applied to the report. 634 enum PrivacyMechanism { 635 PRIVACY_MECHANISM_UNSPECIFIED = 0; 636 // If you specify this value the data will be de-identified without 637 // additional privacy protections. 638 DE_IDENTIFICATION = 1; 639 // If you specify this value the data will be protected with Shuffled 640 // Differential Privacy guarantees (e.g., the noise wll be added on the 641 // devices) 642 SHUFFLED_DIFFERENTIAL_PRIVACY = 2; 643 } 644 645 // This field identifies what privacy protection is applied to the report. 646 PrivacyMechanism privacy_mechanism = 106; 647 648 // The object for grouping all parameters needed for SHUFFLED DP mode. 649 message ShuffledDifferentialPrivacyConfig { 650 // This field represents an upper bound on the amount of information which 651 // can be learned about a device from a report including that device. 652 // Lower values correspond to higher privacy. 653 // Epsilon must be > 0. 654 double epsilon = 1; 655 // This field represents the risk of the epsilon guarantee not holding. This 656 // is usually set as 1 over the expected number of participating devices. 657 // Delta must be > 0 and < 1. 658 double delta = 2; 659 // The generated report will exclude an Observation if there are not at 660 // least |reporting_threshold| number of distinct devices reporting 661 // Observations with the same ObservationMetadata. 662 uint32 reporting_threshold = 3; 663 664 // The mean number of observations added per index point when performing the 665 // Poisson mechanism encoding for Cobalt reports. Required. 666 // 667 // In the future, the value of this field will be computed by the registry 668 // parser as a function of other fields in this 669 // ShuffledDifferentialPrivacyConfig. For now, it should be set manually in 670 // the Cobalt registry in consultation with the Cobalt team. 671 // 672 // TODO(b/295053509): update this comment once the field is auto populated by 673 // the registry parser. 674 double poisson_mean = 4; 675 676 // If true, skip validating the |poisson_mean| value. This is meant to be used only 677 // in end-to-end tests where the traffic volume would not allow a reasonable |poisson_mean|. 678 bool skip_poisson_mean_validation_test_only = 5; 679 680 681 // The report fields that affect how a device participates in a private 682 // report. 683 // 684 // Note, this field is populated by the registry parser and must not be set 685 // manually. 686 DevicePrivacyDependencySet device_privacy_dependency_set = 6; 687 688 // Captures the report fields a device _must_ use in order to properly 689 // make contributions, real and fabricated, to a report. 690 // 691 // Any report field that changes how observations are encoded or noise is 692 // fabricated must result in a new value. 693 enum DevicePrivacyDependencySet { 694 DEVICE_PRIVACY_DEPENDENCY_SET_UNSET = 0; 695 696 // Captures: 697 // - Fields that affect index points include: 698 // * metric dimensions 699 // * num_index_points 700 // * string_sketch_params 701 // * min_value 702 // * max_value 703 // * max_count 704 // * int_buckets 705 // 706 // - Fields that affect sparsity are: 707 // * event_vector_buffer_max 708 // * string_buffer_max 709 // 710 // - poisson_mean 711 V1 = 1; 712 } 713 } 714 715 // If privacy_mechanism is SHUFFLED_DIFFERENTIAL_PRIVACY then privacy_config 716 // must contain valid ShuffledDifferentialPrivacyConfig otherwise empty. 717 oneof privacy_config { 718 ShuffledDifferentialPrivacyConfig shuffled_dp = 107; 719 } 720} 721 722// A specification for SystemProfile selection policy. 723enum SystemProfileSelectionPolicy { 724 // Use the default value. For reports of type FLEETWIDE_OCCURRENCE_COUNTS, 725 // FLEETWIDE_HISTOGRAMS, FLEETWIDE_MEANS, UNIQUE_DEVICE_NUMERIC_STATS, 726 // HOURLY_VALUE_NUMERIC_STATS, and STRING_COUNTS this will resolve to 727 // 'REPORT_ALL' and should not be changed. For all other report types, 728 // SELECT_DEFAULT must not be used. 729 SELECT_DEFAULT = 0; 730 731 // Always report the last SystemProfile seen in the aggregation window. This 732 // will be the last SystemProfile seen *at the time of an event* in the 733 // aggregation window. 734 SELECT_LAST = 1; 735 736 // Always report the first SystemProfile seen in the aggregation window. This 737 // will be the first SystemProfile seen *at the time of an event* in the 738 // aggregation window. 739 SELECT_FIRST = 2; 740 741 // Report all system profiles in the aggregation window. For most report 742 // types, this is the most sensible value to use. For reports that depend on 743 // some concept of uniqueness (such as UNIQUE_DEVICE_COUNTS, 744 // UNIQUE_DEVICE_HISTOGRAMS, UNIQUE_DEVICE_STRING_COUNTS, and 745 // HOURLY_VALUE_HISTOGRAMS) this may not be the best choice, since it will no 746 // longer be the case that a single device will only upload one observation 747 // per time period (It will upload one observation per time period *per unique 748 // system_profile*). 749 REPORT_ALL = 3; 750} 751 752// A specification of a field from SystemProfile. These are used in a 753// ReportDefinition to specify which fields should be included in the generated 754// Observations and reports. 755// 756// For a description of the meaning of each field, see the fields in the 757// SystemProfile in: cobalt/proto/common.proto 758enum SystemProfileField { 759 OS = 0; 760 ARCH = 1; 761 BOARD_NAME = 2; 762 PRODUCT_NAME = 3; 763 SYSTEM_VERSION = 4; 764 APP_VERSION = 10; 765 CHANNEL = 5; 766 BUILD_TYPE = 7; 767 EXPERIMENT_IDS = 9; 768 reserved 6, 8; 769 reserved "REALM", "EXPERIMENT_TOKENS"; 770} 771 772// Stages in the release cycle of a component. Each Cobalt customer determines 773// its current ReleaseStage when initializing the CobaltService. Each Metric 774// and Report can declare the maximum ReleaseStage for which it is allowed to 775// be collected. For example a DEBUG Metric will not be collected from a device 776// running a FISHFOOD release. 777enum ReleaseStage { 778 RELEASE_STAGE_NOT_SET = 0; 779 780 // A test build. Also called "eng". Only use this value when the device is 781 // running test builds as all metrics/reports will be collected. 782 DEBUG = 10; 783 // Small, internal prototype. Used for testing a new feature internally, 784 // usually within the team or a small group. 785 FISHFOOD = 20; 786 // An internal release for testing with internal users. 787 DOGFOOD = 40; 788 // An open beta, for testing with internal and external users. 789 OPEN_BETA = 60; 790 791 // Generally-available. The final stage of a release. Also called 792 // "production". If unsure of which release stage the device is running, it 793 // is safest to fallback to this value (which is the default if no value is 794 // set), to avoid inadvertently collecting metric/report data. 795 GA = 99; 796} 797 798// ExponentialIntegerBuckets is used to define a partition of the integers into 799// a finite number of exponentially increasing buckets. 800// 801// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1. 802// 803// The bucket boundaries are: 804// a[0] = floor 805// a[1] = floor + initial_step 806// a[2] = floor + initial_step * step_multiplier 807// a[3] = floor + initial_step * step_multiplier ^ 2 808// a[4] = floor + initial_step * step_multiplier ^ 3 809// and in general, for i = 1, 2, 3 ... n 810// a[i] = floor + initial_step * step_multiplier ^ (i-1) 811// 812// Then, the buckets are defined as follows: 813// Bucket 0 is the underflow bucket: (-infinity, floor) 814// Bucket i for 0 < i < n+1: [a[i-1], a[i]) 815// Bucket n+1 is the overflow bucket: [a[n], +infinity) 816// 817// Examples: 818// floor = 0 819// num_buckets = 3 820// initial_step = 10 821// step_multiplier = 10 822// Then, the buckets are: 823// (-infinity, 0), [0, 10), [10, 100), [100, 1000), [1000, +infinity) 824// 825// floor = 0 826// num_buckets = 3 827// initial_step = 2 828// step_multiplier = 2 829// Then, the buckets are: 830// (-infinity, 0), [0, 2), [2, 4), [4, 8), [8, +infinity) 831// 832// floor = 10 833// num_buckets = 3 834// initial_step = 2 835// step_multiplier = 2 836// Then, the buckets are: 837// (-infinity, 10), [10, 12), [12, 14), [14, 18), [18, +infinity) 838// 839// floor = 0 840// num_buckets = 3 841// initial_step = 100 842// step_multiplier = 10 843// Then, the buckets are: 844// (-infinity, 0), [0, 100), [100, 1000), [1000, 10000), [10000, +infinity) 845// 846message ExponentialIntegerBuckets { 847 int64 floor = 1; 848 849 // num_buckets must be at least 1. 850 uint32 num_buckets = 2; 851 852 // Must be at least one. 853 uint32 initial_step = 3; 854 855 // Must be at least one. 856 uint32 step_multiplier = 4; 857} 858 859// LinearIntegerBuckets is used to define a partition of the integers into a 860// finite number of buckets of equal size. 861// 862// Let n = num_buckets. Then there are n+2 buckets indexed 0,...,n+1. 863// Bucket 0 is the underflow bucket: (-infinity, floor) 864// Bucket n+1 is the overflow bucket: [lower + step_size * n, +infinity) 865// 866// For i = 1 to n, the bucket i is defined as 867// [floor + step_size * (i-1), floor + step_size * i) 868// 869// Example: floor = 0, num_buckets = 3, step_size = 10. 870// (-infinity, 0), [0, 10), [10, 20), [20, 30), [30, +inifinity) 871message LinearIntegerBuckets { 872 int64 floor = 1; 873 874 // Must be at least one. 875 uint32 num_buckets = 2; 876 877 // Must be at least one. 878 uint32 step_size = 3; 879} 880 881message IntegerBuckets { 882 oneof buckets { 883 ExponentialIntegerBuckets exponential = 1; 884 LinearIntegerBuckets linear = 2; 885 } 886 887 // If set to true, empty buckets will not be added to the report data such 888 // that all histograms contain a row for every bucket. Buckets with a zero 889 // count may still occur if data is logged that contains a zero count. This 890 // field can not be set on reports with added privacy. 891 bool sparse_output = 3; 892} 893 894message StringSketchParameters { 895 // Number of hashes in Count-Min Sketch. 896 int32 num_hashes = 1; 897 898 // Number of cells per hash in Count-Min Sketch. 899 int32 num_cells_per_hash = 2; 900} 901