xref: /aosp_15_r20/external/cronet/components/metrics/metrics_data_validation.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2021 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_METRICS_METRICS_DATA_VALIDATION_H_
6 #define COMPONENTS_METRICS_METRICS_DATA_VALIDATION_H_
7 
8 #include "base/feature_list.h"
9 #include "base/metrics/field_trial_params.h"
10 #include "base/time/time.h"
11 
12 // Features and functions in this file are necessary to set up artificial A / B
13 // experiments that help us better assess the accuracy and power of our field
14 // trial data. All code in this file should not have any impact on client's
15 // experience.
16 namespace metrics {
17 
18 // Only used for testing.
19 namespace internal {
20 BASE_DECLARE_FEATURE(kPseudoMetricsEffectFeature);
21 }  // namespace internal
22 
23 // Used to assess the reliability of field trial data by sending artificial
24 // non-uniform data drawn from a log normal distribution.
25 BASE_DECLARE_FEATURE(kNonUniformityValidationFeature);
26 
27 // The parameters for the log normal distribution. They refer to the default
28 // mean, the delta that would be applied to the default mean (the actual mean
29 // equals mean + log(1 + delta)) and the standard deviation of the distribution
30 // that's being generated. These parameters are carefully calculated so that
31 // ~0.01% of data drawn from the distribution would fall in the underflow bucket
32 // and ~0.01% of data in the overflow bucket. And they also leave us enough
33 // wiggle room to shift mean using delta in experiments without losing precision
34 // badly because of data in the overflow bucket.
35 //
36 // The way we get these numbers are based on the following calculation:
37 // u := the lower threshold for the overflow bucket (in this case, 10000).
38 // l := the upper threshold for the smallest bucket (in this case, 1).
39 // p := the probability that an observation will fall in the highest bucket (in
40 //   this case, 0.01%) and also the probability that an observation will fall in
41 //   the lowest bucket.
42 //
43 // mean = (log(u) + log(l)) / 2
44 // sd = (log(u) - log(l)) / (2 * qnorm(1-p))
45 //
46 // At this point, experiments should only control the delta but not mean and
47 // stdDev. Putting them in feature params so that we can configure them from the
48 // server side if we want.
49 extern const base::FeatureParam<double> kLogNormalMean;
50 extern const base::FeatureParam<double> kLogNormalDelta;
51 extern const base::FeatureParam<double> kLogNormalStdDev;
52 
53 // In order to assess if we're able to accurately detect a statistically
54 // significant difference in our field trial data, we set up pseudo metrics for
55 // some of our key metrics. Values of these pseudo metrics are the linear
56 // transformation (ax + b) of real values (x). The multiplicative factor (a) and
57 // additive factor (b) are controlled by field trial experiments.
58 //
59 // Returns the sample value for a pseudo metric given the |sample| from the real
60 // metric and the assigned field trial group. The input type is double because
61 // we don't want to lose precision before applying transformation.
62 double GetPseudoMetricsSample(double sample);
63 
64 // Returns the TimeDelta for a pseudo metric given the |sample| from the real
65 // metric and the assigned field trial group. The unit of the additive factor
66 // (b) is milliseconds.
67 base::TimeDelta GetPseudoMetricsSample(base::TimeDelta sample);
68 
69 }  // namespace metrics
70 
71 #endif  // COMPONENTS_METRICS_METRICS_DATA_VALIDATION_H_
72