1 // Copyright 2021 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_METRICS_METRICS_DATA_VALIDATION_H_ 6 #define COMPONENTS_METRICS_METRICS_DATA_VALIDATION_H_ 7 8 #include "base/feature_list.h" 9 #include "base/metrics/field_trial_params.h" 10 #include "base/time/time.h" 11 12 // Features and functions in this file are necessary to set up artificial A / B 13 // experiments that help us better assess the accuracy and power of our field 14 // trial data. All code in this file should not have any impact on client's 15 // experience. 16 namespace metrics { 17 18 // Only used for testing. 19 namespace internal { 20 BASE_DECLARE_FEATURE(kPseudoMetricsEffectFeature); 21 } // namespace internal 22 23 // Used to assess the reliability of field trial data by sending artificial 24 // non-uniform data drawn from a log normal distribution. 25 BASE_DECLARE_FEATURE(kNonUniformityValidationFeature); 26 27 // The parameters for the log normal distribution. They refer to the default 28 // mean, the delta that would be applied to the default mean (the actual mean 29 // equals mean + log(1 + delta)) and the standard deviation of the distribution 30 // that's being generated. These parameters are carefully calculated so that 31 // ~0.01% of data drawn from the distribution would fall in the underflow bucket 32 // and ~0.01% of data in the overflow bucket. And they also leave us enough 33 // wiggle room to shift mean using delta in experiments without losing precision 34 // badly because of data in the overflow bucket. 35 // 36 // The way we get these numbers are based on the following calculation: 37 // u := the lower threshold for the overflow bucket (in this case, 10000). 38 // l := the upper threshold for the smallest bucket (in this case, 1). 39 // p := the probability that an observation will fall in the highest bucket (in 40 // this case, 0.01%) and also the probability that an observation will fall in 41 // the lowest bucket. 42 // 43 // mean = (log(u) + log(l)) / 2 44 // sd = (log(u) - log(l)) / (2 * qnorm(1-p)) 45 // 46 // At this point, experiments should only control the delta but not mean and 47 // stdDev. Putting them in feature params so that we can configure them from the 48 // server side if we want. 49 extern const base::FeatureParam<double> kLogNormalMean; 50 extern const base::FeatureParam<double> kLogNormalDelta; 51 extern const base::FeatureParam<double> kLogNormalStdDev; 52 53 // In order to assess if we're able to accurately detect a statistically 54 // significant difference in our field trial data, we set up pseudo metrics for 55 // some of our key metrics. Values of these pseudo metrics are the linear 56 // transformation (ax + b) of real values (x). The multiplicative factor (a) and 57 // additive factor (b) are controlled by field trial experiments. 58 // 59 // Returns the sample value for a pseudo metric given the |sample| from the real 60 // metric and the assigned field trial group. The input type is double because 61 // we don't want to lose precision before applying transformation. 62 double GetPseudoMetricsSample(double sample); 63 64 // Returns the TimeDelta for a pseudo metric given the |sample| from the real 65 // metric and the assigned field trial group. The unit of the additive factor 66 // (b) is milliseconds. 67 base::TimeDelta GetPseudoMetricsSample(base::TimeDelta sample); 68 69 } // namespace metrics 70 71 #endif // COMPONENTS_METRICS_METRICS_DATA_VALIDATION_H_ 72