xref: /aosp_15_r20/external/autotest/utils/frozen_chromite/third_party/infra_libs/ts_mon/common/distribution.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Copyright 2015 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import bisect
6import collections
7
8
9class _Bucketer(object):
10  """Bucketing function for histograms recorded by the Distribution class."""
11
12  def __init__(self, width, growth_factor, num_finite_buckets, scale=1.0):
13    """The bucket sizes are controlled by width and growth_factor, and the total
14    number of buckets is set by num_finite_buckets:
15
16    Args:
17      width: fixed size of each bucket (ignores |scale|).
18      growth_factor: if non-zero, the size of each bucket increases by another
19          multiplicative factor of this factor (see lower bound formula below).
20      num_finite_buckets: the number of finite buckets.  There are two
21          additional buckets - an underflow and an overflow bucket - that have
22          lower and upper bounds of Infinity.
23      scale: overall scale factor to apply to buckets, if using geometric
24          buckets.
25
26    Specify a width for fixed-size buckets or specify a growth_factor for bucket
27    sizes that follow a geometric progression.  Specifying both is not valid.
28
29    For fixed-size buckets::
30
31      The i'th bucket covers the interval [(i-1) * width, i * width),  where i
32      ranges from 1 to num_finite_buckets, inclusive:
33
34      bucket number                   lower bound      upper bound
35      i == 0 (underflow)              -inf             0
36      1 <= i <= num_buckets           (i-1) * width    i * width
37      i == num_buckets+1 (overflow)   (i-1) * width    +inf
38
39    For geometric buckets::
40
41      The i'th bucket covers the interval [factor^(i-1), factor^i) * scale
42      where i ranges from 1 to num_finite_buckets inclusive.
43
44      bucket number                   lower bound            upper bound
45      i == 0 (underflow)              -inf                   scale
46      1 <= i <= num_buckets           factor^(i-1) * scale   factor^i * scale
47      i == num_buckets+1 (overflow)   factor^(i-1) * scale   +inf
48    """
49
50    if num_finite_buckets < 0:
51      raise ValueError('num_finite_buckets must be >= 0 (was %d)' %
52          num_finite_buckets)
53    if width != 0 and growth_factor != 0:
54      raise ValueError('a Bucketer must be created with either a width or a '
55                       'growth factor, not both')
56
57    self.width = width
58    self.growth_factor = growth_factor
59    self.num_finite_buckets = num_finite_buckets
60    self.total_buckets = num_finite_buckets + 2
61    self.underflow_bucket = 0
62    self.overflow_bucket = self.total_buckets - 1
63    self.scale = scale
64
65    if width != 0:
66      self._lower_bounds = [float('-Inf')] + self._linear_bounds()
67    else:
68      self._lower_bounds = [float('-Inf')] + self._exponential_bounds()
69
70    # Sanity check the bucket lower bounds we created.
71    assert len(self._lower_bounds) == self.total_buckets
72    assert all(x < y for x, y in zip(
73        self._lower_bounds, self._lower_bounds[1:])), (
74        'bucket boundaries must be monotonically increasing')
75
76  def __eq__(self, other):
77    return (type(self) is type(other) and
78            self.width == other.width and
79            self.growth_factor == other.growth_factor and
80            self.num_finite_buckets == other.num_finite_buckets and
81            self.scale == other.scale)
82
83  def _linear_bounds(self):
84    return [self.width * i for i in range(self.num_finite_buckets + 1)]
85
86  def _exponential_bounds(self):
87    return [
88        self.scale * self.growth_factor ** i
89        for i in range(self.num_finite_buckets + 1)]
90
91  def bucket_for_value(self, value):
92    """Returns the index of the bucket that this value belongs to."""
93
94    # bisect.bisect_left is wrong because the buckets are of [lower, upper) form
95    return bisect.bisect(self._lower_bounds, value) - 1
96
97  def bucket_boundaries(self, bucket):
98    """Returns a tuple that is the [lower, upper) bounds of this bucket.
99
100    The lower bound of the first bucket is -Infinity, and the upper bound of the
101    last bucket is +Infinity.
102    """
103
104    if bucket < 0 or bucket >= self.total_buckets:
105      raise IndexError('bucket %d out of range' % bucket)
106    if bucket == self.total_buckets - 1:
107      return (self._lower_bounds[bucket], float('Inf'))
108    return (self._lower_bounds[bucket], self._lower_bounds[bucket + 1])
109
110
111def FixedWidthBucketer(width, num_finite_buckets=100):
112  """Convenience function that returns a fixed width Bucketer."""
113  return _Bucketer(width=width, growth_factor=0.0,
114      num_finite_buckets=num_finite_buckets)
115
116
117def GeometricBucketer(growth_factor=10**0.2, num_finite_buckets=100,
118                      scale=1.0):
119  """Convenience function that returns a geometric progression Bucketer."""
120  return _Bucketer(width=0, growth_factor=growth_factor,
121      num_finite_buckets=num_finite_buckets, scale=scale)
122
123
124class Distribution(object):
125  """Holds a histogram distribution.
126
127  Buckets are chosen for values by the provided Bucketer.
128  """
129
130  def __init__(self, bucketer):
131    self.bucketer = bucketer
132    self.sum = 0
133    self.count = 0
134    self.buckets = collections.defaultdict(int)
135
136  def add(self, value):
137    self.buckets[self.bucketer.bucket_for_value(value)] += 1
138    self.sum += value
139    self.count += 1
140