1# Copyright 2015 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import bisect 6import collections 7 8 9class _Bucketer(object): 10 """Bucketing function for histograms recorded by the Distribution class.""" 11 12 def __init__(self, width, growth_factor, num_finite_buckets, scale=1.0): 13 """The bucket sizes are controlled by width and growth_factor, and the total 14 number of buckets is set by num_finite_buckets: 15 16 Args: 17 width: fixed size of each bucket (ignores |scale|). 18 growth_factor: if non-zero, the size of each bucket increases by another 19 multiplicative factor of this factor (see lower bound formula below). 20 num_finite_buckets: the number of finite buckets. There are two 21 additional buckets - an underflow and an overflow bucket - that have 22 lower and upper bounds of Infinity. 23 scale: overall scale factor to apply to buckets, if using geometric 24 buckets. 25 26 Specify a width for fixed-size buckets or specify a growth_factor for bucket 27 sizes that follow a geometric progression. Specifying both is not valid. 28 29 For fixed-size buckets:: 30 31 The i'th bucket covers the interval [(i-1) * width, i * width), where i 32 ranges from 1 to num_finite_buckets, inclusive: 33 34 bucket number lower bound upper bound 35 i == 0 (underflow) -inf 0 36 1 <= i <= num_buckets (i-1) * width i * width 37 i == num_buckets+1 (overflow) (i-1) * width +inf 38 39 For geometric buckets:: 40 41 The i'th bucket covers the interval [factor^(i-1), factor^i) * scale 42 where i ranges from 1 to num_finite_buckets inclusive. 43 44 bucket number lower bound upper bound 45 i == 0 (underflow) -inf scale 46 1 <= i <= num_buckets factor^(i-1) * scale factor^i * scale 47 i == num_buckets+1 (overflow) factor^(i-1) * scale +inf 48 """ 49 50 if num_finite_buckets < 0: 51 raise ValueError('num_finite_buckets must be >= 0 (was %d)' % 52 num_finite_buckets) 53 if width != 0 and growth_factor != 0: 54 raise ValueError('a Bucketer must be created with either a width or a ' 55 'growth factor, not both') 56 57 self.width = width 58 self.growth_factor = growth_factor 59 self.num_finite_buckets = num_finite_buckets 60 self.total_buckets = num_finite_buckets + 2 61 self.underflow_bucket = 0 62 self.overflow_bucket = self.total_buckets - 1 63 self.scale = scale 64 65 if width != 0: 66 self._lower_bounds = [float('-Inf')] + self._linear_bounds() 67 else: 68 self._lower_bounds = [float('-Inf')] + self._exponential_bounds() 69 70 # Sanity check the bucket lower bounds we created. 71 assert len(self._lower_bounds) == self.total_buckets 72 assert all(x < y for x, y in zip( 73 self._lower_bounds, self._lower_bounds[1:])), ( 74 'bucket boundaries must be monotonically increasing') 75 76 def __eq__(self, other): 77 return (type(self) is type(other) and 78 self.width == other.width and 79 self.growth_factor == other.growth_factor and 80 self.num_finite_buckets == other.num_finite_buckets and 81 self.scale == other.scale) 82 83 def _linear_bounds(self): 84 return [self.width * i for i in range(self.num_finite_buckets + 1)] 85 86 def _exponential_bounds(self): 87 return [ 88 self.scale * self.growth_factor ** i 89 for i in range(self.num_finite_buckets + 1)] 90 91 def bucket_for_value(self, value): 92 """Returns the index of the bucket that this value belongs to.""" 93 94 # bisect.bisect_left is wrong because the buckets are of [lower, upper) form 95 return bisect.bisect(self._lower_bounds, value) - 1 96 97 def bucket_boundaries(self, bucket): 98 """Returns a tuple that is the [lower, upper) bounds of this bucket. 99 100 The lower bound of the first bucket is -Infinity, and the upper bound of the 101 last bucket is +Infinity. 102 """ 103 104 if bucket < 0 or bucket >= self.total_buckets: 105 raise IndexError('bucket %d out of range' % bucket) 106 if bucket == self.total_buckets - 1: 107 return (self._lower_bounds[bucket], float('Inf')) 108 return (self._lower_bounds[bucket], self._lower_bounds[bucket + 1]) 109 110 111def FixedWidthBucketer(width, num_finite_buckets=100): 112 """Convenience function that returns a fixed width Bucketer.""" 113 return _Bucketer(width=width, growth_factor=0.0, 114 num_finite_buckets=num_finite_buckets) 115 116 117def GeometricBucketer(growth_factor=10**0.2, num_finite_buckets=100, 118 scale=1.0): 119 """Convenience function that returns a geometric progression Bucketer.""" 120 return _Bucketer(width=0, growth_factor=growth_factor, 121 num_finite_buckets=num_finite_buckets, scale=scale) 122 123 124class Distribution(object): 125 """Holds a histogram distribution. 126 127 Buckets are chosen for values by the provided Bucketer. 128 """ 129 130 def __init__(self, bucketer): 131 self.bucketer = bucketer 132 self.sum = 0 133 self.count = 0 134 self.buckets = collections.defaultdict(int) 135 136 def add(self, value): 137 self.buckets[self.bucketer.bucket_for_value(value)] += 1 138 self.sum += value 139 self.count += 1 140