1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package trace
6
7import (
8	"cmp"
9	"math"
10	"slices"
11)
12
13// mud is an updatable mutator utilization distribution.
14//
15// This is a continuous distribution of duration over mutator
16// utilization. For example, the integral from mutator utilization a
17// to b is the total duration during which the mutator utilization was
18// in the range [a, b].
19//
20// This distribution is *not* normalized (it is not a probability
21// distribution). This makes it easier to work with as it's being
22// updated.
23//
24// It is represented as the sum of scaled uniform distribution
25// functions and Dirac delta functions (which are treated as
26// degenerate uniform distributions).
27type mud struct {
28	sorted, unsorted []edge
29
30	// trackMass is the inverse cumulative sum to track as the
31	// distribution is updated.
32	trackMass float64
33	// trackBucket is the bucket in which trackMass falls. If the
34	// total mass of the distribution is < trackMass, this is
35	// len(hist).
36	trackBucket int
37	// trackSum is the cumulative sum of hist[:trackBucket]. Once
38	// trackSum >= trackMass, trackBucket must be recomputed.
39	trackSum float64
40
41	// hist is a hierarchical histogram of distribution mass.
42	hist [mudDegree]float64
43}
44
45const (
46	// mudDegree is the number of buckets in the MUD summary
47	// histogram.
48	mudDegree = 1024
49)
50
51type edge struct {
52	// At x, the function increases by y.
53	x, delta float64
54	// Additionally at x is a Dirac delta function with area dirac.
55	dirac float64
56}
57
58// add adds a uniform function over [l, r] scaled so the total weight
59// of the uniform is area. If l==r, this adds a Dirac delta function.
60func (d *mud) add(l, r, area float64) {
61	if area == 0 {
62		return
63	}
64
65	if r < l {
66		l, r = r, l
67	}
68
69	// Add the edges.
70	if l == r {
71		d.unsorted = append(d.unsorted, edge{l, 0, area})
72	} else {
73		delta := area / (r - l)
74		d.unsorted = append(d.unsorted, edge{l, delta, 0}, edge{r, -delta, 0})
75	}
76
77	// Update the histogram.
78	h := &d.hist
79	lbFloat, lf := math.Modf(l * mudDegree)
80	lb := int(lbFloat)
81	if lb >= mudDegree {
82		lb, lf = mudDegree-1, 1
83	}
84	if l == r {
85		h[lb] += area
86	} else {
87		rbFloat, rf := math.Modf(r * mudDegree)
88		rb := int(rbFloat)
89		if rb >= mudDegree {
90			rb, rf = mudDegree-1, 1
91		}
92		if lb == rb {
93			h[lb] += area
94		} else {
95			perBucket := area / (r - l) / mudDegree
96			h[lb] += perBucket * (1 - lf)
97			h[rb] += perBucket * rf
98			for i := lb + 1; i < rb; i++ {
99				h[i] += perBucket
100			}
101		}
102	}
103
104	// Update mass tracking.
105	if thresh := float64(d.trackBucket) / mudDegree; l < thresh {
106		if r < thresh {
107			d.trackSum += area
108		} else {
109			d.trackSum += area * (thresh - l) / (r - l)
110		}
111		if d.trackSum >= d.trackMass {
112			// The tracked mass now falls in a different
113			// bucket. Recompute the inverse cumulative sum.
114			d.setTrackMass(d.trackMass)
115		}
116	}
117}
118
119// setTrackMass sets the mass to track the inverse cumulative sum for.
120//
121// Specifically, mass is a cumulative duration, and the mutator
122// utilization bounds for this duration can be queried using
123// approxInvCumulativeSum.
124func (d *mud) setTrackMass(mass float64) {
125	d.trackMass = mass
126
127	// Find the bucket currently containing trackMass by computing
128	// the cumulative sum.
129	sum := 0.0
130	for i, val := range d.hist[:] {
131		newSum := sum + val
132		if newSum > mass {
133			// mass falls in bucket i.
134			d.trackBucket = i
135			d.trackSum = sum
136			return
137		}
138		sum = newSum
139	}
140	d.trackBucket = len(d.hist)
141	d.trackSum = sum
142}
143
144// approxInvCumulativeSum is like invCumulativeSum, but specifically
145// operates on the tracked mass and returns an upper and lower bound
146// approximation of the inverse cumulative sum.
147//
148// The true inverse cumulative sum will be in the range [lower, upper).
149func (d *mud) approxInvCumulativeSum() (float64, float64, bool) {
150	if d.trackBucket == len(d.hist) {
151		return math.NaN(), math.NaN(), false
152	}
153	return float64(d.trackBucket) / mudDegree, float64(d.trackBucket+1) / mudDegree, true
154}
155
156// invCumulativeSum returns x such that the integral of d from -∞ to x
157// is y. If the total weight of d is less than y, it returns the
158// maximum of the distribution and false.
159//
160// Specifically, y is a cumulative duration, and invCumulativeSum
161// returns the mutator utilization x such that at least y time has
162// been spent with mutator utilization <= x.
163func (d *mud) invCumulativeSum(y float64) (float64, bool) {
164	if len(d.sorted) == 0 && len(d.unsorted) == 0 {
165		return math.NaN(), false
166	}
167
168	// Sort edges.
169	edges := d.unsorted
170	slices.SortFunc(edges, func(a, b edge) int {
171		return cmp.Compare(a.x, b.x)
172	})
173	// Merge with sorted edges.
174	d.unsorted = nil
175	if d.sorted == nil {
176		d.sorted = edges
177	} else {
178		oldSorted := d.sorted
179		newSorted := make([]edge, len(oldSorted)+len(edges))
180		i, j := 0, 0
181		for o := range newSorted {
182			if i >= len(oldSorted) {
183				copy(newSorted[o:], edges[j:])
184				break
185			} else if j >= len(edges) {
186				copy(newSorted[o:], oldSorted[i:])
187				break
188			} else if oldSorted[i].x < edges[j].x {
189				newSorted[o] = oldSorted[i]
190				i++
191			} else {
192				newSorted[o] = edges[j]
193				j++
194			}
195		}
196		d.sorted = newSorted
197	}
198
199	// Traverse edges in order computing a cumulative sum.
200	csum, rate, prevX := 0.0, 0.0, 0.0
201	for _, e := range d.sorted {
202		newCsum := csum + (e.x-prevX)*rate
203		if newCsum >= y {
204			// y was exceeded between the previous edge
205			// and this one.
206			if rate == 0 {
207				// Anywhere between prevX and
208				// e.x will do. We return e.x
209				// because that takes care of
210				// the y==0 case naturally.
211				return e.x, true
212			}
213			return (y-csum)/rate + prevX, true
214		}
215		newCsum += e.dirac
216		if newCsum >= y {
217			// y was exceeded by the Dirac delta at e.x.
218			return e.x, true
219		}
220		csum, prevX = newCsum, e.x
221		rate += e.delta
222	}
223	return prevX, false
224}
225