blob: 9bf4286c794b8febfd7091fe998011ce5f42a1f0 [file] [log] [blame]
Don Newton98fd8812019-09-23 15:15:02 -04001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package trace
6
7// This file implements histogramming for RPC statistics collection.
8
9import (
10 "bytes"
11 "fmt"
12 "html/template"
13 "log"
14 "math"
15 "sync"
16
17 "golang.org/x/net/internal/timeseries"
18)
19
20const (
21 bucketCount = 38
22)
23
24// histogram keeps counts of values in buckets that are spaced
25// out in powers of 2: 0-1, 2-3, 4-7...
26// histogram implements timeseries.Observable
27type histogram struct {
28 sum int64 // running total of measurements
29 sumOfSquares float64 // square of running total
30 buckets []int64 // bucketed values for histogram
31 value int // holds a single value as an optimization
32 valueCount int64 // number of values recorded for single value
33}
34
35// AddMeasurement records a value measurement observation to the histogram.
36func (h *histogram) addMeasurement(value int64) {
37 // TODO: assert invariant
38 h.sum += value
39 h.sumOfSquares += float64(value) * float64(value)
40
41 bucketIndex := getBucket(value)
42
43 if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) {
44 h.value = bucketIndex
45 h.valueCount++
46 } else {
47 h.allocateBuckets()
48 h.buckets[bucketIndex]++
49 }
50}
51
52func (h *histogram) allocateBuckets() {
53 if h.buckets == nil {
54 h.buckets = make([]int64, bucketCount)
55 h.buckets[h.value] = h.valueCount
56 h.value = 0
57 h.valueCount = -1
58 }
59}
60
61func log2(i int64) int {
62 n := 0
63 for ; i >= 0x100; i >>= 8 {
64 n += 8
65 }
66 for ; i > 0; i >>= 1 {
67 n += 1
68 }
69 return n
70}
71
72func getBucket(i int64) (index int) {
73 index = log2(i) - 1
74 if index < 0 {
75 index = 0
76 }
77 if index >= bucketCount {
78 index = bucketCount - 1
79 }
80 return
81}
82
83// Total returns the number of recorded observations.
84func (h *histogram) total() (total int64) {
85 if h.valueCount >= 0 {
86 total = h.valueCount
87 }
88 for _, val := range h.buckets {
89 total += int64(val)
90 }
91 return
92}
93
94// Average returns the average value of recorded observations.
95func (h *histogram) average() float64 {
96 t := h.total()
97 if t == 0 {
98 return 0
99 }
100 return float64(h.sum) / float64(t)
101}
102
103// Variance returns the variance of recorded observations.
104func (h *histogram) variance() float64 {
105 t := float64(h.total())
106 if t == 0 {
107 return 0
108 }
109 s := float64(h.sum) / t
110 return h.sumOfSquares/t - s*s
111}
112
113// StandardDeviation returns the standard deviation of recorded observations.
114func (h *histogram) standardDeviation() float64 {
115 return math.Sqrt(h.variance())
116}
117
118// PercentileBoundary estimates the value that the given fraction of recorded
119// observations are less than.
120func (h *histogram) percentileBoundary(percentile float64) int64 {
121 total := h.total()
122
123 // Corner cases (make sure result is strictly less than Total())
124 if total == 0 {
125 return 0
126 } else if total == 1 {
127 return int64(h.average())
128 }
129
130 percentOfTotal := round(float64(total) * percentile)
131 var runningTotal int64
132
133 for i := range h.buckets {
134 value := h.buckets[i]
135 runningTotal += value
136 if runningTotal == percentOfTotal {
137 // We hit an exact bucket boundary. If the next bucket has data, it is a
138 // good estimate of the value. If the bucket is empty, we interpolate the
139 // midpoint between the next bucket's boundary and the next non-zero
140 // bucket. If the remaining buckets are all empty, then we use the
141 // boundary for the next bucket as the estimate.
142 j := uint8(i + 1)
143 min := bucketBoundary(j)
144 if runningTotal < total {
145 for h.buckets[j] == 0 {
146 j++
147 }
148 }
149 max := bucketBoundary(j)
150 return min + round(float64(max-min)/2)
151 } else if runningTotal > percentOfTotal {
152 // The value is in this bucket. Interpolate the value.
153 delta := runningTotal - percentOfTotal
154 percentBucket := float64(value-delta) / float64(value)
155 bucketMin := bucketBoundary(uint8(i))
156 nextBucketMin := bucketBoundary(uint8(i + 1))
157 bucketSize := nextBucketMin - bucketMin
158 return bucketMin + round(percentBucket*float64(bucketSize))
159 }
160 }
161 return bucketBoundary(bucketCount - 1)
162}
163
164// Median returns the estimated median of the observed values.
165func (h *histogram) median() int64 {
166 return h.percentileBoundary(0.5)
167}
168
169// Add adds other to h.
170func (h *histogram) Add(other timeseries.Observable) {
171 o := other.(*histogram)
172 if o.valueCount == 0 {
173 // Other histogram is empty
174 } else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value {
175 // Both have a single bucketed value, aggregate them
176 h.valueCount += o.valueCount
177 } else {
178 // Two different values necessitate buckets in this histogram
179 h.allocateBuckets()
180 if o.valueCount >= 0 {
181 h.buckets[o.value] += o.valueCount
182 } else {
183 for i := range h.buckets {
184 h.buckets[i] += o.buckets[i]
185 }
186 }
187 }
188 h.sumOfSquares += o.sumOfSquares
189 h.sum += o.sum
190}
191
192// Clear resets the histogram to an empty state, removing all observed values.
193func (h *histogram) Clear() {
194 h.buckets = nil
195 h.value = 0
196 h.valueCount = 0
197 h.sum = 0
198 h.sumOfSquares = 0
199}
200
201// CopyFrom copies from other, which must be a *histogram, into h.
202func (h *histogram) CopyFrom(other timeseries.Observable) {
203 o := other.(*histogram)
204 if o.valueCount == -1 {
205 h.allocateBuckets()
206 copy(h.buckets, o.buckets)
207 }
208 h.sum = o.sum
209 h.sumOfSquares = o.sumOfSquares
210 h.value = o.value
211 h.valueCount = o.valueCount
212}
213
214// Multiply scales the histogram by the specified ratio.
215func (h *histogram) Multiply(ratio float64) {
216 if h.valueCount == -1 {
217 for i := range h.buckets {
218 h.buckets[i] = int64(float64(h.buckets[i]) * ratio)
219 }
220 } else {
221 h.valueCount = int64(float64(h.valueCount) * ratio)
222 }
223 h.sum = int64(float64(h.sum) * ratio)
224 h.sumOfSquares = h.sumOfSquares * ratio
225}
226
227// New creates a new histogram.
228func (h *histogram) New() timeseries.Observable {
229 r := new(histogram)
230 r.Clear()
231 return r
232}
233
234func (h *histogram) String() string {
235 return fmt.Sprintf("%d, %f, %d, %d, %v",
236 h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets)
237}
238
239// round returns the closest int64 to the argument
240func round(in float64) int64 {
241 return int64(math.Floor(in + 0.5))
242}
243
244// bucketBoundary returns the first value in the bucket.
245func bucketBoundary(bucket uint8) int64 {
246 if bucket == 0 {
247 return 0
248 }
249 return 1 << bucket
250}
251
252// bucketData holds data about a specific bucket for use in distTmpl.
253type bucketData struct {
254 Lower, Upper int64
255 N int64
256 Pct, CumulativePct float64
257 GraphWidth int
258}
259
260// data holds data about a Distribution for use in distTmpl.
261type data struct {
262 Buckets []*bucketData
263 Count, Median int64
264 Mean, StandardDeviation float64
265}
266
267// maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets.
268const maxHTMLBarWidth = 350.0
269
270// newData returns data representing h for use in distTmpl.
271func (h *histogram) newData() *data {
272 // Force the allocation of buckets to simplify the rendering implementation
273 h.allocateBuckets()
274 // We scale the bars on the right so that the largest bar is
275 // maxHTMLBarWidth pixels in width.
276 maxBucket := int64(0)
277 for _, n := range h.buckets {
278 if n > maxBucket {
279 maxBucket = n
280 }
281 }
282 total := h.total()
283 barsizeMult := maxHTMLBarWidth / float64(maxBucket)
284 var pctMult float64
285 if total == 0 {
286 pctMult = 1.0
287 } else {
288 pctMult = 100.0 / float64(total)
289 }
290
291 buckets := make([]*bucketData, len(h.buckets))
292 runningTotal := int64(0)
293 for i, n := range h.buckets {
294 if n == 0 {
295 continue
296 }
297 runningTotal += n
298 var upperBound int64
299 if i < bucketCount-1 {
300 upperBound = bucketBoundary(uint8(i + 1))
301 } else {
302 upperBound = math.MaxInt64
303 }
304 buckets[i] = &bucketData{
305 Lower: bucketBoundary(uint8(i)),
306 Upper: upperBound,
307 N: n,
308 Pct: float64(n) * pctMult,
309 CumulativePct: float64(runningTotal) * pctMult,
310 GraphWidth: int(float64(n) * barsizeMult),
311 }
312 }
313 return &data{
314 Buckets: buckets,
315 Count: total,
316 Median: h.median(),
317 Mean: h.average(),
318 StandardDeviation: h.standardDeviation(),
319 }
320}
321
322func (h *histogram) html() template.HTML {
323 buf := new(bytes.Buffer)
324 if err := distTmpl().Execute(buf, h.newData()); err != nil {
325 buf.Reset()
326 log.Printf("net/trace: couldn't execute template: %v", err)
327 }
328 return template.HTML(buf.String())
329}
330
331var distTmplCache *template.Template
332var distTmplOnce sync.Once
333
334func distTmpl() *template.Template {
335 distTmplOnce.Do(func() {
336 // Input: data
337 distTmplCache = template.Must(template.New("distTmpl").Parse(`
338<table>
339<tr>
340 <td style="padding:0.25em">Count: {{.Count}}</td>
341 <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td>
342 <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td>
343 <td style="padding:0.25em">Median: {{.Median}}</td>
344</tr>
345</table>
346<hr>
347<table>
348{{range $b := .Buckets}}
349{{if $b}}
350 <tr>
351 <td style="padding:0 0 0 0.25em">[</td>
352 <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td>
353 <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td>
354 <td style="text-align:right;padding:0 0.25em">{{.N}}</td>
355 <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td>
356 <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td>
357 <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td>
358 </tr>
359{{end}}
360{{end}}
361</table>
362`))
363 })
364 return distTmplCache
365}