blob: 2980614dff4bd0a9c16d9d0036e413aae98249aa [file] [log] [blame]
sslobodrd046be82019-01-16 10:02:22 -05001// Copyright 2014 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package prometheus
15
16import (
17 "fmt"
18 "math"
19 "sort"
20 "sync"
21 "time"
22
23 "github.com/beorn7/perks/quantile"
24 "github.com/golang/protobuf/proto"
25
26 dto "github.com/prometheus/client_model/go"
27)
28
29// quantileLabel is used for the label that defines the quantile in a
30// summary.
31const quantileLabel = "quantile"
32
33// A Summary captures individual observations from an event or sample stream and
34// summarizes them in a manner similar to traditional summary statistics: 1. sum
35// of observations, 2. observation count, 3. rank estimations.
36//
37// A typical use-case is the observation of request latencies. By default, a
38// Summary provides the median, the 90th and the 99th percentile of the latency
39// as rank estimations. However, the default behavior will change in the
40// upcoming v0.10 of the library. There will be no rank estimations at all by
41// default. For a sane transition, it is recommended to set the desired rank
42// estimations explicitly.
43//
44// Note that the rank estimations cannot be aggregated in a meaningful way with
45// the Prometheus query language (i.e. you cannot average or add them). If you
46// need aggregatable quantiles (e.g. you want the 99th percentile latency of all
47// queries served across all instances of a service), consider the Histogram
48// metric type. See the Prometheus documentation for more details.
49//
50// To create Summary instances, use NewSummary.
51type Summary interface {
52 Metric
53 Collector
54
55 // Observe adds a single observation to the summary.
56 Observe(float64)
57}
58
59// DefObjectives are the default Summary quantile values.
60//
61// Deprecated: DefObjectives will not be used as the default objectives in
62// v0.10 of the library. The default Summary will have no quantiles then.
63var (
64 DefObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}
65
66 errQuantileLabelNotAllowed = fmt.Errorf(
67 "%q is not allowed as label name in summaries", quantileLabel,
68 )
69)
70
71// Default values for SummaryOpts.
72const (
73 // DefMaxAge is the default duration for which observations stay
74 // relevant.
75 DefMaxAge time.Duration = 10 * time.Minute
76 // DefAgeBuckets is the default number of buckets used to calculate the
77 // age of observations.
78 DefAgeBuckets = 5
79 // DefBufCap is the standard buffer size for collecting Summary observations.
80 DefBufCap = 500
81)
82
83// SummaryOpts bundles the options for creating a Summary metric. It is
84// mandatory to set Name to a non-empty string. While all other fields are
85// optional and can safely be left at their zero value, it is recommended to set
86// a help string and to explicitly set the Objectives field to the desired value
87// as the default value will change in the upcoming v0.10 of the library.
88type SummaryOpts struct {
89 // Namespace, Subsystem, and Name are components of the fully-qualified
90 // name of the Summary (created by joining these components with
91 // "_"). Only Name is mandatory, the others merely help structuring the
92 // name. Note that the fully-qualified name of the Summary must be a
93 // valid Prometheus metric name.
94 Namespace string
95 Subsystem string
96 Name string
97
98 // Help provides information about this Summary.
99 //
100 // Metrics with the same fully-qualified name must have the same Help
101 // string.
102 Help string
103
104 // ConstLabels are used to attach fixed labels to this metric. Metrics
105 // with the same fully-qualified name must have the same label names in
106 // their ConstLabels.
107 //
108 // Due to the way a Summary is represented in the Prometheus text format
109 // and how it is handled by the Prometheus server internally, “quantile”
110 // is an illegal label name. Construction of a Summary or SummaryVec
111 // will panic if this label name is used in ConstLabels.
112 //
113 // ConstLabels are only used rarely. In particular, do not use them to
114 // attach the same labels to all your metrics. Those use cases are
115 // better covered by target labels set by the scraping Prometheus
116 // server, or by one specific metric (e.g. a build_info or a
117 // machine_role metric). See also
118 // https://prometheus.io/docs/instrumenting/writing_exporters/#target-labels,-not-static-scraped-labels
119 ConstLabels Labels
120
121 // Objectives defines the quantile rank estimates with their respective
122 // absolute error. If Objectives[q] = e, then the value reported for q
123 // will be the φ-quantile value for some φ between q-e and q+e. The
124 // default value is DefObjectives. It is used if Objectives is left at
125 // its zero value (i.e. nil). To create a Summary without Objectives,
126 // set it to an empty map (i.e. map[float64]float64{}).
127 //
128 // Deprecated: Note that the current value of DefObjectives is
129 // deprecated. It will be replaced by an empty map in v0.10 of the
130 // library. Please explicitly set Objectives to the desired value.
131 Objectives map[float64]float64
132
133 // MaxAge defines the duration for which an observation stays relevant
134 // for the summary. Must be positive. The default value is DefMaxAge.
135 MaxAge time.Duration
136
137 // AgeBuckets is the number of buckets used to exclude observations that
138 // are older than MaxAge from the summary. A higher number has a
139 // resource penalty, so only increase it if the higher resolution is
140 // really required. For very high observation rates, you might want to
141 // reduce the number of age buckets. With only one age bucket, you will
142 // effectively see a complete reset of the summary each time MaxAge has
143 // passed. The default value is DefAgeBuckets.
144 AgeBuckets uint32
145
146 // BufCap defines the default sample stream buffer size. The default
147 // value of DefBufCap should suffice for most uses. If there is a need
148 // to increase the value, a multiple of 500 is recommended (because that
149 // is the internal buffer size of the underlying package
150 // "github.com/bmizerany/perks/quantile").
151 BufCap uint32
152}
153
154// Great fuck-up with the sliding-window decay algorithm... The Merge method of
155// perk/quantile is actually not working as advertised - and it might be
156// unfixable, as the underlying algorithm is apparently not capable of merging
157// summaries in the first place. To avoid using Merge, we are currently adding
158// observations to _each_ age bucket, i.e. the effort to add a sample is
159// essentially multiplied by the number of age buckets. When rotating age
160// buckets, we empty the previous head stream. On scrape time, we simply take
161// the quantiles from the head stream (no merging required). Result: More effort
162// on observation time, less effort on scrape time, which is exactly the
163// opposite of what we try to accomplish, but at least the results are correct.
164//
165// The quite elegant previous contraption to merge the age buckets efficiently
166// on scrape time (see code up commit 6b9530d72ea715f0ba612c0120e6e09fbf1d49d0)
167// can't be used anymore.
168
169// NewSummary creates a new Summary based on the provided SummaryOpts.
170func NewSummary(opts SummaryOpts) Summary {
171 return newSummary(
172 NewDesc(
173 BuildFQName(opts.Namespace, opts.Subsystem, opts.Name),
174 opts.Help,
175 nil,
176 opts.ConstLabels,
177 ),
178 opts,
179 )
180}
181
182func newSummary(desc *Desc, opts SummaryOpts, labelValues ...string) Summary {
183 if len(desc.variableLabels) != len(labelValues) {
184 panic(makeInconsistentCardinalityError(desc.fqName, desc.variableLabels, labelValues))
185 }
186
187 for _, n := range desc.variableLabels {
188 if n == quantileLabel {
189 panic(errQuantileLabelNotAllowed)
190 }
191 }
192 for _, lp := range desc.constLabelPairs {
193 if lp.GetName() == quantileLabel {
194 panic(errQuantileLabelNotAllowed)
195 }
196 }
197
198 if opts.Objectives == nil {
199 opts.Objectives = DefObjectives
200 }
201
202 if opts.MaxAge < 0 {
203 panic(fmt.Errorf("illegal max age MaxAge=%v", opts.MaxAge))
204 }
205 if opts.MaxAge == 0 {
206 opts.MaxAge = DefMaxAge
207 }
208
209 if opts.AgeBuckets == 0 {
210 opts.AgeBuckets = DefAgeBuckets
211 }
212
213 if opts.BufCap == 0 {
214 opts.BufCap = DefBufCap
215 }
216
217 s := &summary{
218 desc: desc,
219
220 objectives: opts.Objectives,
221 sortedObjectives: make([]float64, 0, len(opts.Objectives)),
222
223 labelPairs: makeLabelPairs(desc, labelValues),
224
225 hotBuf: make([]float64, 0, opts.BufCap),
226 coldBuf: make([]float64, 0, opts.BufCap),
227 streamDuration: opts.MaxAge / time.Duration(opts.AgeBuckets),
228 }
229 s.headStreamExpTime = time.Now().Add(s.streamDuration)
230 s.hotBufExpTime = s.headStreamExpTime
231
232 for i := uint32(0); i < opts.AgeBuckets; i++ {
233 s.streams = append(s.streams, s.newStream())
234 }
235 s.headStream = s.streams[0]
236
237 for qu := range s.objectives {
238 s.sortedObjectives = append(s.sortedObjectives, qu)
239 }
240 sort.Float64s(s.sortedObjectives)
241
242 s.init(s) // Init self-collection.
243 return s
244}
245
246type summary struct {
247 selfCollector
248
249 bufMtx sync.Mutex // Protects hotBuf and hotBufExpTime.
250 mtx sync.Mutex // Protects every other moving part.
251 // Lock bufMtx before mtx if both are needed.
252
253 desc *Desc
254
255 objectives map[float64]float64
256 sortedObjectives []float64
257
258 labelPairs []*dto.LabelPair
259
260 sum float64
261 cnt uint64
262
263 hotBuf, coldBuf []float64
264
265 streams []*quantile.Stream
266 streamDuration time.Duration
267 headStream *quantile.Stream
268 headStreamIdx int
269 headStreamExpTime, hotBufExpTime time.Time
270}
271
272func (s *summary) Desc() *Desc {
273 return s.desc
274}
275
276func (s *summary) Observe(v float64) {
277 s.bufMtx.Lock()
278 defer s.bufMtx.Unlock()
279
280 now := time.Now()
281 if now.After(s.hotBufExpTime) {
282 s.asyncFlush(now)
283 }
284 s.hotBuf = append(s.hotBuf, v)
285 if len(s.hotBuf) == cap(s.hotBuf) {
286 s.asyncFlush(now)
287 }
288}
289
290func (s *summary) Write(out *dto.Metric) error {
291 sum := &dto.Summary{}
292 qs := make([]*dto.Quantile, 0, len(s.objectives))
293
294 s.bufMtx.Lock()
295 s.mtx.Lock()
296 // Swap bufs even if hotBuf is empty to set new hotBufExpTime.
297 s.swapBufs(time.Now())
298 s.bufMtx.Unlock()
299
300 s.flushColdBuf()
301 sum.SampleCount = proto.Uint64(s.cnt)
302 sum.SampleSum = proto.Float64(s.sum)
303
304 for _, rank := range s.sortedObjectives {
305 var q float64
306 if s.headStream.Count() == 0 {
307 q = math.NaN()
308 } else {
309 q = s.headStream.Query(rank)
310 }
311 qs = append(qs, &dto.Quantile{
312 Quantile: proto.Float64(rank),
313 Value: proto.Float64(q),
314 })
315 }
316
317 s.mtx.Unlock()
318
319 if len(qs) > 0 {
320 sort.Sort(quantSort(qs))
321 }
322 sum.Quantile = qs
323
324 out.Summary = sum
325 out.Label = s.labelPairs
326 return nil
327}
328
329func (s *summary) newStream() *quantile.Stream {
330 return quantile.NewTargeted(s.objectives)
331}
332
333// asyncFlush needs bufMtx locked.
334func (s *summary) asyncFlush(now time.Time) {
335 s.mtx.Lock()
336 s.swapBufs(now)
337
338 // Unblock the original goroutine that was responsible for the mutation
339 // that triggered the compaction. But hold onto the global non-buffer
340 // state mutex until the operation finishes.
341 go func() {
342 s.flushColdBuf()
343 s.mtx.Unlock()
344 }()
345}
346
347// rotateStreams needs mtx AND bufMtx locked.
348func (s *summary) maybeRotateStreams() {
349 for !s.hotBufExpTime.Equal(s.headStreamExpTime) {
350 s.headStream.Reset()
351 s.headStreamIdx++
352 if s.headStreamIdx >= len(s.streams) {
353 s.headStreamIdx = 0
354 }
355 s.headStream = s.streams[s.headStreamIdx]
356 s.headStreamExpTime = s.headStreamExpTime.Add(s.streamDuration)
357 }
358}
359
360// flushColdBuf needs mtx locked.
361func (s *summary) flushColdBuf() {
362 for _, v := range s.coldBuf {
363 for _, stream := range s.streams {
364 stream.Insert(v)
365 }
366 s.cnt++
367 s.sum += v
368 }
369 s.coldBuf = s.coldBuf[0:0]
370 s.maybeRotateStreams()
371}
372
373// swapBufs needs mtx AND bufMtx locked, coldBuf must be empty.
374func (s *summary) swapBufs(now time.Time) {
375 if len(s.coldBuf) != 0 {
376 panic("coldBuf is not empty")
377 }
378 s.hotBuf, s.coldBuf = s.coldBuf, s.hotBuf
379 // hotBuf is now empty and gets new expiration set.
380 for now.After(s.hotBufExpTime) {
381 s.hotBufExpTime = s.hotBufExpTime.Add(s.streamDuration)
382 }
383}
384
385type quantSort []*dto.Quantile
386
387func (s quantSort) Len() int {
388 return len(s)
389}
390
391func (s quantSort) Swap(i, j int) {
392 s[i], s[j] = s[j], s[i]
393}
394
395func (s quantSort) Less(i, j int) bool {
396 return s[i].GetQuantile() < s[j].GetQuantile()
397}
398
399// SummaryVec is a Collector that bundles a set of Summaries that all share the
400// same Desc, but have different values for their variable labels. This is used
401// if you want to count the same thing partitioned by various dimensions
402// (e.g. HTTP request latencies, partitioned by status code and method). Create
403// instances with NewSummaryVec.
404type SummaryVec struct {
405 *metricVec
406}
407
408// NewSummaryVec creates a new SummaryVec based on the provided SummaryOpts and
409// partitioned by the given label names.
410//
411// Due to the way a Summary is represented in the Prometheus text format and how
412// it is handled by the Prometheus server internally, “quantile” is an illegal
413// label name. NewSummaryVec will panic if this label name is used.
414func NewSummaryVec(opts SummaryOpts, labelNames []string) *SummaryVec {
415 for _, ln := range labelNames {
416 if ln == quantileLabel {
417 panic(errQuantileLabelNotAllowed)
418 }
419 }
420 desc := NewDesc(
421 BuildFQName(opts.Namespace, opts.Subsystem, opts.Name),
422 opts.Help,
423 labelNames,
424 opts.ConstLabels,
425 )
426 return &SummaryVec{
427 metricVec: newMetricVec(desc, func(lvs ...string) Metric {
428 return newSummary(desc, opts, lvs...)
429 }),
430 }
431}
432
433// GetMetricWithLabelValues returns the Summary for the given slice of label
434// values (same order as the VariableLabels in Desc). If that combination of
435// label values is accessed for the first time, a new Summary is created.
436//
437// It is possible to call this method without using the returned Summary to only
438// create the new Summary but leave it at its starting value, a Summary without
439// any observations.
440//
441// Keeping the Summary for later use is possible (and should be considered if
442// performance is critical), but keep in mind that Reset, DeleteLabelValues and
443// Delete can be used to delete the Summary from the SummaryVec. In that case,
444// the Summary will still exist, but it will not be exported anymore, even if a
445// Summary with the same label values is created later. See also the CounterVec
446// example.
447//
448// An error is returned if the number of label values is not the same as the
449// number of VariableLabels in Desc (minus any curried labels).
450//
451// Note that for more than one label value, this method is prone to mistakes
452// caused by an incorrect order of arguments. Consider GetMetricWith(Labels) as
453// an alternative to avoid that type of mistake. For higher label numbers, the
454// latter has a much more readable (albeit more verbose) syntax, but it comes
455// with a performance overhead (for creating and processing the Labels map).
456// See also the GaugeVec example.
457func (v *SummaryVec) GetMetricWithLabelValues(lvs ...string) (Observer, error) {
458 metric, err := v.metricVec.getMetricWithLabelValues(lvs...)
459 if metric != nil {
460 return metric.(Observer), err
461 }
462 return nil, err
463}
464
465// GetMetricWith returns the Summary for the given Labels map (the label names
466// must match those of the VariableLabels in Desc). If that label map is
467// accessed for the first time, a new Summary is created. Implications of
468// creating a Summary without using it and keeping the Summary for later use are
469// the same as for GetMetricWithLabelValues.
470//
471// An error is returned if the number and names of the Labels are inconsistent
472// with those of the VariableLabels in Desc (minus any curried labels).
473//
474// This method is used for the same purpose as
475// GetMetricWithLabelValues(...string). See there for pros and cons of the two
476// methods.
477func (v *SummaryVec) GetMetricWith(labels Labels) (Observer, error) {
478 metric, err := v.metricVec.getMetricWith(labels)
479 if metric != nil {
480 return metric.(Observer), err
481 }
482 return nil, err
483}
484
485// WithLabelValues works as GetMetricWithLabelValues, but panics where
486// GetMetricWithLabelValues would have returned an error. Not returning an
487// error allows shortcuts like
488// myVec.WithLabelValues("404", "GET").Observe(42.21)
489func (v *SummaryVec) WithLabelValues(lvs ...string) Observer {
490 s, err := v.GetMetricWithLabelValues(lvs...)
491 if err != nil {
492 panic(err)
493 }
494 return s
495}
496
497// With works as GetMetricWith, but panics where GetMetricWithLabels would have
498// returned an error. Not returning an error allows shortcuts like
499// myVec.With(prometheus.Labels{"code": "404", "method": "GET"}).Observe(42.21)
500func (v *SummaryVec) With(labels Labels) Observer {
501 s, err := v.GetMetricWith(labels)
502 if err != nil {
503 panic(err)
504 }
505 return s
506}
507
508// CurryWith returns a vector curried with the provided labels, i.e. the
509// returned vector has those labels pre-set for all labeled operations performed
510// on it. The cardinality of the curried vector is reduced accordingly. The
511// order of the remaining labels stays the same (just with the curried labels
512// taken out of the sequence – which is relevant for the
513// (GetMetric)WithLabelValues methods). It is possible to curry a curried
514// vector, but only with labels not yet used for currying before.
515//
516// The metrics contained in the SummaryVec are shared between the curried and
517// uncurried vectors. They are just accessed differently. Curried and uncurried
518// vectors behave identically in terms of collection. Only one must be
519// registered with a given registry (usually the uncurried version). The Reset
520// method deletes all metrics, even if called on a curried vector.
521func (v *SummaryVec) CurryWith(labels Labels) (ObserverVec, error) {
522 vec, err := v.curryWith(labels)
523 if vec != nil {
524 return &SummaryVec{vec}, err
525 }
526 return nil, err
527}
528
529// MustCurryWith works as CurryWith but panics where CurryWith would have
530// returned an error.
531func (v *SummaryVec) MustCurryWith(labels Labels) ObserverVec {
532 vec, err := v.CurryWith(labels)
533 if err != nil {
534 panic(err)
535 }
536 return vec
537}
538
539type constSummary struct {
540 desc *Desc
541 count uint64
542 sum float64
543 quantiles map[float64]float64
544 labelPairs []*dto.LabelPair
545}
546
547func (s *constSummary) Desc() *Desc {
548 return s.desc
549}
550
551func (s *constSummary) Write(out *dto.Metric) error {
552 sum := &dto.Summary{}
553 qs := make([]*dto.Quantile, 0, len(s.quantiles))
554
555 sum.SampleCount = proto.Uint64(s.count)
556 sum.SampleSum = proto.Float64(s.sum)
557
558 for rank, q := range s.quantiles {
559 qs = append(qs, &dto.Quantile{
560 Quantile: proto.Float64(rank),
561 Value: proto.Float64(q),
562 })
563 }
564
565 if len(qs) > 0 {
566 sort.Sort(quantSort(qs))
567 }
568 sum.Quantile = qs
569
570 out.Summary = sum
571 out.Label = s.labelPairs
572
573 return nil
574}
575
576// NewConstSummary returns a metric representing a Prometheus summary with fixed
577// values for the count, sum, and quantiles. As those parameters cannot be
578// changed, the returned value does not implement the Summary interface (but
579// only the Metric interface). Users of this package will not have much use for
580// it in regular operations. However, when implementing custom Collectors, it is
581// useful as a throw-away metric that is generated on the fly to send it to
582// Prometheus in the Collect method.
583//
584// quantiles maps ranks to quantile values. For example, a median latency of
585// 0.23s and a 99th percentile latency of 0.56s would be expressed as:
586// map[float64]float64{0.5: 0.23, 0.99: 0.56}
587//
588// NewConstSummary returns an error if the length of labelValues is not
589// consistent with the variable labels in Desc or if Desc is invalid.
590func NewConstSummary(
591 desc *Desc,
592 count uint64,
593 sum float64,
594 quantiles map[float64]float64,
595 labelValues ...string,
596) (Metric, error) {
597 if desc.err != nil {
598 return nil, desc.err
599 }
600 if err := validateLabelValues(labelValues, len(desc.variableLabels)); err != nil {
601 return nil, err
602 }
603 return &constSummary{
604 desc: desc,
605 count: count,
606 sum: sum,
607 quantiles: quantiles,
608 labelPairs: makeLabelPairs(desc, labelValues),
609 }, nil
610}
611
612// MustNewConstSummary is a version of NewConstSummary that panics where
613// NewConstMetric would have returned an error.
614func MustNewConstSummary(
615 desc *Desc,
616 count uint64,
617 sum float64,
618 quantiles map[float64]float64,
619 labelValues ...string,
620) Metric {
621 m, err := NewConstSummary(desc, count, sum, quantiles, labelValues...)
622 if err != nil {
623 panic(err)
624 }
625 return m
626}