blob: dc9247fed976c3e9073e00ed759867236b125531 [file] [log] [blame]
khenaidooab1f7bd2019-11-14 14:00:27 -05001// Copyright 2018 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package prometheus
15
16import (
17 "runtime"
18 "runtime/debug"
19 "sync"
20 "time"
21)
22
23type goCollector struct {
24 goroutinesDesc *Desc
25 threadsDesc *Desc
26 gcDesc *Desc
27 goInfoDesc *Desc
28
29 // ms... are memstats related.
30 msLast *runtime.MemStats // Previously collected memstats.
31 msLastTimestamp time.Time
32 msMtx sync.Mutex // Protects msLast and msLastTimestamp.
33 msMetrics memStatsMetrics
34 msRead func(*runtime.MemStats) // For mocking in tests.
35 msMaxWait time.Duration // Wait time for fresh memstats.
36 msMaxAge time.Duration // Maximum allowed age of old memstats.
37}
38
39// NewGoCollector returns a collector that exports metrics about the current Go
40// process. This includes memory stats. To collect those, runtime.ReadMemStats
41// is called. This requires to “stop the world”, which usually only happens for
42// garbage collection (GC). Take the following implications into account when
43// deciding whether to use the Go collector:
44//
45// 1. The performance impact of stopping the world is the more relevant the more
46// frequently metrics are collected. However, with Go1.9 or later the
47// stop-the-world time per metrics collection is very short (~25µs) so that the
48// performance impact will only matter in rare cases. However, with older Go
49// versions, the stop-the-world duration depends on the heap size and can be
50// quite significant (~1.7 ms/GiB as per
51// https://go-review.googlesource.com/c/go/+/34937).
52//
53// 2. During an ongoing GC, nothing else can stop the world. Therefore, if the
54// metrics collection happens to coincide with GC, it will only complete after
55// GC has finished. Usually, GC is fast enough to not cause problems. However,
56// with a very large heap, GC might take multiple seconds, which is enough to
57// cause scrape timeouts in common setups. To avoid this problem, the Go
58// collector will use the memstats from a previous collection if
59// runtime.ReadMemStats takes more than 1s. However, if there are no previously
60// collected memstats, or their collection is more than 5m ago, the collection
61// will block until runtime.ReadMemStats succeeds. (The problem might be solved
62// in Go1.13, see https://github.com/golang/go/issues/19812 for the related Go
63// issue.)
64func NewGoCollector() Collector {
65 return &goCollector{
66 goroutinesDesc: NewDesc(
67 "go_goroutines",
68 "Number of goroutines that currently exist.",
69 nil, nil),
70 threadsDesc: NewDesc(
71 "go_threads",
72 "Number of OS threads created.",
73 nil, nil),
74 gcDesc: NewDesc(
75 "go_gc_duration_seconds",
76 "A summary of the GC invocation durations.",
77 nil, nil),
78 goInfoDesc: NewDesc(
79 "go_info",
80 "Information about the Go environment.",
81 nil, Labels{"version": runtime.Version()}),
82 msLast: &runtime.MemStats{},
83 msRead: runtime.ReadMemStats,
84 msMaxWait: time.Second,
85 msMaxAge: 5 * time.Minute,
86 msMetrics: memStatsMetrics{
87 {
88 desc: NewDesc(
89 memstatNamespace("alloc_bytes"),
90 "Number of bytes allocated and still in use.",
91 nil, nil,
92 ),
93 eval: func(ms *runtime.MemStats) float64 { return float64(ms.Alloc) },
94 valType: GaugeValue,
95 }, {
96 desc: NewDesc(
97 memstatNamespace("alloc_bytes_total"),
98 "Total number of bytes allocated, even if freed.",
99 nil, nil,
100 ),
101 eval: func(ms *runtime.MemStats) float64 { return float64(ms.TotalAlloc) },
102 valType: CounterValue,
103 }, {
104 desc: NewDesc(
105 memstatNamespace("sys_bytes"),
106 "Number of bytes obtained from system.",
107 nil, nil,
108 ),
109 eval: func(ms *runtime.MemStats) float64 { return float64(ms.Sys) },
110 valType: GaugeValue,
111 }, {
112 desc: NewDesc(
113 memstatNamespace("lookups_total"),
114 "Total number of pointer lookups.",
115 nil, nil,
116 ),
117 eval: func(ms *runtime.MemStats) float64 { return float64(ms.Lookups) },
118 valType: CounterValue,
119 }, {
120 desc: NewDesc(
121 memstatNamespace("mallocs_total"),
122 "Total number of mallocs.",
123 nil, nil,
124 ),
125 eval: func(ms *runtime.MemStats) float64 { return float64(ms.Mallocs) },
126 valType: CounterValue,
127 }, {
128 desc: NewDesc(
129 memstatNamespace("frees_total"),
130 "Total number of frees.",
131 nil, nil,
132 ),
133 eval: func(ms *runtime.MemStats) float64 { return float64(ms.Frees) },
134 valType: CounterValue,
135 }, {
136 desc: NewDesc(
137 memstatNamespace("heap_alloc_bytes"),
138 "Number of heap bytes allocated and still in use.",
139 nil, nil,
140 ),
141 eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapAlloc) },
142 valType: GaugeValue,
143 }, {
144 desc: NewDesc(
145 memstatNamespace("heap_sys_bytes"),
146 "Number of heap bytes obtained from system.",
147 nil, nil,
148 ),
149 eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapSys) },
150 valType: GaugeValue,
151 }, {
152 desc: NewDesc(
153 memstatNamespace("heap_idle_bytes"),
154 "Number of heap bytes waiting to be used.",
155 nil, nil,
156 ),
157 eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapIdle) },
158 valType: GaugeValue,
159 }, {
160 desc: NewDesc(
161 memstatNamespace("heap_inuse_bytes"),
162 "Number of heap bytes that are in use.",
163 nil, nil,
164 ),
165 eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapInuse) },
166 valType: GaugeValue,
167 }, {
168 desc: NewDesc(
169 memstatNamespace("heap_released_bytes"),
170 "Number of heap bytes released to OS.",
171 nil, nil,
172 ),
173 eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapReleased) },
174 valType: GaugeValue,
175 }, {
176 desc: NewDesc(
177 memstatNamespace("heap_objects"),
178 "Number of allocated objects.",
179 nil, nil,
180 ),
181 eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapObjects) },
182 valType: GaugeValue,
183 }, {
184 desc: NewDesc(
185 memstatNamespace("stack_inuse_bytes"),
186 "Number of bytes in use by the stack allocator.",
187 nil, nil,
188 ),
189 eval: func(ms *runtime.MemStats) float64 { return float64(ms.StackInuse) },
190 valType: GaugeValue,
191 }, {
192 desc: NewDesc(
193 memstatNamespace("stack_sys_bytes"),
194 "Number of bytes obtained from system for stack allocator.",
195 nil, nil,
196 ),
197 eval: func(ms *runtime.MemStats) float64 { return float64(ms.StackSys) },
198 valType: GaugeValue,
199 }, {
200 desc: NewDesc(
201 memstatNamespace("mspan_inuse_bytes"),
202 "Number of bytes in use by mspan structures.",
203 nil, nil,
204 ),
205 eval: func(ms *runtime.MemStats) float64 { return float64(ms.MSpanInuse) },
206 valType: GaugeValue,
207 }, {
208 desc: NewDesc(
209 memstatNamespace("mspan_sys_bytes"),
210 "Number of bytes used for mspan structures obtained from system.",
211 nil, nil,
212 ),
213 eval: func(ms *runtime.MemStats) float64 { return float64(ms.MSpanSys) },
214 valType: GaugeValue,
215 }, {
216 desc: NewDesc(
217 memstatNamespace("mcache_inuse_bytes"),
218 "Number of bytes in use by mcache structures.",
219 nil, nil,
220 ),
221 eval: func(ms *runtime.MemStats) float64 { return float64(ms.MCacheInuse) },
222 valType: GaugeValue,
223 }, {
224 desc: NewDesc(
225 memstatNamespace("mcache_sys_bytes"),
226 "Number of bytes used for mcache structures obtained from system.",
227 nil, nil,
228 ),
229 eval: func(ms *runtime.MemStats) float64 { return float64(ms.MCacheSys) },
230 valType: GaugeValue,
231 }, {
232 desc: NewDesc(
233 memstatNamespace("buck_hash_sys_bytes"),
234 "Number of bytes used by the profiling bucket hash table.",
235 nil, nil,
236 ),
237 eval: func(ms *runtime.MemStats) float64 { return float64(ms.BuckHashSys) },
238 valType: GaugeValue,
239 }, {
240 desc: NewDesc(
241 memstatNamespace("gc_sys_bytes"),
242 "Number of bytes used for garbage collection system metadata.",
243 nil, nil,
244 ),
245 eval: func(ms *runtime.MemStats) float64 { return float64(ms.GCSys) },
246 valType: GaugeValue,
247 }, {
248 desc: NewDesc(
249 memstatNamespace("other_sys_bytes"),
250 "Number of bytes used for other system allocations.",
251 nil, nil,
252 ),
253 eval: func(ms *runtime.MemStats) float64 { return float64(ms.OtherSys) },
254 valType: GaugeValue,
255 }, {
256 desc: NewDesc(
257 memstatNamespace("next_gc_bytes"),
258 "Number of heap bytes when next garbage collection will take place.",
259 nil, nil,
260 ),
261 eval: func(ms *runtime.MemStats) float64 { return float64(ms.NextGC) },
262 valType: GaugeValue,
263 }, {
264 desc: NewDesc(
265 memstatNamespace("last_gc_time_seconds"),
266 "Number of seconds since 1970 of last garbage collection.",
267 nil, nil,
268 ),
269 eval: func(ms *runtime.MemStats) float64 { return float64(ms.LastGC) / 1e9 },
270 valType: GaugeValue,
271 }, {
272 desc: NewDesc(
273 memstatNamespace("gc_cpu_fraction"),
274 "The fraction of this program's available CPU time used by the GC since the program started.",
275 nil, nil,
276 ),
277 eval: func(ms *runtime.MemStats) float64 { return ms.GCCPUFraction },
278 valType: GaugeValue,
279 },
280 },
281 }
282}
283
284func memstatNamespace(s string) string {
285 return "go_memstats_" + s
286}
287
288// Describe returns all descriptions of the collector.
289func (c *goCollector) Describe(ch chan<- *Desc) {
290 ch <- c.goroutinesDesc
291 ch <- c.threadsDesc
292 ch <- c.gcDesc
293 ch <- c.goInfoDesc
294 for _, i := range c.msMetrics {
295 ch <- i.desc
296 }
297}
298
299// Collect returns the current state of all metrics of the collector.
300func (c *goCollector) Collect(ch chan<- Metric) {
301 var (
302 ms = &runtime.MemStats{}
303 done = make(chan struct{})
304 )
305 // Start reading memstats first as it might take a while.
306 go func() {
307 c.msRead(ms)
308 c.msMtx.Lock()
309 c.msLast = ms
310 c.msLastTimestamp = time.Now()
311 c.msMtx.Unlock()
312 close(done)
313 }()
314
315 ch <- MustNewConstMetric(c.goroutinesDesc, GaugeValue, float64(runtime.NumGoroutine()))
316 n, _ := runtime.ThreadCreateProfile(nil)
317 ch <- MustNewConstMetric(c.threadsDesc, GaugeValue, float64(n))
318
319 var stats debug.GCStats
320 stats.PauseQuantiles = make([]time.Duration, 5)
321 debug.ReadGCStats(&stats)
322
323 quantiles := make(map[float64]float64)
324 for idx, pq := range stats.PauseQuantiles[1:] {
325 quantiles[float64(idx+1)/float64(len(stats.PauseQuantiles)-1)] = pq.Seconds()
326 }
327 quantiles[0.0] = stats.PauseQuantiles[0].Seconds()
328 ch <- MustNewConstSummary(c.gcDesc, uint64(stats.NumGC), stats.PauseTotal.Seconds(), quantiles)
329
330 ch <- MustNewConstMetric(c.goInfoDesc, GaugeValue, 1)
331
332 timer := time.NewTimer(c.msMaxWait)
333 select {
334 case <-done: // Our own ReadMemStats succeeded in time. Use it.
335 timer.Stop() // Important for high collection frequencies to not pile up timers.
336 c.msCollect(ch, ms)
337 return
338 case <-timer.C: // Time out, use last memstats if possible. Continue below.
339 }
340 c.msMtx.Lock()
341 if time.Since(c.msLastTimestamp) < c.msMaxAge {
342 // Last memstats are recent enough. Collect from them under the lock.
343 c.msCollect(ch, c.msLast)
344 c.msMtx.Unlock()
345 return
346 }
347 // If we are here, the last memstats are too old or don't exist. We have
348 // to wait until our own ReadMemStats finally completes. For that to
349 // happen, we have to release the lock.
350 c.msMtx.Unlock()
351 <-done
352 c.msCollect(ch, ms)
353}
354
355func (c *goCollector) msCollect(ch chan<- Metric, ms *runtime.MemStats) {
356 for _, i := range c.msMetrics {
357 ch <- MustNewConstMetric(i.desc, i.valType, i.eval(ms))
358 }
359}
360
361// memStatsMetrics provide description, value, and value type for memstat metrics.
362type memStatsMetrics []struct {
363 desc *Desc
364 eval func(*runtime.MemStats) float64
365 valType ValueType
366}
367
368// NewBuildInfoCollector returns a collector collecting a single metric
369// "go_build_info" with the constant value 1 and three labels "path", "version",
370// and "checksum". Their label values contain the main module path, version, and
371// checksum, respectively. The labels will only have meaningful values if the
372// binary is built with Go module support and from source code retrieved from
373// the source repository (rather than the local file system). This is usually
374// accomplished by building from outside of GOPATH, specifying the full address
375// of the main package, e.g. "GO111MODULE=on go run
376// github.com/prometheus/client_golang/examples/random". If built without Go
377// module support, all label values will be "unknown". If built with Go module
378// support but using the source code from the local file system, the "path" will
379// be set appropriately, but "checksum" will be empty and "version" will be
380// "(devel)".
381//
382// This collector uses only the build information for the main module. See
383// https://github.com/povilasv/prommod for an example of a collector for the
384// module dependencies.
385func NewBuildInfoCollector() Collector {
386 path, version, sum := readBuildInfo()
387 c := &selfCollector{MustNewConstMetric(
388 NewDesc(
389 "go_build_info",
390 "Build information about the main Go module.",
391 nil, Labels{"path": path, "version": version, "checksum": sum},
392 ),
393 GaugeValue, 1)}
394 c.init(c.self)
395 return c
396}