khenaidoo | ab1f7bd | 2019-11-14 14:00:27 -0500 | [diff] [blame^] | 1 | // Copyright 2018 The Prometheus Authors |
| 2 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 3 | // you may not use this file except in compliance with the License. |
| 4 | // You may obtain a copy of the License at |
| 5 | // |
| 6 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | // |
| 8 | // Unless required by applicable law or agreed to in writing, software |
| 9 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 11 | // See the License for the specific language governing permissions and |
| 12 | // limitations under the License. |
| 13 | |
| 14 | package prometheus |
| 15 | |
| 16 | import ( |
| 17 | "runtime" |
| 18 | "runtime/debug" |
| 19 | "sync" |
| 20 | "time" |
| 21 | ) |
| 22 | |
| 23 | type goCollector struct { |
| 24 | goroutinesDesc *Desc |
| 25 | threadsDesc *Desc |
| 26 | gcDesc *Desc |
| 27 | goInfoDesc *Desc |
| 28 | |
| 29 | // ms... are memstats related. |
| 30 | msLast *runtime.MemStats // Previously collected memstats. |
| 31 | msLastTimestamp time.Time |
| 32 | msMtx sync.Mutex // Protects msLast and msLastTimestamp. |
| 33 | msMetrics memStatsMetrics |
| 34 | msRead func(*runtime.MemStats) // For mocking in tests. |
| 35 | msMaxWait time.Duration // Wait time for fresh memstats. |
| 36 | msMaxAge time.Duration // Maximum allowed age of old memstats. |
| 37 | } |
| 38 | |
| 39 | // NewGoCollector returns a collector that exports metrics about the current Go |
| 40 | // process. This includes memory stats. To collect those, runtime.ReadMemStats |
| 41 | // is called. This requires to “stop the world”, which usually only happens for |
| 42 | // garbage collection (GC). Take the following implications into account when |
| 43 | // deciding whether to use the Go collector: |
| 44 | // |
| 45 | // 1. The performance impact of stopping the world is the more relevant the more |
| 46 | // frequently metrics are collected. However, with Go1.9 or later the |
| 47 | // stop-the-world time per metrics collection is very short (~25µs) so that the |
| 48 | // performance impact will only matter in rare cases. However, with older Go |
| 49 | // versions, the stop-the-world duration depends on the heap size and can be |
| 50 | // quite significant (~1.7 ms/GiB as per |
| 51 | // https://go-review.googlesource.com/c/go/+/34937). |
| 52 | // |
| 53 | // 2. During an ongoing GC, nothing else can stop the world. Therefore, if the |
| 54 | // metrics collection happens to coincide with GC, it will only complete after |
| 55 | // GC has finished. Usually, GC is fast enough to not cause problems. However, |
| 56 | // with a very large heap, GC might take multiple seconds, which is enough to |
| 57 | // cause scrape timeouts in common setups. To avoid this problem, the Go |
| 58 | // collector will use the memstats from a previous collection if |
| 59 | // runtime.ReadMemStats takes more than 1s. However, if there are no previously |
| 60 | // collected memstats, or their collection is more than 5m ago, the collection |
| 61 | // will block until runtime.ReadMemStats succeeds. (The problem might be solved |
| 62 | // in Go1.13, see https://github.com/golang/go/issues/19812 for the related Go |
| 63 | // issue.) |
| 64 | func NewGoCollector() Collector { |
| 65 | return &goCollector{ |
| 66 | goroutinesDesc: NewDesc( |
| 67 | "go_goroutines", |
| 68 | "Number of goroutines that currently exist.", |
| 69 | nil, nil), |
| 70 | threadsDesc: NewDesc( |
| 71 | "go_threads", |
| 72 | "Number of OS threads created.", |
| 73 | nil, nil), |
| 74 | gcDesc: NewDesc( |
| 75 | "go_gc_duration_seconds", |
| 76 | "A summary of the GC invocation durations.", |
| 77 | nil, nil), |
| 78 | goInfoDesc: NewDesc( |
| 79 | "go_info", |
| 80 | "Information about the Go environment.", |
| 81 | nil, Labels{"version": runtime.Version()}), |
| 82 | msLast: &runtime.MemStats{}, |
| 83 | msRead: runtime.ReadMemStats, |
| 84 | msMaxWait: time.Second, |
| 85 | msMaxAge: 5 * time.Minute, |
| 86 | msMetrics: memStatsMetrics{ |
| 87 | { |
| 88 | desc: NewDesc( |
| 89 | memstatNamespace("alloc_bytes"), |
| 90 | "Number of bytes allocated and still in use.", |
| 91 | nil, nil, |
| 92 | ), |
| 93 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.Alloc) }, |
| 94 | valType: GaugeValue, |
| 95 | }, { |
| 96 | desc: NewDesc( |
| 97 | memstatNamespace("alloc_bytes_total"), |
| 98 | "Total number of bytes allocated, even if freed.", |
| 99 | nil, nil, |
| 100 | ), |
| 101 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.TotalAlloc) }, |
| 102 | valType: CounterValue, |
| 103 | }, { |
| 104 | desc: NewDesc( |
| 105 | memstatNamespace("sys_bytes"), |
| 106 | "Number of bytes obtained from system.", |
| 107 | nil, nil, |
| 108 | ), |
| 109 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.Sys) }, |
| 110 | valType: GaugeValue, |
| 111 | }, { |
| 112 | desc: NewDesc( |
| 113 | memstatNamespace("lookups_total"), |
| 114 | "Total number of pointer lookups.", |
| 115 | nil, nil, |
| 116 | ), |
| 117 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.Lookups) }, |
| 118 | valType: CounterValue, |
| 119 | }, { |
| 120 | desc: NewDesc( |
| 121 | memstatNamespace("mallocs_total"), |
| 122 | "Total number of mallocs.", |
| 123 | nil, nil, |
| 124 | ), |
| 125 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.Mallocs) }, |
| 126 | valType: CounterValue, |
| 127 | }, { |
| 128 | desc: NewDesc( |
| 129 | memstatNamespace("frees_total"), |
| 130 | "Total number of frees.", |
| 131 | nil, nil, |
| 132 | ), |
| 133 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.Frees) }, |
| 134 | valType: CounterValue, |
| 135 | }, { |
| 136 | desc: NewDesc( |
| 137 | memstatNamespace("heap_alloc_bytes"), |
| 138 | "Number of heap bytes allocated and still in use.", |
| 139 | nil, nil, |
| 140 | ), |
| 141 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapAlloc) }, |
| 142 | valType: GaugeValue, |
| 143 | }, { |
| 144 | desc: NewDesc( |
| 145 | memstatNamespace("heap_sys_bytes"), |
| 146 | "Number of heap bytes obtained from system.", |
| 147 | nil, nil, |
| 148 | ), |
| 149 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapSys) }, |
| 150 | valType: GaugeValue, |
| 151 | }, { |
| 152 | desc: NewDesc( |
| 153 | memstatNamespace("heap_idle_bytes"), |
| 154 | "Number of heap bytes waiting to be used.", |
| 155 | nil, nil, |
| 156 | ), |
| 157 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapIdle) }, |
| 158 | valType: GaugeValue, |
| 159 | }, { |
| 160 | desc: NewDesc( |
| 161 | memstatNamespace("heap_inuse_bytes"), |
| 162 | "Number of heap bytes that are in use.", |
| 163 | nil, nil, |
| 164 | ), |
| 165 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapInuse) }, |
| 166 | valType: GaugeValue, |
| 167 | }, { |
| 168 | desc: NewDesc( |
| 169 | memstatNamespace("heap_released_bytes"), |
| 170 | "Number of heap bytes released to OS.", |
| 171 | nil, nil, |
| 172 | ), |
| 173 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapReleased) }, |
| 174 | valType: GaugeValue, |
| 175 | }, { |
| 176 | desc: NewDesc( |
| 177 | memstatNamespace("heap_objects"), |
| 178 | "Number of allocated objects.", |
| 179 | nil, nil, |
| 180 | ), |
| 181 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.HeapObjects) }, |
| 182 | valType: GaugeValue, |
| 183 | }, { |
| 184 | desc: NewDesc( |
| 185 | memstatNamespace("stack_inuse_bytes"), |
| 186 | "Number of bytes in use by the stack allocator.", |
| 187 | nil, nil, |
| 188 | ), |
| 189 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.StackInuse) }, |
| 190 | valType: GaugeValue, |
| 191 | }, { |
| 192 | desc: NewDesc( |
| 193 | memstatNamespace("stack_sys_bytes"), |
| 194 | "Number of bytes obtained from system for stack allocator.", |
| 195 | nil, nil, |
| 196 | ), |
| 197 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.StackSys) }, |
| 198 | valType: GaugeValue, |
| 199 | }, { |
| 200 | desc: NewDesc( |
| 201 | memstatNamespace("mspan_inuse_bytes"), |
| 202 | "Number of bytes in use by mspan structures.", |
| 203 | nil, nil, |
| 204 | ), |
| 205 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.MSpanInuse) }, |
| 206 | valType: GaugeValue, |
| 207 | }, { |
| 208 | desc: NewDesc( |
| 209 | memstatNamespace("mspan_sys_bytes"), |
| 210 | "Number of bytes used for mspan structures obtained from system.", |
| 211 | nil, nil, |
| 212 | ), |
| 213 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.MSpanSys) }, |
| 214 | valType: GaugeValue, |
| 215 | }, { |
| 216 | desc: NewDesc( |
| 217 | memstatNamespace("mcache_inuse_bytes"), |
| 218 | "Number of bytes in use by mcache structures.", |
| 219 | nil, nil, |
| 220 | ), |
| 221 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.MCacheInuse) }, |
| 222 | valType: GaugeValue, |
| 223 | }, { |
| 224 | desc: NewDesc( |
| 225 | memstatNamespace("mcache_sys_bytes"), |
| 226 | "Number of bytes used for mcache structures obtained from system.", |
| 227 | nil, nil, |
| 228 | ), |
| 229 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.MCacheSys) }, |
| 230 | valType: GaugeValue, |
| 231 | }, { |
| 232 | desc: NewDesc( |
| 233 | memstatNamespace("buck_hash_sys_bytes"), |
| 234 | "Number of bytes used by the profiling bucket hash table.", |
| 235 | nil, nil, |
| 236 | ), |
| 237 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.BuckHashSys) }, |
| 238 | valType: GaugeValue, |
| 239 | }, { |
| 240 | desc: NewDesc( |
| 241 | memstatNamespace("gc_sys_bytes"), |
| 242 | "Number of bytes used for garbage collection system metadata.", |
| 243 | nil, nil, |
| 244 | ), |
| 245 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.GCSys) }, |
| 246 | valType: GaugeValue, |
| 247 | }, { |
| 248 | desc: NewDesc( |
| 249 | memstatNamespace("other_sys_bytes"), |
| 250 | "Number of bytes used for other system allocations.", |
| 251 | nil, nil, |
| 252 | ), |
| 253 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.OtherSys) }, |
| 254 | valType: GaugeValue, |
| 255 | }, { |
| 256 | desc: NewDesc( |
| 257 | memstatNamespace("next_gc_bytes"), |
| 258 | "Number of heap bytes when next garbage collection will take place.", |
| 259 | nil, nil, |
| 260 | ), |
| 261 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.NextGC) }, |
| 262 | valType: GaugeValue, |
| 263 | }, { |
| 264 | desc: NewDesc( |
| 265 | memstatNamespace("last_gc_time_seconds"), |
| 266 | "Number of seconds since 1970 of last garbage collection.", |
| 267 | nil, nil, |
| 268 | ), |
| 269 | eval: func(ms *runtime.MemStats) float64 { return float64(ms.LastGC) / 1e9 }, |
| 270 | valType: GaugeValue, |
| 271 | }, { |
| 272 | desc: NewDesc( |
| 273 | memstatNamespace("gc_cpu_fraction"), |
| 274 | "The fraction of this program's available CPU time used by the GC since the program started.", |
| 275 | nil, nil, |
| 276 | ), |
| 277 | eval: func(ms *runtime.MemStats) float64 { return ms.GCCPUFraction }, |
| 278 | valType: GaugeValue, |
| 279 | }, |
| 280 | }, |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | func memstatNamespace(s string) string { |
| 285 | return "go_memstats_" + s |
| 286 | } |
| 287 | |
| 288 | // Describe returns all descriptions of the collector. |
| 289 | func (c *goCollector) Describe(ch chan<- *Desc) { |
| 290 | ch <- c.goroutinesDesc |
| 291 | ch <- c.threadsDesc |
| 292 | ch <- c.gcDesc |
| 293 | ch <- c.goInfoDesc |
| 294 | for _, i := range c.msMetrics { |
| 295 | ch <- i.desc |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | // Collect returns the current state of all metrics of the collector. |
| 300 | func (c *goCollector) Collect(ch chan<- Metric) { |
| 301 | var ( |
| 302 | ms = &runtime.MemStats{} |
| 303 | done = make(chan struct{}) |
| 304 | ) |
| 305 | // Start reading memstats first as it might take a while. |
| 306 | go func() { |
| 307 | c.msRead(ms) |
| 308 | c.msMtx.Lock() |
| 309 | c.msLast = ms |
| 310 | c.msLastTimestamp = time.Now() |
| 311 | c.msMtx.Unlock() |
| 312 | close(done) |
| 313 | }() |
| 314 | |
| 315 | ch <- MustNewConstMetric(c.goroutinesDesc, GaugeValue, float64(runtime.NumGoroutine())) |
| 316 | n, _ := runtime.ThreadCreateProfile(nil) |
| 317 | ch <- MustNewConstMetric(c.threadsDesc, GaugeValue, float64(n)) |
| 318 | |
| 319 | var stats debug.GCStats |
| 320 | stats.PauseQuantiles = make([]time.Duration, 5) |
| 321 | debug.ReadGCStats(&stats) |
| 322 | |
| 323 | quantiles := make(map[float64]float64) |
| 324 | for idx, pq := range stats.PauseQuantiles[1:] { |
| 325 | quantiles[float64(idx+1)/float64(len(stats.PauseQuantiles)-1)] = pq.Seconds() |
| 326 | } |
| 327 | quantiles[0.0] = stats.PauseQuantiles[0].Seconds() |
| 328 | ch <- MustNewConstSummary(c.gcDesc, uint64(stats.NumGC), stats.PauseTotal.Seconds(), quantiles) |
| 329 | |
| 330 | ch <- MustNewConstMetric(c.goInfoDesc, GaugeValue, 1) |
| 331 | |
| 332 | timer := time.NewTimer(c.msMaxWait) |
| 333 | select { |
| 334 | case <-done: // Our own ReadMemStats succeeded in time. Use it. |
| 335 | timer.Stop() // Important for high collection frequencies to not pile up timers. |
| 336 | c.msCollect(ch, ms) |
| 337 | return |
| 338 | case <-timer.C: // Time out, use last memstats if possible. Continue below. |
| 339 | } |
| 340 | c.msMtx.Lock() |
| 341 | if time.Since(c.msLastTimestamp) < c.msMaxAge { |
| 342 | // Last memstats are recent enough. Collect from them under the lock. |
| 343 | c.msCollect(ch, c.msLast) |
| 344 | c.msMtx.Unlock() |
| 345 | return |
| 346 | } |
| 347 | // If we are here, the last memstats are too old or don't exist. We have |
| 348 | // to wait until our own ReadMemStats finally completes. For that to |
| 349 | // happen, we have to release the lock. |
| 350 | c.msMtx.Unlock() |
| 351 | <-done |
| 352 | c.msCollect(ch, ms) |
| 353 | } |
| 354 | |
| 355 | func (c *goCollector) msCollect(ch chan<- Metric, ms *runtime.MemStats) { |
| 356 | for _, i := range c.msMetrics { |
| 357 | ch <- MustNewConstMetric(i.desc, i.valType, i.eval(ms)) |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | // memStatsMetrics provide description, value, and value type for memstat metrics. |
| 362 | type memStatsMetrics []struct { |
| 363 | desc *Desc |
| 364 | eval func(*runtime.MemStats) float64 |
| 365 | valType ValueType |
| 366 | } |
| 367 | |
| 368 | // NewBuildInfoCollector returns a collector collecting a single metric |
| 369 | // "go_build_info" with the constant value 1 and three labels "path", "version", |
| 370 | // and "checksum". Their label values contain the main module path, version, and |
| 371 | // checksum, respectively. The labels will only have meaningful values if the |
| 372 | // binary is built with Go module support and from source code retrieved from |
| 373 | // the source repository (rather than the local file system). This is usually |
| 374 | // accomplished by building from outside of GOPATH, specifying the full address |
| 375 | // of the main package, e.g. "GO111MODULE=on go run |
| 376 | // github.com/prometheus/client_golang/examples/random". If built without Go |
| 377 | // module support, all label values will be "unknown". If built with Go module |
| 378 | // support but using the source code from the local file system, the "path" will |
| 379 | // be set appropriately, but "checksum" will be empty and "version" will be |
| 380 | // "(devel)". |
| 381 | // |
| 382 | // This collector uses only the build information for the main module. See |
| 383 | // https://github.com/povilasv/prommod for an example of a collector for the |
| 384 | // module dependencies. |
| 385 | func NewBuildInfoCollector() Collector { |
| 386 | path, version, sum := readBuildInfo() |
| 387 | c := &selfCollector{MustNewConstMetric( |
| 388 | NewDesc( |
| 389 | "go_build_info", |
| 390 | "Build information about the main Go module.", |
| 391 | nil, Labels{"path": path, "version": version, "checksum": sum}, |
| 392 | ), |
| 393 | GaugeValue, 1)} |
| 394 | c.init(c.self) |
| 395 | return c |
| 396 | } |