blob: 10f8a475fca787acfc99872433a4dba95edeeb9f [file] [log] [blame]
khenaidooffe076b2019-01-15 16:08:08 -05001// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package etcdserver
16
17import (
18 goruntime "runtime"
19 "time"
20
21 "github.com/coreos/etcd/pkg/runtime"
22 "github.com/coreos/etcd/version"
23 "github.com/prometheus/client_golang/prometheus"
24)
25
26var (
27 hasLeader = prometheus.NewGauge(prometheus.GaugeOpts{
28 Namespace: "etcd",
29 Subsystem: "server",
30 Name: "has_leader",
31 Help: "Whether or not a leader exists. 1 is existence, 0 is not.",
32 })
33 isLeader = prometheus.NewGauge(prometheus.GaugeOpts{
34 Namespace: "etcd",
35 Subsystem: "server",
36 Name: "is_leader",
37 Help: "Whether or not this member is a leader. 1 if is, 0 otherwise.",
38 })
39 leaderChanges = prometheus.NewCounter(prometheus.CounterOpts{
40 Namespace: "etcd",
41 Subsystem: "server",
42 Name: "leader_changes_seen_total",
43 Help: "The number of leader changes seen.",
44 })
45 heartbeatSendFailures = prometheus.NewCounter(prometheus.CounterOpts{
46 Namespace: "etcd",
47 Subsystem: "server",
48 Name: "heartbeat_send_failures_total",
49 Help: "The total number of leader heartbeat send failures (likely overloaded from slow disk).",
50 })
51 slowApplies = prometheus.NewCounter(prometheus.CounterOpts{
52 Namespace: "etcd",
53 Subsystem: "server",
54 Name: "slow_apply_total",
55 Help: "The total number of slow apply requests (likely overloaded from slow disk).",
56 })
57 proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{
58 Namespace: "etcd",
59 Subsystem: "server",
60 Name: "proposals_committed_total",
61 Help: "The total number of consensus proposals committed.",
62 })
63 proposalsApplied = prometheus.NewGauge(prometheus.GaugeOpts{
64 Namespace: "etcd",
65 Subsystem: "server",
66 Name: "proposals_applied_total",
67 Help: "The total number of consensus proposals applied.",
68 })
69 proposalsPending = prometheus.NewGauge(prometheus.GaugeOpts{
70 Namespace: "etcd",
71 Subsystem: "server",
72 Name: "proposals_pending",
73 Help: "The current number of pending proposals to commit.",
74 })
75 proposalsFailed = prometheus.NewCounter(prometheus.CounterOpts{
76 Namespace: "etcd",
77 Subsystem: "server",
78 Name: "proposals_failed_total",
79 Help: "The total number of failed proposals seen.",
80 })
81 leaseExpired = prometheus.NewCounter(prometheus.CounterOpts{
82 Namespace: "etcd_debugging",
83 Subsystem: "server",
84 Name: "lease_expired_total",
85 Help: "The total number of expired leases.",
86 })
87 slowReadIndex = prometheus.NewCounter(prometheus.CounterOpts{
88 Namespace: "etcd",
89 Subsystem: "server",
90 Name: "slow_read_indexes_total",
91 Help: "The total number of pending read indexes not in sync with leader's or timed out read index requests.",
92 })
93 readIndexFailed = prometheus.NewCounter(prometheus.CounterOpts{
94 Namespace: "etcd",
95 Subsystem: "server",
96 Name: "read_indexes_failed_total",
97 Help: "The total number of failed read indexes seen.",
98 })
99 quotaBackendBytes = prometheus.NewGauge(prometheus.GaugeOpts{
100 Namespace: "etcd",
101 Subsystem: "server",
102 Name: "quota_backend_bytes",
103 Help: "Current backend storage quota size in bytes.",
104 })
105 currentVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{
106 Namespace: "etcd",
107 Subsystem: "server",
108 Name: "version",
109 Help: "Which version is running. 1 for 'server_version' label with current version.",
110 },
111 []string{"server_version"})
112 currentGoVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{
113 Namespace: "etcd",
114 Subsystem: "server",
115 Name: "go_version",
116 Help: "Which Go version server is running with. 1 for 'server_go_version' label with current version.",
117 },
118 []string{"server_go_version"})
119 serverID = prometheus.NewGaugeVec(prometheus.GaugeOpts{
120 Namespace: "etcd",
121 Subsystem: "server",
122 Name: "id",
123 Help: "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.",
124 },
125 []string{"server_id"})
126)
127
128func init() {
129 prometheus.MustRegister(hasLeader)
130 prometheus.MustRegister(isLeader)
131 prometheus.MustRegister(leaderChanges)
132 prometheus.MustRegister(heartbeatSendFailures)
133 prometheus.MustRegister(slowApplies)
134 prometheus.MustRegister(proposalsCommitted)
135 prometheus.MustRegister(proposalsApplied)
136 prometheus.MustRegister(proposalsPending)
137 prometheus.MustRegister(proposalsFailed)
138 prometheus.MustRegister(leaseExpired)
139 prometheus.MustRegister(slowReadIndex)
140 prometheus.MustRegister(readIndexFailed)
141 prometheus.MustRegister(quotaBackendBytes)
142 prometheus.MustRegister(currentVersion)
143 prometheus.MustRegister(currentGoVersion)
144 prometheus.MustRegister(serverID)
145
146 currentVersion.With(prometheus.Labels{
147 "server_version": version.Version,
148 }).Set(1)
149 currentGoVersion.With(prometheus.Labels{
150 "server_go_version": goruntime.Version(),
151 }).Set(1)
152}
153
154func monitorFileDescriptor(done <-chan struct{}) {
155 ticker := time.NewTicker(5 * time.Second)
156 defer ticker.Stop()
157 for {
158 used, err := runtime.FDUsage()
159 if err != nil {
160 plog.Errorf("cannot monitor file descriptor usage (%v)", err)
161 return
162 }
163 limit, err := runtime.FDLimit()
164 if err != nil {
165 plog.Errorf("cannot monitor file descriptor usage (%v)", err)
166 return
167 }
168 if used >= limit/5*4 {
169 plog.Warningf("80%% of the file descriptor limit is used [used = %d, limit = %d]", used, limit)
170 }
171 select {
172 case <-ticker.C:
173 case <-done:
174 return
175 }
176 }
177}