blob: 2f6a0a7b2aebd0dd5f7033580f56f600b4d29f68 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2017 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package etcdhttp
16
17import (
18 "context"
19 "encoding/json"
20 "net/http"
21 "time"
22
23 "github.com/coreos/etcd/etcdserver"
24 "github.com/coreos/etcd/etcdserver/etcdserverpb"
25 "github.com/coreos/etcd/raft"
26
27 "github.com/prometheus/client_golang/prometheus"
28 "github.com/prometheus/client_golang/prometheus/promhttp"
29)
30
31const (
32 PathMetrics = "/metrics"
33 PathHealth = "/health"
34)
35
36// HandleMetricsHealth registers metrics and health handlers.
37func HandleMetricsHealth(mux *http.ServeMux, srv etcdserver.ServerV2) {
38 mux.Handle(PathMetrics, promhttp.Handler())
39 mux.Handle(PathHealth, NewHealthHandler(func() Health { return checkHealth(srv) }))
40}
41
42// HandlePrometheus registers prometheus handler on '/metrics'.
43func HandlePrometheus(mux *http.ServeMux) {
44 mux.Handle(PathMetrics, promhttp.Handler())
45}
46
47// NewHealthHandler handles '/health' requests.
48func NewHealthHandler(hfunc func() Health) http.HandlerFunc {
49 return func(w http.ResponseWriter, r *http.Request) {
50 if r.Method != http.MethodGet {
51 w.Header().Set("Allow", http.MethodGet)
52 http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
53 plog.Warningf("/health error (status code %d)", http.StatusMethodNotAllowed)
54 return
55 }
56 h := hfunc()
57 d, _ := json.Marshal(h)
58 if h.Health != "true" {
59 http.Error(w, string(d), http.StatusServiceUnavailable)
60 return
61 }
62 w.WriteHeader(http.StatusOK)
63 w.Write(d)
64 }
65}
66
67var (
68 healthSuccess = prometheus.NewCounter(prometheus.CounterOpts{
69 Namespace: "etcd",
70 Subsystem: "server",
71 Name: "health_success",
72 Help: "The total number of successful health checks",
73 })
74 healthFailed = prometheus.NewCounter(prometheus.CounterOpts{
75 Namespace: "etcd",
76 Subsystem: "server",
77 Name: "health_failures",
78 Help: "The total number of failed health checks",
79 })
80)
81
82func init() {
83 prometheus.MustRegister(healthSuccess)
84 prometheus.MustRegister(healthFailed)
85}
86
87// Health defines etcd server health status.
88// TODO: remove manual parsing in etcdctl cluster-health
89type Health struct {
90 Health string `json:"health"`
91}
92
93// TODO: server NOSPACE, etcdserver.ErrNoLeader in health API
94
95func checkHealth(srv etcdserver.ServerV2) Health {
96 h := Health{Health: "true"}
97
98 as := srv.Alarms()
99 if len(as) > 0 {
100 h.Health = "false"
101 for _, v := range as {
102 plog.Warningf("/health error due to an alarm %s", v.String())
103 }
104 }
105
106 if h.Health == "true" {
107 if uint64(srv.Leader()) == raft.None {
108 h.Health = "false"
109 plog.Warningf("/health error; no leader (status code %d)", http.StatusServiceUnavailable)
110 }
111 }
112
113 if h.Health == "true" {
114 ctx, cancel := context.WithTimeout(context.Background(), time.Second)
115 _, err := srv.Do(ctx, etcdserverpb.Request{Method: "QGET"})
116 cancel()
117 if err != nil {
118 h.Health = "false"
119 plog.Warningf("/health error; QGET failed %v (status code %d)", err, http.StatusServiceUnavailable)
120 }
121 }
122
123 if h.Health == "true" {
124 healthSuccess.Inc()
125 plog.Infof("/health OK (status code %d)", http.StatusOK)
126 } else {
127 healthFailed.Inc()
128 }
129 return h
130}