khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 1 | // Copyright 2015 The etcd Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package rafthttp |
| 16 | |
| 17 | import ( |
| 18 | "time" |
| 19 | |
| 20 | "github.com/prometheus/client_golang/prometheus" |
| 21 | "github.com/xiang90/probing" |
| 22 | ) |
| 23 | |
| 24 | var ( |
| 25 | // proberInterval must be shorter than read timeout. |
| 26 | // Or the connection will time-out. |
| 27 | proberInterval = ConnReadTimeout - time.Second |
| 28 | statusMonitoringInterval = 30 * time.Second |
| 29 | statusErrorInterval = 5 * time.Second |
| 30 | ) |
| 31 | |
| 32 | const ( |
| 33 | // RoundTripperNameRaftMessage is the name of round-tripper that sends |
| 34 | // all other Raft messages, other than "snap.Message". |
| 35 | RoundTripperNameRaftMessage = "ROUND_TRIPPER_RAFT_MESSAGE" |
| 36 | // RoundTripperNameSnapshot is the name of round-tripper that sends merged snapshot message. |
| 37 | RoundTripperNameSnapshot = "ROUND_TRIPPER_SNAPSHOT" |
| 38 | ) |
| 39 | |
| 40 | func addPeerToProber(p probing.Prober, id string, us []string, roundTripperName string, rttSecProm *prometheus.HistogramVec) { |
| 41 | hus := make([]string, len(us)) |
| 42 | for i := range us { |
| 43 | hus[i] = us[i] + ProbingPrefix |
| 44 | } |
| 45 | |
| 46 | p.AddHTTP(id, proberInterval, hus) |
| 47 | |
| 48 | s, err := p.Status(id) |
| 49 | if err != nil { |
| 50 | plog.Errorf("failed to add peer %s into prober", id) |
| 51 | } else { |
| 52 | go monitorProbingStatus(s, id, roundTripperName, rttSecProm) |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | func monitorProbingStatus(s probing.Status, id string, roundTripperName string, rttSecProm *prometheus.HistogramVec) { |
| 57 | // set the first interval short to log error early. |
| 58 | interval := statusErrorInterval |
| 59 | for { |
| 60 | select { |
| 61 | case <-time.After(interval): |
| 62 | if !s.Health() { |
| 63 | plog.Warningf("health check for peer %s could not connect: %v (prober %q)", id, s.Err(), roundTripperName) |
| 64 | interval = statusErrorInterval |
| 65 | } else { |
| 66 | interval = statusMonitoringInterval |
| 67 | } |
| 68 | if s.ClockDiff() > time.Second { |
| 69 | plog.Warningf("the clock difference against peer %s is too high [%v > %v] (prober %q)", id, s.ClockDiff(), time.Second, roundTripperName) |
| 70 | } |
| 71 | rttSecProm.WithLabelValues(id).Observe(s.SRTT().Seconds()) |
| 72 | case <-s.StopNotify(): |
| 73 | return |
| 74 | } |
| 75 | } |
| 76 | } |