Abhilash S.L | 3b49463 | 2019-07-16 15:51:09 +0530 | [diff] [blame^] | 1 | // Copyright 2019 The etcd Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package tracker |
| 16 | |
| 17 | import ( |
| 18 | "fmt" |
| 19 | "sort" |
| 20 | |
| 21 | "go.etcd.io/etcd/raft/quorum" |
| 22 | ) |
| 23 | |
| 24 | // Config reflects the configuration tracked in a ProgressTracker. |
| 25 | type Config struct { |
| 26 | Voters quorum.JointConfig |
| 27 | // Learners is a set of IDs corresponding to the learners active in the |
| 28 | // current configuration. |
| 29 | // |
| 30 | // Invariant: Learners and Voters does not intersect, i.e. if a peer is in |
| 31 | // either half of the joint config, it can't be a learner; if it is a |
| 32 | // learner it can't be in either half of the joint config. This invariant |
| 33 | // simplifies the implementation since it allows peers to have clarity about |
| 34 | // its current role without taking into account joint consensus. |
| 35 | Learners map[uint64]struct{} |
| 36 | // TODO(tbg): when we actually carry out joint consensus changes and turn a |
| 37 | // voter into a learner, we cannot add the learner when entering the joint |
| 38 | // state. This is because this would violate the invariant that the inter- |
| 39 | // section of voters and learners is empty. For example, assume a Voter is |
| 40 | // removed and immediately re-added as a learner (or in other words, it is |
| 41 | // demoted). |
| 42 | // |
| 43 | // Initially, the configuration will be |
| 44 | // |
| 45 | // voters: {1 2 3} |
| 46 | // learners: {} |
| 47 | // |
| 48 | // and we want to demote 3. Entering the joint configuration, we naively get |
| 49 | // |
| 50 | // voters: {1 2} & {1 2 3} |
| 51 | // learners: {3} |
| 52 | // |
| 53 | // but this violates the invariant (3 is both voter and learner). Instead, |
| 54 | // we have |
| 55 | // |
| 56 | // voters: {1 2} & {1 2 3} |
| 57 | // learners: {} |
| 58 | // next_learners: {3} |
| 59 | // |
| 60 | // Where 3 is now still purely a voter, but we are remembering the intention |
| 61 | // to make it a learner upon transitioning into the final configuration: |
| 62 | // |
| 63 | // voters: {1 2} |
| 64 | // learners: {3} |
| 65 | // next_learners: {} |
| 66 | // |
| 67 | // Note that next_learners is not used while adding a learner that is not |
| 68 | // also a voter in the joint config. In this case, the learner is added |
| 69 | // to Learners right away when entering the joint configuration, so that it |
| 70 | // is caught up as soon as possible. |
| 71 | // |
| 72 | // NextLearners map[uint64]struct{} |
| 73 | } |
| 74 | |
| 75 | func (c *Config) String() string { |
| 76 | if len(c.Learners) == 0 { |
| 77 | return fmt.Sprintf("voters=%s", c.Voters) |
| 78 | } |
| 79 | return fmt.Sprintf( |
| 80 | "voters=%s learners=%s", |
| 81 | c.Voters, quorum.MajorityConfig(c.Learners).String(), |
| 82 | ) |
| 83 | } |
| 84 | |
| 85 | // ProgressTracker tracks the currently active configuration and the information |
| 86 | // known about the nodes and learners in it. In particular, it tracks the match |
| 87 | // index for each peer which in turn allows reasoning about the committed index. |
| 88 | type ProgressTracker struct { |
| 89 | Config |
| 90 | |
| 91 | Progress map[uint64]*Progress |
| 92 | |
| 93 | Votes map[uint64]bool |
| 94 | |
| 95 | MaxInflight int |
| 96 | } |
| 97 | |
| 98 | // MakeProgressTracker initializes a ProgressTracker. |
| 99 | func MakeProgressTracker(maxInflight int) ProgressTracker { |
| 100 | p := ProgressTracker{ |
| 101 | MaxInflight: maxInflight, |
| 102 | Config: Config{ |
| 103 | Voters: quorum.JointConfig{ |
| 104 | quorum.MajorityConfig{}, |
| 105 | // TODO(tbg): this will be mostly empty, so make it a nil pointer |
| 106 | // in the common case. |
| 107 | quorum.MajorityConfig{}, |
| 108 | }, |
| 109 | Learners: map[uint64]struct{}{}, |
| 110 | }, |
| 111 | Votes: map[uint64]bool{}, |
| 112 | Progress: map[uint64]*Progress{}, |
| 113 | } |
| 114 | return p |
| 115 | } |
| 116 | |
| 117 | // IsSingleton returns true if (and only if) there is only one voting member |
| 118 | // (i.e. the leader) in the current configuration. |
| 119 | func (p *ProgressTracker) IsSingleton() bool { |
| 120 | return len(p.Voters[0]) == 1 && len(p.Voters[1]) == 0 |
| 121 | } |
| 122 | |
| 123 | type matchAckIndexer map[uint64]*Progress |
| 124 | |
| 125 | var _ quorum.AckedIndexer = matchAckIndexer(nil) |
| 126 | |
| 127 | // AckedIndex implements IndexLookuper. |
| 128 | func (l matchAckIndexer) AckedIndex(id uint64) (quorum.Index, bool) { |
| 129 | pr, ok := l[id] |
| 130 | if !ok { |
| 131 | return 0, false |
| 132 | } |
| 133 | return quorum.Index(pr.Match), true |
| 134 | } |
| 135 | |
| 136 | // Committed returns the largest log index known to be committed based on what |
| 137 | // the voting members of the group have acknowledged. |
| 138 | func (p *ProgressTracker) Committed() uint64 { |
| 139 | return uint64(p.Voters.CommittedIndex(matchAckIndexer(p.Progress))) |
| 140 | } |
| 141 | |
| 142 | // RemoveAny removes this peer, which *must* be tracked as a voter or learner, |
| 143 | // from the tracker. |
| 144 | func (p *ProgressTracker) RemoveAny(id uint64) { |
| 145 | _, okPR := p.Progress[id] |
| 146 | _, okV1 := p.Voters[0][id] |
| 147 | _, okV2 := p.Voters[1][id] |
| 148 | _, okL := p.Learners[id] |
| 149 | |
| 150 | okV := okV1 || okV2 |
| 151 | |
| 152 | if !okPR { |
| 153 | panic("attempting to remove unknown peer %x") |
| 154 | } else if !okV && !okL { |
| 155 | panic("attempting to remove unknown peer %x") |
| 156 | } else if okV && okL { |
| 157 | panic(fmt.Sprintf("peer %x is both voter and learner", id)) |
| 158 | } |
| 159 | |
| 160 | delete(p.Voters[0], id) |
| 161 | delete(p.Voters[1], id) |
| 162 | delete(p.Learners, id) |
| 163 | delete(p.Progress, id) |
| 164 | } |
| 165 | |
| 166 | // InitProgress initializes a new progress for the given node or learner. The |
| 167 | // node may not exist yet in either form or a panic will ensue. |
| 168 | func (p *ProgressTracker) InitProgress(id, match, next uint64, isLearner bool) { |
| 169 | if pr := p.Progress[id]; pr != nil { |
| 170 | panic(fmt.Sprintf("peer %x already tracked as node %v", id, pr)) |
| 171 | } |
| 172 | if !isLearner { |
| 173 | p.Voters[0][id] = struct{}{} |
| 174 | } else { |
| 175 | p.Learners[id] = struct{}{} |
| 176 | } |
| 177 | p.Progress[id] = &Progress{Next: next, Match: match, Inflights: NewInflights(p.MaxInflight), IsLearner: isLearner} |
| 178 | } |
| 179 | |
| 180 | // Visit invokes the supplied closure for all tracked progresses. |
| 181 | func (p *ProgressTracker) Visit(f func(id uint64, pr *Progress)) { |
| 182 | for id, pr := range p.Progress { |
| 183 | f(id, pr) |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | // QuorumActive returns true if the quorum is active from the view of the local |
| 188 | // raft state machine. Otherwise, it returns false. |
| 189 | func (p *ProgressTracker) QuorumActive() bool { |
| 190 | votes := map[uint64]bool{} |
| 191 | p.Visit(func(id uint64, pr *Progress) { |
| 192 | if pr.IsLearner { |
| 193 | return |
| 194 | } |
| 195 | votes[id] = pr.RecentActive |
| 196 | }) |
| 197 | |
| 198 | return p.Voters.VoteResult(votes) == quorum.VoteWon |
| 199 | } |
| 200 | |
| 201 | // VoterNodes returns a sorted slice of voters. |
| 202 | func (p *ProgressTracker) VoterNodes() []uint64 { |
| 203 | m := p.Voters.IDs() |
| 204 | nodes := make([]uint64, 0, len(m)) |
| 205 | for id := range m { |
| 206 | nodes = append(nodes, id) |
| 207 | } |
| 208 | sort.Slice(nodes, func(i, j int) bool { return nodes[i] < nodes[j] }) |
| 209 | return nodes |
| 210 | } |
| 211 | |
| 212 | // LearnerNodes returns a sorted slice of learners. |
| 213 | func (p *ProgressTracker) LearnerNodes() []uint64 { |
| 214 | nodes := make([]uint64, 0, len(p.Learners)) |
| 215 | for id := range p.Learners { |
| 216 | nodes = append(nodes, id) |
| 217 | } |
| 218 | sort.Slice(nodes, func(i, j int) bool { return nodes[i] < nodes[j] }) |
| 219 | return nodes |
| 220 | } |
| 221 | |
| 222 | // ResetVotes prepares for a new round of vote counting via recordVote. |
| 223 | func (p *ProgressTracker) ResetVotes() { |
| 224 | p.Votes = map[uint64]bool{} |
| 225 | } |
| 226 | |
| 227 | // RecordVote records that the node with the given id voted for this Raft |
| 228 | // instance if v == true (and declined it otherwise). |
| 229 | func (p *ProgressTracker) RecordVote(id uint64, v bool) { |
| 230 | _, ok := p.Votes[id] |
| 231 | if !ok { |
| 232 | p.Votes[id] = v |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | // TallyVotes returns the number of granted and rejected Votes, and whether the |
| 237 | // election outcome is known. |
| 238 | func (p *ProgressTracker) TallyVotes() (granted int, rejected int, _ quorum.VoteResult) { |
| 239 | // Make sure to populate granted/rejected correctly even if the Votes slice |
| 240 | // contains members no longer part of the configuration. This doesn't really |
| 241 | // matter in the way the numbers are used (they're informational), but might |
| 242 | // as well get it right. |
| 243 | for id, pr := range p.Progress { |
| 244 | if pr.IsLearner { |
| 245 | continue |
| 246 | } |
| 247 | v, voted := p.Votes[id] |
| 248 | if !voted { |
| 249 | continue |
| 250 | } |
| 251 | if v { |
| 252 | granted++ |
| 253 | } else { |
| 254 | rejected++ |
| 255 | } |
| 256 | } |
| 257 | result := p.Voters.VoteResult(p.Votes) |
| 258 | return granted, rejected, result |
| 259 | } |