blob: 4b3396fbe170bc7f90b01322dd5a8b6d57ab9435 [file] [log] [blame]
Abhilash S.L3b494632019-07-16 15:51:09 +05301// Copyright 2019 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package tracker
16
17import (
18 "fmt"
19 "sort"
20
21 "go.etcd.io/etcd/raft/quorum"
22)
23
24// Config reflects the configuration tracked in a ProgressTracker.
25type Config struct {
26 Voters quorum.JointConfig
27 // Learners is a set of IDs corresponding to the learners active in the
28 // current configuration.
29 //
30 // Invariant: Learners and Voters does not intersect, i.e. if a peer is in
31 // either half of the joint config, it can't be a learner; if it is a
32 // learner it can't be in either half of the joint config. This invariant
33 // simplifies the implementation since it allows peers to have clarity about
34 // its current role without taking into account joint consensus.
35 Learners map[uint64]struct{}
36 // TODO(tbg): when we actually carry out joint consensus changes and turn a
37 // voter into a learner, we cannot add the learner when entering the joint
38 // state. This is because this would violate the invariant that the inter-
39 // section of voters and learners is empty. For example, assume a Voter is
40 // removed and immediately re-added as a learner (or in other words, it is
41 // demoted).
42 //
43 // Initially, the configuration will be
44 //
45 // voters: {1 2 3}
46 // learners: {}
47 //
48 // and we want to demote 3. Entering the joint configuration, we naively get
49 //
50 // voters: {1 2} & {1 2 3}
51 // learners: {3}
52 //
53 // but this violates the invariant (3 is both voter and learner). Instead,
54 // we have
55 //
56 // voters: {1 2} & {1 2 3}
57 // learners: {}
58 // next_learners: {3}
59 //
60 // Where 3 is now still purely a voter, but we are remembering the intention
61 // to make it a learner upon transitioning into the final configuration:
62 //
63 // voters: {1 2}
64 // learners: {3}
65 // next_learners: {}
66 //
67 // Note that next_learners is not used while adding a learner that is not
68 // also a voter in the joint config. In this case, the learner is added
69 // to Learners right away when entering the joint configuration, so that it
70 // is caught up as soon as possible.
71 //
72 // NextLearners map[uint64]struct{}
73}
74
75func (c *Config) String() string {
76 if len(c.Learners) == 0 {
77 return fmt.Sprintf("voters=%s", c.Voters)
78 }
79 return fmt.Sprintf(
80 "voters=%s learners=%s",
81 c.Voters, quorum.MajorityConfig(c.Learners).String(),
82 )
83}
84
85// ProgressTracker tracks the currently active configuration and the information
86// known about the nodes and learners in it. In particular, it tracks the match
87// index for each peer which in turn allows reasoning about the committed index.
88type ProgressTracker struct {
89 Config
90
91 Progress map[uint64]*Progress
92
93 Votes map[uint64]bool
94
95 MaxInflight int
96}
97
98// MakeProgressTracker initializes a ProgressTracker.
99func MakeProgressTracker(maxInflight int) ProgressTracker {
100 p := ProgressTracker{
101 MaxInflight: maxInflight,
102 Config: Config{
103 Voters: quorum.JointConfig{
104 quorum.MajorityConfig{},
105 // TODO(tbg): this will be mostly empty, so make it a nil pointer
106 // in the common case.
107 quorum.MajorityConfig{},
108 },
109 Learners: map[uint64]struct{}{},
110 },
111 Votes: map[uint64]bool{},
112 Progress: map[uint64]*Progress{},
113 }
114 return p
115}
116
117// IsSingleton returns true if (and only if) there is only one voting member
118// (i.e. the leader) in the current configuration.
119func (p *ProgressTracker) IsSingleton() bool {
120 return len(p.Voters[0]) == 1 && len(p.Voters[1]) == 0
121}
122
123type matchAckIndexer map[uint64]*Progress
124
125var _ quorum.AckedIndexer = matchAckIndexer(nil)
126
127// AckedIndex implements IndexLookuper.
128func (l matchAckIndexer) AckedIndex(id uint64) (quorum.Index, bool) {
129 pr, ok := l[id]
130 if !ok {
131 return 0, false
132 }
133 return quorum.Index(pr.Match), true
134}
135
136// Committed returns the largest log index known to be committed based on what
137// the voting members of the group have acknowledged.
138func (p *ProgressTracker) Committed() uint64 {
139 return uint64(p.Voters.CommittedIndex(matchAckIndexer(p.Progress)))
140}
141
142// RemoveAny removes this peer, which *must* be tracked as a voter or learner,
143// from the tracker.
144func (p *ProgressTracker) RemoveAny(id uint64) {
145 _, okPR := p.Progress[id]
146 _, okV1 := p.Voters[0][id]
147 _, okV2 := p.Voters[1][id]
148 _, okL := p.Learners[id]
149
150 okV := okV1 || okV2
151
152 if !okPR {
153 panic("attempting to remove unknown peer %x")
154 } else if !okV && !okL {
155 panic("attempting to remove unknown peer %x")
156 } else if okV && okL {
157 panic(fmt.Sprintf("peer %x is both voter and learner", id))
158 }
159
160 delete(p.Voters[0], id)
161 delete(p.Voters[1], id)
162 delete(p.Learners, id)
163 delete(p.Progress, id)
164}
165
166// InitProgress initializes a new progress for the given node or learner. The
167// node may not exist yet in either form or a panic will ensue.
168func (p *ProgressTracker) InitProgress(id, match, next uint64, isLearner bool) {
169 if pr := p.Progress[id]; pr != nil {
170 panic(fmt.Sprintf("peer %x already tracked as node %v", id, pr))
171 }
172 if !isLearner {
173 p.Voters[0][id] = struct{}{}
174 } else {
175 p.Learners[id] = struct{}{}
176 }
177 p.Progress[id] = &Progress{Next: next, Match: match, Inflights: NewInflights(p.MaxInflight), IsLearner: isLearner}
178}
179
180// Visit invokes the supplied closure for all tracked progresses.
181func (p *ProgressTracker) Visit(f func(id uint64, pr *Progress)) {
182 for id, pr := range p.Progress {
183 f(id, pr)
184 }
185}
186
187// QuorumActive returns true if the quorum is active from the view of the local
188// raft state machine. Otherwise, it returns false.
189func (p *ProgressTracker) QuorumActive() bool {
190 votes := map[uint64]bool{}
191 p.Visit(func(id uint64, pr *Progress) {
192 if pr.IsLearner {
193 return
194 }
195 votes[id] = pr.RecentActive
196 })
197
198 return p.Voters.VoteResult(votes) == quorum.VoteWon
199}
200
201// VoterNodes returns a sorted slice of voters.
202func (p *ProgressTracker) VoterNodes() []uint64 {
203 m := p.Voters.IDs()
204 nodes := make([]uint64, 0, len(m))
205 for id := range m {
206 nodes = append(nodes, id)
207 }
208 sort.Slice(nodes, func(i, j int) bool { return nodes[i] < nodes[j] })
209 return nodes
210}
211
212// LearnerNodes returns a sorted slice of learners.
213func (p *ProgressTracker) LearnerNodes() []uint64 {
214 nodes := make([]uint64, 0, len(p.Learners))
215 for id := range p.Learners {
216 nodes = append(nodes, id)
217 }
218 sort.Slice(nodes, func(i, j int) bool { return nodes[i] < nodes[j] })
219 return nodes
220}
221
222// ResetVotes prepares for a new round of vote counting via recordVote.
223func (p *ProgressTracker) ResetVotes() {
224 p.Votes = map[uint64]bool{}
225}
226
227// RecordVote records that the node with the given id voted for this Raft
228// instance if v == true (and declined it otherwise).
229func (p *ProgressTracker) RecordVote(id uint64, v bool) {
230 _, ok := p.Votes[id]
231 if !ok {
232 p.Votes[id] = v
233 }
234}
235
236// TallyVotes returns the number of granted and rejected Votes, and whether the
237// election outcome is known.
238func (p *ProgressTracker) TallyVotes() (granted int, rejected int, _ quorum.VoteResult) {
239 // Make sure to populate granted/rejected correctly even if the Votes slice
240 // contains members no longer part of the configuration. This doesn't really
241 // matter in the way the numbers are used (they're informational), but might
242 // as well get it right.
243 for id, pr := range p.Progress {
244 if pr.IsLearner {
245 continue
246 }
247 v, voted := p.Votes[id]
248 if !voted {
249 continue
250 }
251 if v {
252 granted++
253 } else {
254 rejected++
255 }
256 }
257 result := p.Voters.VoteResult(p.Votes)
258 return granted, rejected, result
259}