blob: 8858a36b6341364ac2c248e1301ece607aa3c163 [file] [log] [blame]
Scott Baker8461e152019-10-01 14:44:30 -07001// Copyright 2019 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package quorum
16
17import (
18 "fmt"
19 "math"
20 "sort"
21 "strings"
22)
23
24// MajorityConfig is a set of IDs that uses majority quorums to make decisions.
25type MajorityConfig map[uint64]struct{}
26
27func (c MajorityConfig) String() string {
28 sl := make([]uint64, 0, len(c))
29 for id := range c {
30 sl = append(sl, id)
31 }
32 sort.Slice(sl, func(i, j int) bool { return sl[i] < sl[j] })
33 var buf strings.Builder
34 buf.WriteByte('(')
35 for i := range sl {
36 if i > 0 {
37 buf.WriteByte(' ')
38 }
39 fmt.Fprint(&buf, sl[i])
40 }
41 buf.WriteByte(')')
42 return buf.String()
43}
44
45// Describe returns a (multi-line) representation of the commit indexes for the
46// given lookuper.
47func (c MajorityConfig) Describe(l AckedIndexer) string {
48 if len(c) == 0 {
49 return "<empty majority quorum>"
50 }
51 type tup struct {
52 id uint64
53 idx Index
54 ok bool // idx found?
55 bar int // length of bar displayed for this tup
56 }
57
58 // Below, populate .bar so that the i-th largest commit index has bar i (we
59 // plot this as sort of a progress bar). The actual code is a bit more
60 // complicated and also makes sure that equal index => equal bar.
61
62 n := len(c)
63 info := make([]tup, 0, n)
64 for id := range c {
65 idx, ok := l.AckedIndex(id)
66 info = append(info, tup{id: id, idx: idx, ok: ok})
67 }
68
69 // Sort by index
70 sort.Slice(info, func(i, j int) bool {
71 if info[i].idx == info[j].idx {
72 return info[i].id < info[j].id
73 }
74 return info[i].idx < info[j].idx
75 })
76
77 // Populate .bar.
78 for i := range info {
79 if i > 0 && info[i-1].idx < info[i].idx {
80 info[i].bar = i
81 }
82 }
83
84 // Sort by ID.
85 sort.Slice(info, func(i, j int) bool {
86 return info[i].id < info[j].id
87 })
88
89 var buf strings.Builder
90
91 // Print.
92 fmt.Fprint(&buf, strings.Repeat(" ", n)+" idx\n")
93 for i := range info {
94 bar := info[i].bar
95 if !info[i].ok {
96 fmt.Fprint(&buf, "?"+strings.Repeat(" ", n))
97 } else {
98 fmt.Fprint(&buf, strings.Repeat("x", bar)+">"+strings.Repeat(" ", n-bar))
99 }
100 fmt.Fprintf(&buf, " %5d (id=%d)\n", info[i].idx, info[i].id)
101 }
102 return buf.String()
103}
104
105// Slice returns the MajorityConfig as a sorted slice.
106func (c MajorityConfig) Slice() []uint64 {
107 var sl []uint64
108 for id := range c {
109 sl = append(sl, id)
110 }
111 sort.Slice(sl, func(i, j int) bool { return sl[i] < sl[j] })
112 return sl
113}
114
115func insertionSort(sl []uint64) {
116 a, b := 0, len(sl)
117 for i := a + 1; i < b; i++ {
118 for j := i; j > a && sl[j] < sl[j-1]; j-- {
119 sl[j], sl[j-1] = sl[j-1], sl[j]
120 }
121 }
122}
123
124// CommittedIndex computes the committed index from those supplied via the
125// provided AckedIndexer (for the active config).
126func (c MajorityConfig) CommittedIndex(l AckedIndexer) Index {
127 n := len(c)
128 if n == 0 {
129 // This plays well with joint quorums which, when one half is the zero
130 // MajorityConfig, should behave like the other half.
131 return math.MaxUint64
132 }
133
134 // Use an on-stack slice to collect the committed indexes when n <= 7
135 // (otherwise we alloc). The alternative is to stash a slice on
136 // MajorityConfig, but this impairs usability (as is, MajorityConfig is just
137 // a map, and that's nice). The assumption is that running with a
138 // replication factor of >7 is rare, and in cases in which it happens
139 // performance is a lesser concern (additionally the performance
140 // implications of an allocation here are far from drastic).
141 var stk [7]uint64
142 var srt []uint64
143 if len(stk) >= n {
144 srt = stk[:n]
145 } else {
146 srt = make([]uint64, n)
147 }
148
149 {
150 // Fill the slice with the indexes observed. Any unused slots will be
151 // left as zero; these correspond to voters that may report in, but
152 // haven't yet. We fill from the right (since the zeroes will end up on
153 // the left after sorting below anyway).
154 i := n - 1
155 for id := range c {
156 if idx, ok := l.AckedIndex(id); ok {
157 srt[i] = uint64(idx)
158 i--
159 }
160 }
161 }
162
163 // Sort by index. Use a bespoke algorithm (copied from the stdlib's sort
164 // package) to keep srt on the stack.
165 insertionSort(srt)
166
167 // The smallest index into the array for which the value is acked by a
168 // quorum. In other words, from the end of the slice, move n/2+1 to the
169 // left (accounting for zero-indexing).
170 pos := n - (n/2 + 1)
171 return Index(srt[pos])
172}
173
174// VoteResult takes a mapping of voters to yes/no (true/false) votes and returns
175// a result indicating whether the vote is pending (i.e. neither a quorum of
176// yes/no has been reached), won (a quorum of yes has been reached), or lost (a
177// quorum of no has been reached).
178func (c MajorityConfig) VoteResult(votes map[uint64]bool) VoteResult {
179 if len(c) == 0 {
180 // By convention, the elections on an empty config win. This comes in
181 // handy with joint quorums because it'll make a half-populated joint
182 // quorum behave like a majority quorum.
183 return VoteWon
184 }
185
186 ny := [2]int{} // vote counts for no and yes, respectively
187
188 var missing int
189 for id := range c {
190 v, ok := votes[id]
191 if !ok {
192 missing++
193 continue
194 }
195 if v {
196 ny[1]++
197 } else {
198 ny[0]++
199 }
200 }
201
202 q := len(c)/2 + 1
203 if ny[1] >= q {
204 return VoteWon
205 }
206 if ny[1]+missing >= q {
207 return VotePending
208 }
209 return VoteLost
210}