blob: ba5c5411d5fc3e0daed1cd9d041c294d7c54b8e3 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package membership
16
17import (
18 "bytes"
19 "context"
20 "crypto/sha1"
21 "encoding/binary"
22 "encoding/json"
23 "fmt"
24 "path"
25 "sort"
26 "strings"
27 "sync"
28 "time"
29
30 "github.com/coreos/etcd/mvcc/backend"
31 "github.com/coreos/etcd/pkg/netutil"
32 "github.com/coreos/etcd/pkg/types"
33 "github.com/coreos/etcd/raft"
34 "github.com/coreos/etcd/raft/raftpb"
35 "github.com/coreos/etcd/store"
36 "github.com/coreos/etcd/version"
37
38 "github.com/coreos/go-semver/semver"
39 "github.com/prometheus/client_golang/prometheus"
40)
41
42// RaftCluster is a list of Members that belong to the same raft cluster
43type RaftCluster struct {
44 id types.ID
45 token string
46
47 store store.Store
48 be backend.Backend
49
50 sync.Mutex // guards the fields below
51 version *semver.Version
52 members map[types.ID]*Member
53 // removed contains the ids of removed members in the cluster.
54 // removed id cannot be reused.
55 removed map[types.ID]bool
56}
57
58func NewClusterFromURLsMap(token string, urlsmap types.URLsMap) (*RaftCluster, error) {
59 c := NewCluster(token)
60 for name, urls := range urlsmap {
61 m := NewMember(name, urls, token, nil)
62 if _, ok := c.members[m.ID]; ok {
63 return nil, fmt.Errorf("member exists with identical ID %v", m)
64 }
65 if uint64(m.ID) == raft.None {
66 return nil, fmt.Errorf("cannot use %x as member id", raft.None)
67 }
68 c.members[m.ID] = m
69 }
70 c.genID()
71 return c, nil
72}
73
74func NewClusterFromMembers(token string, id types.ID, membs []*Member) *RaftCluster {
75 c := NewCluster(token)
76 c.id = id
77 for _, m := range membs {
78 c.members[m.ID] = m
79 }
80 return c
81}
82
83func NewCluster(token string) *RaftCluster {
84 return &RaftCluster{
85 token: token,
86 members: make(map[types.ID]*Member),
87 removed: make(map[types.ID]bool),
88 }
89}
90
91func (c *RaftCluster) ID() types.ID { return c.id }
92
93func (c *RaftCluster) Members() []*Member {
94 c.Lock()
95 defer c.Unlock()
96 var ms MembersByID
97 for _, m := range c.members {
98 ms = append(ms, m.Clone())
99 }
100 sort.Sort(ms)
101 return []*Member(ms)
102}
103
104func (c *RaftCluster) Member(id types.ID) *Member {
105 c.Lock()
106 defer c.Unlock()
107 return c.members[id].Clone()
108}
109
110// MemberByName returns a Member with the given name if exists.
111// If more than one member has the given name, it will panic.
112func (c *RaftCluster) MemberByName(name string) *Member {
113 c.Lock()
114 defer c.Unlock()
115 var memb *Member
116 for _, m := range c.members {
117 if m.Name == name {
118 if memb != nil {
119 plog.Panicf("two members with the given name %q exist", name)
120 }
121 memb = m
122 }
123 }
124 return memb.Clone()
125}
126
127func (c *RaftCluster) MemberIDs() []types.ID {
128 c.Lock()
129 defer c.Unlock()
130 var ids []types.ID
131 for _, m := range c.members {
132 ids = append(ids, m.ID)
133 }
134 sort.Sort(types.IDSlice(ids))
135 return ids
136}
137
138func (c *RaftCluster) IsIDRemoved(id types.ID) bool {
139 c.Lock()
140 defer c.Unlock()
141 return c.removed[id]
142}
143
144// PeerURLs returns a list of all peer addresses.
145// The returned list is sorted in ascending lexicographical order.
146func (c *RaftCluster) PeerURLs() []string {
147 c.Lock()
148 defer c.Unlock()
149 urls := make([]string, 0)
150 for _, p := range c.members {
151 urls = append(urls, p.PeerURLs...)
152 }
153 sort.Strings(urls)
154 return urls
155}
156
157// ClientURLs returns a list of all client addresses.
158// The returned list is sorted in ascending lexicographical order.
159func (c *RaftCluster) ClientURLs() []string {
160 c.Lock()
161 defer c.Unlock()
162 urls := make([]string, 0)
163 for _, p := range c.members {
164 urls = append(urls, p.ClientURLs...)
165 }
166 sort.Strings(urls)
167 return urls
168}
169
170func (c *RaftCluster) String() string {
171 c.Lock()
172 defer c.Unlock()
173 b := &bytes.Buffer{}
174 fmt.Fprintf(b, "{ClusterID:%s ", c.id)
175 var ms []string
176 for _, m := range c.members {
177 ms = append(ms, fmt.Sprintf("%+v", m))
178 }
179 fmt.Fprintf(b, "Members:[%s] ", strings.Join(ms, " "))
180 var ids []string
181 for id := range c.removed {
182 ids = append(ids, id.String())
183 }
184 fmt.Fprintf(b, "RemovedMemberIDs:[%s]}", strings.Join(ids, " "))
185 return b.String()
186}
187
188func (c *RaftCluster) genID() {
189 mIDs := c.MemberIDs()
190 b := make([]byte, 8*len(mIDs))
191 for i, id := range mIDs {
192 binary.BigEndian.PutUint64(b[8*i:], uint64(id))
193 }
194 hash := sha1.Sum(b)
195 c.id = types.ID(binary.BigEndian.Uint64(hash[:8]))
196}
197
198func (c *RaftCluster) SetID(id types.ID) { c.id = id }
199
200func (c *RaftCluster) SetStore(st store.Store) { c.store = st }
201
202func (c *RaftCluster) SetBackend(be backend.Backend) {
203 c.be = be
204 mustCreateBackendBuckets(c.be)
205}
206
207func (c *RaftCluster) Recover(onSet func(*semver.Version)) {
208 c.Lock()
209 defer c.Unlock()
210
211 c.members, c.removed = membersFromStore(c.store)
212 c.version = clusterVersionFromStore(c.store)
213 mustDetectDowngrade(c.version)
214 onSet(c.version)
215
216 for _, m := range c.members {
217 plog.Infof("added member %s %v to cluster %s from store", m.ID, m.PeerURLs, c.id)
218 }
219 if c.version != nil {
220 plog.Infof("set the cluster version to %v from store", version.Cluster(c.version.String()))
221 }
222}
223
224// ValidateConfigurationChange takes a proposed ConfChange and
225// ensures that it is still valid.
226func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error {
227 members, removed := membersFromStore(c.store)
228 id := types.ID(cc.NodeID)
229 if removed[id] {
230 return ErrIDRemoved
231 }
232 switch cc.Type {
233 case raftpb.ConfChangeAddNode:
234 if members[id] != nil {
235 return ErrIDExists
236 }
237 urls := make(map[string]bool)
238 for _, m := range members {
239 for _, u := range m.PeerURLs {
240 urls[u] = true
241 }
242 }
243 m := new(Member)
244 if err := json.Unmarshal(cc.Context, m); err != nil {
245 plog.Panicf("unmarshal member should never fail: %v", err)
246 }
247 for _, u := range m.PeerURLs {
248 if urls[u] {
249 return ErrPeerURLexists
250 }
251 }
252 case raftpb.ConfChangeRemoveNode:
253 if members[id] == nil {
254 return ErrIDNotFound
255 }
256 case raftpb.ConfChangeUpdateNode:
257 if members[id] == nil {
258 return ErrIDNotFound
259 }
260 urls := make(map[string]bool)
261 for _, m := range members {
262 if m.ID == id {
263 continue
264 }
265 for _, u := range m.PeerURLs {
266 urls[u] = true
267 }
268 }
269 m := new(Member)
270 if err := json.Unmarshal(cc.Context, m); err != nil {
271 plog.Panicf("unmarshal member should never fail: %v", err)
272 }
273 for _, u := range m.PeerURLs {
274 if urls[u] {
275 return ErrPeerURLexists
276 }
277 }
278 default:
279 plog.Panicf("ConfChange type should be either AddNode, RemoveNode or UpdateNode")
280 }
281 return nil
282}
283
284// AddMember adds a new Member into the cluster, and saves the given member's
285// raftAttributes into the store. The given member should have empty attributes.
286// A Member with a matching id must not exist.
287func (c *RaftCluster) AddMember(m *Member) {
288 c.Lock()
289 defer c.Unlock()
290 if c.store != nil {
291 mustSaveMemberToStore(c.store, m)
292 }
293 if c.be != nil {
294 mustSaveMemberToBackend(c.be, m)
295 }
296
297 c.members[m.ID] = m
298
299 plog.Infof("added member %s %v to cluster %s", m.ID, m.PeerURLs, c.id)
300}
301
302// RemoveMember removes a member from the store.
303// The given id MUST exist, or the function panics.
304func (c *RaftCluster) RemoveMember(id types.ID) {
305 c.Lock()
306 defer c.Unlock()
307 if c.store != nil {
308 mustDeleteMemberFromStore(c.store, id)
309 }
310 if c.be != nil {
311 mustDeleteMemberFromBackend(c.be, id)
312 }
313
314 delete(c.members, id)
315 c.removed[id] = true
316
317 plog.Infof("removed member %s from cluster %s", id, c.id)
318}
319
320func (c *RaftCluster) UpdateAttributes(id types.ID, attr Attributes) {
321 c.Lock()
322 defer c.Unlock()
323 if m, ok := c.members[id]; ok {
324 m.Attributes = attr
325 if c.store != nil {
326 mustUpdateMemberAttrInStore(c.store, m)
327 }
328 if c.be != nil {
329 mustSaveMemberToBackend(c.be, m)
330 }
331 return
332 }
333 _, ok := c.removed[id]
334 if !ok {
335 plog.Panicf("error updating attributes of unknown member %s", id)
336 }
337 plog.Warningf("skipped updating attributes of removed member %s", id)
338}
339
340func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes) {
341 c.Lock()
342 defer c.Unlock()
343
344 c.members[id].RaftAttributes = raftAttr
345 if c.store != nil {
346 mustUpdateMemberInStore(c.store, c.members[id])
347 }
348 if c.be != nil {
349 mustSaveMemberToBackend(c.be, c.members[id])
350 }
351
352 plog.Noticef("updated member %s %v in cluster %s", id, raftAttr.PeerURLs, c.id)
353}
354
355func (c *RaftCluster) Version() *semver.Version {
356 c.Lock()
357 defer c.Unlock()
358 if c.version == nil {
359 return nil
360 }
361 return semver.Must(semver.NewVersion(c.version.String()))
362}
363
364func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*semver.Version)) {
365 c.Lock()
366 defer c.Unlock()
367 if c.version != nil {
368 plog.Noticef("updated the cluster version from %v to %v", version.Cluster(c.version.String()), version.Cluster(ver.String()))
369 } else {
370 plog.Noticef("set the initial cluster version to %v", version.Cluster(ver.String()))
371 }
372 oldVer := c.version
373 c.version = ver
374 mustDetectDowngrade(c.version)
375 if c.store != nil {
376 mustSaveClusterVersionToStore(c.store, ver)
377 }
378 if c.be != nil {
379 mustSaveClusterVersionToBackend(c.be, ver)
380 }
381 if oldVer != nil {
382 ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": version.Cluster(oldVer.String())}).Set(0)
383 }
384 ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": version.Cluster(ver.String())}).Set(1)
385 onSet(ver)
386}
387
388func (c *RaftCluster) IsReadyToAddNewMember() bool {
389 nmembers := 1
390 nstarted := 0
391
392 for _, member := range c.members {
393 if member.IsStarted() {
394 nstarted++
395 }
396 nmembers++
397 }
398
399 if nstarted == 1 && nmembers == 2 {
400 // a case of adding a new node to 1-member cluster for restoring cluster data
401 // https://github.com/coreos/etcd/blob/master/Documentation/v2/admin_guide.md#restoring-the-cluster
402
403 plog.Debugf("The number of started member is 1. This cluster can accept add member request.")
404 return true
405 }
406
407 nquorum := nmembers/2 + 1
408 if nstarted < nquorum {
409 plog.Warningf("Reject add member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
410 return false
411 }
412
413 return true
414}
415
416func (c *RaftCluster) IsReadyToRemoveMember(id uint64) bool {
417 nmembers := 0
418 nstarted := 0
419
420 for _, member := range c.members {
421 if uint64(member.ID) == id {
422 continue
423 }
424
425 if member.IsStarted() {
426 nstarted++
427 }
428 nmembers++
429 }
430
431 nquorum := nmembers/2 + 1
432 if nstarted < nquorum {
433 plog.Warningf("Reject remove member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
434 return false
435 }
436
437 return true
438}
439
440func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) {
441 members := make(map[types.ID]*Member)
442 removed := make(map[types.ID]bool)
443 e, err := st.Get(StoreMembersPrefix, true, true)
444 if err != nil {
445 if isKeyNotFound(err) {
446 return members, removed
447 }
448 plog.Panicf("get storeMembers should never fail: %v", err)
449 }
450 for _, n := range e.Node.Nodes {
451 var m *Member
452 m, err = nodeToMember(n)
453 if err != nil {
454 plog.Panicf("nodeToMember should never fail: %v", err)
455 }
456 members[m.ID] = m
457 }
458
459 e, err = st.Get(storeRemovedMembersPrefix, true, true)
460 if err != nil {
461 if isKeyNotFound(err) {
462 return members, removed
463 }
464 plog.Panicf("get storeRemovedMembers should never fail: %v", err)
465 }
466 for _, n := range e.Node.Nodes {
467 removed[MustParseMemberIDFromKey(n.Key)] = true
468 }
469 return members, removed
470}
471
472func clusterVersionFromStore(st store.Store) *semver.Version {
473 e, err := st.Get(path.Join(storePrefix, "version"), false, false)
474 if err != nil {
475 if isKeyNotFound(err) {
476 return nil
477 }
478 plog.Panicf("unexpected error (%v) when getting cluster version from store", err)
479 }
480 return semver.Must(semver.NewVersion(*e.Node.Value))
481}
482
483// ValidateClusterAndAssignIDs validates the local cluster by matching the PeerURLs
484// with the existing cluster. If the validation succeeds, it assigns the IDs
485// from the existing cluster to the local cluster.
486// If the validation fails, an error will be returned.
487func ValidateClusterAndAssignIDs(local *RaftCluster, existing *RaftCluster) error {
488 ems := existing.Members()
489 lms := local.Members()
490 if len(ems) != len(lms) {
491 return fmt.Errorf("member count is unequal")
492 }
493 sort.Sort(MembersByPeerURLs(ems))
494 sort.Sort(MembersByPeerURLs(lms))
495
496 ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
497 defer cancel()
498 for i := range ems {
499 if ok, err := netutil.URLStringsEqual(ctx, ems[i].PeerURLs, lms[i].PeerURLs); !ok {
500 return fmt.Errorf("unmatched member while checking PeerURLs (%v)", err)
501 }
502 lms[i].ID = ems[i].ID
503 }
504 local.members = make(map[types.ID]*Member)
505 for _, m := range lms {
506 local.members[m.ID] = m
507 }
508 return nil
509}
510
511func mustDetectDowngrade(cv *semver.Version) {
512 lv := semver.Must(semver.NewVersion(version.Version))
513 // only keep major.minor version for comparison against cluster version
514 lv = &semver.Version{Major: lv.Major, Minor: lv.Minor}
515 if cv != nil && lv.LessThan(*cv) {
516 plog.Fatalf("cluster cannot be downgraded (current version: %s is lower than determined cluster version: %s).", version.Version, version.Cluster(cv.String()))
517 }
518}