khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 1 | // Copyright 2015 The etcd Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package membership |
| 16 | |
| 17 | import ( |
| 18 | "bytes" |
| 19 | "context" |
| 20 | "crypto/sha1" |
| 21 | "encoding/binary" |
| 22 | "encoding/json" |
| 23 | "fmt" |
| 24 | "path" |
| 25 | "sort" |
| 26 | "strings" |
| 27 | "sync" |
| 28 | "time" |
| 29 | |
| 30 | "github.com/coreos/etcd/mvcc/backend" |
| 31 | "github.com/coreos/etcd/pkg/netutil" |
| 32 | "github.com/coreos/etcd/pkg/types" |
| 33 | "github.com/coreos/etcd/raft" |
| 34 | "github.com/coreos/etcd/raft/raftpb" |
| 35 | "github.com/coreos/etcd/store" |
| 36 | "github.com/coreos/etcd/version" |
| 37 | |
| 38 | "github.com/coreos/go-semver/semver" |
| 39 | "github.com/prometheus/client_golang/prometheus" |
| 40 | ) |
| 41 | |
| 42 | // RaftCluster is a list of Members that belong to the same raft cluster |
| 43 | type RaftCluster struct { |
| 44 | id types.ID |
| 45 | token string |
| 46 | |
| 47 | store store.Store |
| 48 | be backend.Backend |
| 49 | |
| 50 | sync.Mutex // guards the fields below |
| 51 | version *semver.Version |
| 52 | members map[types.ID]*Member |
| 53 | // removed contains the ids of removed members in the cluster. |
| 54 | // removed id cannot be reused. |
| 55 | removed map[types.ID]bool |
| 56 | } |
| 57 | |
| 58 | func NewClusterFromURLsMap(token string, urlsmap types.URLsMap) (*RaftCluster, error) { |
| 59 | c := NewCluster(token) |
| 60 | for name, urls := range urlsmap { |
| 61 | m := NewMember(name, urls, token, nil) |
| 62 | if _, ok := c.members[m.ID]; ok { |
| 63 | return nil, fmt.Errorf("member exists with identical ID %v", m) |
| 64 | } |
| 65 | if uint64(m.ID) == raft.None { |
| 66 | return nil, fmt.Errorf("cannot use %x as member id", raft.None) |
| 67 | } |
| 68 | c.members[m.ID] = m |
| 69 | } |
| 70 | c.genID() |
| 71 | return c, nil |
| 72 | } |
| 73 | |
| 74 | func NewClusterFromMembers(token string, id types.ID, membs []*Member) *RaftCluster { |
| 75 | c := NewCluster(token) |
| 76 | c.id = id |
| 77 | for _, m := range membs { |
| 78 | c.members[m.ID] = m |
| 79 | } |
| 80 | return c |
| 81 | } |
| 82 | |
| 83 | func NewCluster(token string) *RaftCluster { |
| 84 | return &RaftCluster{ |
| 85 | token: token, |
| 86 | members: make(map[types.ID]*Member), |
| 87 | removed: make(map[types.ID]bool), |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | func (c *RaftCluster) ID() types.ID { return c.id } |
| 92 | |
| 93 | func (c *RaftCluster) Members() []*Member { |
| 94 | c.Lock() |
| 95 | defer c.Unlock() |
| 96 | var ms MembersByID |
| 97 | for _, m := range c.members { |
| 98 | ms = append(ms, m.Clone()) |
| 99 | } |
| 100 | sort.Sort(ms) |
| 101 | return []*Member(ms) |
| 102 | } |
| 103 | |
| 104 | func (c *RaftCluster) Member(id types.ID) *Member { |
| 105 | c.Lock() |
| 106 | defer c.Unlock() |
| 107 | return c.members[id].Clone() |
| 108 | } |
| 109 | |
| 110 | // MemberByName returns a Member with the given name if exists. |
| 111 | // If more than one member has the given name, it will panic. |
| 112 | func (c *RaftCluster) MemberByName(name string) *Member { |
| 113 | c.Lock() |
| 114 | defer c.Unlock() |
| 115 | var memb *Member |
| 116 | for _, m := range c.members { |
| 117 | if m.Name == name { |
| 118 | if memb != nil { |
| 119 | plog.Panicf("two members with the given name %q exist", name) |
| 120 | } |
| 121 | memb = m |
| 122 | } |
| 123 | } |
| 124 | return memb.Clone() |
| 125 | } |
| 126 | |
| 127 | func (c *RaftCluster) MemberIDs() []types.ID { |
| 128 | c.Lock() |
| 129 | defer c.Unlock() |
| 130 | var ids []types.ID |
| 131 | for _, m := range c.members { |
| 132 | ids = append(ids, m.ID) |
| 133 | } |
| 134 | sort.Sort(types.IDSlice(ids)) |
| 135 | return ids |
| 136 | } |
| 137 | |
| 138 | func (c *RaftCluster) IsIDRemoved(id types.ID) bool { |
| 139 | c.Lock() |
| 140 | defer c.Unlock() |
| 141 | return c.removed[id] |
| 142 | } |
| 143 | |
| 144 | // PeerURLs returns a list of all peer addresses. |
| 145 | // The returned list is sorted in ascending lexicographical order. |
| 146 | func (c *RaftCluster) PeerURLs() []string { |
| 147 | c.Lock() |
| 148 | defer c.Unlock() |
| 149 | urls := make([]string, 0) |
| 150 | for _, p := range c.members { |
| 151 | urls = append(urls, p.PeerURLs...) |
| 152 | } |
| 153 | sort.Strings(urls) |
| 154 | return urls |
| 155 | } |
| 156 | |
| 157 | // ClientURLs returns a list of all client addresses. |
| 158 | // The returned list is sorted in ascending lexicographical order. |
| 159 | func (c *RaftCluster) ClientURLs() []string { |
| 160 | c.Lock() |
| 161 | defer c.Unlock() |
| 162 | urls := make([]string, 0) |
| 163 | for _, p := range c.members { |
| 164 | urls = append(urls, p.ClientURLs...) |
| 165 | } |
| 166 | sort.Strings(urls) |
| 167 | return urls |
| 168 | } |
| 169 | |
| 170 | func (c *RaftCluster) String() string { |
| 171 | c.Lock() |
| 172 | defer c.Unlock() |
| 173 | b := &bytes.Buffer{} |
| 174 | fmt.Fprintf(b, "{ClusterID:%s ", c.id) |
| 175 | var ms []string |
| 176 | for _, m := range c.members { |
| 177 | ms = append(ms, fmt.Sprintf("%+v", m)) |
| 178 | } |
| 179 | fmt.Fprintf(b, "Members:[%s] ", strings.Join(ms, " ")) |
| 180 | var ids []string |
| 181 | for id := range c.removed { |
| 182 | ids = append(ids, id.String()) |
| 183 | } |
| 184 | fmt.Fprintf(b, "RemovedMemberIDs:[%s]}", strings.Join(ids, " ")) |
| 185 | return b.String() |
| 186 | } |
| 187 | |
| 188 | func (c *RaftCluster) genID() { |
| 189 | mIDs := c.MemberIDs() |
| 190 | b := make([]byte, 8*len(mIDs)) |
| 191 | for i, id := range mIDs { |
| 192 | binary.BigEndian.PutUint64(b[8*i:], uint64(id)) |
| 193 | } |
| 194 | hash := sha1.Sum(b) |
| 195 | c.id = types.ID(binary.BigEndian.Uint64(hash[:8])) |
| 196 | } |
| 197 | |
| 198 | func (c *RaftCluster) SetID(id types.ID) { c.id = id } |
| 199 | |
| 200 | func (c *RaftCluster) SetStore(st store.Store) { c.store = st } |
| 201 | |
| 202 | func (c *RaftCluster) SetBackend(be backend.Backend) { |
| 203 | c.be = be |
| 204 | mustCreateBackendBuckets(c.be) |
| 205 | } |
| 206 | |
| 207 | func (c *RaftCluster) Recover(onSet func(*semver.Version)) { |
| 208 | c.Lock() |
| 209 | defer c.Unlock() |
| 210 | |
| 211 | c.members, c.removed = membersFromStore(c.store) |
| 212 | c.version = clusterVersionFromStore(c.store) |
| 213 | mustDetectDowngrade(c.version) |
| 214 | onSet(c.version) |
| 215 | |
| 216 | for _, m := range c.members { |
| 217 | plog.Infof("added member %s %v to cluster %s from store", m.ID, m.PeerURLs, c.id) |
| 218 | } |
| 219 | if c.version != nil { |
| 220 | plog.Infof("set the cluster version to %v from store", version.Cluster(c.version.String())) |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | // ValidateConfigurationChange takes a proposed ConfChange and |
| 225 | // ensures that it is still valid. |
| 226 | func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error { |
| 227 | members, removed := membersFromStore(c.store) |
| 228 | id := types.ID(cc.NodeID) |
| 229 | if removed[id] { |
| 230 | return ErrIDRemoved |
| 231 | } |
| 232 | switch cc.Type { |
| 233 | case raftpb.ConfChangeAddNode: |
| 234 | if members[id] != nil { |
| 235 | return ErrIDExists |
| 236 | } |
| 237 | urls := make(map[string]bool) |
| 238 | for _, m := range members { |
| 239 | for _, u := range m.PeerURLs { |
| 240 | urls[u] = true |
| 241 | } |
| 242 | } |
| 243 | m := new(Member) |
| 244 | if err := json.Unmarshal(cc.Context, m); err != nil { |
| 245 | plog.Panicf("unmarshal member should never fail: %v", err) |
| 246 | } |
| 247 | for _, u := range m.PeerURLs { |
| 248 | if urls[u] { |
| 249 | return ErrPeerURLexists |
| 250 | } |
| 251 | } |
| 252 | case raftpb.ConfChangeRemoveNode: |
| 253 | if members[id] == nil { |
| 254 | return ErrIDNotFound |
| 255 | } |
| 256 | case raftpb.ConfChangeUpdateNode: |
| 257 | if members[id] == nil { |
| 258 | return ErrIDNotFound |
| 259 | } |
| 260 | urls := make(map[string]bool) |
| 261 | for _, m := range members { |
| 262 | if m.ID == id { |
| 263 | continue |
| 264 | } |
| 265 | for _, u := range m.PeerURLs { |
| 266 | urls[u] = true |
| 267 | } |
| 268 | } |
| 269 | m := new(Member) |
| 270 | if err := json.Unmarshal(cc.Context, m); err != nil { |
| 271 | plog.Panicf("unmarshal member should never fail: %v", err) |
| 272 | } |
| 273 | for _, u := range m.PeerURLs { |
| 274 | if urls[u] { |
| 275 | return ErrPeerURLexists |
| 276 | } |
| 277 | } |
| 278 | default: |
| 279 | plog.Panicf("ConfChange type should be either AddNode, RemoveNode or UpdateNode") |
| 280 | } |
| 281 | return nil |
| 282 | } |
| 283 | |
| 284 | // AddMember adds a new Member into the cluster, and saves the given member's |
| 285 | // raftAttributes into the store. The given member should have empty attributes. |
| 286 | // A Member with a matching id must not exist. |
| 287 | func (c *RaftCluster) AddMember(m *Member) { |
| 288 | c.Lock() |
| 289 | defer c.Unlock() |
| 290 | if c.store != nil { |
| 291 | mustSaveMemberToStore(c.store, m) |
| 292 | } |
| 293 | if c.be != nil { |
| 294 | mustSaveMemberToBackend(c.be, m) |
| 295 | } |
| 296 | |
| 297 | c.members[m.ID] = m |
| 298 | |
| 299 | plog.Infof("added member %s %v to cluster %s", m.ID, m.PeerURLs, c.id) |
| 300 | } |
| 301 | |
| 302 | // RemoveMember removes a member from the store. |
| 303 | // The given id MUST exist, or the function panics. |
| 304 | func (c *RaftCluster) RemoveMember(id types.ID) { |
| 305 | c.Lock() |
| 306 | defer c.Unlock() |
| 307 | if c.store != nil { |
| 308 | mustDeleteMemberFromStore(c.store, id) |
| 309 | } |
| 310 | if c.be != nil { |
| 311 | mustDeleteMemberFromBackend(c.be, id) |
| 312 | } |
| 313 | |
| 314 | delete(c.members, id) |
| 315 | c.removed[id] = true |
| 316 | |
| 317 | plog.Infof("removed member %s from cluster %s", id, c.id) |
| 318 | } |
| 319 | |
| 320 | func (c *RaftCluster) UpdateAttributes(id types.ID, attr Attributes) { |
| 321 | c.Lock() |
| 322 | defer c.Unlock() |
| 323 | if m, ok := c.members[id]; ok { |
| 324 | m.Attributes = attr |
| 325 | if c.store != nil { |
| 326 | mustUpdateMemberAttrInStore(c.store, m) |
| 327 | } |
| 328 | if c.be != nil { |
| 329 | mustSaveMemberToBackend(c.be, m) |
| 330 | } |
| 331 | return |
| 332 | } |
| 333 | _, ok := c.removed[id] |
| 334 | if !ok { |
| 335 | plog.Panicf("error updating attributes of unknown member %s", id) |
| 336 | } |
| 337 | plog.Warningf("skipped updating attributes of removed member %s", id) |
| 338 | } |
| 339 | |
| 340 | func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes) { |
| 341 | c.Lock() |
| 342 | defer c.Unlock() |
| 343 | |
| 344 | c.members[id].RaftAttributes = raftAttr |
| 345 | if c.store != nil { |
| 346 | mustUpdateMemberInStore(c.store, c.members[id]) |
| 347 | } |
| 348 | if c.be != nil { |
| 349 | mustSaveMemberToBackend(c.be, c.members[id]) |
| 350 | } |
| 351 | |
| 352 | plog.Noticef("updated member %s %v in cluster %s", id, raftAttr.PeerURLs, c.id) |
| 353 | } |
| 354 | |
| 355 | func (c *RaftCluster) Version() *semver.Version { |
| 356 | c.Lock() |
| 357 | defer c.Unlock() |
| 358 | if c.version == nil { |
| 359 | return nil |
| 360 | } |
| 361 | return semver.Must(semver.NewVersion(c.version.String())) |
| 362 | } |
| 363 | |
| 364 | func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*semver.Version)) { |
| 365 | c.Lock() |
| 366 | defer c.Unlock() |
| 367 | if c.version != nil { |
| 368 | plog.Noticef("updated the cluster version from %v to %v", version.Cluster(c.version.String()), version.Cluster(ver.String())) |
| 369 | } else { |
| 370 | plog.Noticef("set the initial cluster version to %v", version.Cluster(ver.String())) |
| 371 | } |
| 372 | oldVer := c.version |
| 373 | c.version = ver |
| 374 | mustDetectDowngrade(c.version) |
| 375 | if c.store != nil { |
| 376 | mustSaveClusterVersionToStore(c.store, ver) |
| 377 | } |
| 378 | if c.be != nil { |
| 379 | mustSaveClusterVersionToBackend(c.be, ver) |
| 380 | } |
| 381 | if oldVer != nil { |
| 382 | ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": version.Cluster(oldVer.String())}).Set(0) |
| 383 | } |
| 384 | ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": version.Cluster(ver.String())}).Set(1) |
| 385 | onSet(ver) |
| 386 | } |
| 387 | |
| 388 | func (c *RaftCluster) IsReadyToAddNewMember() bool { |
| 389 | nmembers := 1 |
| 390 | nstarted := 0 |
| 391 | |
| 392 | for _, member := range c.members { |
| 393 | if member.IsStarted() { |
| 394 | nstarted++ |
| 395 | } |
| 396 | nmembers++ |
| 397 | } |
| 398 | |
| 399 | if nstarted == 1 && nmembers == 2 { |
| 400 | // a case of adding a new node to 1-member cluster for restoring cluster data |
| 401 | // https://github.com/coreos/etcd/blob/master/Documentation/v2/admin_guide.md#restoring-the-cluster |
| 402 | |
| 403 | plog.Debugf("The number of started member is 1. This cluster can accept add member request.") |
| 404 | return true |
| 405 | } |
| 406 | |
| 407 | nquorum := nmembers/2 + 1 |
| 408 | if nstarted < nquorum { |
| 409 | plog.Warningf("Reject add member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum) |
| 410 | return false |
| 411 | } |
| 412 | |
| 413 | return true |
| 414 | } |
| 415 | |
| 416 | func (c *RaftCluster) IsReadyToRemoveMember(id uint64) bool { |
| 417 | nmembers := 0 |
| 418 | nstarted := 0 |
| 419 | |
| 420 | for _, member := range c.members { |
| 421 | if uint64(member.ID) == id { |
| 422 | continue |
| 423 | } |
| 424 | |
| 425 | if member.IsStarted() { |
| 426 | nstarted++ |
| 427 | } |
| 428 | nmembers++ |
| 429 | } |
| 430 | |
| 431 | nquorum := nmembers/2 + 1 |
| 432 | if nstarted < nquorum { |
| 433 | plog.Warningf("Reject remove member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum) |
| 434 | return false |
| 435 | } |
| 436 | |
| 437 | return true |
| 438 | } |
| 439 | |
| 440 | func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) { |
| 441 | members := make(map[types.ID]*Member) |
| 442 | removed := make(map[types.ID]bool) |
| 443 | e, err := st.Get(StoreMembersPrefix, true, true) |
| 444 | if err != nil { |
| 445 | if isKeyNotFound(err) { |
| 446 | return members, removed |
| 447 | } |
| 448 | plog.Panicf("get storeMembers should never fail: %v", err) |
| 449 | } |
| 450 | for _, n := range e.Node.Nodes { |
| 451 | var m *Member |
| 452 | m, err = nodeToMember(n) |
| 453 | if err != nil { |
| 454 | plog.Panicf("nodeToMember should never fail: %v", err) |
| 455 | } |
| 456 | members[m.ID] = m |
| 457 | } |
| 458 | |
| 459 | e, err = st.Get(storeRemovedMembersPrefix, true, true) |
| 460 | if err != nil { |
| 461 | if isKeyNotFound(err) { |
| 462 | return members, removed |
| 463 | } |
| 464 | plog.Panicf("get storeRemovedMembers should never fail: %v", err) |
| 465 | } |
| 466 | for _, n := range e.Node.Nodes { |
| 467 | removed[MustParseMemberIDFromKey(n.Key)] = true |
| 468 | } |
| 469 | return members, removed |
| 470 | } |
| 471 | |
| 472 | func clusterVersionFromStore(st store.Store) *semver.Version { |
| 473 | e, err := st.Get(path.Join(storePrefix, "version"), false, false) |
| 474 | if err != nil { |
| 475 | if isKeyNotFound(err) { |
| 476 | return nil |
| 477 | } |
| 478 | plog.Panicf("unexpected error (%v) when getting cluster version from store", err) |
| 479 | } |
| 480 | return semver.Must(semver.NewVersion(*e.Node.Value)) |
| 481 | } |
| 482 | |
| 483 | // ValidateClusterAndAssignIDs validates the local cluster by matching the PeerURLs |
| 484 | // with the existing cluster. If the validation succeeds, it assigns the IDs |
| 485 | // from the existing cluster to the local cluster. |
| 486 | // If the validation fails, an error will be returned. |
| 487 | func ValidateClusterAndAssignIDs(local *RaftCluster, existing *RaftCluster) error { |
| 488 | ems := existing.Members() |
| 489 | lms := local.Members() |
| 490 | if len(ems) != len(lms) { |
| 491 | return fmt.Errorf("member count is unequal") |
| 492 | } |
| 493 | sort.Sort(MembersByPeerURLs(ems)) |
| 494 | sort.Sort(MembersByPeerURLs(lms)) |
| 495 | |
| 496 | ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) |
| 497 | defer cancel() |
| 498 | for i := range ems { |
| 499 | if ok, err := netutil.URLStringsEqual(ctx, ems[i].PeerURLs, lms[i].PeerURLs); !ok { |
| 500 | return fmt.Errorf("unmatched member while checking PeerURLs (%v)", err) |
| 501 | } |
| 502 | lms[i].ID = ems[i].ID |
| 503 | } |
| 504 | local.members = make(map[types.ID]*Member) |
| 505 | for _, m := range lms { |
| 506 | local.members[m.ID] = m |
| 507 | } |
| 508 | return nil |
| 509 | } |
| 510 | |
| 511 | func mustDetectDowngrade(cv *semver.Version) { |
| 512 | lv := semver.Must(semver.NewVersion(version.Version)) |
| 513 | // only keep major.minor version for comparison against cluster version |
| 514 | lv = &semver.Version{Major: lv.Major, Minor: lv.Minor} |
| 515 | if cv != nil && lv.LessThan(*cv) { |
| 516 | plog.Fatalf("cluster cannot be downgraded (current version: %s is lower than determined cluster version: %s).", version.Version, version.Cluster(cv.String())) |
| 517 | } |
| 518 | } |