| // Copyright 2015 The etcd Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package raft |
| |
| import ( |
| "errors" |
| "sync" |
| |
| pb "go.etcd.io/etcd/raft/raftpb" |
| ) |
| |
| // ErrCompacted is returned by Storage.Entries/Compact when a requested |
| // index is unavailable because it predates the last snapshot. |
| var ErrCompacted = errors.New("requested index is unavailable due to compaction") |
| |
| // ErrSnapOutOfDate is returned by Storage.CreateSnapshot when a requested |
| // index is older than the existing snapshot. |
| var ErrSnapOutOfDate = errors.New("requested index is older than the existing snapshot") |
| |
| // ErrUnavailable is returned by Storage interface when the requested log entries |
| // are unavailable. |
| var ErrUnavailable = errors.New("requested entry at index is unavailable") |
| |
| // ErrSnapshotTemporarilyUnavailable is returned by the Storage interface when the required |
| // snapshot is temporarily unavailable. |
| var ErrSnapshotTemporarilyUnavailable = errors.New("snapshot is temporarily unavailable") |
| |
| // Storage is an interface that may be implemented by the application |
| // to retrieve log entries from storage. |
| // |
| // If any Storage method returns an error, the raft instance will |
| // become inoperable and refuse to participate in elections; the |
| // application is responsible for cleanup and recovery in this case. |
| type Storage interface { |
| // TODO(tbg): split this into two interfaces, LogStorage and StateStorage. |
| |
| // InitialState returns the saved HardState and ConfState information. |
| InitialState() (pb.HardState, pb.ConfState, error) |
| // Entries returns a slice of log entries in the range [lo,hi). |
| // MaxSize limits the total size of the log entries returned, but |
| // Entries returns at least one entry if any. |
| Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) |
| // Term returns the term of entry i, which must be in the range |
| // [FirstIndex()-1, LastIndex()]. The term of the entry before |
| // FirstIndex is retained for matching purposes even though the |
| // rest of that entry may not be available. |
| Term(i uint64) (uint64, error) |
| // LastIndex returns the index of the last entry in the log. |
| LastIndex() (uint64, error) |
| // FirstIndex returns the index of the first log entry that is |
| // possibly available via Entries (older entries have been incorporated |
| // into the latest Snapshot; if storage only contains the dummy entry the |
| // first log entry is not available). |
| FirstIndex() (uint64, error) |
| // Snapshot returns the most recent snapshot. |
| // If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable, |
| // so raft state machine could know that Storage needs some time to prepare |
| // snapshot and call Snapshot later. |
| Snapshot() (pb.Snapshot, error) |
| } |
| |
| // MemoryStorage implements the Storage interface backed by an |
| // in-memory array. |
| type MemoryStorage struct { |
| // Protects access to all fields. Most methods of MemoryStorage are |
| // run on the raft goroutine, but Append() is run on an application |
| // goroutine. |
| sync.Mutex |
| |
| hardState pb.HardState |
| snapshot pb.Snapshot |
| // ents[i] has raft log position i+snapshot.Metadata.Index |
| ents []pb.Entry |
| } |
| |
| // NewMemoryStorage creates an empty MemoryStorage. |
| func NewMemoryStorage() *MemoryStorage { |
| return &MemoryStorage{ |
| // When starting from scratch populate the list with a dummy entry at term zero. |
| ents: make([]pb.Entry, 1), |
| } |
| } |
| |
| // InitialState implements the Storage interface. |
| func (ms *MemoryStorage) InitialState() (pb.HardState, pb.ConfState, error) { |
| return ms.hardState, ms.snapshot.Metadata.ConfState, nil |
| } |
| |
| // SetHardState saves the current HardState. |
| func (ms *MemoryStorage) SetHardState(st pb.HardState) error { |
| ms.Lock() |
| defer ms.Unlock() |
| ms.hardState = st |
| return nil |
| } |
| |
| // Entries implements the Storage interface. |
| func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) { |
| ms.Lock() |
| defer ms.Unlock() |
| offset := ms.ents[0].Index |
| if lo <= offset { |
| return nil, ErrCompacted |
| } |
| if hi > ms.lastIndex()+1 { |
| raftLogger.Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex()) |
| } |
| // only contains dummy entries. |
| if len(ms.ents) == 1 { |
| return nil, ErrUnavailable |
| } |
| |
| ents := ms.ents[lo-offset : hi-offset] |
| return limitSize(ents, maxSize), nil |
| } |
| |
| // Term implements the Storage interface. |
| func (ms *MemoryStorage) Term(i uint64) (uint64, error) { |
| ms.Lock() |
| defer ms.Unlock() |
| offset := ms.ents[0].Index |
| if i < offset { |
| return 0, ErrCompacted |
| } |
| if int(i-offset) >= len(ms.ents) { |
| return 0, ErrUnavailable |
| } |
| return ms.ents[i-offset].Term, nil |
| } |
| |
| // LastIndex implements the Storage interface. |
| func (ms *MemoryStorage) LastIndex() (uint64, error) { |
| ms.Lock() |
| defer ms.Unlock() |
| return ms.lastIndex(), nil |
| } |
| |
| func (ms *MemoryStorage) lastIndex() uint64 { |
| return ms.ents[0].Index + uint64(len(ms.ents)) - 1 |
| } |
| |
| // FirstIndex implements the Storage interface. |
| func (ms *MemoryStorage) FirstIndex() (uint64, error) { |
| ms.Lock() |
| defer ms.Unlock() |
| return ms.firstIndex(), nil |
| } |
| |
| func (ms *MemoryStorage) firstIndex() uint64 { |
| return ms.ents[0].Index + 1 |
| } |
| |
| // Snapshot implements the Storage interface. |
| func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) { |
| ms.Lock() |
| defer ms.Unlock() |
| return ms.snapshot, nil |
| } |
| |
| // ApplySnapshot overwrites the contents of this Storage object with |
| // those of the given snapshot. |
| func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error { |
| ms.Lock() |
| defer ms.Unlock() |
| |
| //handle check for old snapshot being applied |
| msIndex := ms.snapshot.Metadata.Index |
| snapIndex := snap.Metadata.Index |
| if msIndex >= snapIndex { |
| return ErrSnapOutOfDate |
| } |
| |
| ms.snapshot = snap |
| ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}} |
| return nil |
| } |
| |
| // CreateSnapshot makes a snapshot which can be retrieved with Snapshot() and |
| // can be used to reconstruct the state at that point. |
| // If any configuration changes have been made since the last compaction, |
| // the result of the last ApplyConfChange must be passed in. |
| func (ms *MemoryStorage) CreateSnapshot(i uint64, cs *pb.ConfState, data []byte) (pb.Snapshot, error) { |
| ms.Lock() |
| defer ms.Unlock() |
| if i <= ms.snapshot.Metadata.Index { |
| return pb.Snapshot{}, ErrSnapOutOfDate |
| } |
| |
| offset := ms.ents[0].Index |
| if i > ms.lastIndex() { |
| raftLogger.Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex()) |
| } |
| |
| ms.snapshot.Metadata.Index = i |
| ms.snapshot.Metadata.Term = ms.ents[i-offset].Term |
| if cs != nil { |
| ms.snapshot.Metadata.ConfState = *cs |
| } |
| ms.snapshot.Data = data |
| return ms.snapshot, nil |
| } |
| |
| // Compact discards all log entries prior to compactIndex. |
| // It is the application's responsibility to not attempt to compact an index |
| // greater than raftLog.applied. |
| func (ms *MemoryStorage) Compact(compactIndex uint64) error { |
| ms.Lock() |
| defer ms.Unlock() |
| offset := ms.ents[0].Index |
| if compactIndex <= offset { |
| return ErrCompacted |
| } |
| if compactIndex > ms.lastIndex() { |
| raftLogger.Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex()) |
| } |
| |
| i := compactIndex - offset |
| ents := make([]pb.Entry, 1, 1+uint64(len(ms.ents))-i) |
| ents[0].Index = ms.ents[i].Index |
| ents[0].Term = ms.ents[i].Term |
| ents = append(ents, ms.ents[i+1:]...) |
| ms.ents = ents |
| return nil |
| } |
| |
| // Append the new entries to storage. |
| // TODO (xiangli): ensure the entries are continuous and |
| // entries[0].Index > ms.entries[0].Index |
| func (ms *MemoryStorage) Append(entries []pb.Entry) error { |
| if len(entries) == 0 { |
| return nil |
| } |
| |
| ms.Lock() |
| defer ms.Unlock() |
| |
| first := ms.firstIndex() |
| last := entries[0].Index + uint64(len(entries)) - 1 |
| |
| // shortcut if there is no new entry. |
| if last < first { |
| return nil |
| } |
| // truncate compacted entries |
| if first > entries[0].Index { |
| entries = entries[first-entries[0].Index:] |
| } |
| |
| offset := entries[0].Index - ms.ents[0].Index |
| switch { |
| case uint64(len(ms.ents)) > offset: |
| ms.ents = append([]pb.Entry{}, ms.ents[:offset]...) |
| ms.ents = append(ms.ents, entries...) |
| case uint64(len(ms.ents)) == offset: |
| ms.ents = append(ms.ents, entries...) |
| default: |
| raftLogger.Panicf("missing log entry [last: %d, append at: %d]", |
| ms.lastIndex(), entries[0].Index) |
| } |
| return nil |
| } |