blob: 77183b793cee415cbaedf915958a2ba485439054 [file] [log] [blame]
William Kurkianea869482019-04-09 15:16:11 -04001// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package raft
16
17import (
18 "errors"
19
20 pb "go.etcd.io/etcd/raft/raftpb"
Abhilash S.L3b494632019-07-16 15:51:09 +053021 "go.etcd.io/etcd/raft/tracker"
William Kurkianea869482019-04-09 15:16:11 -040022)
23
24// ErrStepLocalMsg is returned when try to step a local raft message
25var ErrStepLocalMsg = errors.New("raft: cannot step raft local message")
26
27// ErrStepPeerNotFound is returned when try to step a response message
28// but there is no peer found in raft.prs for that node.
29var ErrStepPeerNotFound = errors.New("raft: cannot step as peer not found")
30
31// RawNode is a thread-unsafe Node.
32// The methods of this struct correspond to the methods of Node and are described
33// more fully there.
34type RawNode struct {
35 raft *raft
36 prevSoftSt *SoftState
37 prevHardSt pb.HardState
38}
39
40func (rn *RawNode) newReady() Ready {
41 return newReady(rn.raft, rn.prevSoftSt, rn.prevHardSt)
42}
43
44func (rn *RawNode) commitReady(rd Ready) {
45 if rd.SoftState != nil {
46 rn.prevSoftSt = rd.SoftState
47 }
48 if !IsEmptyHardState(rd.HardState) {
49 rn.prevHardSt = rd.HardState
50 }
51
52 // If entries were applied (or a snapshot), update our cursor for
53 // the next Ready. Note that if the current HardState contains a
54 // new Commit index, this does not mean that we're also applying
55 // all of the new entries due to commit pagination by size.
56 if index := rd.appliedCursor(); index > 0 {
57 rn.raft.raftLog.appliedTo(index)
58 }
59
60 if len(rd.Entries) > 0 {
61 e := rd.Entries[len(rd.Entries)-1]
62 rn.raft.raftLog.stableTo(e.Index, e.Term)
63 }
64 if !IsEmptySnap(rd.Snapshot) {
65 rn.raft.raftLog.stableSnapTo(rd.Snapshot.Metadata.Index)
66 }
67 if len(rd.ReadStates) != 0 {
68 rn.raft.readStates = nil
69 }
70}
71
72// NewRawNode returns a new RawNode given configuration and a list of raft peers.
73func NewRawNode(config *Config, peers []Peer) (*RawNode, error) {
74 if config.ID == 0 {
75 panic("config.ID must not be zero")
76 }
77 r := newRaft(config)
78 rn := &RawNode{
79 raft: r,
80 }
81 lastIndex, err := config.Storage.LastIndex()
82 if err != nil {
83 panic(err) // TODO(bdarnell)
84 }
85 // If the log is empty, this is a new RawNode (like StartNode); otherwise it's
86 // restoring an existing RawNode (like RestartNode).
87 // TODO(bdarnell): rethink RawNode initialization and whether the application needs
88 // to be able to tell us when it expects the RawNode to exist.
89 if lastIndex == 0 {
90 r.becomeFollower(1, None)
91 ents := make([]pb.Entry, len(peers))
92 for i, peer := range peers {
93 cc := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: peer.ID, Context: peer.Context}
94 data, err := cc.Marshal()
95 if err != nil {
96 panic("unexpected marshal error")
97 }
98
99 ents[i] = pb.Entry{Type: pb.EntryConfChange, Term: 1, Index: uint64(i + 1), Data: data}
100 }
101 r.raftLog.append(ents...)
102 r.raftLog.committed = uint64(len(ents))
103 for _, peer := range peers {
Abhilash S.L3b494632019-07-16 15:51:09 +0530104 r.applyConfChange(pb.ConfChange{NodeID: peer.ID, Type: pb.ConfChangeAddNode})
William Kurkianea869482019-04-09 15:16:11 -0400105 }
106 }
107
108 // Set the initial hard and soft states after performing all initialization.
109 rn.prevSoftSt = r.softState()
110 if lastIndex == 0 {
111 rn.prevHardSt = emptyState
112 } else {
113 rn.prevHardSt = r.hardState()
114 }
115
116 return rn, nil
117}
118
119// Tick advances the internal logical clock by a single tick.
120func (rn *RawNode) Tick() {
121 rn.raft.tick()
122}
123
124// TickQuiesced advances the internal logical clock by a single tick without
125// performing any other state machine processing. It allows the caller to avoid
126// periodic heartbeats and elections when all of the peers in a Raft group are
127// known to be at the same state. Expected usage is to periodically invoke Tick
128// or TickQuiesced depending on whether the group is "active" or "quiesced".
129//
130// WARNING: Be very careful about using this method as it subverts the Raft
131// state machine. You should probably be using Tick instead.
132func (rn *RawNode) TickQuiesced() {
133 rn.raft.electionElapsed++
134}
135
136// Campaign causes this RawNode to transition to candidate state.
137func (rn *RawNode) Campaign() error {
138 return rn.raft.Step(pb.Message{
139 Type: pb.MsgHup,
140 })
141}
142
143// Propose proposes data be appended to the raft log.
144func (rn *RawNode) Propose(data []byte) error {
145 return rn.raft.Step(pb.Message{
146 Type: pb.MsgProp,
147 From: rn.raft.id,
148 Entries: []pb.Entry{
149 {Data: data},
150 }})
151}
152
153// ProposeConfChange proposes a config change.
154func (rn *RawNode) ProposeConfChange(cc pb.ConfChange) error {
155 data, err := cc.Marshal()
156 if err != nil {
157 return err
158 }
159 return rn.raft.Step(pb.Message{
160 Type: pb.MsgProp,
161 Entries: []pb.Entry{
162 {Type: pb.EntryConfChange, Data: data},
163 },
164 })
165}
166
167// ApplyConfChange applies a config change to the local node.
168func (rn *RawNode) ApplyConfChange(cc pb.ConfChange) *pb.ConfState {
Abhilash S.L3b494632019-07-16 15:51:09 +0530169 cs := rn.raft.applyConfChange(cc)
170 return &cs
William Kurkianea869482019-04-09 15:16:11 -0400171}
172
173// Step advances the state machine using the given message.
174func (rn *RawNode) Step(m pb.Message) error {
175 // ignore unexpected local messages receiving over network
176 if IsLocalMsg(m.Type) {
177 return ErrStepLocalMsg
178 }
Abhilash S.L3b494632019-07-16 15:51:09 +0530179 if pr := rn.raft.prs.Progress[m.From]; pr != nil || !IsResponseMsg(m.Type) {
William Kurkianea869482019-04-09 15:16:11 -0400180 return rn.raft.Step(m)
181 }
182 return ErrStepPeerNotFound
183}
184
185// Ready returns the current point-in-time state of this RawNode.
186func (rn *RawNode) Ready() Ready {
187 rd := rn.newReady()
188 rn.raft.msgs = nil
189 rn.raft.reduceUncommittedSize(rd.CommittedEntries)
190 return rd
191}
192
193// HasReady called when RawNode user need to check if any Ready pending.
194// Checking logic in this method should be consistent with Ready.containsUpdates().
195func (rn *RawNode) HasReady() bool {
196 r := rn.raft
197 if !r.softState().equal(rn.prevSoftSt) {
198 return true
199 }
200 if hardSt := r.hardState(); !IsEmptyHardState(hardSt) && !isHardStateEqual(hardSt, rn.prevHardSt) {
201 return true
202 }
203 if r.raftLog.unstable.snapshot != nil && !IsEmptySnap(*r.raftLog.unstable.snapshot) {
204 return true
205 }
206 if len(r.msgs) > 0 || len(r.raftLog.unstableEntries()) > 0 || r.raftLog.hasNextEnts() {
207 return true
208 }
209 if len(r.readStates) != 0 {
210 return true
211 }
212 return false
213}
214
215// Advance notifies the RawNode that the application has applied and saved progress in the
216// last Ready results.
217func (rn *RawNode) Advance(rd Ready) {
218 rn.commitReady(rd)
219}
220
221// Status returns the current status of the given group.
222func (rn *RawNode) Status() *Status {
223 status := getStatus(rn.raft)
224 return &status
225}
226
227// StatusWithoutProgress returns a Status without populating the Progress field
228// (and returns the Status as a value to avoid forcing it onto the heap). This
229// is more performant if the Progress is not required. See WithProgress for an
230// allocation-free way to introspect the Progress.
231func (rn *RawNode) StatusWithoutProgress() Status {
232 return getStatusWithoutProgress(rn.raft)
233}
234
235// ProgressType indicates the type of replica a Progress corresponds to.
236type ProgressType byte
237
238const (
239 // ProgressTypePeer accompanies a Progress for a regular peer replica.
240 ProgressTypePeer ProgressType = iota
241 // ProgressTypeLearner accompanies a Progress for a learner replica.
242 ProgressTypeLearner
243)
244
245// WithProgress is a helper to introspect the Progress for this node and its
246// peers.
Abhilash S.L3b494632019-07-16 15:51:09 +0530247func (rn *RawNode) WithProgress(visitor func(id uint64, typ ProgressType, pr tracker.Progress)) {
248 rn.raft.prs.Visit(func(id uint64, pr *tracker.Progress) {
249 typ := ProgressTypePeer
250 if pr.IsLearner {
251 typ = ProgressTypeLearner
252 }
253 p := *pr
254 p.Inflights = nil
255 visitor(id, typ, p)
256 })
William Kurkianea869482019-04-09 15:16:11 -0400257}
258
259// ReportUnreachable reports the given node is not reachable for the last send.
260func (rn *RawNode) ReportUnreachable(id uint64) {
261 _ = rn.raft.Step(pb.Message{Type: pb.MsgUnreachable, From: id})
262}
263
264// ReportSnapshot reports the status of the sent snapshot.
265func (rn *RawNode) ReportSnapshot(id uint64, status SnapshotStatus) {
266 rej := status == SnapshotFailure
267
268 _ = rn.raft.Step(pb.Message{Type: pb.MsgSnapStatus, From: id, Reject: rej})
269}
270
271// TransferLeader tries to transfer leadership to the given transferee.
272func (rn *RawNode) TransferLeader(transferee uint64) {
273 _ = rn.raft.Step(pb.Message{Type: pb.MsgTransferLeader, From: transferee})
274}
275
276// ReadIndex requests a read state. The read state will be set in ready.
277// Read State has a read index. Once the application advances further than the read
278// index, any linearizable read requests issued before the read request can be
279// processed safely. The read state will have the same rctx attached.
280func (rn *RawNode) ReadIndex(rctx []byte) {
281 _ = rn.raft.Step(pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}})
282}