blob: 23d62ec2fb0204f1f3646409008815bd93f063f0 [file] [log] [blame]
Scott Baker2c1c4822019-10-16 11:02:41 -07001syntax = "proto2";
2package raftpb;
3
4import "gogoproto/gogo.proto";
5
6option (gogoproto.marshaler_all) = true;
7option (gogoproto.sizer_all) = true;
8option (gogoproto.unmarshaler_all) = true;
9option (gogoproto.goproto_getters_all) = false;
10option (gogoproto.goproto_enum_prefix_all) = false;
11
12enum EntryType {
13 EntryNormal = 0;
14 EntryConfChange = 1; // corresponds to pb.ConfChange
15 EntryConfChangeV2 = 2; // corresponds to pb.ConfChangeV2
16}
17
18message Entry {
19 optional uint64 Term = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations
20 optional uint64 Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations
21 optional EntryType Type = 1 [(gogoproto.nullable) = false];
22 optional bytes Data = 4;
23}
24
25message SnapshotMetadata {
26 optional ConfState conf_state = 1 [(gogoproto.nullable) = false];
27 optional uint64 index = 2 [(gogoproto.nullable) = false];
28 optional uint64 term = 3 [(gogoproto.nullable) = false];
29}
30
31message Snapshot {
32 optional bytes data = 1;
33 optional SnapshotMetadata metadata = 2 [(gogoproto.nullable) = false];
34}
35
36enum MessageType {
37 MsgHup = 0;
38 MsgBeat = 1;
39 MsgProp = 2;
40 MsgApp = 3;
41 MsgAppResp = 4;
42 MsgVote = 5;
43 MsgVoteResp = 6;
44 MsgSnap = 7;
45 MsgHeartbeat = 8;
46 MsgHeartbeatResp = 9;
47 MsgUnreachable = 10;
48 MsgSnapStatus = 11;
49 MsgCheckQuorum = 12;
50 MsgTransferLeader = 13;
51 MsgTimeoutNow = 14;
52 MsgReadIndex = 15;
53 MsgReadIndexResp = 16;
54 MsgPreVote = 17;
55 MsgPreVoteResp = 18;
56}
57
58message Message {
59 optional MessageType type = 1 [(gogoproto.nullable) = false];
60 optional uint64 to = 2 [(gogoproto.nullable) = false];
61 optional uint64 from = 3 [(gogoproto.nullable) = false];
62 optional uint64 term = 4 [(gogoproto.nullable) = false];
63 optional uint64 logTerm = 5 [(gogoproto.nullable) = false];
64 optional uint64 index = 6 [(gogoproto.nullable) = false];
65 repeated Entry entries = 7 [(gogoproto.nullable) = false];
66 optional uint64 commit = 8 [(gogoproto.nullable) = false];
67 optional Snapshot snapshot = 9 [(gogoproto.nullable) = false];
68 optional bool reject = 10 [(gogoproto.nullable) = false];
69 optional uint64 rejectHint = 11 [(gogoproto.nullable) = false];
70 optional bytes context = 12;
71}
72
73message HardState {
74 optional uint64 term = 1 [(gogoproto.nullable) = false];
75 optional uint64 vote = 2 [(gogoproto.nullable) = false];
76 optional uint64 commit = 3 [(gogoproto.nullable) = false];
77}
78
79// ConfChangeTransition specifies the behavior of a configuration change with
80// respect to joint consensus.
81enum ConfChangeTransition {
82 // Automatically use the simple protocol if possible, otherwise fall back
83 // to ConfChangeJointImplicit. Most applications will want to use this.
84 ConfChangeTransitionAuto = 0;
85 // Use joint consensus unconditionally, and transition out of them
86 // automatically (by proposing a zero configuration change).
87 //
88 // This option is suitable for applications that want to minimize the time
89 // spent in the joint configuration and do not store the joint configuration
90 // in the state machine (outside of InitialState).
91 ConfChangeTransitionJointImplicit = 1;
92 // Use joint consensus and remain in the joint configuration until the
93 // application proposes a no-op configuration change. This is suitable for
94 // applications that want to explicitly control the transitions, for example
95 // to use a custom payload (via the Context field).
96 ConfChangeTransitionJointExplicit = 2;
97}
98
99message ConfState {
100 // The voters in the incoming config. (If the configuration is not joint,
101 // then the outgoing config is empty).
102 repeated uint64 voters = 1;
103 // The learners in the incoming config.
104 repeated uint64 learners = 2;
105 // The voters in the outgoing config.
106 repeated uint64 voters_outgoing = 3;
107 // The nodes that will become learners when the outgoing config is removed.
108 // These nodes are necessarily currently in nodes_joint (or they would have
109 // been added to the incoming config right away).
110 repeated uint64 learners_next = 4;
111 // If set, the config is joint and Raft will automatically transition into
112 // the final config (i.e. remove the outgoing config) when this is safe.
113 optional bool auto_leave = 5 [(gogoproto.nullable) = false];
114}
115
116enum ConfChangeType {
117 ConfChangeAddNode = 0;
118 ConfChangeRemoveNode = 1;
119 ConfChangeUpdateNode = 2;
120 ConfChangeAddLearnerNode = 3;
121}
122
123message ConfChange {
124 optional ConfChangeType type = 2 [(gogoproto.nullable) = false];
125 optional uint64 node_id = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID" ];
126 optional bytes context = 4;
127
128 // NB: this is used only by etcd to thread through a unique identifier.
129 // Ideally it should really use the Context instead. No counterpart to
130 // this field exists in ConfChangeV2.
131 optional uint64 id = 1 [(gogoproto.nullable) = false, (gogoproto.customname) = "ID" ];
132}
133
134// ConfChangeSingle is an individual configuration change operation. Multiple
135// such operations can be carried out atomically via a ConfChangeV2.
136message ConfChangeSingle {
137 optional ConfChangeType type = 1 [(gogoproto.nullable) = false];
138 optional uint64 node_id = 2 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID"];
139}
140
141// ConfChangeV2 messages initiate configuration changes. They support both the
142// simple "one at a time" membership change protocol and full Joint Consensus
143// allowing for arbitrary changes in membership.
144//
145// The supplied context is treated as an opaque payload and can be used to
146// attach an action on the state machine to the application of the config change
147// proposal. Note that contrary to Joint Consensus as outlined in the Raft
148// paper[1], configuration changes become active when they are *applied* to the
149// state machine (not when they are appended to the log).
150//
151// The simple protocol can be used whenever only a single change is made.
152//
153// Non-simple changes require the use of Joint Consensus, for which two
154// configuration changes are run. The first configuration change specifies the
155// desired changes and transitions the Raft group into the joint configuration,
156// in which quorum requires a majority of both the pre-changes and post-changes
157// configuration. Joint Consensus avoids entering fragile intermediate
158// configurations that could compromise survivability. For example, without the
159// use of Joint Consensus and running across three availability zones with a
160// replication factor of three, it is not possible to replace a voter without
161// entering an intermediate configuration that does not survive the outage of
162// one availability zone.
163//
164// The provided ConfChangeTransition specifies how (and whether) Joint Consensus
165// is used, and assigns the task of leaving the joint configuration either to
166// Raft or the application. Leaving the joint configuration is accomplished by
167// proposing a ConfChangeV2 with only and optionally the Context field
168// populated.
169//
170// For details on Raft membership changes, see:
171//
172// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
173message ConfChangeV2 {
174 optional ConfChangeTransition transition = 1 [(gogoproto.nullable) = false];
175 repeated ConfChangeSingle changes = 2 [(gogoproto.nullable) = false];
176 optional bytes context = 3;
177}