Stephane Barbarie | 260a563 | 2019-02-26 16:12:49 -0500 | [diff] [blame] | 1 | syntax = "proto2"; |
| 2 | package raftpb; |
| 3 | |
| 4 | import "gogoproto/gogo.proto"; |
| 5 | |
| 6 | option (gogoproto.marshaler_all) = true; |
| 7 | option (gogoproto.sizer_all) = true; |
| 8 | option (gogoproto.unmarshaler_all) = true; |
| 9 | option (gogoproto.goproto_getters_all) = false; |
| 10 | option (gogoproto.goproto_enum_prefix_all) = false; |
| 11 | |
| 12 | enum EntryType { |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 13 | EntryNormal = 0; |
| 14 | EntryConfChange = 1; // corresponds to pb.ConfChange |
| 15 | EntryConfChangeV2 = 2; // corresponds to pb.ConfChangeV2 |
Stephane Barbarie | 260a563 | 2019-02-26 16:12:49 -0500 | [diff] [blame] | 16 | } |
| 17 | |
| 18 | message Entry { |
| 19 | optional uint64 Term = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations |
| 20 | optional uint64 Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations |
| 21 | optional EntryType Type = 1 [(gogoproto.nullable) = false]; |
| 22 | optional bytes Data = 4; |
| 23 | } |
| 24 | |
| 25 | message SnapshotMetadata { |
| 26 | optional ConfState conf_state = 1 [(gogoproto.nullable) = false]; |
| 27 | optional uint64 index = 2 [(gogoproto.nullable) = false]; |
| 28 | optional uint64 term = 3 [(gogoproto.nullable) = false]; |
| 29 | } |
| 30 | |
| 31 | message Snapshot { |
| 32 | optional bytes data = 1; |
| 33 | optional SnapshotMetadata metadata = 2 [(gogoproto.nullable) = false]; |
| 34 | } |
| 35 | |
| 36 | enum MessageType { |
| 37 | MsgHup = 0; |
| 38 | MsgBeat = 1; |
| 39 | MsgProp = 2; |
| 40 | MsgApp = 3; |
| 41 | MsgAppResp = 4; |
| 42 | MsgVote = 5; |
| 43 | MsgVoteResp = 6; |
| 44 | MsgSnap = 7; |
| 45 | MsgHeartbeat = 8; |
| 46 | MsgHeartbeatResp = 9; |
| 47 | MsgUnreachable = 10; |
| 48 | MsgSnapStatus = 11; |
| 49 | MsgCheckQuorum = 12; |
| 50 | MsgTransferLeader = 13; |
| 51 | MsgTimeoutNow = 14; |
| 52 | MsgReadIndex = 15; |
| 53 | MsgReadIndexResp = 16; |
| 54 | MsgPreVote = 17; |
| 55 | MsgPreVoteResp = 18; |
| 56 | } |
| 57 | |
| 58 | message Message { |
| 59 | optional MessageType type = 1 [(gogoproto.nullable) = false]; |
| 60 | optional uint64 to = 2 [(gogoproto.nullable) = false]; |
| 61 | optional uint64 from = 3 [(gogoproto.nullable) = false]; |
| 62 | optional uint64 term = 4 [(gogoproto.nullable) = false]; |
| 63 | optional uint64 logTerm = 5 [(gogoproto.nullable) = false]; |
| 64 | optional uint64 index = 6 [(gogoproto.nullable) = false]; |
| 65 | repeated Entry entries = 7 [(gogoproto.nullable) = false]; |
| 66 | optional uint64 commit = 8 [(gogoproto.nullable) = false]; |
| 67 | optional Snapshot snapshot = 9 [(gogoproto.nullable) = false]; |
| 68 | optional bool reject = 10 [(gogoproto.nullable) = false]; |
| 69 | optional uint64 rejectHint = 11 [(gogoproto.nullable) = false]; |
| 70 | optional bytes context = 12; |
| 71 | } |
| 72 | |
| 73 | message HardState { |
| 74 | optional uint64 term = 1 [(gogoproto.nullable) = false]; |
| 75 | optional uint64 vote = 2 [(gogoproto.nullable) = false]; |
| 76 | optional uint64 commit = 3 [(gogoproto.nullable) = false]; |
| 77 | } |
| 78 | |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 79 | // ConfChangeTransition specifies the behavior of a configuration change with |
| 80 | // respect to joint consensus. |
| 81 | enum ConfChangeTransition { |
| 82 | // Automatically use the simple protocol if possible, otherwise fall back |
| 83 | // to ConfChangeJointImplicit. Most applications will want to use this. |
| 84 | ConfChangeTransitionAuto = 0; |
| 85 | // Use joint consensus unconditionally, and transition out of them |
| 86 | // automatically (by proposing a zero configuration change). |
| 87 | // |
| 88 | // This option is suitable for applications that want to minimize the time |
| 89 | // spent in the joint configuration and do not store the joint configuration |
| 90 | // in the state machine (outside of InitialState). |
| 91 | ConfChangeTransitionJointImplicit = 1; |
| 92 | // Use joint consensus and remain in the joint configuration until the |
| 93 | // application proposes a no-op configuration change. This is suitable for |
| 94 | // applications that want to explicitly control the transitions, for example |
| 95 | // to use a custom payload (via the Context field). |
| 96 | ConfChangeTransitionJointExplicit = 2; |
| 97 | } |
| 98 | |
Stephane Barbarie | 260a563 | 2019-02-26 16:12:49 -0500 | [diff] [blame] | 99 | message ConfState { |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 100 | // The voters in the incoming config. (If the configuration is not joint, |
| 101 | // then the outgoing config is empty). |
| 102 | repeated uint64 voters = 1; |
| 103 | // The learners in the incoming config. |
| 104 | repeated uint64 learners = 2; |
| 105 | // The voters in the outgoing config. |
| 106 | repeated uint64 voters_outgoing = 3; |
| 107 | // The nodes that will become learners when the outgoing config is removed. |
| 108 | // These nodes are necessarily currently in nodes_joint (or they would have |
| 109 | // been added to the incoming config right away). |
| 110 | repeated uint64 learners_next = 4; |
| 111 | // If set, the config is joint and Raft will automatically transition into |
| 112 | // the final config (i.e. remove the outgoing config) when this is safe. |
| 113 | optional bool auto_leave = 5 [(gogoproto.nullable) = false]; |
Stephane Barbarie | 260a563 | 2019-02-26 16:12:49 -0500 | [diff] [blame] | 114 | } |
| 115 | |
| 116 | enum ConfChangeType { |
| 117 | ConfChangeAddNode = 0; |
| 118 | ConfChangeRemoveNode = 1; |
| 119 | ConfChangeUpdateNode = 2; |
| 120 | ConfChangeAddLearnerNode = 3; |
| 121 | } |
| 122 | |
| 123 | message ConfChange { |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 124 | optional ConfChangeType type = 2 [(gogoproto.nullable) = false]; |
| 125 | optional uint64 node_id = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID" ]; |
| 126 | optional bytes context = 4; |
| 127 | |
| 128 | // NB: this is used only by etcd to thread through a unique identifier. |
| 129 | // Ideally it should really use the Context instead. No counterpart to |
| 130 | // this field exists in ConfChangeV2. |
| 131 | optional uint64 id = 1 [(gogoproto.nullable) = false, (gogoproto.customname) = "ID" ]; |
| 132 | } |
| 133 | |
| 134 | // ConfChangeSingle is an individual configuration change operation. Multiple |
| 135 | // such operations can be carried out atomically via a ConfChangeV2. |
| 136 | message ConfChangeSingle { |
| 137 | optional ConfChangeType type = 1 [(gogoproto.nullable) = false]; |
| 138 | optional uint64 node_id = 2 [(gogoproto.nullable) = false, (gogoproto.customname) = "NodeID"]; |
| 139 | } |
| 140 | |
| 141 | // ConfChangeV2 messages initiate configuration changes. They support both the |
| 142 | // simple "one at a time" membership change protocol and full Joint Consensus |
| 143 | // allowing for arbitrary changes in membership. |
| 144 | // |
| 145 | // The supplied context is treated as an opaque payload and can be used to |
| 146 | // attach an action on the state machine to the application of the config change |
| 147 | // proposal. Note that contrary to Joint Consensus as outlined in the Raft |
| 148 | // paper[1], configuration changes become active when they are *applied* to the |
| 149 | // state machine (not when they are appended to the log). |
| 150 | // |
| 151 | // The simple protocol can be used whenever only a single change is made. |
| 152 | // |
| 153 | // Non-simple changes require the use of Joint Consensus, for which two |
| 154 | // configuration changes are run. The first configuration change specifies the |
| 155 | // desired changes and transitions the Raft group into the joint configuration, |
| 156 | // in which quorum requires a majority of both the pre-changes and post-changes |
| 157 | // configuration. Joint Consensus avoids entering fragile intermediate |
| 158 | // configurations that could compromise survivability. For example, without the |
| 159 | // use of Joint Consensus and running across three availability zones with a |
| 160 | // replication factor of three, it is not possible to replace a voter without |
| 161 | // entering an intermediate configuration that does not survive the outage of |
| 162 | // one availability zone. |
| 163 | // |
| 164 | // The provided ConfChangeTransition specifies how (and whether) Joint Consensus |
| 165 | // is used, and assigns the task of leaving the joint configuration either to |
| 166 | // Raft or the application. Leaving the joint configuration is accomplished by |
| 167 | // proposing a ConfChangeV2 with only and optionally the Context field |
| 168 | // populated. |
| 169 | // |
| 170 | // For details on Raft membership changes, see: |
| 171 | // |
| 172 | // [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf |
| 173 | message ConfChangeV2 { |
| 174 | optional ConfChangeTransition transition = 1 [(gogoproto.nullable) = false]; |
| 175 | repeated ConfChangeSingle changes = 2 [(gogoproto.nullable) = false]; |
| 176 | optional bytes context = 3; |
Stephane Barbarie | 260a563 | 2019-02-26 16:12:49 -0500 | [diff] [blame] | 177 | } |