khenaidoo | ab1f7bd | 2019-11-14 14:00:27 -0500 | [diff] [blame] | 1 | // Copyright 2015 The etcd Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package etcdserver |
| 16 | |
| 17 | import ( |
| 18 | "context" |
| 19 | "fmt" |
| 20 | "path/filepath" |
| 21 | "sort" |
| 22 | "strings" |
| 23 | "time" |
| 24 | |
| 25 | "go.etcd.io/etcd/pkg/netutil" |
| 26 | "go.etcd.io/etcd/pkg/transport" |
| 27 | "go.etcd.io/etcd/pkg/types" |
| 28 | |
| 29 | bolt "go.etcd.io/bbolt" |
| 30 | "go.uber.org/zap" |
| 31 | "go.uber.org/zap/zapcore" |
| 32 | ) |
| 33 | |
| 34 | // ServerConfig holds the configuration of etcd as taken from the command line or discovery. |
| 35 | type ServerConfig struct { |
| 36 | Name string |
| 37 | DiscoveryURL string |
| 38 | DiscoveryProxy string |
| 39 | ClientURLs types.URLs |
| 40 | PeerURLs types.URLs |
| 41 | DataDir string |
| 42 | // DedicatedWALDir config will make the etcd to write the WAL to the WALDir |
| 43 | // rather than the dataDir/member/wal. |
| 44 | DedicatedWALDir string |
| 45 | |
| 46 | SnapshotCount uint64 |
| 47 | |
| 48 | // SnapshotCatchUpEntries is the number of entries for a slow follower |
| 49 | // to catch-up after compacting the raft storage entries. |
| 50 | // We expect the follower has a millisecond level latency with the leader. |
| 51 | // The max throughput is around 10K. Keep a 5K entries is enough for helping |
| 52 | // follower to catch up. |
| 53 | // WARNING: only change this for tests. Always use "DefaultSnapshotCatchUpEntries" |
| 54 | SnapshotCatchUpEntries uint64 |
| 55 | |
| 56 | MaxSnapFiles uint |
| 57 | MaxWALFiles uint |
| 58 | |
| 59 | // BackendBatchInterval is the maximum time before commit the backend transaction. |
| 60 | BackendBatchInterval time.Duration |
| 61 | // BackendBatchLimit is the maximum operations before commit the backend transaction. |
| 62 | BackendBatchLimit int |
| 63 | |
| 64 | // BackendFreelistType is the type of the backend boltdb freelist. |
| 65 | BackendFreelistType bolt.FreelistType |
| 66 | |
| 67 | InitialPeerURLsMap types.URLsMap |
| 68 | InitialClusterToken string |
| 69 | NewCluster bool |
| 70 | PeerTLSInfo transport.TLSInfo |
| 71 | |
| 72 | CORS map[string]struct{} |
| 73 | |
| 74 | // HostWhitelist lists acceptable hostnames from client requests. |
| 75 | // If server is insecure (no TLS), server only accepts requests |
| 76 | // whose Host header value exists in this white list. |
| 77 | HostWhitelist map[string]struct{} |
| 78 | |
| 79 | TickMs uint |
| 80 | ElectionTicks int |
| 81 | |
| 82 | // InitialElectionTickAdvance is true, then local member fast-forwards |
| 83 | // election ticks to speed up "initial" leader election trigger. This |
| 84 | // benefits the case of larger election ticks. For instance, cross |
| 85 | // datacenter deployment may require longer election timeout of 10-second. |
| 86 | // If true, local node does not need wait up to 10-second. Instead, |
| 87 | // forwards its election ticks to 8-second, and have only 2-second left |
| 88 | // before leader election. |
| 89 | // |
| 90 | // Major assumptions are that: |
| 91 | // - cluster has no active leader thus advancing ticks enables faster |
| 92 | // leader election, or |
| 93 | // - cluster already has an established leader, and rejoining follower |
| 94 | // is likely to receive heartbeats from the leader after tick advance |
| 95 | // and before election timeout. |
| 96 | // |
| 97 | // However, when network from leader to rejoining follower is congested, |
| 98 | // and the follower does not receive leader heartbeat within left election |
| 99 | // ticks, disruptive election has to happen thus affecting cluster |
| 100 | // availabilities. |
| 101 | // |
| 102 | // Disabling this would slow down initial bootstrap process for cross |
| 103 | // datacenter deployments. Make your own tradeoffs by configuring |
| 104 | // --initial-election-tick-advance at the cost of slow initial bootstrap. |
| 105 | // |
| 106 | // If single-node, it advances ticks regardless. |
| 107 | // |
| 108 | // See https://github.com/etcd-io/etcd/issues/9333 for more detail. |
| 109 | InitialElectionTickAdvance bool |
| 110 | |
| 111 | BootstrapTimeout time.Duration |
| 112 | |
| 113 | AutoCompactionRetention time.Duration |
| 114 | AutoCompactionMode string |
| 115 | CompactionBatchLimit int |
| 116 | QuotaBackendBytes int64 |
| 117 | MaxTxnOps uint |
| 118 | |
| 119 | // MaxRequestBytes is the maximum request size to send over raft. |
| 120 | MaxRequestBytes uint |
| 121 | |
| 122 | StrictReconfigCheck bool |
| 123 | |
| 124 | // ClientCertAuthEnabled is true when cert has been signed by the client CA. |
| 125 | ClientCertAuthEnabled bool |
| 126 | |
| 127 | AuthToken string |
| 128 | BcryptCost uint |
| 129 | |
| 130 | // InitialCorruptCheck is true to check data corruption on boot |
| 131 | // before serving any peer/client traffic. |
| 132 | InitialCorruptCheck bool |
| 133 | CorruptCheckTime time.Duration |
| 134 | |
| 135 | // PreVote is true to enable Raft Pre-Vote. |
| 136 | PreVote bool |
| 137 | |
| 138 | // Logger logs server-side operations. |
| 139 | // If not nil, it disables "capnslog" and uses the given logger. |
| 140 | Logger *zap.Logger |
| 141 | |
| 142 | // LoggerConfig is server logger configuration for Raft logger. |
| 143 | // Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil". |
| 144 | LoggerConfig *zap.Config |
| 145 | // LoggerCore is "zapcore.Core" for raft logger. |
| 146 | // Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil". |
| 147 | LoggerCore zapcore.Core |
| 148 | LoggerWriteSyncer zapcore.WriteSyncer |
| 149 | |
| 150 | Debug bool |
| 151 | |
| 152 | ForceNewCluster bool |
| 153 | |
| 154 | // EnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases. |
| 155 | EnableLeaseCheckpoint bool |
| 156 | // LeaseCheckpointInterval time.Duration is the wait duration between lease checkpoints. |
| 157 | LeaseCheckpointInterval time.Duration |
| 158 | |
| 159 | EnableGRPCGateway bool |
| 160 | } |
| 161 | |
| 162 | // VerifyBootstrap sanity-checks the initial config for bootstrap case |
| 163 | // and returns an error for things that should never happen. |
| 164 | func (c *ServerConfig) VerifyBootstrap() error { |
| 165 | if err := c.hasLocalMember(); err != nil { |
| 166 | return err |
| 167 | } |
| 168 | if err := c.advertiseMatchesCluster(); err != nil { |
| 169 | return err |
| 170 | } |
| 171 | if checkDuplicateURL(c.InitialPeerURLsMap) { |
| 172 | return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap) |
| 173 | } |
| 174 | if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" { |
| 175 | return fmt.Errorf("initial cluster unset and no discovery URL found") |
| 176 | } |
| 177 | return nil |
| 178 | } |
| 179 | |
| 180 | // VerifyJoinExisting sanity-checks the initial config for join existing cluster |
| 181 | // case and returns an error for things that should never happen. |
| 182 | func (c *ServerConfig) VerifyJoinExisting() error { |
| 183 | // The member has announced its peer urls to the cluster before starting; no need to |
| 184 | // set the configuration again. |
| 185 | if err := c.hasLocalMember(); err != nil { |
| 186 | return err |
| 187 | } |
| 188 | if checkDuplicateURL(c.InitialPeerURLsMap) { |
| 189 | return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap) |
| 190 | } |
| 191 | if c.DiscoveryURL != "" { |
| 192 | return fmt.Errorf("discovery URL should not be set when joining existing initial cluster") |
| 193 | } |
| 194 | return nil |
| 195 | } |
| 196 | |
| 197 | // hasLocalMember checks that the cluster at least contains the local server. |
| 198 | func (c *ServerConfig) hasLocalMember() error { |
| 199 | if urls := c.InitialPeerURLsMap[c.Name]; urls == nil { |
| 200 | return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name) |
| 201 | } |
| 202 | return nil |
| 203 | } |
| 204 | |
| 205 | // advertiseMatchesCluster confirms peer URLs match those in the cluster peer list. |
| 206 | func (c *ServerConfig) advertiseMatchesCluster() error { |
| 207 | urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice() |
| 208 | urls.Sort() |
| 209 | sort.Strings(apurls) |
| 210 | ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) |
| 211 | defer cancel() |
| 212 | ok, err := netutil.URLStringsEqual(ctx, c.Logger, apurls, urls.StringSlice()) |
| 213 | if ok { |
| 214 | return nil |
| 215 | } |
| 216 | |
| 217 | initMap, apMap := make(map[string]struct{}), make(map[string]struct{}) |
| 218 | for _, url := range c.PeerURLs { |
| 219 | apMap[url.String()] = struct{}{} |
| 220 | } |
| 221 | for _, url := range c.InitialPeerURLsMap[c.Name] { |
| 222 | initMap[url.String()] = struct{}{} |
| 223 | } |
| 224 | |
| 225 | missing := []string{} |
| 226 | for url := range initMap { |
| 227 | if _, ok := apMap[url]; !ok { |
| 228 | missing = append(missing, url) |
| 229 | } |
| 230 | } |
| 231 | if len(missing) > 0 { |
| 232 | for i := range missing { |
| 233 | missing[i] = c.Name + "=" + missing[i] |
| 234 | } |
| 235 | mstr := strings.Join(missing, ",") |
| 236 | apStr := strings.Join(apurls, ",") |
| 237 | return fmt.Errorf("--initial-cluster has %s but missing from --initial-advertise-peer-urls=%s (%v)", mstr, apStr, err) |
| 238 | } |
| 239 | |
| 240 | for url := range apMap { |
| 241 | if _, ok := initMap[url]; !ok { |
| 242 | missing = append(missing, url) |
| 243 | } |
| 244 | } |
| 245 | if len(missing) > 0 { |
| 246 | mstr := strings.Join(missing, ",") |
| 247 | umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs}) |
| 248 | return fmt.Errorf("--initial-advertise-peer-urls has %s but missing from --initial-cluster=%s", mstr, umap.String()) |
| 249 | } |
| 250 | |
| 251 | // resolved URLs from "--initial-advertise-peer-urls" and "--initial-cluster" did not match or failed |
| 252 | apStr := strings.Join(apurls, ",") |
| 253 | umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs}) |
| 254 | return fmt.Errorf("failed to resolve %s to match --initial-cluster=%s (%v)", apStr, umap.String(), err) |
| 255 | } |
| 256 | |
| 257 | func (c *ServerConfig) MemberDir() string { return filepath.Join(c.DataDir, "member") } |
| 258 | |
| 259 | func (c *ServerConfig) WALDir() string { |
| 260 | if c.DedicatedWALDir != "" { |
| 261 | return c.DedicatedWALDir |
| 262 | } |
| 263 | return filepath.Join(c.MemberDir(), "wal") |
| 264 | } |
| 265 | |
| 266 | func (c *ServerConfig) SnapDir() string { return filepath.Join(c.MemberDir(), "snap") } |
| 267 | |
| 268 | func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" } |
| 269 | |
| 270 | // ReqTimeout returns timeout for request to finish. |
| 271 | func (c *ServerConfig) ReqTimeout() time.Duration { |
| 272 | // 5s for queue waiting, computation and disk IO delay |
| 273 | // + 2 * election timeout for possible leader election |
| 274 | return 5*time.Second + 2*time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond |
| 275 | } |
| 276 | |
| 277 | func (c *ServerConfig) electionTimeout() time.Duration { |
| 278 | return time.Duration(c.ElectionTicks*int(c.TickMs)) * time.Millisecond |
| 279 | } |
| 280 | |
| 281 | func (c *ServerConfig) peerDialTimeout() time.Duration { |
| 282 | // 1s for queue wait and election timeout |
| 283 | return time.Second + time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond |
| 284 | } |
| 285 | |
| 286 | func checkDuplicateURL(urlsmap types.URLsMap) bool { |
| 287 | um := make(map[string]bool) |
| 288 | for _, urls := range urlsmap { |
| 289 | for _, url := range urls { |
| 290 | u := url.String() |
| 291 | if um[u] { |
| 292 | return true |
| 293 | } |
| 294 | um[u] = true |
| 295 | } |
| 296 | } |
| 297 | return false |
| 298 | } |
| 299 | |
| 300 | func (c *ServerConfig) bootstrapTimeout() time.Duration { |
| 301 | if c.BootstrapTimeout != 0 { |
| 302 | return c.BootstrapTimeout |
| 303 | } |
| 304 | return time.Second |
| 305 | } |
| 306 | |
| 307 | func (c *ServerConfig) backendPath() string { return filepath.Join(c.SnapDir(), "db") } |