blob: 88cd721c325955f8230e6e1e87f20a5f2bc71a39 [file] [log] [blame]
khenaidooab1f7bd2019-11-14 14:00:27 -05001// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package etcdserver
16
17import (
18 "context"
19 "fmt"
20 "path/filepath"
21 "sort"
22 "strings"
23 "time"
24
25 "go.etcd.io/etcd/pkg/netutil"
26 "go.etcd.io/etcd/pkg/transport"
27 "go.etcd.io/etcd/pkg/types"
28
29 bolt "go.etcd.io/bbolt"
30 "go.uber.org/zap"
31 "go.uber.org/zap/zapcore"
32)
33
34// ServerConfig holds the configuration of etcd as taken from the command line or discovery.
35type ServerConfig struct {
36 Name string
37 DiscoveryURL string
38 DiscoveryProxy string
39 ClientURLs types.URLs
40 PeerURLs types.URLs
41 DataDir string
42 // DedicatedWALDir config will make the etcd to write the WAL to the WALDir
43 // rather than the dataDir/member/wal.
44 DedicatedWALDir string
45
46 SnapshotCount uint64
47
48 // SnapshotCatchUpEntries is the number of entries for a slow follower
49 // to catch-up after compacting the raft storage entries.
50 // We expect the follower has a millisecond level latency with the leader.
51 // The max throughput is around 10K. Keep a 5K entries is enough for helping
52 // follower to catch up.
53 // WARNING: only change this for tests. Always use "DefaultSnapshotCatchUpEntries"
54 SnapshotCatchUpEntries uint64
55
56 MaxSnapFiles uint
57 MaxWALFiles uint
58
59 // BackendBatchInterval is the maximum time before commit the backend transaction.
60 BackendBatchInterval time.Duration
61 // BackendBatchLimit is the maximum operations before commit the backend transaction.
62 BackendBatchLimit int
63
64 // BackendFreelistType is the type of the backend boltdb freelist.
65 BackendFreelistType bolt.FreelistType
66
67 InitialPeerURLsMap types.URLsMap
68 InitialClusterToken string
69 NewCluster bool
70 PeerTLSInfo transport.TLSInfo
71
72 CORS map[string]struct{}
73
74 // HostWhitelist lists acceptable hostnames from client requests.
75 // If server is insecure (no TLS), server only accepts requests
76 // whose Host header value exists in this white list.
77 HostWhitelist map[string]struct{}
78
79 TickMs uint
80 ElectionTicks int
81
82 // InitialElectionTickAdvance is true, then local member fast-forwards
83 // election ticks to speed up "initial" leader election trigger. This
84 // benefits the case of larger election ticks. For instance, cross
85 // datacenter deployment may require longer election timeout of 10-second.
86 // If true, local node does not need wait up to 10-second. Instead,
87 // forwards its election ticks to 8-second, and have only 2-second left
88 // before leader election.
89 //
90 // Major assumptions are that:
91 // - cluster has no active leader thus advancing ticks enables faster
92 // leader election, or
93 // - cluster already has an established leader, and rejoining follower
94 // is likely to receive heartbeats from the leader after tick advance
95 // and before election timeout.
96 //
97 // However, when network from leader to rejoining follower is congested,
98 // and the follower does not receive leader heartbeat within left election
99 // ticks, disruptive election has to happen thus affecting cluster
100 // availabilities.
101 //
102 // Disabling this would slow down initial bootstrap process for cross
103 // datacenter deployments. Make your own tradeoffs by configuring
104 // --initial-election-tick-advance at the cost of slow initial bootstrap.
105 //
106 // If single-node, it advances ticks regardless.
107 //
108 // See https://github.com/etcd-io/etcd/issues/9333 for more detail.
109 InitialElectionTickAdvance bool
110
111 BootstrapTimeout time.Duration
112
113 AutoCompactionRetention time.Duration
114 AutoCompactionMode string
115 CompactionBatchLimit int
116 QuotaBackendBytes int64
117 MaxTxnOps uint
118
119 // MaxRequestBytes is the maximum request size to send over raft.
120 MaxRequestBytes uint
121
122 StrictReconfigCheck bool
123
124 // ClientCertAuthEnabled is true when cert has been signed by the client CA.
125 ClientCertAuthEnabled bool
126
127 AuthToken string
128 BcryptCost uint
129
130 // InitialCorruptCheck is true to check data corruption on boot
131 // before serving any peer/client traffic.
132 InitialCorruptCheck bool
133 CorruptCheckTime time.Duration
134
135 // PreVote is true to enable Raft Pre-Vote.
136 PreVote bool
137
138 // Logger logs server-side operations.
139 // If not nil, it disables "capnslog" and uses the given logger.
140 Logger *zap.Logger
141
142 // LoggerConfig is server logger configuration for Raft logger.
143 // Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil".
144 LoggerConfig *zap.Config
145 // LoggerCore is "zapcore.Core" for raft logger.
146 // Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil".
147 LoggerCore zapcore.Core
148 LoggerWriteSyncer zapcore.WriteSyncer
149
150 Debug bool
151
152 ForceNewCluster bool
153
154 // EnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases.
155 EnableLeaseCheckpoint bool
156 // LeaseCheckpointInterval time.Duration is the wait duration between lease checkpoints.
157 LeaseCheckpointInterval time.Duration
158
159 EnableGRPCGateway bool
160}
161
162// VerifyBootstrap sanity-checks the initial config for bootstrap case
163// and returns an error for things that should never happen.
164func (c *ServerConfig) VerifyBootstrap() error {
165 if err := c.hasLocalMember(); err != nil {
166 return err
167 }
168 if err := c.advertiseMatchesCluster(); err != nil {
169 return err
170 }
171 if checkDuplicateURL(c.InitialPeerURLsMap) {
172 return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
173 }
174 if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" {
175 return fmt.Errorf("initial cluster unset and no discovery URL found")
176 }
177 return nil
178}
179
180// VerifyJoinExisting sanity-checks the initial config for join existing cluster
181// case and returns an error for things that should never happen.
182func (c *ServerConfig) VerifyJoinExisting() error {
183 // The member has announced its peer urls to the cluster before starting; no need to
184 // set the configuration again.
185 if err := c.hasLocalMember(); err != nil {
186 return err
187 }
188 if checkDuplicateURL(c.InitialPeerURLsMap) {
189 return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
190 }
191 if c.DiscoveryURL != "" {
192 return fmt.Errorf("discovery URL should not be set when joining existing initial cluster")
193 }
194 return nil
195}
196
197// hasLocalMember checks that the cluster at least contains the local server.
198func (c *ServerConfig) hasLocalMember() error {
199 if urls := c.InitialPeerURLsMap[c.Name]; urls == nil {
200 return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
201 }
202 return nil
203}
204
205// advertiseMatchesCluster confirms peer URLs match those in the cluster peer list.
206func (c *ServerConfig) advertiseMatchesCluster() error {
207 urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice()
208 urls.Sort()
209 sort.Strings(apurls)
210 ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
211 defer cancel()
212 ok, err := netutil.URLStringsEqual(ctx, c.Logger, apurls, urls.StringSlice())
213 if ok {
214 return nil
215 }
216
217 initMap, apMap := make(map[string]struct{}), make(map[string]struct{})
218 for _, url := range c.PeerURLs {
219 apMap[url.String()] = struct{}{}
220 }
221 for _, url := range c.InitialPeerURLsMap[c.Name] {
222 initMap[url.String()] = struct{}{}
223 }
224
225 missing := []string{}
226 for url := range initMap {
227 if _, ok := apMap[url]; !ok {
228 missing = append(missing, url)
229 }
230 }
231 if len(missing) > 0 {
232 for i := range missing {
233 missing[i] = c.Name + "=" + missing[i]
234 }
235 mstr := strings.Join(missing, ",")
236 apStr := strings.Join(apurls, ",")
237 return fmt.Errorf("--initial-cluster has %s but missing from --initial-advertise-peer-urls=%s (%v)", mstr, apStr, err)
238 }
239
240 for url := range apMap {
241 if _, ok := initMap[url]; !ok {
242 missing = append(missing, url)
243 }
244 }
245 if len(missing) > 0 {
246 mstr := strings.Join(missing, ",")
247 umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs})
248 return fmt.Errorf("--initial-advertise-peer-urls has %s but missing from --initial-cluster=%s", mstr, umap.String())
249 }
250
251 // resolved URLs from "--initial-advertise-peer-urls" and "--initial-cluster" did not match or failed
252 apStr := strings.Join(apurls, ",")
253 umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs})
254 return fmt.Errorf("failed to resolve %s to match --initial-cluster=%s (%v)", apStr, umap.String(), err)
255}
256
257func (c *ServerConfig) MemberDir() string { return filepath.Join(c.DataDir, "member") }
258
259func (c *ServerConfig) WALDir() string {
260 if c.DedicatedWALDir != "" {
261 return c.DedicatedWALDir
262 }
263 return filepath.Join(c.MemberDir(), "wal")
264}
265
266func (c *ServerConfig) SnapDir() string { return filepath.Join(c.MemberDir(), "snap") }
267
268func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" }
269
270// ReqTimeout returns timeout for request to finish.
271func (c *ServerConfig) ReqTimeout() time.Duration {
272 // 5s for queue waiting, computation and disk IO delay
273 // + 2 * election timeout for possible leader election
274 return 5*time.Second + 2*time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond
275}
276
277func (c *ServerConfig) electionTimeout() time.Duration {
278 return time.Duration(c.ElectionTicks*int(c.TickMs)) * time.Millisecond
279}
280
281func (c *ServerConfig) peerDialTimeout() time.Duration {
282 // 1s for queue wait and election timeout
283 return time.Second + time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond
284}
285
286func checkDuplicateURL(urlsmap types.URLsMap) bool {
287 um := make(map[string]bool)
288 for _, urls := range urlsmap {
289 for _, url := range urls {
290 u := url.String()
291 if um[u] {
292 return true
293 }
294 um[u] = true
295 }
296 }
297 return false
298}
299
300func (c *ServerConfig) bootstrapTimeout() time.Duration {
301 if c.BootstrapTimeout != 0 {
302 return c.BootstrapTimeout
303 }
304 return time.Second
305}
306
307func (c *ServerConfig) backendPath() string { return filepath.Join(c.SnapDir(), "db") }