blob: d6000a85c6fe68283a20d41cba2ac27603be8231 [file] [log] [blame]
Prince Pereirac1c21d62021-04-22 08:38:15 +00001// Copyright 2016 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package clientv3
16
17import (
18 "context"
19 "errors"
20 "fmt"
21 "net"
22 "os"
23 "strconv"
24 "strings"
25 "sync"
26 "time"
27
28 "github.com/google/uuid"
29 "go.etcd.io/etcd/clientv3/balancer"
30 "go.etcd.io/etcd/clientv3/balancer/picker"
31 "go.etcd.io/etcd/clientv3/balancer/resolver/endpoint"
32 "go.etcd.io/etcd/clientv3/credentials"
33 "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
34 "go.etcd.io/etcd/pkg/logutil"
35 "go.uber.org/zap"
36 "google.golang.org/grpc"
37 "google.golang.org/grpc/codes"
38 grpccredentials "google.golang.org/grpc/credentials"
39 "google.golang.org/grpc/keepalive"
40 "google.golang.org/grpc/metadata"
41 "google.golang.org/grpc/status"
42)
43
44var (
45 ErrNoAvailableEndpoints = errors.New("etcdclient: no available endpoints")
46 ErrOldCluster = errors.New("etcdclient: old cluster version")
47
48 roundRobinBalancerName = fmt.Sprintf("etcd-%s", picker.RoundrobinBalanced.String())
49)
50
51func init() {
52 lg := zap.NewNop()
53 if os.Getenv("ETCD_CLIENT_DEBUG") != "" {
54 lcfg := logutil.DefaultZapLoggerConfig
55 lcfg.Level = zap.NewAtomicLevelAt(zap.DebugLevel)
56
57 var err error
58 lg, err = lcfg.Build() // info level logging
59 if err != nil {
60 panic(err)
61 }
62 }
63
64 // TODO: support custom balancer
65 balancer.RegisterBuilder(balancer.Config{
66 Policy: picker.RoundrobinBalanced,
67 Name: roundRobinBalancerName,
68 Logger: lg,
69 })
70}
71
72// Client provides and manages an etcd v3 client session.
73type Client struct {
74 Cluster
75 KV
76 Lease
77 Watcher
78 Auth
79 Maintenance
80
81 conn *grpc.ClientConn
82
83 cfg Config
84 creds grpccredentials.TransportCredentials
85 resolverGroup *endpoint.ResolverGroup
86 mu *sync.RWMutex
87
88 ctx context.Context
89 cancel context.CancelFunc
90
91 // Username is a user name for authentication.
92 Username string
93 // Password is a password for authentication.
94 Password string
95 authTokenBundle credentials.Bundle
96
97 callOpts []grpc.CallOption
98
99 lg *zap.Logger
100}
101
102// New creates a new etcdv3 client from a given configuration.
103func New(cfg Config) (*Client, error) {
104 if len(cfg.Endpoints) == 0 {
105 return nil, ErrNoAvailableEndpoints
106 }
107
108 return newClient(&cfg)
109}
110
111// NewCtxClient creates a client with a context but no underlying grpc
112// connection. This is useful for embedded cases that override the
113// service interface implementations and do not need connection management.
114func NewCtxClient(ctx context.Context) *Client {
115 cctx, cancel := context.WithCancel(ctx)
116 return &Client{ctx: cctx, cancel: cancel}
117}
118
119// NewFromURL creates a new etcdv3 client from a URL.
120func NewFromURL(url string) (*Client, error) {
121 return New(Config{Endpoints: []string{url}})
122}
123
124// NewFromURLs creates a new etcdv3 client from URLs.
125func NewFromURLs(urls []string) (*Client, error) {
126 return New(Config{Endpoints: urls})
127}
128
129// Close shuts down the client's etcd connections.
130func (c *Client) Close() error {
131 c.cancel()
132 if c.Watcher != nil {
133 c.Watcher.Close()
134 }
135 if c.Lease != nil {
136 c.Lease.Close()
137 }
138 if c.resolverGroup != nil {
139 c.resolverGroup.Close()
140 }
141 if c.conn != nil {
142 return toErr(c.ctx, c.conn.Close())
143 }
144 return c.ctx.Err()
145}
146
147// Ctx is a context for "out of band" messages (e.g., for sending
148// "clean up" message when another context is canceled). It is
149// canceled on client Close().
150func (c *Client) Ctx() context.Context { return c.ctx }
151
152// Endpoints lists the registered endpoints for the client.
153func (c *Client) Endpoints() []string {
154 // copy the slice; protect original endpoints from being changed
155 c.mu.RLock()
156 defer c.mu.RUnlock()
157 eps := make([]string, len(c.cfg.Endpoints))
158 copy(eps, c.cfg.Endpoints)
159 return eps
160}
161
162// SetEndpoints updates client's endpoints.
163func (c *Client) SetEndpoints(eps ...string) {
164 c.mu.Lock()
165 defer c.mu.Unlock()
166 c.cfg.Endpoints = eps
167 c.resolverGroup.SetEndpoints(eps)
168}
169
170// Sync synchronizes client's endpoints with the known endpoints from the etcd membership.
171func (c *Client) Sync(ctx context.Context) error {
172 mresp, err := c.MemberList(ctx)
173 if err != nil {
174 return err
175 }
176 var eps []string
177 for _, m := range mresp.Members {
178 eps = append(eps, m.ClientURLs...)
179 }
180 c.SetEndpoints(eps...)
181 return nil
182}
183
184func (c *Client) autoSync() {
185 if c.cfg.AutoSyncInterval == time.Duration(0) {
186 return
187 }
188
189 for {
190 select {
191 case <-c.ctx.Done():
192 return
193 case <-time.After(c.cfg.AutoSyncInterval):
194 ctx, cancel := context.WithTimeout(c.ctx, 5*time.Second)
195 err := c.Sync(ctx)
196 cancel()
197 if err != nil && err != c.ctx.Err() {
198 lg.Lvl(4).Infof("Auto sync endpoints failed: %v", err)
199 }
200 }
201 }
202}
203
204func (c *Client) processCreds(scheme string) (creds grpccredentials.TransportCredentials) {
205 creds = c.creds
206 switch scheme {
207 case "unix":
208 case "http":
209 creds = nil
210 case "https", "unixs":
211 if creds != nil {
212 break
213 }
214 creds = credentials.NewBundle(credentials.Config{}).TransportCredentials()
215 default:
216 creds = nil
217 }
218 return creds
219}
220
221// dialSetupOpts gives the dial opts prior to any authentication.
222func (c *Client) dialSetupOpts(creds grpccredentials.TransportCredentials, dopts ...grpc.DialOption) (opts []grpc.DialOption, err error) {
223 if c.cfg.DialKeepAliveTime > 0 {
224 params := keepalive.ClientParameters{
225 Time: c.cfg.DialKeepAliveTime,
226 Timeout: c.cfg.DialKeepAliveTimeout,
227 PermitWithoutStream: c.cfg.PermitWithoutStream,
228 }
229 opts = append(opts, grpc.WithKeepaliveParams(params))
230 }
231 opts = append(opts, dopts...)
232
233 // Provide a net dialer that supports cancelation and timeout.
234 f := func(dialEp string, t time.Duration) (net.Conn, error) {
235 proto, host, _ := endpoint.ParseEndpoint(dialEp)
236 select {
237 case <-c.ctx.Done():
238 return nil, c.ctx.Err()
239 default:
240 }
241 dialer := &net.Dialer{Timeout: t}
242 return dialer.DialContext(c.ctx, proto, host)
243 }
244 opts = append(opts, grpc.WithDialer(f))
245
246 if creds != nil {
247 opts = append(opts, grpc.WithTransportCredentials(creds))
248 } else {
249 opts = append(opts, grpc.WithInsecure())
250 }
251
252 // Interceptor retry and backoff.
253 // TODO: Replace all of clientv3/retry.go with interceptor based retry, or with
254 // https://github.com/grpc/proposal/blob/master/A6-client-retries.md#retry-policy
255 // once it is available.
256 rrBackoff := withBackoff(c.roundRobinQuorumBackoff(defaultBackoffWaitBetween, defaultBackoffJitterFraction))
257 opts = append(opts,
258 // Disable stream retry by default since go-grpc-middleware/retry does not support client streams.
259 // Streams that are safe to retry are enabled individually.
260 grpc.WithStreamInterceptor(c.streamClientInterceptor(c.lg, withMax(0), rrBackoff)),
261 grpc.WithUnaryInterceptor(c.unaryClientInterceptor(c.lg, withMax(defaultUnaryMaxRetries), rrBackoff)),
262 )
263
264 return opts, nil
265}
266
267// Dial connects to a single endpoint using the client's config.
268func (c *Client) Dial(ep string) (*grpc.ClientConn, error) {
269 creds := c.directDialCreds(ep)
270 // Use the grpc passthrough resolver to directly dial a single endpoint.
271 // This resolver passes through the 'unix' and 'unixs' endpoints schemes used
272 // by etcd without modification, allowing us to directly dial endpoints and
273 // using the same dial functions that we use for load balancer dialing.
274 return c.dial(fmt.Sprintf("passthrough:///%s", ep), creds)
275}
276
277func (c *Client) getToken(ctx context.Context) error {
278 var err error // return last error in a case of fail
279 var auth *authenticator
280
281 eps := c.Endpoints()
282 for _, ep := range eps {
283 // use dial options without dopts to avoid reusing the client balancer
284 var dOpts []grpc.DialOption
285 _, host, _ := endpoint.ParseEndpoint(ep)
286 target := c.resolverGroup.Target(host)
287 creds := c.dialWithBalancerCreds(ep)
288 dOpts, err = c.dialSetupOpts(creds, c.cfg.DialOptions...)
289 if err != nil {
290 err = fmt.Errorf("failed to configure auth dialer: %v", err)
291 continue
292 }
293 dOpts = append(dOpts, grpc.WithBalancerName(roundRobinBalancerName))
294 auth, err = newAuthenticator(ctx, target, dOpts, c)
295 if err != nil {
296 continue
297 }
298 defer auth.close()
299
300 var resp *AuthenticateResponse
301 resp, err = auth.authenticate(ctx, c.Username, c.Password)
302 if err != nil {
303 // return err without retrying other endpoints
304 if err == rpctypes.ErrAuthNotEnabled {
305 return err
306 }
307 continue
308 }
309
310 c.authTokenBundle.UpdateAuthToken(resp.Token)
311 return nil
312 }
313
314 return err
315}
316
317// dialWithBalancer dials the client's current load balanced resolver group. The scheme of the host
318// of the provided endpoint determines the scheme used for all endpoints of the client connection.
319func (c *Client) dialWithBalancer(ep string, dopts ...grpc.DialOption) (*grpc.ClientConn, error) {
320 _, host, _ := endpoint.ParseEndpoint(ep)
321 target := c.resolverGroup.Target(host)
322 creds := c.dialWithBalancerCreds(ep)
323 return c.dial(target, creds, dopts...)
324}
325
326// dial configures and dials any grpc balancer target.
327func (c *Client) dial(target string, creds grpccredentials.TransportCredentials, dopts ...grpc.DialOption) (*grpc.ClientConn, error) {
328 opts, err := c.dialSetupOpts(creds, dopts...)
329 if err != nil {
330 return nil, fmt.Errorf("failed to configure dialer: %v", err)
331 }
332
333 if c.Username != "" && c.Password != "" {
334 c.authTokenBundle = credentials.NewBundle(credentials.Config{})
335
336 ctx, cancel := c.ctx, func() {}
337 if c.cfg.DialTimeout > 0 {
338 ctx, cancel = context.WithTimeout(ctx, c.cfg.DialTimeout)
339 }
340
341 err = c.getToken(ctx)
342 if err != nil {
343 if toErr(ctx, err) != rpctypes.ErrAuthNotEnabled {
344 if err == ctx.Err() && ctx.Err() != c.ctx.Err() {
345 err = context.DeadlineExceeded
346 }
347 cancel()
348 return nil, err
349 }
350 } else {
351 opts = append(opts, grpc.WithPerRPCCredentials(c.authTokenBundle.PerRPCCredentials()))
352 }
353 cancel()
354 }
355
356 opts = append(opts, c.cfg.DialOptions...)
357
358 dctx := c.ctx
359 if c.cfg.DialTimeout > 0 {
360 var cancel context.CancelFunc
361 dctx, cancel = context.WithTimeout(c.ctx, c.cfg.DialTimeout)
362 defer cancel() // TODO: Is this right for cases where grpc.WithBlock() is not set on the dial options?
363 }
364
365 conn, err := grpc.DialContext(dctx, target, opts...)
366 if err != nil {
367 return nil, err
368 }
369 return conn, nil
370}
371
372func (c *Client) directDialCreds(ep string) grpccredentials.TransportCredentials {
373 _, hostPort, scheme := endpoint.ParseEndpoint(ep)
374 creds := c.creds
375 if len(scheme) != 0 {
376 creds = c.processCreds(scheme)
377 if creds != nil {
378 clone := creds.Clone()
379 // Set the server name must to the endpoint hostname without port since grpc
380 // otherwise attempts to check if x509 cert is valid for the full endpoint
381 // including the scheme and port, which fails.
382 host, _ := endpoint.ParseHostPort(hostPort)
383 clone.OverrideServerName(host)
384 creds = clone
385 }
386 }
387 return creds
388}
389
390func (c *Client) dialWithBalancerCreds(ep string) grpccredentials.TransportCredentials {
391 _, _, scheme := endpoint.ParseEndpoint(ep)
392 creds := c.creds
393 if len(scheme) != 0 {
394 creds = c.processCreds(scheme)
395 }
396 return creds
397}
398
399// WithRequireLeader requires client requests to only succeed
400// when the cluster has a leader.
401func WithRequireLeader(ctx context.Context) context.Context {
402 md := metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
403 return metadata.NewOutgoingContext(ctx, md)
404}
405
406func newClient(cfg *Config) (*Client, error) {
407 if cfg == nil {
408 cfg = &Config{}
409 }
410 var creds grpccredentials.TransportCredentials
411 if cfg.TLS != nil {
412 creds = credentials.NewBundle(credentials.Config{TLSConfig: cfg.TLS}).TransportCredentials()
413 }
414
415 // use a temporary skeleton client to bootstrap first connection
416 baseCtx := context.TODO()
417 if cfg.Context != nil {
418 baseCtx = cfg.Context
419 }
420
421 ctx, cancel := context.WithCancel(baseCtx)
422 client := &Client{
423 conn: nil,
424 cfg: *cfg,
425 creds: creds,
426 ctx: ctx,
427 cancel: cancel,
428 mu: new(sync.RWMutex),
429 callOpts: defaultCallOpts,
430 }
431
432 lcfg := logutil.DefaultZapLoggerConfig
433 if cfg.LogConfig != nil {
434 lcfg = *cfg.LogConfig
435 }
436 var err error
437 client.lg, err = lcfg.Build()
438 if err != nil {
439 return nil, err
440 }
441
442 if cfg.Username != "" && cfg.Password != "" {
443 client.Username = cfg.Username
444 client.Password = cfg.Password
445 }
446 if cfg.MaxCallSendMsgSize > 0 || cfg.MaxCallRecvMsgSize > 0 {
447 if cfg.MaxCallRecvMsgSize > 0 && cfg.MaxCallSendMsgSize > cfg.MaxCallRecvMsgSize {
448 return nil, fmt.Errorf("gRPC message recv limit (%d bytes) must be greater than send limit (%d bytes)", cfg.MaxCallRecvMsgSize, cfg.MaxCallSendMsgSize)
449 }
450 callOpts := []grpc.CallOption{
451 defaultFailFast,
452 defaultMaxCallSendMsgSize,
453 defaultMaxCallRecvMsgSize,
454 }
455 if cfg.MaxCallSendMsgSize > 0 {
456 callOpts[1] = grpc.MaxCallSendMsgSize(cfg.MaxCallSendMsgSize)
457 }
458 if cfg.MaxCallRecvMsgSize > 0 {
459 callOpts[2] = grpc.MaxCallRecvMsgSize(cfg.MaxCallRecvMsgSize)
460 }
461 client.callOpts = callOpts
462 }
463
464 // Prepare a 'endpoint://<unique-client-id>/' resolver for the client and create a endpoint target to pass
465 // to dial so the client knows to use this resolver.
466 client.resolverGroup, err = endpoint.NewResolverGroup(fmt.Sprintf("client-%s", uuid.New().String()))
467 if err != nil {
468 client.cancel()
469 return nil, err
470 }
471 client.resolverGroup.SetEndpoints(cfg.Endpoints)
472
473 if len(cfg.Endpoints) < 1 {
474 return nil, fmt.Errorf("at least one Endpoint must is required in client config")
475 }
476 dialEndpoint := cfg.Endpoints[0]
477
478 // Use a provided endpoint target so that for https:// without any tls config given, then
479 // grpc will assume the certificate server name is the endpoint host.
480 conn, err := client.dialWithBalancer(dialEndpoint, grpc.WithBalancerName(roundRobinBalancerName))
481 if err != nil {
482 client.cancel()
483 client.resolverGroup.Close()
484 return nil, err
485 }
486 // TODO: With the old grpc balancer interface, we waited until the dial timeout
487 // for the balancer to be ready. Is there an equivalent wait we should do with the new grpc balancer interface?
488 client.conn = conn
489
490 client.Cluster = NewCluster(client)
491 client.KV = NewKV(client)
492 client.Lease = NewLease(client)
493 client.Watcher = NewWatcher(client)
494 client.Auth = NewAuth(client)
495 client.Maintenance = NewMaintenance(client)
496
497 if cfg.RejectOldCluster {
498 if err := client.checkVersion(); err != nil {
499 client.Close()
500 return nil, err
501 }
502 }
503
504 go client.autoSync()
505 return client, nil
506}
507
508// roundRobinQuorumBackoff retries against quorum between each backoff.
509// This is intended for use with a round robin load balancer.
510func (c *Client) roundRobinQuorumBackoff(waitBetween time.Duration, jitterFraction float64) backoffFunc {
511 return func(attempt uint) time.Duration {
512 // after each round robin across quorum, backoff for our wait between duration
513 n := uint(len(c.Endpoints()))
514 quorum := (n/2 + 1)
515 if attempt%quorum == 0 {
516 c.lg.Debug("backoff", zap.Uint("attempt", attempt), zap.Uint("quorum", quorum), zap.Duration("waitBetween", waitBetween), zap.Float64("jitterFraction", jitterFraction))
517 return jitterUp(waitBetween, jitterFraction)
518 }
519 c.lg.Debug("backoff skipped", zap.Uint("attempt", attempt), zap.Uint("quorum", quorum))
520 return 0
521 }
522}
523
524func (c *Client) checkVersion() (err error) {
525 var wg sync.WaitGroup
526
527 eps := c.Endpoints()
528 errc := make(chan error, len(eps))
529 ctx, cancel := context.WithCancel(c.ctx)
530 if c.cfg.DialTimeout > 0 {
531 cancel()
532 ctx, cancel = context.WithTimeout(c.ctx, c.cfg.DialTimeout)
533 }
534
535 wg.Add(len(eps))
536 for _, ep := range eps {
537 // if cluster is current, any endpoint gives a recent version
538 go func(e string) {
539 defer wg.Done()
540 resp, rerr := c.Status(ctx, e)
541 if rerr != nil {
542 errc <- rerr
543 return
544 }
545 vs := strings.Split(resp.Version, ".")
546 maj, min := 0, 0
547 if len(vs) >= 2 {
548 var serr error
549 if maj, serr = strconv.Atoi(vs[0]); serr != nil {
550 errc <- serr
551 return
552 }
553 if min, serr = strconv.Atoi(vs[1]); serr != nil {
554 errc <- serr
555 return
556 }
557 }
558 if maj < 3 || (maj == 3 && min < 2) {
559 rerr = ErrOldCluster
560 }
561 errc <- rerr
562 }(ep)
563 }
564 // wait for success
565 for range eps {
566 if err = <-errc; err == nil {
567 break
568 }
569 }
570 cancel()
571 wg.Wait()
572 return err
573}
574
575// ActiveConnection returns the current in-use connection
576func (c *Client) ActiveConnection() *grpc.ClientConn { return c.conn }
577
578// isHaltErr returns true if the given error and context indicate no forward
579// progress can be made, even after reconnecting.
580func isHaltErr(ctx context.Context, err error) bool {
581 if ctx != nil && ctx.Err() != nil {
582 return true
583 }
584 if err == nil {
585 return false
586 }
587 ev, _ := status.FromError(err)
588 // Unavailable codes mean the system will be right back.
589 // (e.g., can't connect, lost leader)
590 // Treat Internal codes as if something failed, leaving the
591 // system in an inconsistent state, but retrying could make progress.
592 // (e.g., failed in middle of send, corrupted frame)
593 // TODO: are permanent Internal errors possible from grpc?
594 return ev.Code() != codes.Unavailable && ev.Code() != codes.Internal
595}
596
597// isUnavailableErr returns true if the given error is an unavailable error
598func isUnavailableErr(ctx context.Context, err error) bool {
599 if ctx != nil && ctx.Err() != nil {
600 return false
601 }
602 if err == nil {
603 return false
604 }
605 ev, ok := status.FromError(err)
606 if ok {
607 // Unavailable codes mean the system will be right back.
608 // (e.g., can't connect, lost leader)
609 return ev.Code() == codes.Unavailable
610 }
611 return false
612}
613
614func toErr(ctx context.Context, err error) error {
615 if err == nil {
616 return nil
617 }
618 err = rpctypes.Error(err)
619 if _, ok := err.(rpctypes.EtcdError); ok {
620 return err
621 }
622 if ev, ok := status.FromError(err); ok {
623 code := ev.Code()
624 switch code {
625 case codes.DeadlineExceeded:
626 fallthrough
627 case codes.Canceled:
628 if ctx.Err() != nil {
629 err = ctx.Err()
630 }
631 }
632 }
633 return err
634}
635
636func canceledByCaller(stopCtx context.Context, err error) bool {
637 if stopCtx.Err() == nil || err == nil {
638 return false
639 }
640
641 return err == context.Canceled || err == context.DeadlineExceeded
642}
643
644// IsConnCanceled returns true, if error is from a closed gRPC connection.
645// ref. https://github.com/grpc/grpc-go/pull/1854
646func IsConnCanceled(err error) bool {
647 if err == nil {
648 return false
649 }
650
651 // >= gRPC v1.23.x
652 s, ok := status.FromError(err)
653 if ok {
654 // connection is canceled or server has already closed the connection
655 return s.Code() == codes.Canceled || s.Message() == "transport is closing"
656 }
657
658 // >= gRPC v1.10.x
659 if err == context.Canceled {
660 return true
661 }
662
663 // <= gRPC v1.7.x returns 'errors.New("grpc: the client connection is closing")'
664 return strings.Contains(err.Error(), "grpc: the client connection is closing")
665}