blob: 5918cba848238d7a0b7cc21e5a510640d735bcf7 [file] [log] [blame]
khenaidooffe076b2019-01-15 16:08:08 -05001// Copyright 2017 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package clientv3
16
17import (
18 "context"
19 "errors"
20 "net/url"
21 "strings"
22 "sync"
23 "time"
24
25 "google.golang.org/grpc"
26 "google.golang.org/grpc/codes"
27 healthpb "google.golang.org/grpc/health/grpc_health_v1"
28 "google.golang.org/grpc/status"
29)
30
31const (
32 minHealthRetryDuration = 3 * time.Second
33 unknownService = "unknown service grpc.health.v1.Health"
34)
35
36// ErrNoAddrAvilable is returned by Get() when the balancer does not have
37// any active connection to endpoints at the time.
38// This error is returned only when opts.BlockingWait is true.
39var ErrNoAddrAvilable = status.Error(codes.Unavailable, "there is no address available")
40
41type healthCheckFunc func(ep string) (bool, error)
42
43type notifyMsg int
44
45const (
46 notifyReset notifyMsg = iota
47 notifyNext
48)
49
50// healthBalancer does the bare minimum to expose multiple eps
51// to the grpc reconnection code path
52type healthBalancer struct {
53 // addrs are the client's endpoint addresses for grpc
54 addrs []grpc.Address
55
56 // eps holds the raw endpoints from the client
57 eps []string
58
59 // notifyCh notifies grpc of the set of addresses for connecting
60 notifyCh chan []grpc.Address
61
62 // readyc closes once the first connection is up
63 readyc chan struct{}
64 readyOnce sync.Once
65
66 // healthCheck checks an endpoint's health.
67 healthCheck healthCheckFunc
68 healthCheckTimeout time.Duration
69
70 unhealthyMu sync.RWMutex
71 unhealthyHostPorts map[string]time.Time
72
73 // mu protects all fields below.
74 mu sync.RWMutex
75
76 // upc closes when pinAddr transitions from empty to non-empty or the balancer closes.
77 upc chan struct{}
78
79 // downc closes when grpc calls down() on pinAddr
80 downc chan struct{}
81
82 // stopc is closed to signal updateNotifyLoop should stop.
83 stopc chan struct{}
84 stopOnce sync.Once
85 wg sync.WaitGroup
86
87 // donec closes when all goroutines are exited
88 donec chan struct{}
89
90 // updateAddrsC notifies updateNotifyLoop to update addrs.
91 updateAddrsC chan notifyMsg
92
93 // grpc issues TLS cert checks using the string passed into dial so
94 // that string must be the host. To recover the full scheme://host URL,
95 // have a map from hosts to the original endpoint.
96 hostPort2ep map[string]string
97
98 // pinAddr is the currently pinned address; set to the empty string on
99 // initialization and shutdown.
100 pinAddr string
101
102 closed bool
103}
104
105func newHealthBalancer(eps []string, timeout time.Duration, hc healthCheckFunc) *healthBalancer {
106 notifyCh := make(chan []grpc.Address)
107 addrs := eps2addrs(eps)
108 hb := &healthBalancer{
109 addrs: addrs,
110 eps: eps,
111 notifyCh: notifyCh,
112 readyc: make(chan struct{}),
113 healthCheck: hc,
114 unhealthyHostPorts: make(map[string]time.Time),
115 upc: make(chan struct{}),
116 stopc: make(chan struct{}),
117 downc: make(chan struct{}),
118 donec: make(chan struct{}),
119 updateAddrsC: make(chan notifyMsg),
120 hostPort2ep: getHostPort2ep(eps),
121 }
122 if timeout < minHealthRetryDuration {
123 timeout = minHealthRetryDuration
124 }
125 hb.healthCheckTimeout = timeout
126
127 close(hb.downc)
128 go hb.updateNotifyLoop()
129 hb.wg.Add(1)
130 go func() {
131 defer hb.wg.Done()
132 hb.updateUnhealthy()
133 }()
134 return hb
135}
136
137func (b *healthBalancer) Start(target string, config grpc.BalancerConfig) error { return nil }
138
139func (b *healthBalancer) ConnectNotify() <-chan struct{} {
140 b.mu.Lock()
141 defer b.mu.Unlock()
142 return b.upc
143}
144
145func (b *healthBalancer) ready() <-chan struct{} { return b.readyc }
146
147func (b *healthBalancer) endpoint(hostPort string) string {
148 b.mu.RLock()
149 defer b.mu.RUnlock()
150 return b.hostPort2ep[hostPort]
151}
152
153func (b *healthBalancer) pinned() string {
154 b.mu.RLock()
155 defer b.mu.RUnlock()
156 return b.pinAddr
157}
158
159func (b *healthBalancer) hostPortError(hostPort string, err error) {
160 if b.endpoint(hostPort) == "" {
161 logger.Lvl(4).Infof("clientv3/balancer: %q is stale (skip marking as unhealthy on %q)", hostPort, err.Error())
162 return
163 }
164
165 b.unhealthyMu.Lock()
166 b.unhealthyHostPorts[hostPort] = time.Now()
167 b.unhealthyMu.Unlock()
168 logger.Lvl(4).Infof("clientv3/balancer: %q is marked unhealthy (%q)", hostPort, err.Error())
169}
170
171func (b *healthBalancer) removeUnhealthy(hostPort, msg string) {
172 if b.endpoint(hostPort) == "" {
173 logger.Lvl(4).Infof("clientv3/balancer: %q was not in unhealthy (%q)", hostPort, msg)
174 return
175 }
176
177 b.unhealthyMu.Lock()
178 delete(b.unhealthyHostPorts, hostPort)
179 b.unhealthyMu.Unlock()
180 logger.Lvl(4).Infof("clientv3/balancer: %q is removed from unhealthy (%q)", hostPort, msg)
181}
182
183func (b *healthBalancer) countUnhealthy() (count int) {
184 b.unhealthyMu.RLock()
185 count = len(b.unhealthyHostPorts)
186 b.unhealthyMu.RUnlock()
187 return count
188}
189
190func (b *healthBalancer) isUnhealthy(hostPort string) (unhealthy bool) {
191 b.unhealthyMu.RLock()
192 _, unhealthy = b.unhealthyHostPorts[hostPort]
193 b.unhealthyMu.RUnlock()
194 return unhealthy
195}
196
197func (b *healthBalancer) cleanupUnhealthy() {
198 b.unhealthyMu.Lock()
199 for k, v := range b.unhealthyHostPorts {
200 if time.Since(v) > b.healthCheckTimeout {
201 delete(b.unhealthyHostPorts, k)
202 logger.Lvl(4).Infof("clientv3/balancer: removed %q from unhealthy after %v", k, b.healthCheckTimeout)
203 }
204 }
205 b.unhealthyMu.Unlock()
206}
207
208func (b *healthBalancer) liveAddrs() ([]grpc.Address, map[string]struct{}) {
209 unhealthyCnt := b.countUnhealthy()
210
211 b.mu.RLock()
212 defer b.mu.RUnlock()
213
214 hbAddrs := b.addrs
215 if len(b.addrs) == 1 || unhealthyCnt == 0 || unhealthyCnt == len(b.addrs) {
216 liveHostPorts := make(map[string]struct{}, len(b.hostPort2ep))
217 for k := range b.hostPort2ep {
218 liveHostPorts[k] = struct{}{}
219 }
220 return hbAddrs, liveHostPorts
221 }
222
223 addrs := make([]grpc.Address, 0, len(b.addrs)-unhealthyCnt)
224 liveHostPorts := make(map[string]struct{}, len(addrs))
225 for _, addr := range b.addrs {
226 if !b.isUnhealthy(addr.Addr) {
227 addrs = append(addrs, addr)
228 liveHostPorts[addr.Addr] = struct{}{}
229 }
230 }
231 return addrs, liveHostPorts
232}
233
234func (b *healthBalancer) updateUnhealthy() {
235 for {
236 select {
237 case <-time.After(b.healthCheckTimeout):
238 b.cleanupUnhealthy()
239 pinned := b.pinned()
240 if pinned == "" || b.isUnhealthy(pinned) {
241 select {
242 case b.updateAddrsC <- notifyNext:
243 case <-b.stopc:
244 return
245 }
246 }
247 case <-b.stopc:
248 return
249 }
250 }
251}
252
253func (b *healthBalancer) updateAddrs(eps ...string) {
254 np := getHostPort2ep(eps)
255
256 b.mu.Lock()
257 defer b.mu.Unlock()
258
259 match := len(np) == len(b.hostPort2ep)
260 if match {
261 for k, v := range np {
262 if b.hostPort2ep[k] != v {
263 match = false
264 break
265 }
266 }
267 }
268 if match {
269 // same endpoints, so no need to update address
270 return
271 }
272
273 b.hostPort2ep = np
274 b.addrs, b.eps = eps2addrs(eps), eps
275
276 b.unhealthyMu.Lock()
277 b.unhealthyHostPorts = make(map[string]time.Time)
278 b.unhealthyMu.Unlock()
279}
280
281func (b *healthBalancer) next() {
282 b.mu.RLock()
283 downc := b.downc
284 b.mu.RUnlock()
285 select {
286 case b.updateAddrsC <- notifyNext:
287 case <-b.stopc:
288 }
289 // wait until disconnect so new RPCs are not issued on old connection
290 select {
291 case <-downc:
292 case <-b.stopc:
293 }
294}
295
296func (b *healthBalancer) updateNotifyLoop() {
297 defer close(b.donec)
298
299 for {
300 b.mu.RLock()
301 upc, downc, addr := b.upc, b.downc, b.pinAddr
302 b.mu.RUnlock()
303 // downc or upc should be closed
304 select {
305 case <-downc:
306 downc = nil
307 default:
308 }
309 select {
310 case <-upc:
311 upc = nil
312 default:
313 }
314 switch {
315 case downc == nil && upc == nil:
316 // stale
317 select {
318 case <-b.stopc:
319 return
320 default:
321 }
322 case downc == nil:
323 b.notifyAddrs(notifyReset)
324 select {
325 case <-upc:
326 case msg := <-b.updateAddrsC:
327 b.notifyAddrs(msg)
328 case <-b.stopc:
329 return
330 }
331 case upc == nil:
332 select {
333 // close connections that are not the pinned address
334 case b.notifyCh <- []grpc.Address{{Addr: addr}}:
335 case <-downc:
336 case <-b.stopc:
337 return
338 }
339 select {
340 case <-downc:
341 b.notifyAddrs(notifyReset)
342 case msg := <-b.updateAddrsC:
343 b.notifyAddrs(msg)
344 case <-b.stopc:
345 return
346 }
347 }
348 }
349}
350
351func (b *healthBalancer) notifyAddrs(msg notifyMsg) {
352 if msg == notifyNext {
353 select {
354 case b.notifyCh <- []grpc.Address{}:
355 case <-b.stopc:
356 return
357 }
358 }
359 b.mu.RLock()
360 pinAddr := b.pinAddr
361 downc := b.downc
362 b.mu.RUnlock()
363 addrs, hostPorts := b.liveAddrs()
364
365 var waitDown bool
366 if pinAddr != "" {
367 _, ok := hostPorts[pinAddr]
368 waitDown = !ok
369 }
370
371 select {
372 case b.notifyCh <- addrs:
373 if waitDown {
374 select {
375 case <-downc:
376 case <-b.stopc:
377 }
378 }
379 case <-b.stopc:
380 }
381}
382
383func (b *healthBalancer) Up(addr grpc.Address) func(error) {
384 if !b.mayPin(addr) {
385 return func(err error) {}
386 }
387
388 b.mu.Lock()
389 defer b.mu.Unlock()
390
391 // gRPC might call Up after it called Close. We add this check
392 // to "fix" it up at application layer. Otherwise, will panic
393 // if b.upc is already closed.
394 if b.closed {
395 return func(err error) {}
396 }
397
398 // gRPC might call Up on a stale address.
399 // Prevent updating pinAddr with a stale address.
400 if !hasAddr(b.addrs, addr.Addr) {
401 return func(err error) {}
402 }
403
404 if b.pinAddr != "" {
405 logger.Lvl(4).Infof("clientv3/balancer: %q is up but not pinned (already pinned %q)", addr.Addr, b.pinAddr)
406 return func(err error) {}
407 }
408
409 // notify waiting Get()s and pin first connected address
410 close(b.upc)
411 b.downc = make(chan struct{})
412 b.pinAddr = addr.Addr
413 logger.Lvl(4).Infof("clientv3/balancer: pin %q", addr.Addr)
414
415 // notify client that a connection is up
416 b.readyOnce.Do(func() { close(b.readyc) })
417
418 return func(err error) {
419 // If connected to a black hole endpoint or a killed server, the gRPC ping
420 // timeout will induce a network I/O error, and retrying until success;
421 // finding healthy endpoint on retry could take several timeouts and redials.
422 // To avoid wasting retries, gray-list unhealthy endpoints.
423 b.hostPortError(addr.Addr, err)
424
425 b.mu.Lock()
426 b.upc = make(chan struct{})
427 close(b.downc)
428 b.pinAddr = ""
429 b.mu.Unlock()
430 logger.Lvl(4).Infof("clientv3/balancer: unpin %q (%q)", addr.Addr, err.Error())
431 }
432}
433
434func (b *healthBalancer) mayPin(addr grpc.Address) bool {
435 if b.endpoint(addr.Addr) == "" { // stale host:port
436 return false
437 }
438
439 b.unhealthyMu.RLock()
440 unhealthyCnt := len(b.unhealthyHostPorts)
441 failedTime, bad := b.unhealthyHostPorts[addr.Addr]
442 b.unhealthyMu.RUnlock()
443
444 b.mu.RLock()
445 skip := len(b.addrs) == 1 || unhealthyCnt == 0 || len(b.addrs) == unhealthyCnt
446 b.mu.RUnlock()
447 if skip || !bad {
448 return true
449 }
450
451 // prevent isolated member's endpoint from being infinitely retried, as follows:
452 // 1. keepalive pings detects GoAway with http2.ErrCodeEnhanceYourCalm
453 // 2. balancer 'Up' unpins with grpc: failed with network I/O error
454 // 3. grpc-healthcheck still SERVING, thus retry to pin
455 // instead, return before grpc-healthcheck if failed within healthcheck timeout
456 if elapsed := time.Since(failedTime); elapsed < b.healthCheckTimeout {
457 logger.Lvl(4).Infof("clientv3/balancer: %q is up but not pinned (failed %v ago, require minimum %v after failure)", addr.Addr, elapsed, b.healthCheckTimeout)
458 return false
459 }
460
461 if ok, _ := b.healthCheck(addr.Addr); ok {
462 b.removeUnhealthy(addr.Addr, "health check success")
463 return true
464 }
465
466 b.hostPortError(addr.Addr, errors.New("health check failed"))
467 return false
468}
469
470func (b *healthBalancer) Get(ctx context.Context, opts grpc.BalancerGetOptions) (grpc.Address, func(), error) {
471 var (
472 addr string
473 closed bool
474 )
475
476 // If opts.BlockingWait is false (for fail-fast RPCs), it should return
477 // an address it has notified via Notify immediately instead of blocking.
478 if !opts.BlockingWait {
479 b.mu.RLock()
480 closed = b.closed
481 addr = b.pinAddr
482 b.mu.RUnlock()
483 if closed {
484 return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
485 }
486 if addr == "" {
487 return grpc.Address{Addr: ""}, nil, ErrNoAddrAvilable
488 }
489 return grpc.Address{Addr: addr}, func() {}, nil
490 }
491
492 for {
493 b.mu.RLock()
494 ch := b.upc
495 b.mu.RUnlock()
496 select {
497 case <-ch:
498 case <-b.donec:
499 return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
500 case <-ctx.Done():
501 return grpc.Address{Addr: ""}, nil, ctx.Err()
502 }
503 b.mu.RLock()
504 closed = b.closed
505 addr = b.pinAddr
506 b.mu.RUnlock()
507 // Close() which sets b.closed = true can be called before Get(), Get() must exit if balancer is closed.
508 if closed {
509 return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
510 }
511 if addr != "" {
512 break
513 }
514 }
515 return grpc.Address{Addr: addr}, func() {}, nil
516}
517
518func (b *healthBalancer) Notify() <-chan []grpc.Address { return b.notifyCh }
519
520func (b *healthBalancer) Close() error {
521 b.mu.Lock()
522 // In case gRPC calls close twice. TODO: remove the checking
523 // when we are sure that gRPC wont call close twice.
524 if b.closed {
525 b.mu.Unlock()
526 <-b.donec
527 return nil
528 }
529 b.closed = true
530 b.stopOnce.Do(func() { close(b.stopc) })
531 b.pinAddr = ""
532
533 // In the case of following scenario:
534 // 1. upc is not closed; no pinned address
535 // 2. client issues an RPC, calling invoke(), which calls Get(), enters for loop, blocks
536 // 3. client.conn.Close() calls balancer.Close(); closed = true
537 // 4. for loop in Get() never exits since ctx is the context passed in by the client and may not be canceled
538 // we must close upc so Get() exits from blocking on upc
539 select {
540 case <-b.upc:
541 default:
542 // terminate all waiting Get()s
543 close(b.upc)
544 }
545
546 b.mu.Unlock()
547 b.wg.Wait()
548
549 // wait for updateNotifyLoop to finish
550 <-b.donec
551 close(b.notifyCh)
552
553 return nil
554}
555
556func grpcHealthCheck(client *Client, ep string) (bool, error) {
557 conn, err := client.dial(ep)
558 if err != nil {
559 return false, err
560 }
561 defer conn.Close()
562 cli := healthpb.NewHealthClient(conn)
563 ctx, cancel := context.WithTimeout(context.Background(), time.Second)
564 resp, err := cli.Check(ctx, &healthpb.HealthCheckRequest{})
565 cancel()
566 if err != nil {
567 if s, ok := status.FromError(err); ok && s.Code() == codes.Unavailable {
568 if s.Message() == unknownService { // etcd < v3.3.0
569 return true, nil
570 }
571 }
572 return false, err
573 }
574 return resp.Status == healthpb.HealthCheckResponse_SERVING, nil
575}
576
577func hasAddr(addrs []grpc.Address, targetAddr string) bool {
578 for _, addr := range addrs {
579 if targetAddr == addr.Addr {
580 return true
581 }
582 }
583 return false
584}
585
586func getHost(ep string) string {
587 url, uerr := url.Parse(ep)
588 if uerr != nil || !strings.Contains(ep, "://") {
589 return ep
590 }
591 return url.Host
592}
593
594func eps2addrs(eps []string) []grpc.Address {
595 addrs := make([]grpc.Address, len(eps))
596 for i := range eps {
597 addrs[i].Addr = getHost(eps[i])
598 }
599 return addrs
600}
601
602func getHostPort2ep(eps []string) map[string]string {
603 hm := make(map[string]string, len(eps))
604 for i := range eps {
605 _, host, _ := parseEndpoint(eps[i])
606 hm[host] = eps[i]
607 }
608 return hm
609}