blob: 2153767354dec7df4fc552263cbd5b61f49028b8 [file] [log] [blame]
Scott Baker2d897982019-09-24 11:50:08 -07001// Copyright 2018 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package balancer
16
17import (
18 "context"
19 "errors"
20 "io/ioutil"
21 "net/url"
22 "strings"
23 "sync"
24 "time"
25
26 "google.golang.org/grpc"
27 "google.golang.org/grpc/codes"
28 "google.golang.org/grpc/grpclog"
29 healthpb "google.golang.org/grpc/health/grpc_health_v1"
30 "google.golang.org/grpc/status"
31)
32
33// TODO: replace with something better
34var lg = grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard)
35
36const (
37 minHealthRetryDuration = 3 * time.Second
38 unknownService = "unknown service grpc.health.v1.Health"
39)
40
41// ErrNoAddrAvailable is returned by Get() when the balancer does not have
42// any active connection to endpoints at the time.
43// This error is returned only when opts.BlockingWait is true.
44var ErrNoAddrAvailable = status.Error(codes.Unavailable, "there is no address available")
45
46type NotifyMsg int
47
48const (
49 NotifyReset NotifyMsg = iota
50 NotifyNext
51)
52
53// GRPC17Health does the bare minimum to expose multiple eps
54// to the grpc reconnection code path
55type GRPC17Health struct {
56 // addrs are the client's endpoint addresses for grpc
57 addrs []grpc.Address
58
59 // eps holds the raw endpoints from the client
60 eps []string
61
62 // notifyCh notifies grpc of the set of addresses for connecting
63 notifyCh chan []grpc.Address
64
65 // readyc closes once the first connection is up
66 readyc chan struct{}
67 readyOnce sync.Once
68
69 // healthCheck checks an endpoint's health.
70 healthCheck func(ep string) (bool, error)
71 healthCheckTimeout time.Duration
72
73 unhealthyMu sync.RWMutex
74 unhealthyHostPorts map[string]time.Time
75
76 // mu protects all fields below.
77 mu sync.RWMutex
78
79 // upc closes when pinAddr transitions from empty to non-empty or the balancer closes.
80 upc chan struct{}
81
82 // downc closes when grpc calls down() on pinAddr
83 downc chan struct{}
84
85 // stopc is closed to signal updateNotifyLoop should stop.
86 stopc chan struct{}
87 stopOnce sync.Once
88 wg sync.WaitGroup
89
90 // donec closes when all goroutines are exited
91 donec chan struct{}
92
93 // updateAddrsC notifies updateNotifyLoop to update addrs.
94 updateAddrsC chan NotifyMsg
95
96 // grpc issues TLS cert checks using the string passed into dial so
97 // that string must be the host. To recover the full scheme://host URL,
98 // have a map from hosts to the original endpoint.
99 hostPort2ep map[string]string
100
101 // pinAddr is the currently pinned address; set to the empty string on
102 // initialization and shutdown.
103 pinAddr string
104
105 closed bool
106}
107
108// DialFunc defines gRPC dial function.
109type DialFunc func(ep string, dopts ...grpc.DialOption) (*grpc.ClientConn, error)
110
111// NewGRPC17Health returns a new health balancer with gRPC v1.7.
112func NewGRPC17Health(
113 eps []string,
114 timeout time.Duration,
115 dialFunc DialFunc,
116) *GRPC17Health {
117 notifyCh := make(chan []grpc.Address)
118 addrs := eps2addrs(eps)
119 hb := &GRPC17Health{
120 addrs: addrs,
121 eps: eps,
122 notifyCh: notifyCh,
123 readyc: make(chan struct{}),
124 healthCheck: func(ep string) (bool, error) { return grpcHealthCheck(ep, dialFunc) },
125 unhealthyHostPorts: make(map[string]time.Time),
126 upc: make(chan struct{}),
127 stopc: make(chan struct{}),
128 downc: make(chan struct{}),
129 donec: make(chan struct{}),
130 updateAddrsC: make(chan NotifyMsg),
131 hostPort2ep: getHostPort2ep(eps),
132 }
133 if timeout < minHealthRetryDuration {
134 timeout = minHealthRetryDuration
135 }
136 hb.healthCheckTimeout = timeout
137
138 close(hb.downc)
139 go hb.updateNotifyLoop()
140 hb.wg.Add(1)
141 go func() {
142 defer hb.wg.Done()
143 hb.updateUnhealthy()
144 }()
145 return hb
146}
147
148func (b *GRPC17Health) Start(target string, config grpc.BalancerConfig) error { return nil }
149
150func (b *GRPC17Health) ConnectNotify() <-chan struct{} {
151 b.mu.Lock()
152 defer b.mu.Unlock()
153 return b.upc
154}
155
156func (b *GRPC17Health) UpdateAddrsC() chan NotifyMsg { return b.updateAddrsC }
157func (b *GRPC17Health) StopC() chan struct{} { return b.stopc }
158
159func (b *GRPC17Health) Ready() <-chan struct{} { return b.readyc }
160
161func (b *GRPC17Health) Endpoint(hostPort string) string {
162 b.mu.RLock()
163 defer b.mu.RUnlock()
164 return b.hostPort2ep[hostPort]
165}
166
167func (b *GRPC17Health) Pinned() string {
168 b.mu.RLock()
169 defer b.mu.RUnlock()
170 return b.pinAddr
171}
172
173func (b *GRPC17Health) HostPortError(hostPort string, err error) {
174 if b.Endpoint(hostPort) == "" {
175 lg.Infof("clientv3/balancer: %q is stale (skip marking as unhealthy on %q)", hostPort, err.Error())
176 return
177 }
178
179 b.unhealthyMu.Lock()
180 b.unhealthyHostPorts[hostPort] = time.Now()
181 b.unhealthyMu.Unlock()
182 lg.Infof("clientv3/balancer: %q is marked unhealthy (%q)", hostPort, err.Error())
183}
184
185func (b *GRPC17Health) removeUnhealthy(hostPort, msg string) {
186 if b.Endpoint(hostPort) == "" {
187 lg.Infof("clientv3/balancer: %q was not in unhealthy (%q)", hostPort, msg)
188 return
189 }
190
191 b.unhealthyMu.Lock()
192 delete(b.unhealthyHostPorts, hostPort)
193 b.unhealthyMu.Unlock()
194 lg.Infof("clientv3/balancer: %q is removed from unhealthy (%q)", hostPort, msg)
195}
196
197func (b *GRPC17Health) countUnhealthy() (count int) {
198 b.unhealthyMu.RLock()
199 count = len(b.unhealthyHostPorts)
200 b.unhealthyMu.RUnlock()
201 return count
202}
203
204func (b *GRPC17Health) isUnhealthy(hostPort string) (unhealthy bool) {
205 b.unhealthyMu.RLock()
206 _, unhealthy = b.unhealthyHostPorts[hostPort]
207 b.unhealthyMu.RUnlock()
208 return unhealthy
209}
210
211func (b *GRPC17Health) cleanupUnhealthy() {
212 b.unhealthyMu.Lock()
213 for k, v := range b.unhealthyHostPorts {
214 if time.Since(v) > b.healthCheckTimeout {
215 delete(b.unhealthyHostPorts, k)
216 lg.Infof("clientv3/balancer: removed %q from unhealthy after %v", k, b.healthCheckTimeout)
217 }
218 }
219 b.unhealthyMu.Unlock()
220}
221
222func (b *GRPC17Health) liveAddrs() ([]grpc.Address, map[string]struct{}) {
223 unhealthyCnt := b.countUnhealthy()
224
225 b.mu.RLock()
226 defer b.mu.RUnlock()
227
228 hbAddrs := b.addrs
229 if len(b.addrs) == 1 || unhealthyCnt == 0 || unhealthyCnt == len(b.addrs) {
230 liveHostPorts := make(map[string]struct{}, len(b.hostPort2ep))
231 for k := range b.hostPort2ep {
232 liveHostPorts[k] = struct{}{}
233 }
234 return hbAddrs, liveHostPorts
235 }
236
237 addrs := make([]grpc.Address, 0, len(b.addrs)-unhealthyCnt)
238 liveHostPorts := make(map[string]struct{}, len(addrs))
239 for _, addr := range b.addrs {
240 if !b.isUnhealthy(addr.Addr) {
241 addrs = append(addrs, addr)
242 liveHostPorts[addr.Addr] = struct{}{}
243 }
244 }
245 return addrs, liveHostPorts
246}
247
248func (b *GRPC17Health) updateUnhealthy() {
249 for {
250 select {
251 case <-time.After(b.healthCheckTimeout):
252 b.cleanupUnhealthy()
253 pinned := b.Pinned()
254 if pinned == "" || b.isUnhealthy(pinned) {
255 select {
256 case b.updateAddrsC <- NotifyNext:
257 case <-b.stopc:
258 return
259 }
260 }
261 case <-b.stopc:
262 return
263 }
264 }
265}
266
267// NeedUpdate returns true if all connections are down or
268// addresses do not include current pinned address.
269func (b *GRPC17Health) NeedUpdate() bool {
270 // updating notifyCh can trigger new connections,
271 // need update addrs if all connections are down
272 // or addrs does not include pinAddr.
273 b.mu.RLock()
274 update := !hasAddr(b.addrs, b.pinAddr)
275 b.mu.RUnlock()
276 return update
277}
278
279func (b *GRPC17Health) UpdateAddrs(eps ...string) {
280 np := getHostPort2ep(eps)
281
282 b.mu.Lock()
283 defer b.mu.Unlock()
284
285 match := len(np) == len(b.hostPort2ep)
286 if match {
287 for k, v := range np {
288 if b.hostPort2ep[k] != v {
289 match = false
290 break
291 }
292 }
293 }
294 if match {
295 // same endpoints, so no need to update address
296 return
297 }
298
299 b.hostPort2ep = np
300 b.addrs, b.eps = eps2addrs(eps), eps
301
302 b.unhealthyMu.Lock()
303 b.unhealthyHostPorts = make(map[string]time.Time)
304 b.unhealthyMu.Unlock()
305}
306
307func (b *GRPC17Health) Next() {
308 b.mu.RLock()
309 downc := b.downc
310 b.mu.RUnlock()
311 select {
312 case b.updateAddrsC <- NotifyNext:
313 case <-b.stopc:
314 }
315 // wait until disconnect so new RPCs are not issued on old connection
316 select {
317 case <-downc:
318 case <-b.stopc:
319 }
320}
321
322func (b *GRPC17Health) updateNotifyLoop() {
323 defer close(b.donec)
324
325 for {
326 b.mu.RLock()
327 upc, downc, addr := b.upc, b.downc, b.pinAddr
328 b.mu.RUnlock()
329 // downc or upc should be closed
330 select {
331 case <-downc:
332 downc = nil
333 default:
334 }
335 select {
336 case <-upc:
337 upc = nil
338 default:
339 }
340 switch {
341 case downc == nil && upc == nil:
342 // stale
343 select {
344 case <-b.stopc:
345 return
346 default:
347 }
348 case downc == nil:
349 b.notifyAddrs(NotifyReset)
350 select {
351 case <-upc:
352 case msg := <-b.updateAddrsC:
353 b.notifyAddrs(msg)
354 case <-b.stopc:
355 return
356 }
357 case upc == nil:
358 select {
359 // close connections that are not the pinned address
360 case b.notifyCh <- []grpc.Address{{Addr: addr}}:
361 case <-downc:
362 case <-b.stopc:
363 return
364 }
365 select {
366 case <-downc:
367 b.notifyAddrs(NotifyReset)
368 case msg := <-b.updateAddrsC:
369 b.notifyAddrs(msg)
370 case <-b.stopc:
371 return
372 }
373 }
374 }
375}
376
377func (b *GRPC17Health) notifyAddrs(msg NotifyMsg) {
378 if msg == NotifyNext {
379 select {
380 case b.notifyCh <- []grpc.Address{}:
381 case <-b.stopc:
382 return
383 }
384 }
385 b.mu.RLock()
386 pinAddr := b.pinAddr
387 downc := b.downc
388 b.mu.RUnlock()
389 addrs, hostPorts := b.liveAddrs()
390
391 var waitDown bool
392 if pinAddr != "" {
393 _, ok := hostPorts[pinAddr]
394 waitDown = !ok
395 }
396
397 select {
398 case b.notifyCh <- addrs:
399 if waitDown {
400 select {
401 case <-downc:
402 case <-b.stopc:
403 }
404 }
405 case <-b.stopc:
406 }
407}
408
409func (b *GRPC17Health) Up(addr grpc.Address) func(error) {
410 if !b.mayPin(addr) {
411 return func(err error) {}
412 }
413
414 b.mu.Lock()
415 defer b.mu.Unlock()
416
417 // gRPC might call Up after it called Close. We add this check
418 // to "fix" it up at application layer. Otherwise, will panic
419 // if b.upc is already closed.
420 if b.closed {
421 return func(err error) {}
422 }
423
424 // gRPC might call Up on a stale address.
425 // Prevent updating pinAddr with a stale address.
426 if !hasAddr(b.addrs, addr.Addr) {
427 return func(err error) {}
428 }
429
430 if b.pinAddr != "" {
431 lg.Infof("clientv3/balancer: %q is up but not pinned (already pinned %q)", addr.Addr, b.pinAddr)
432 return func(err error) {}
433 }
434
435 // notify waiting Get()s and pin first connected address
436 close(b.upc)
437 b.downc = make(chan struct{})
438 b.pinAddr = addr.Addr
439 lg.Infof("clientv3/balancer: pin %q", addr.Addr)
440
441 // notify client that a connection is up
442 b.readyOnce.Do(func() { close(b.readyc) })
443
444 return func(err error) {
445 // If connected to a black hole endpoint or a killed server, the gRPC ping
446 // timeout will induce a network I/O error, and retrying until success;
447 // finding healthy endpoint on retry could take several timeouts and redials.
448 // To avoid wasting retries, gray-list unhealthy endpoints.
449 b.HostPortError(addr.Addr, err)
450
451 b.mu.Lock()
452 b.upc = make(chan struct{})
453 close(b.downc)
454 b.pinAddr = ""
455 b.mu.Unlock()
456 lg.Infof("clientv3/balancer: unpin %q (%q)", addr.Addr, err.Error())
457 }
458}
459
460func (b *GRPC17Health) mayPin(addr grpc.Address) bool {
461 if b.Endpoint(addr.Addr) == "" { // stale host:port
462 return false
463 }
464
465 b.unhealthyMu.RLock()
466 unhealthyCnt := len(b.unhealthyHostPorts)
467 failedTime, bad := b.unhealthyHostPorts[addr.Addr]
468 b.unhealthyMu.RUnlock()
469
470 b.mu.RLock()
471 skip := len(b.addrs) == 1 || unhealthyCnt == 0 || len(b.addrs) == unhealthyCnt
472 b.mu.RUnlock()
473 if skip || !bad {
474 return true
475 }
476
477 // prevent isolated member's endpoint from being infinitely retried, as follows:
478 // 1. keepalive pings detects GoAway with http2.ErrCodeEnhanceYourCalm
479 // 2. balancer 'Up' unpins with grpc: failed with network I/O error
480 // 3. grpc-healthcheck still SERVING, thus retry to pin
481 // instead, return before grpc-healthcheck if failed within healthcheck timeout
482 if elapsed := time.Since(failedTime); elapsed < b.healthCheckTimeout {
483 lg.Infof("clientv3/balancer: %q is up but not pinned (failed %v ago, require minimum %v after failure)", addr.Addr, elapsed, b.healthCheckTimeout)
484 return false
485 }
486
487 if ok, _ := b.healthCheck(addr.Addr); ok {
488 b.removeUnhealthy(addr.Addr, "health check success")
489 return true
490 }
491
492 b.HostPortError(addr.Addr, errors.New("health check failed"))
493 return false
494}
495
496func (b *GRPC17Health) Get(ctx context.Context, opts grpc.BalancerGetOptions) (grpc.Address, func(), error) {
497 var (
498 addr string
499 closed bool
500 )
501
502 // If opts.BlockingWait is false (for fail-fast RPCs), it should return
503 // an address it has notified via Notify immediately instead of blocking.
504 if !opts.BlockingWait {
505 b.mu.RLock()
506 closed = b.closed
507 addr = b.pinAddr
508 b.mu.RUnlock()
509 if closed {
510 return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
511 }
512 if addr == "" {
513 return grpc.Address{Addr: ""}, nil, ErrNoAddrAvailable
514 }
515 return grpc.Address{Addr: addr}, func() {}, nil
516 }
517
518 for {
519 b.mu.RLock()
520 ch := b.upc
521 b.mu.RUnlock()
522 select {
523 case <-ch:
524 case <-b.donec:
525 return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
526 case <-ctx.Done():
527 return grpc.Address{Addr: ""}, nil, ctx.Err()
528 }
529 b.mu.RLock()
530 closed = b.closed
531 addr = b.pinAddr
532 b.mu.RUnlock()
533 // Close() which sets b.closed = true can be called before Get(), Get() must exit if balancer is closed.
534 if closed {
535 return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
536 }
537 if addr != "" {
538 break
539 }
540 }
541 return grpc.Address{Addr: addr}, func() {}, nil
542}
543
544func (b *GRPC17Health) Notify() <-chan []grpc.Address { return b.notifyCh }
545
546func (b *GRPC17Health) Close() error {
547 b.mu.Lock()
548 // In case gRPC calls close twice. TODO: remove the checking
549 // when we are sure that gRPC wont call close twice.
550 if b.closed {
551 b.mu.Unlock()
552 <-b.donec
553 return nil
554 }
555 b.closed = true
556 b.stopOnce.Do(func() { close(b.stopc) })
557 b.pinAddr = ""
558
559 // In the case of following scenario:
560 // 1. upc is not closed; no pinned address
561 // 2. client issues an RPC, calling invoke(), which calls Get(), enters for loop, blocks
562 // 3. client.conn.Close() calls balancer.Close(); closed = true
563 // 4. for loop in Get() never exits since ctx is the context passed in by the client and may not be canceled
564 // we must close upc so Get() exits from blocking on upc
565 select {
566 case <-b.upc:
567 default:
568 // terminate all waiting Get()s
569 close(b.upc)
570 }
571
572 b.mu.Unlock()
573 b.wg.Wait()
574
575 // wait for updateNotifyLoop to finish
576 <-b.donec
577 close(b.notifyCh)
578
579 return nil
580}
581
582func grpcHealthCheck(ep string, dialFunc func(ep string, dopts ...grpc.DialOption) (*grpc.ClientConn, error)) (bool, error) {
583 conn, err := dialFunc(ep)
584 if err != nil {
585 return false, err
586 }
587 defer conn.Close()
588 cli := healthpb.NewHealthClient(conn)
589 ctx, cancel := context.WithTimeout(context.Background(), time.Second)
590 resp, err := cli.Check(ctx, &healthpb.HealthCheckRequest{})
591 cancel()
592 if err != nil {
593 if s, ok := status.FromError(err); ok && s.Code() == codes.Unavailable {
594 if s.Message() == unknownService { // etcd < v3.3.0
595 return true, nil
596 }
597 }
598 return false, err
599 }
600 return resp.Status == healthpb.HealthCheckResponse_SERVING, nil
601}
602
603func hasAddr(addrs []grpc.Address, targetAddr string) bool {
604 for _, addr := range addrs {
605 if targetAddr == addr.Addr {
606 return true
607 }
608 }
609 return false
610}
611
612func getHost(ep string) string {
613 url, uerr := url.Parse(ep)
614 if uerr != nil || !strings.Contains(ep, "://") {
615 return ep
616 }
617 return url.Host
618}
619
620func eps2addrs(eps []string) []grpc.Address {
621 addrs := make([]grpc.Address, len(eps))
622 for i := range eps {
623 addrs[i].Addr = getHost(eps[i])
624 }
625 return addrs
626}
627
628func getHostPort2ep(eps []string) map[string]string {
629 hm := make(map[string]string, len(eps))
630 for i := range eps {
631 _, host, _ := parseEndpoint(eps[i])
632 hm[host] = eps[i]
633 }
634 return hm
635}
636
637func parseEndpoint(endpoint string) (proto string, host string, scheme string) {
638 proto = "tcp"
639 host = endpoint
640 url, uerr := url.Parse(endpoint)
641 if uerr != nil || !strings.Contains(endpoint, "://") {
642 return proto, host, scheme
643 }
644 scheme = url.Scheme
645
646 // strip scheme:// prefix since grpc dials by host
647 host = url.Host
648 switch url.Scheme {
649 case "http", "https":
650 case "unix", "unixs":
651 proto = "unix"
652 host = url.Host + url.Path
653 default:
654 proto, host = "", ""
655 }
656 return proto, host, scheme
657}