khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 1 | // Copyright 2015 The etcd Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | // Package discovery provides an implementation of the cluster discovery that |
| 16 | // is used by etcd. |
| 17 | package discovery |
| 18 | |
| 19 | import ( |
| 20 | "context" |
| 21 | "errors" |
| 22 | "fmt" |
| 23 | "math" |
| 24 | "net/http" |
| 25 | "net/url" |
| 26 | "path" |
| 27 | "sort" |
| 28 | "strconv" |
| 29 | "strings" |
| 30 | "time" |
| 31 | |
| 32 | "github.com/coreos/etcd/client" |
| 33 | "github.com/coreos/etcd/pkg/transport" |
| 34 | "github.com/coreos/etcd/pkg/types" |
| 35 | |
| 36 | "github.com/coreos/pkg/capnslog" |
| 37 | "github.com/jonboulle/clockwork" |
| 38 | ) |
| 39 | |
| 40 | var ( |
| 41 | plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "discovery") |
| 42 | |
| 43 | ErrInvalidURL = errors.New("discovery: invalid URL") |
| 44 | ErrBadSizeKey = errors.New("discovery: size key is bad") |
| 45 | ErrSizeNotFound = errors.New("discovery: size key not found") |
| 46 | ErrTokenNotFound = errors.New("discovery: token not found") |
| 47 | ErrDuplicateID = errors.New("discovery: found duplicate id") |
| 48 | ErrDuplicateName = errors.New("discovery: found duplicate name") |
| 49 | ErrFullCluster = errors.New("discovery: cluster is full") |
| 50 | ErrTooManyRetries = errors.New("discovery: too many retries") |
| 51 | ErrBadDiscoveryEndpoint = errors.New("discovery: bad discovery endpoint") |
| 52 | ) |
| 53 | |
| 54 | var ( |
| 55 | // Number of retries discovery will attempt before giving up and erroring out. |
| 56 | nRetries = uint(math.MaxUint32) |
| 57 | maxExpoentialRetries = uint(8) |
| 58 | ) |
| 59 | |
| 60 | // JoinCluster will connect to the discovery service at the given url, and |
| 61 | // register the server represented by the given id and config to the cluster |
| 62 | func JoinCluster(durl, dproxyurl string, id types.ID, config string) (string, error) { |
| 63 | d, err := newDiscovery(durl, dproxyurl, id) |
| 64 | if err != nil { |
| 65 | return "", err |
| 66 | } |
| 67 | return d.joinCluster(config) |
| 68 | } |
| 69 | |
| 70 | // GetCluster will connect to the discovery service at the given url and |
| 71 | // retrieve a string describing the cluster |
| 72 | func GetCluster(durl, dproxyurl string) (string, error) { |
| 73 | d, err := newDiscovery(durl, dproxyurl, 0) |
| 74 | if err != nil { |
| 75 | return "", err |
| 76 | } |
| 77 | return d.getCluster() |
| 78 | } |
| 79 | |
| 80 | type discovery struct { |
| 81 | cluster string |
| 82 | id types.ID |
| 83 | c client.KeysAPI |
| 84 | retries uint |
| 85 | url *url.URL |
| 86 | |
| 87 | clock clockwork.Clock |
| 88 | } |
| 89 | |
| 90 | // newProxyFunc builds a proxy function from the given string, which should |
| 91 | // represent a URL that can be used as a proxy. It performs basic |
| 92 | // sanitization of the URL and returns any error encountered. |
| 93 | func newProxyFunc(proxy string) (func(*http.Request) (*url.URL, error), error) { |
| 94 | if proxy == "" { |
| 95 | return nil, nil |
| 96 | } |
| 97 | // Do a small amount of URL sanitization to help the user |
| 98 | // Derived from net/http.ProxyFromEnvironment |
| 99 | proxyURL, err := url.Parse(proxy) |
| 100 | if err != nil || !strings.HasPrefix(proxyURL.Scheme, "http") { |
| 101 | // proxy was bogus. Try prepending "http://" to it and |
| 102 | // see if that parses correctly. If not, we ignore the |
| 103 | // error and complain about the original one |
| 104 | var err2 error |
| 105 | proxyURL, err2 = url.Parse("http://" + proxy) |
| 106 | if err2 == nil { |
| 107 | err = nil |
| 108 | } |
| 109 | } |
| 110 | if err != nil { |
| 111 | return nil, fmt.Errorf("invalid proxy address %q: %v", proxy, err) |
| 112 | } |
| 113 | |
| 114 | plog.Infof("using proxy %q", proxyURL.String()) |
| 115 | return http.ProxyURL(proxyURL), nil |
| 116 | } |
| 117 | |
| 118 | func newDiscovery(durl, dproxyurl string, id types.ID) (*discovery, error) { |
| 119 | u, err := url.Parse(durl) |
| 120 | if err != nil { |
| 121 | return nil, err |
| 122 | } |
| 123 | token := u.Path |
| 124 | u.Path = "" |
| 125 | pf, err := newProxyFunc(dproxyurl) |
| 126 | if err != nil { |
| 127 | return nil, err |
| 128 | } |
| 129 | |
| 130 | // TODO: add ResponseHeaderTimeout back when watch on discovery service writes header early |
| 131 | tr, err := transport.NewTransport(transport.TLSInfo{}, 30*time.Second) |
| 132 | if err != nil { |
| 133 | return nil, err |
| 134 | } |
| 135 | tr.Proxy = pf |
| 136 | cfg := client.Config{ |
| 137 | Transport: tr, |
| 138 | Endpoints: []string{u.String()}, |
| 139 | } |
| 140 | c, err := client.New(cfg) |
| 141 | if err != nil { |
| 142 | return nil, err |
| 143 | } |
| 144 | dc := client.NewKeysAPIWithPrefix(c, "") |
| 145 | return &discovery{ |
| 146 | cluster: token, |
| 147 | c: dc, |
| 148 | id: id, |
| 149 | url: u, |
| 150 | clock: clockwork.NewRealClock(), |
| 151 | }, nil |
| 152 | } |
| 153 | |
| 154 | func (d *discovery) joinCluster(config string) (string, error) { |
| 155 | // fast path: if the cluster is full, return the error |
| 156 | // do not need to register to the cluster in this case. |
| 157 | if _, _, _, err := d.checkCluster(); err != nil { |
| 158 | return "", err |
| 159 | } |
| 160 | |
| 161 | if err := d.createSelf(config); err != nil { |
| 162 | // Fails, even on a timeout, if createSelf times out. |
| 163 | // TODO(barakmich): Retrying the same node might want to succeed here |
| 164 | // (ie, createSelf should be idempotent for discovery). |
| 165 | return "", err |
| 166 | } |
| 167 | |
| 168 | nodes, size, index, err := d.checkCluster() |
| 169 | if err != nil { |
| 170 | return "", err |
| 171 | } |
| 172 | |
| 173 | all, err := d.waitNodes(nodes, size, index) |
| 174 | if err != nil { |
| 175 | return "", err |
| 176 | } |
| 177 | |
| 178 | return nodesToCluster(all, size) |
| 179 | } |
| 180 | |
| 181 | func (d *discovery) getCluster() (string, error) { |
| 182 | nodes, size, index, err := d.checkCluster() |
| 183 | if err != nil { |
| 184 | if err == ErrFullCluster { |
| 185 | return nodesToCluster(nodes, size) |
| 186 | } |
| 187 | return "", err |
| 188 | } |
| 189 | |
| 190 | all, err := d.waitNodes(nodes, size, index) |
| 191 | if err != nil { |
| 192 | return "", err |
| 193 | } |
| 194 | return nodesToCluster(all, size) |
| 195 | } |
| 196 | |
| 197 | func (d *discovery) createSelf(contents string) error { |
| 198 | ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout) |
| 199 | resp, err := d.c.Create(ctx, d.selfKey(), contents) |
| 200 | cancel() |
| 201 | if err != nil { |
| 202 | if eerr, ok := err.(client.Error); ok && eerr.Code == client.ErrorCodeNodeExist { |
| 203 | return ErrDuplicateID |
| 204 | } |
| 205 | return err |
| 206 | } |
| 207 | |
| 208 | // ensure self appears on the server we connected to |
| 209 | w := d.c.Watcher(d.selfKey(), &client.WatcherOptions{AfterIndex: resp.Node.CreatedIndex - 1}) |
| 210 | _, err = w.Next(context.Background()) |
| 211 | return err |
| 212 | } |
| 213 | |
| 214 | func (d *discovery) checkCluster() ([]*client.Node, uint64, uint64, error) { |
| 215 | configKey := path.Join("/", d.cluster, "_config") |
| 216 | ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout) |
| 217 | // find cluster size |
| 218 | resp, err := d.c.Get(ctx, path.Join(configKey, "size"), nil) |
| 219 | cancel() |
| 220 | if err != nil { |
| 221 | if eerr, ok := err.(*client.Error); ok && eerr.Code == client.ErrorCodeKeyNotFound { |
| 222 | return nil, 0, 0, ErrSizeNotFound |
| 223 | } |
| 224 | if err == client.ErrInvalidJSON { |
| 225 | return nil, 0, 0, ErrBadDiscoveryEndpoint |
| 226 | } |
| 227 | if ce, ok := err.(*client.ClusterError); ok { |
| 228 | plog.Error(ce.Detail()) |
| 229 | return d.checkClusterRetry() |
| 230 | } |
| 231 | return nil, 0, 0, err |
| 232 | } |
| 233 | size, err := strconv.ParseUint(resp.Node.Value, 10, 0) |
| 234 | if err != nil { |
| 235 | return nil, 0, 0, ErrBadSizeKey |
| 236 | } |
| 237 | |
| 238 | ctx, cancel = context.WithTimeout(context.Background(), client.DefaultRequestTimeout) |
| 239 | resp, err = d.c.Get(ctx, d.cluster, nil) |
| 240 | cancel() |
| 241 | if err != nil { |
| 242 | if ce, ok := err.(*client.ClusterError); ok { |
| 243 | plog.Error(ce.Detail()) |
| 244 | return d.checkClusterRetry() |
| 245 | } |
| 246 | return nil, 0, 0, err |
| 247 | } |
| 248 | var nodes []*client.Node |
| 249 | // append non-config keys to nodes |
| 250 | for _, n := range resp.Node.Nodes { |
| 251 | if !(path.Base(n.Key) == path.Base(configKey)) { |
| 252 | nodes = append(nodes, n) |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | snodes := sortableNodes{nodes} |
| 257 | sort.Sort(snodes) |
| 258 | |
| 259 | // find self position |
| 260 | for i := range nodes { |
| 261 | if path.Base(nodes[i].Key) == path.Base(d.selfKey()) { |
| 262 | break |
| 263 | } |
| 264 | if uint64(i) >= size-1 { |
| 265 | return nodes[:size], size, resp.Index, ErrFullCluster |
| 266 | } |
| 267 | } |
| 268 | return nodes, size, resp.Index, nil |
| 269 | } |
| 270 | |
| 271 | func (d *discovery) logAndBackoffForRetry(step string) { |
| 272 | d.retries++ |
| 273 | // logAndBackoffForRetry stops exponential backoff when the retries are more than maxExpoentialRetries and is set to a constant backoff afterward. |
| 274 | retries := d.retries |
| 275 | if retries > maxExpoentialRetries { |
| 276 | retries = maxExpoentialRetries |
| 277 | } |
| 278 | retryTimeInSecond := time.Duration(0x1<<retries) * time.Second |
| 279 | plog.Infof("%s: error connecting to %s, retrying in %s", step, d.url, retryTimeInSecond) |
| 280 | d.clock.Sleep(retryTimeInSecond) |
| 281 | } |
| 282 | |
| 283 | func (d *discovery) checkClusterRetry() ([]*client.Node, uint64, uint64, error) { |
| 284 | if d.retries < nRetries { |
| 285 | d.logAndBackoffForRetry("cluster status check") |
| 286 | return d.checkCluster() |
| 287 | } |
| 288 | return nil, 0, 0, ErrTooManyRetries |
| 289 | } |
| 290 | |
| 291 | func (d *discovery) waitNodesRetry() ([]*client.Node, error) { |
| 292 | if d.retries < nRetries { |
| 293 | d.logAndBackoffForRetry("waiting for other nodes") |
| 294 | nodes, n, index, err := d.checkCluster() |
| 295 | if err != nil { |
| 296 | return nil, err |
| 297 | } |
| 298 | return d.waitNodes(nodes, n, index) |
| 299 | } |
| 300 | return nil, ErrTooManyRetries |
| 301 | } |
| 302 | |
| 303 | func (d *discovery) waitNodes(nodes []*client.Node, size uint64, index uint64) ([]*client.Node, error) { |
| 304 | if uint64(len(nodes)) > size { |
| 305 | nodes = nodes[:size] |
| 306 | } |
| 307 | // watch from the next index |
| 308 | w := d.c.Watcher(d.cluster, &client.WatcherOptions{AfterIndex: index, Recursive: true}) |
| 309 | all := make([]*client.Node, len(nodes)) |
| 310 | copy(all, nodes) |
| 311 | for _, n := range all { |
| 312 | if path.Base(n.Key) == path.Base(d.selfKey()) { |
| 313 | plog.Noticef("found self %s in the cluster", path.Base(d.selfKey())) |
| 314 | } else { |
| 315 | plog.Noticef("found peer %s in the cluster", path.Base(n.Key)) |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | // wait for others |
| 320 | for uint64(len(all)) < size { |
| 321 | plog.Noticef("found %d peer(s), waiting for %d more", len(all), int(size-uint64(len(all)))) |
| 322 | resp, err := w.Next(context.Background()) |
| 323 | if err != nil { |
| 324 | if ce, ok := err.(*client.ClusterError); ok { |
| 325 | plog.Error(ce.Detail()) |
| 326 | return d.waitNodesRetry() |
| 327 | } |
| 328 | return nil, err |
| 329 | } |
| 330 | plog.Noticef("found peer %s in the cluster", path.Base(resp.Node.Key)) |
| 331 | all = append(all, resp.Node) |
| 332 | } |
| 333 | plog.Noticef("found %d needed peer(s)", len(all)) |
| 334 | return all, nil |
| 335 | } |
| 336 | |
| 337 | func (d *discovery) selfKey() string { |
| 338 | return path.Join("/", d.cluster, d.id.String()) |
| 339 | } |
| 340 | |
| 341 | func nodesToCluster(ns []*client.Node, size uint64) (string, error) { |
| 342 | s := make([]string, len(ns)) |
| 343 | for i, n := range ns { |
| 344 | s[i] = n.Value |
| 345 | } |
| 346 | us := strings.Join(s, ",") |
| 347 | m, err := types.NewURLsMap(us) |
| 348 | if err != nil { |
| 349 | return us, ErrInvalidURL |
| 350 | } |
| 351 | if uint64(m.Len()) != size { |
| 352 | return us, ErrDuplicateName |
| 353 | } |
| 354 | return us, nil |
| 355 | } |
| 356 | |
| 357 | type sortableNodes struct{ Nodes []*client.Node } |
| 358 | |
| 359 | func (ns sortableNodes) Len() int { return len(ns.Nodes) } |
| 360 | func (ns sortableNodes) Less(i, j int) bool { |
| 361 | return ns.Nodes[i].CreatedIndex < ns.Nodes[j].CreatedIndex |
| 362 | } |
| 363 | func (ns sortableNodes) Swap(i, j int) { ns.Nodes[i], ns.Nodes[j] = ns.Nodes[j], ns.Nodes[i] } |