khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 1 | /* |
Mahir Gunyel | 4b93c07 | 2023-07-21 11:55:08 +0300 | [diff] [blame] | 2 | * Copyright 2018-2023 Open Networking Foundation (ONF) and the ONF Contributors |
khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 3 | |
Mahir Gunyel | 4b93c07 | 2023-07-21 11:55:08 +0300 | [diff] [blame] | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 7 | |
Mahir Gunyel | 4b93c07 | 2023-07-21 11:55:08 +0300 | [diff] [blame] | 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 9 | |
Mahir Gunyel | 4b93c07 | 2023-07-21 11:55:08 +0300 | [diff] [blame] | 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
khenaidoo | d948f77 | 2021-08-11 17:49:24 -0400 | [diff] [blame] | 15 | */ |
| 16 | package kvstore |
| 17 | |
| 18 | import ( |
| 19 | "context" |
| 20 | "errors" |
| 21 | "fmt" |
| 22 | "os" |
| 23 | "strconv" |
| 24 | "sync" |
| 25 | "time" |
| 26 | |
| 27 | "github.com/opencord/voltha-lib-go/v7/pkg/log" |
| 28 | v3Client "go.etcd.io/etcd/clientv3" |
| 29 | v3rpcTypes "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes" |
| 30 | ) |
| 31 | |
| 32 | const ( |
| 33 | poolCapacityEnvName = "VOLTHA_ETCD_CLIENT_POOL_CAPACITY" |
| 34 | maxUsageEnvName = "VOLTHA_ETCD_CLIENT_MAX_USAGE" |
| 35 | ) |
| 36 | |
| 37 | const ( |
| 38 | defaultMaxPoolCapacity = 1000 // Default size of an Etcd Client pool |
| 39 | defaultMaxPoolUsage = 100 // Maximum concurrent request an Etcd Client is allowed to process |
| 40 | ) |
| 41 | |
| 42 | // EtcdClient represents the Etcd KV store client |
| 43 | type EtcdClient struct { |
| 44 | pool EtcdClientAllocator |
| 45 | watchedChannels sync.Map |
| 46 | watchedClients map[string]*v3Client.Client |
| 47 | watchedClientsLock sync.RWMutex |
| 48 | } |
| 49 | |
| 50 | // NewEtcdCustomClient returns a new client for the Etcd KV store allowing |
| 51 | // the called to specify etcd client configuration |
| 52 | func NewEtcdCustomClient(ctx context.Context, addr string, timeout time.Duration, level log.LogLevel) (*EtcdClient, error) { |
| 53 | // Get the capacity and max usage from the environment |
| 54 | capacity := defaultMaxPoolCapacity |
| 55 | maxUsage := defaultMaxPoolUsage |
| 56 | if capacityStr, present := os.LookupEnv(poolCapacityEnvName); present { |
| 57 | if val, err := strconv.Atoi(capacityStr); err == nil { |
| 58 | capacity = val |
| 59 | logger.Infow(ctx, "env-variable-set", log.Fields{"pool-capacity": capacity}) |
| 60 | } else { |
| 61 | logger.Warnw(ctx, "invalid-capacity-value", log.Fields{"error": err, "capacity": capacityStr}) |
| 62 | } |
| 63 | } |
| 64 | if maxUsageStr, present := os.LookupEnv(maxUsageEnvName); present { |
| 65 | if val, err := strconv.Atoi(maxUsageStr); err == nil { |
| 66 | maxUsage = val |
| 67 | logger.Infow(ctx, "env-variable-set", log.Fields{"max-usage": maxUsage}) |
| 68 | } else { |
| 69 | logger.Warnw(ctx, "invalid-max-usage-value", log.Fields{"error": err, "max-usage": maxUsageStr}) |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | var err error |
| 74 | |
| 75 | pool, err := NewRoundRobinEtcdClientAllocator([]string{addr}, timeout, capacity, maxUsage, level) |
| 76 | if err != nil { |
| 77 | logger.Errorw(ctx, "failed-to-create-rr-client", log.Fields{ |
| 78 | "error": err, |
| 79 | }) |
| 80 | } |
| 81 | |
| 82 | logger.Infow(ctx, "etcd-pool-created", log.Fields{"capacity": capacity, "max-usage": maxUsage}) |
| 83 | |
| 84 | return &EtcdClient{pool: pool, |
| 85 | watchedClients: make(map[string]*v3Client.Client), |
| 86 | }, nil |
| 87 | } |
| 88 | |
| 89 | // NewEtcdClient returns a new client for the Etcd KV store |
| 90 | func NewEtcdClient(ctx context.Context, addr string, timeout time.Duration, level log.LogLevel) (*EtcdClient, error) { |
| 91 | return NewEtcdCustomClient(ctx, addr, timeout, level) |
| 92 | } |
| 93 | |
| 94 | // IsConnectionUp returns whether the connection to the Etcd KV store is up. If a timeout occurs then |
| 95 | // it is assumed the connection is down or unreachable. |
| 96 | func (c *EtcdClient) IsConnectionUp(ctx context.Context) bool { |
| 97 | // Let's try to get a non existent key. If the connection is up then there will be no error returned. |
| 98 | if _, err := c.Get(ctx, "non-existent-key"); err != nil { |
| 99 | return false |
| 100 | } |
| 101 | return true |
| 102 | } |
| 103 | |
| 104 | // List returns an array of key-value pairs with key as a prefix. Timeout defines how long the function will |
| 105 | // wait for a response |
| 106 | func (c *EtcdClient) List(ctx context.Context, key string) (map[string]*KVPair, error) { |
| 107 | client, err := c.pool.Get(ctx) |
| 108 | if err != nil { |
| 109 | return nil, err |
| 110 | } |
| 111 | defer c.pool.Put(client) |
| 112 | resp, err := client.Get(ctx, key, v3Client.WithPrefix()) |
| 113 | |
| 114 | if err != nil { |
| 115 | logger.Error(ctx, err) |
| 116 | return nil, err |
| 117 | } |
| 118 | m := make(map[string]*KVPair) |
| 119 | for _, ev := range resp.Kvs { |
| 120 | m[string(ev.Key)] = NewKVPair(string(ev.Key), ev.Value, "", ev.Lease, ev.Version) |
| 121 | } |
| 122 | return m, nil |
| 123 | } |
| 124 | |
| 125 | // Get returns a key-value pair for a given key. Timeout defines how long the function will |
| 126 | // wait for a response |
| 127 | func (c *EtcdClient) Get(ctx context.Context, key string) (*KVPair, error) { |
| 128 | client, err := c.pool.Get(ctx) |
| 129 | if err != nil { |
| 130 | return nil, err |
| 131 | } |
| 132 | defer c.pool.Put(client) |
| 133 | |
| 134 | attempt := 0 |
| 135 | |
| 136 | startLoop: |
| 137 | for { |
| 138 | resp, err := client.Get(ctx, key) |
| 139 | if err != nil { |
| 140 | switch err { |
| 141 | case context.Canceled: |
| 142 | logger.Warnw(ctx, "context-cancelled", log.Fields{"error": err}) |
| 143 | case context.DeadlineExceeded: |
| 144 | logger.Warnw(ctx, "context-deadline-exceeded", log.Fields{"error": err, "context": ctx}) |
| 145 | case v3rpcTypes.ErrEmptyKey: |
| 146 | logger.Warnw(ctx, "etcd-client-error", log.Fields{"error": err}) |
| 147 | case v3rpcTypes.ErrLeaderChanged, |
| 148 | v3rpcTypes.ErrGRPCNoLeader, |
| 149 | v3rpcTypes.ErrTimeout, |
| 150 | v3rpcTypes.ErrTimeoutDueToLeaderFail, |
| 151 | v3rpcTypes.ErrTimeoutDueToConnectionLost: |
| 152 | // Retry for these server errors |
| 153 | attempt += 1 |
| 154 | if er := backoff(ctx, attempt); er != nil { |
| 155 | logger.Warnw(ctx, "get-retries-failed", log.Fields{"key": key, "error": er, "attempt": attempt}) |
| 156 | return nil, err |
| 157 | } |
| 158 | logger.Warnw(ctx, "retrying-get", log.Fields{"key": key, "error": err, "attempt": attempt}) |
| 159 | goto startLoop |
| 160 | default: |
| 161 | logger.Warnw(ctx, "etcd-server-error", log.Fields{"error": err}) |
| 162 | } |
| 163 | return nil, err |
| 164 | } |
| 165 | |
| 166 | for _, ev := range resp.Kvs { |
| 167 | // Only one value is returned |
| 168 | return NewKVPair(string(ev.Key), ev.Value, "", ev.Lease, ev.Version), nil |
| 169 | } |
| 170 | return nil, nil |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | // Put writes a key-value pair to the KV store. Value can only be a string or []byte since the etcd API |
| 175 | // accepts only a string as a value for a put operation. Timeout defines how long the function will |
| 176 | // wait for a response |
| 177 | func (c *EtcdClient) Put(ctx context.Context, key string, value interface{}) error { |
| 178 | |
| 179 | // Validate that we can convert value to a string as etcd API expects a string |
| 180 | var val string |
| 181 | var err error |
| 182 | if val, err = ToString(value); err != nil { |
| 183 | return fmt.Errorf("unexpected-type-%T", value) |
| 184 | } |
| 185 | |
| 186 | client, err := c.pool.Get(ctx) |
| 187 | if err != nil { |
| 188 | return err |
| 189 | } |
| 190 | defer c.pool.Put(client) |
| 191 | |
| 192 | attempt := 0 |
| 193 | startLoop: |
| 194 | for { |
| 195 | _, err = client.Put(ctx, key, val) |
| 196 | if err != nil { |
| 197 | switch err { |
| 198 | case context.Canceled: |
| 199 | logger.Warnw(ctx, "context-cancelled", log.Fields{"error": err}) |
| 200 | case context.DeadlineExceeded: |
| 201 | logger.Warnw(ctx, "context-deadline-exceeded", log.Fields{"error": err, "context": ctx}) |
| 202 | case v3rpcTypes.ErrEmptyKey: |
| 203 | logger.Warnw(ctx, "etcd-client-error", log.Fields{"error": err}) |
| 204 | case v3rpcTypes.ErrLeaderChanged, |
| 205 | v3rpcTypes.ErrGRPCNoLeader, |
| 206 | v3rpcTypes.ErrTimeout, |
| 207 | v3rpcTypes.ErrTimeoutDueToLeaderFail, |
| 208 | v3rpcTypes.ErrTimeoutDueToConnectionLost: |
| 209 | // Retry for these server errors |
| 210 | attempt += 1 |
| 211 | if er := backoff(ctx, attempt); er != nil { |
| 212 | logger.Warnw(ctx, "put-retries-failed", log.Fields{"key": key, "error": er, "attempt": attempt}) |
| 213 | return err |
| 214 | } |
| 215 | logger.Warnw(ctx, "retrying-put", log.Fields{"key": key, "error": err, "attempt": attempt}) |
| 216 | goto startLoop |
| 217 | default: |
| 218 | logger.Warnw(ctx, "etcd-server-error", log.Fields{"error": err}) |
| 219 | } |
| 220 | return err |
| 221 | } |
| 222 | return nil |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | // Delete removes a key from the KV store. Timeout defines how long the function will |
| 227 | // wait for a response |
| 228 | func (c *EtcdClient) Delete(ctx context.Context, key string) error { |
| 229 | client, err := c.pool.Get(ctx) |
| 230 | if err != nil { |
| 231 | return err |
| 232 | } |
| 233 | defer c.pool.Put(client) |
| 234 | |
| 235 | attempt := 0 |
| 236 | startLoop: |
| 237 | for { |
| 238 | _, err = client.Delete(ctx, key) |
| 239 | if err != nil { |
| 240 | switch err { |
| 241 | case context.Canceled: |
| 242 | logger.Warnw(ctx, "context-cancelled", log.Fields{"error": err}) |
| 243 | case context.DeadlineExceeded: |
| 244 | logger.Warnw(ctx, "context-deadline-exceeded", log.Fields{"error": err, "context": ctx}) |
| 245 | case v3rpcTypes.ErrEmptyKey: |
| 246 | logger.Warnw(ctx, "etcd-client-error", log.Fields{"error": err}) |
| 247 | case v3rpcTypes.ErrLeaderChanged, |
| 248 | v3rpcTypes.ErrGRPCNoLeader, |
| 249 | v3rpcTypes.ErrTimeout, |
| 250 | v3rpcTypes.ErrTimeoutDueToLeaderFail, |
| 251 | v3rpcTypes.ErrTimeoutDueToConnectionLost: |
| 252 | // Retry for these server errors |
| 253 | attempt += 1 |
| 254 | if er := backoff(ctx, attempt); er != nil { |
| 255 | logger.Warnw(ctx, "delete-retries-failed", log.Fields{"key": key, "error": er, "attempt": attempt}) |
| 256 | return err |
| 257 | } |
| 258 | logger.Warnw(ctx, "retrying-delete", log.Fields{"key": key, "error": err, "attempt": attempt}) |
| 259 | goto startLoop |
| 260 | default: |
| 261 | logger.Warnw(ctx, "etcd-server-error", log.Fields{"error": err}) |
| 262 | } |
| 263 | return err |
| 264 | } |
| 265 | logger.Debugw(ctx, "key(s)-deleted", log.Fields{"key": key}) |
| 266 | return nil |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | func (c *EtcdClient) DeleteWithPrefix(ctx context.Context, prefixKey string) error { |
| 271 | |
| 272 | client, err := c.pool.Get(ctx) |
| 273 | if err != nil { |
| 274 | return err |
| 275 | } |
| 276 | defer c.pool.Put(client) |
| 277 | |
| 278 | //delete the prefix |
| 279 | if _, err := client.Delete(ctx, prefixKey, v3Client.WithPrefix()); err != nil { |
| 280 | logger.Errorw(ctx, "failed-to-delete-prefix-key", log.Fields{"key": prefixKey, "error": err}) |
| 281 | return err |
| 282 | } |
| 283 | logger.Debugw(ctx, "key(s)-deleted", log.Fields{"key": prefixKey}) |
| 284 | return nil |
| 285 | } |
| 286 | |
| 287 | // Watch provides the watch capability on a given key. It returns a channel onto which the callee needs to |
| 288 | // listen to receive Events. |
| 289 | func (c *EtcdClient) Watch(ctx context.Context, key string, withPrefix bool) chan *Event { |
| 290 | var err error |
| 291 | // Reuse the Etcd client when multiple callees are watching the same key. |
| 292 | c.watchedClientsLock.Lock() |
| 293 | client, exist := c.watchedClients[key] |
| 294 | if !exist { |
| 295 | client, err = c.pool.Get(ctx) |
| 296 | if err != nil { |
| 297 | logger.Errorw(ctx, "failed-to-an-etcd-client", log.Fields{"key": key, "error": err}) |
| 298 | c.watchedClientsLock.Unlock() |
| 299 | return nil |
| 300 | } |
| 301 | c.watchedClients[key] = client |
| 302 | } |
| 303 | c.watchedClientsLock.Unlock() |
| 304 | |
| 305 | w := v3Client.NewWatcher(client) |
| 306 | ctx, cancel := context.WithCancel(ctx) |
| 307 | var channel v3Client.WatchChan |
| 308 | if withPrefix { |
| 309 | channel = w.Watch(ctx, key, v3Client.WithPrefix()) |
| 310 | } else { |
| 311 | channel = w.Watch(ctx, key) |
| 312 | } |
| 313 | |
| 314 | // Create a new channel |
| 315 | ch := make(chan *Event, maxClientChannelBufferSize) |
| 316 | |
| 317 | // Keep track of the created channels so they can be closed when required |
| 318 | channelMap := make(map[chan *Event]v3Client.Watcher) |
| 319 | channelMap[ch] = w |
| 320 | channelMaps := c.addChannelMap(key, channelMap) |
| 321 | |
| 322 | // Changing the log field (from channelMaps) as the underlying logger cannot format the map of channels into a |
| 323 | // json format. |
| 324 | logger.Debugw(ctx, "watched-channels", log.Fields{"len": len(channelMaps)}) |
| 325 | // Launch a go routine to listen for updates |
| 326 | go c.listenForKeyChange(ctx, channel, ch, cancel) |
| 327 | |
| 328 | return ch |
| 329 | |
| 330 | } |
| 331 | |
| 332 | func (c *EtcdClient) addChannelMap(key string, channelMap map[chan *Event]v3Client.Watcher) []map[chan *Event]v3Client.Watcher { |
| 333 | var channels interface{} |
| 334 | var exists bool |
| 335 | |
| 336 | if channels, exists = c.watchedChannels.Load(key); exists { |
| 337 | channels = append(channels.([]map[chan *Event]v3Client.Watcher), channelMap) |
| 338 | } else { |
| 339 | channels = []map[chan *Event]v3Client.Watcher{channelMap} |
| 340 | } |
| 341 | c.watchedChannels.Store(key, channels) |
| 342 | |
| 343 | return channels.([]map[chan *Event]v3Client.Watcher) |
| 344 | } |
| 345 | |
| 346 | func (c *EtcdClient) removeChannelMap(key string, pos int) []map[chan *Event]v3Client.Watcher { |
| 347 | var channels interface{} |
| 348 | var exists bool |
| 349 | |
| 350 | if channels, exists = c.watchedChannels.Load(key); exists { |
| 351 | channels = append(channels.([]map[chan *Event]v3Client.Watcher)[:pos], channels.([]map[chan *Event]v3Client.Watcher)[pos+1:]...) |
| 352 | c.watchedChannels.Store(key, channels) |
| 353 | } |
| 354 | |
| 355 | return channels.([]map[chan *Event]v3Client.Watcher) |
| 356 | } |
| 357 | |
| 358 | func (c *EtcdClient) getChannelMaps(key string) ([]map[chan *Event]v3Client.Watcher, bool) { |
| 359 | var channels interface{} |
| 360 | var exists bool |
| 361 | |
| 362 | channels, exists = c.watchedChannels.Load(key) |
| 363 | |
| 364 | if channels == nil { |
| 365 | return nil, exists |
| 366 | } |
| 367 | |
| 368 | return channels.([]map[chan *Event]v3Client.Watcher), exists |
| 369 | } |
| 370 | |
| 371 | // CloseWatch closes a specific watch. Both the key and the channel are required when closing a watch as there |
| 372 | // may be multiple listeners on the same key. The previously created channel serves as a key |
| 373 | func (c *EtcdClient) CloseWatch(ctx context.Context, key string, ch chan *Event) { |
| 374 | // Get the array of channels mapping |
| 375 | var watchedChannels []map[chan *Event]v3Client.Watcher |
| 376 | var ok bool |
| 377 | |
| 378 | if watchedChannels, ok = c.getChannelMaps(key); !ok { |
| 379 | logger.Warnw(ctx, "key-has-no-watched-channels", log.Fields{"key": key}) |
| 380 | return |
| 381 | } |
| 382 | // Look for the channels |
| 383 | var pos = -1 |
| 384 | for i, chMap := range watchedChannels { |
| 385 | if t, ok := chMap[ch]; ok { |
| 386 | logger.Debug(ctx, "channel-found") |
| 387 | // Close the etcd watcher before the client channel. This should close the etcd channel as well |
| 388 | if err := t.Close(); err != nil { |
| 389 | logger.Errorw(ctx, "watcher-cannot-be-closed", log.Fields{"key": key, "error": err}) |
| 390 | } |
| 391 | pos = i |
| 392 | break |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | channelMaps, _ := c.getChannelMaps(key) |
| 397 | // Remove that entry if present |
| 398 | if pos >= 0 { |
| 399 | channelMaps = c.removeChannelMap(key, pos) |
| 400 | } |
| 401 | |
| 402 | // If we don't have any keys being watched then return the Etcd client to the pool |
| 403 | if len(channelMaps) == 0 { |
| 404 | c.watchedClientsLock.Lock() |
| 405 | // Sanity |
| 406 | if client, ok := c.watchedClients[key]; ok { |
| 407 | c.pool.Put(client) |
| 408 | delete(c.watchedClients, key) |
| 409 | } |
| 410 | c.watchedClientsLock.Unlock() |
| 411 | } |
| 412 | logger.Infow(ctx, "watcher-channel-exiting", log.Fields{"key": key, "channel": channelMaps}) |
| 413 | } |
| 414 | |
| 415 | func (c *EtcdClient) listenForKeyChange(ctx context.Context, channel v3Client.WatchChan, ch chan<- *Event, cancel context.CancelFunc) { |
| 416 | logger.Debug(ctx, "start-listening-on-channel ...") |
| 417 | defer cancel() |
| 418 | defer close(ch) |
| 419 | for resp := range channel { |
| 420 | for _, ev := range resp.Events { |
| 421 | ch <- NewEvent(getEventType(ev), ev.Kv.Key, ev.Kv.Value, ev.Kv.Version) |
| 422 | } |
| 423 | } |
| 424 | logger.Debug(ctx, "stop-listening-on-channel ...") |
| 425 | } |
| 426 | |
| 427 | func getEventType(event *v3Client.Event) int { |
| 428 | switch event.Type { |
| 429 | case v3Client.EventTypePut: |
| 430 | return PUT |
| 431 | case v3Client.EventTypeDelete: |
| 432 | return DELETE |
| 433 | } |
| 434 | return UNKNOWN |
| 435 | } |
| 436 | |
| 437 | // Close closes all the connection in the pool store client |
| 438 | func (c *EtcdClient) Close(ctx context.Context) { |
| 439 | logger.Debug(ctx, "closing-etcd-pool") |
| 440 | c.pool.Close(ctx) |
| 441 | } |
| 442 | |
| 443 | // The APIs below are not used |
| 444 | var errUnimplemented = errors.New("deprecated") |
| 445 | |
| 446 | // Reserve is deprecated |
| 447 | func (c *EtcdClient) Reserve(ctx context.Context, key string, value interface{}, ttl time.Duration) (interface{}, error) { |
| 448 | return nil, errUnimplemented |
| 449 | } |
| 450 | |
| 451 | // ReleaseAllReservations is deprecated |
| 452 | func (c *EtcdClient) ReleaseAllReservations(ctx context.Context) error { |
| 453 | return errUnimplemented |
| 454 | } |
| 455 | |
| 456 | // ReleaseReservation is deprecated |
| 457 | func (c *EtcdClient) ReleaseReservation(ctx context.Context, key string) error { |
| 458 | return errUnimplemented |
| 459 | } |
| 460 | |
| 461 | // RenewReservation is deprecated |
| 462 | func (c *EtcdClient) RenewReservation(ctx context.Context, key string) error { |
| 463 | return errUnimplemented |
| 464 | } |
| 465 | |
| 466 | // AcquireLock is deprecated |
| 467 | func (c *EtcdClient) AcquireLock(ctx context.Context, lockName string, timeout time.Duration) error { |
| 468 | return errUnimplemented |
| 469 | } |
| 470 | |
| 471 | // ReleaseLock is deprecated |
| 472 | func (c *EtcdClient) ReleaseLock(lockName string) error { |
| 473 | return errUnimplemented |
| 474 | } |