blob: 1f63debdb38fe6f7c57e1143247580d1de91a858 [file] [log] [blame]
Richard Jankowski215a3e22018-10-04 13:56:11 -04001/*
2 * Copyright 2018-present Open Networking Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Two voltha cores receive the same request; each tries to acquire ownership of the request
19 * by writing its identifier (e.g. container name or pod name) to the transaction key named
Richard Jankowskie4d77662018-10-17 13:53:21 -040020 * after the serial number of the request. The core that loses the race for acquisition
21 * monitors the progress of the core actually serving the request by watching for changes
22 * in the value of the transaction key. Once the request is complete, the
23 * serving core closes the transaction by invoking the KVTransaction's Close method, which
Richard Jankowski215a3e22018-10-04 13:56:11 -040024 * replaces the value of the transaction (i.e. serial number) key with the string
25 * TRANSACTION_COMPLETE. The standby core observes this update, stops watching the transaction,
26 * and then deletes the transaction key.
27 *
Richard Jankowski215a3e22018-10-04 13:56:11 -040028 */
npujar1d86a522019-11-14 17:11:16 +053029
Richard Jankowski215a3e22018-10-04 13:56:11 -040030package core
31
32import (
npujar1d86a522019-11-14 17:11:16 +053033 "time"
34
Scott Baker807addd2019-10-24 15:16:21 -070035 "github.com/opencord/voltha-lib-go/v2/pkg/db/kvstore"
36 "github.com/opencord/voltha-lib-go/v2/pkg/log"
Kent Hagerman46dcd9d2019-09-18 16:42:59 -040037 "google.golang.org/grpc/codes"
38 "google.golang.org/grpc/status"
Richard Jankowski215a3e22018-10-04 13:56:11 -040039)
40
41// Transaction acquisition results
42const (
khenaidoo89b0e942018-10-21 21:11:33 -040043 UNKNOWN = iota
npujar1d86a522019-11-14 17:11:16 +053044 SeizedBySelf
45 CompletedByOther
46 AbandonedByOther
47 AbandonedWatchBySelf
Richard Jankowski215a3e22018-10-04 13:56:11 -040048)
49
Kent Hagerman46dcd9d2019-09-18 16:42:59 -040050var errorTransactionNotAcquired = status.Error(codes.Canceled, "transaction-not-acquired")
51
npujar1d86a522019-11-14 17:11:16 +053052// Transaction constant
Richard Jankowski215a3e22018-10-04 13:56:11 -040053const (
npujar1d86a522019-11-14 17:11:16 +053054 TransactionComplete = "TRANSACTION-COMPLETE"
Richard Jankowski215a3e22018-10-04 13:56:11 -040055)
56
khenaidoo09771ef2019-10-11 14:25:02 -040057// Transaction constants used to guarantee the Core processing a request hold on to the transaction until
khenaidoof684e1b2019-10-28 19:00:37 -040058// it either completes it (either successfully or times out) or the Core itself crashes (
59// e.g. a server failure).
khenaidoo09771ef2019-10-11 14:25:02 -040060// If a request has a timeout of x seconds then the Core processing the request will renew the transaction lease
61// every x/NUM_TXN_RENEWAL_PER_REQUEST seconds. After the Core completes the request it stops renewing the
62// transaction and sets the transaction value to TRANSACTION_COMPLETE. If the processing Core crashes then it
63// will not renew the transaction causing the KV store to delete the transaction after its renewal period. The
64// Core watching the transaction will then take over.
65// Since the MIN_TXN_RENEWAL_INTERVAL_IN_SEC is 3 seconds then for any transaction that completes within 3 seconds
66// there won't be a transaction renewal done.
67const (
npujar1d86a522019-11-14 17:11:16 +053068 NumTxnRenewalPerRequest = 2
69 MinTxnRenewalIntervalInSec = 3
70 MinTxnReservationDurationInSec = 5
khenaidoo09771ef2019-10-11 14:25:02 -040071)
72
npujar1d86a522019-11-14 17:11:16 +053073// TransactionContext represent transaction context attributes
Richard Jankowski215a3e22018-10-04 13:56:11 -040074type TransactionContext struct {
khenaidoo09771ef2019-10-11 14:25:02 -040075 kvClient kvstore.Client
76 kvOperationTimeout int
77 owner string
78 txnPrefix string
Richard Jankowski215a3e22018-10-04 13:56:11 -040079}
khenaidoo89b0e942018-10-21 21:11:33 -040080
Richard Jankowski215a3e22018-10-04 13:56:11 -040081var ctx *TransactionContext
82
khenaidoo89b0e942018-10-21 21:11:33 -040083var txnState = []string{
84 "UNKNOWN",
85 "SEIZED-BY-SELF",
86 "COMPLETED-BY-OTHER",
87 "ABANDONED-BY-OTHER",
khenaidoo09771ef2019-10-11 14:25:02 -040088 "ABANDONED_WATCH_BY_SELF"}
Richard Jankowski215a3e22018-10-04 13:56:11 -040089
90func init() {
npujar1d86a522019-11-14 17:11:16 +053091 _, err := log.AddPackage(log.JSON, log.DebugLevel, nil)
92 if err != nil {
93 log.Errorw("unable-to-register-package-to-the-log-map", log.Fields{"error": err})
94 }
Richard Jankowski215a3e22018-10-04 13:56:11 -040095}
96
npujar1d86a522019-11-14 17:11:16 +053097// NewTransactionContext creates transaction context instance
Richard Jankowski215a3e22018-10-04 13:56:11 -040098func NewTransactionContext(
khenaidoo89b0e942018-10-21 21:11:33 -040099 owner string,
100 txnPrefix string,
101 kvClient kvstore.Client,
khenaidoo09771ef2019-10-11 14:25:02 -0400102 kvOpTimeout int) *TransactionContext {
Richard Jankowski215a3e22018-10-04 13:56:11 -0400103
khenaidoo89b0e942018-10-21 21:11:33 -0400104 return &TransactionContext{
khenaidoo09771ef2019-10-11 14:25:02 -0400105 owner: owner,
106 txnPrefix: txnPrefix,
107 kvClient: kvClient,
108 kvOperationTimeout: kvOpTimeout}
Richard Jankowski215a3e22018-10-04 13:56:11 -0400109}
110
111/*
112 * Before instantiating a KVTransaction, a TransactionContext must be created.
npujar1d86a522019-11-14 17:11:16 +0530113 * The parameters stored in the context govern the behavior of all KVTransaction
Richard Jankowski215a3e22018-10-04 13:56:11 -0400114 * instances.
115 *
116 * :param owner: The owner (i.e. voltha core name) of a transaction
117 * :param txnPrefix: The key prefix under which all transaction IDs, or serial numbers,
118 * will be created (e.g. "service/voltha/transactions")
119 * :param kvClient: The client API used for all interactions with the KV store. Currently
120 * only the etcd client is supported.
Richard Jankowski199fd862019-03-18 14:49:51 -0400121 * :param: kvOpTimeout: The maximum time, in seconds, to be taken by any KV operation
122 * used by this package
Richard Jankowski215a3e22018-10-04 13:56:11 -0400123 */
npujar1d86a522019-11-14 17:11:16 +0530124
125// SetTransactionContext creates new transaction context
Richard Jankowski215a3e22018-10-04 13:56:11 -0400126func SetTransactionContext(owner string,
khenaidoo89b0e942018-10-21 21:11:33 -0400127 txnPrefix string,
128 kvClient kvstore.Client,
khenaidoo09771ef2019-10-11 14:25:02 -0400129 kvOpTimeout int) error {
Richard Jankowski215a3e22018-10-04 13:56:11 -0400130
khenaidoo09771ef2019-10-11 14:25:02 -0400131 ctx = NewTransactionContext(owner, txnPrefix, kvClient, kvOpTimeout)
khenaidoo89b0e942018-10-21 21:11:33 -0400132 return nil
Richard Jankowski215a3e22018-10-04 13:56:11 -0400133}
134
npujar1d86a522019-11-14 17:11:16 +0530135// KVTransaction represent KV transaction attributes
Richard Jankowskie4d77662018-10-17 13:53:21 -0400136type KVTransaction struct {
khenaidoo09771ef2019-10-11 14:25:02 -0400137 monitorCh chan int
npujar1d86a522019-11-14 17:11:16 +0530138 txnID string
khenaidoo09771ef2019-10-11 14:25:02 -0400139 txnKey string
Richard Jankowski215a3e22018-10-04 13:56:11 -0400140}
141
142/*
143 * A KVTransaction constructor
144 *
145 * :param txnId: The serial number of a voltha request.
Richard Jankowskie4d77662018-10-17 13:53:21 -0400146 * :return: A KVTransaction instance
Richard Jankowski215a3e22018-10-04 13:56:11 -0400147 */
npujar1d86a522019-11-14 17:11:16 +0530148
149// NewKVTransaction creates KV transaction instance
150func NewKVTransaction(txnID string) *KVTransaction {
khenaidoo89b0e942018-10-21 21:11:33 -0400151 return &KVTransaction{
npujar1d86a522019-11-14 17:11:16 +0530152 txnID: txnID,
153 txnKey: ctx.txnPrefix + txnID}
Richard Jankowski215a3e22018-10-04 13:56:11 -0400154}
155
156/*
khenaidoo09771ef2019-10-11 14:25:02 -0400157 * Acquired is invoked by a Core, upon reception of a request, to reserve the transaction key in the KV store. The
158 * request may be resource specific (i.e will include an ID for that resource) or may be generic (i.e. list a set of
159 * resources). If the request is resource specific then this function should be invoked with the ownedByMe flag to
160 * indicate whether this Core owns this resource. In the case where this Core owns this resource or it is a generic
161 * request then we will proceed to reserve the transaction key in the KV store for a minimum time specified by the
162 * minDuration param. If the reservation request fails (i.e. the other Core got the reservation before this one - this
163 * can happen only for generic request) then the Core will start to watch for changes to the key to determine
164 * whether the other Core completed the transaction successfully or the Core just died. If the Core does not own the
165 * resource then we will proceed to watch the transaction key.
Richard Jankowski215a3e22018-10-04 13:56:11 -0400166 *
khenaidoo09771ef2019-10-11 14:25:02 -0400167 * :param minDuration: minimum time to reserve the transaction key in the KV store
168 * :param ownedByMe: specify whether the request is about a resource owned or not. If it's absent then this is a
169 * generic request that has no specific resource ID (e.g. list)
170 *
171 * :return: A boolean specifying whether the resource was acquired. An error is return in case this function is invoked
172 * for a resource that is nonexistent.
Richard Jankowski215a3e22018-10-04 13:56:11 -0400173 */
npujar1d86a522019-11-14 17:11:16 +0530174
175// Acquired aquires transaction status
khenaidoo09771ef2019-10-11 14:25:02 -0400176func (c *KVTransaction) Acquired(minDuration int64, ownedByMe ...bool) (bool, error) {
khenaidoo89b0e942018-10-21 21:11:33 -0400177 var acquired bool
npujar1d86a522019-11-14 17:11:16 +0530178 var currOwner string
khenaidoo89b0e942018-10-21 21:11:33 -0400179 var res int
Richard Jankowski215a3e22018-10-04 13:56:11 -0400180
khenaidoo89b0e942018-10-21 21:11:33 -0400181 // Convert milliseconds to seconds, rounding up
182 // The reservation TTL is specified in seconds
khenaidoo09771ef2019-10-11 14:25:02 -0400183 durationInSecs := minDuration / 1000
184 if remainder := minDuration % 1000; remainder > 0 {
khenaidoo89b0e942018-10-21 21:11:33 -0400185 durationInSecs++
186 }
npujar1d86a522019-11-14 17:11:16 +0530187 if durationInSecs < int64(MinTxnReservationDurationInSec) {
188 durationInSecs = int64(MinTxnReservationDurationInSec)
khenaidoo09771ef2019-10-11 14:25:02 -0400189 }
190 genericRequest := true
191 resourceOwned := false
192 if len(ownedByMe) > 0 {
193 genericRequest = false
194 resourceOwned = ownedByMe[0]
195 }
196 if resourceOwned || genericRequest {
197 // Keep the reservation longer that the minDuration (which is really the request timeout) to ensure the
198 // transaction key stays in the KV store until after the Core finalize a request timeout condition (which is
199 // a success from a request completion perspective).
200 if err := c.tryToReserveTxn(durationInSecs * 2); err == nil {
npujar1d86a522019-11-14 17:11:16 +0530201 res = SeizedBySelf
khenaidoo09771ef2019-10-11 14:25:02 -0400202 } else {
203 log.Debugw("watch-other-server",
npujar1d86a522019-11-14 17:11:16 +0530204 log.Fields{"transactionId": c.txnID, "owner": currOwner, "timeout": durationInSecs})
khenaidoo09771ef2019-10-11 14:25:02 -0400205 res = c.Watch(durationInSecs)
206 }
207 } else {
208 res = c.Watch(durationInSecs)
209 }
210 switch res {
npujar1d86a522019-11-14 17:11:16 +0530211 case SeizedBySelf, AbandonedByOther:
khenaidoo09771ef2019-10-11 14:25:02 -0400212 acquired = true
213 default:
214 acquired = false
215 }
npujar1d86a522019-11-14 17:11:16 +0530216 log.Debugw("acquire-transaction-status", log.Fields{"transactionId": c.txnID, "acquired": acquired, "result": txnState[res]})
khenaidoo09771ef2019-10-11 14:25:02 -0400217 return acquired, nil
218}
Richard Jankowski215a3e22018-10-04 13:56:11 -0400219
khenaidoo09771ef2019-10-11 14:25:02 -0400220func (c *KVTransaction) tryToReserveTxn(durationInSecs int64) error {
npujar1d86a522019-11-14 17:11:16 +0530221 var currOwner string
khenaidoo09771ef2019-10-11 14:25:02 -0400222 var res int
npujar1d86a522019-11-14 17:11:16 +0530223 var err error
224 value, _ := ctx.kvClient.Reserve(c.txnKey, ctx.owner, durationInSecs)
khenaidoo89b0e942018-10-21 21:11:33 -0400225 if value != nil {
khenaidoo09771ef2019-10-11 14:25:02 -0400226 if currOwner, err = kvstore.ToString(value); err != nil { // This should never happen
npujar1d86a522019-11-14 17:11:16 +0530227 log.Errorw("unexpected-owner-type", log.Fields{"transactionId": c.txnID, "error": err})
khenaidoo09771ef2019-10-11 14:25:02 -0400228 return err
229 }
230 if currOwner == ctx.owner {
npujar1d86a522019-11-14 17:11:16 +0530231 log.Debugw("acquired-transaction", log.Fields{"transactionId": c.txnID, "result": txnState[res]})
khenaidoo09771ef2019-10-11 14:25:02 -0400232 // Setup the monitoring channel
233 c.monitorCh = make(chan int)
234 go c.holdOnToTxnUntilProcessingCompleted(c.txnKey, ctx.owner, durationInSecs)
235 return nil
khenaidoo89b0e942018-10-21 21:11:33 -0400236 }
237 }
khenaidoo09771ef2019-10-11 14:25:02 -0400238 return status.Error(codes.PermissionDenied, "reservation-denied")
Richard Jankowski215a3e22018-10-04 13:56:11 -0400239}
240
npujar1d86a522019-11-14 17:11:16 +0530241// Watch watches transaction
khenaidoo09771ef2019-10-11 14:25:02 -0400242func (c *KVTransaction) Watch(durationInSecs int64) int {
Richard Jankowski199fd862019-03-18 14:49:51 -0400243 var res int
244
245 events := ctx.kvClient.Watch(c.txnKey)
A R Karthick43ba1fb2019-10-03 16:24:21 +0000246 defer ctx.kvClient.CloseWatch(c.txnKey, events)
Richard Jankowski199fd862019-03-18 14:49:51 -0400247
khenaidoo09771ef2019-10-11 14:25:02 -0400248 transactionWasAcquiredByOther := false
249
250 //Check whether the transaction was already completed by the other Core before we got here.
251 if kvp, _ := ctx.kvClient.Get(c.txnKey, ctx.kvOperationTimeout); kvp != nil {
252 transactionWasAcquiredByOther = true
253 if val, err := kvstore.ToString(kvp.Value); err == nil {
npujar1d86a522019-11-14 17:11:16 +0530254 if val == TransactionComplete {
255 res = CompletedByOther
khenaidoo09771ef2019-10-11 14:25:02 -0400256 // Do an immediate delete of the transaction in the KV Store to free up KV Storage faster
npujar1d86a522019-11-14 17:11:16 +0530257 err = c.Delete()
258 if err != nil {
259 log.Errorw("unable-to-delete-the-transaction", log.Fields{"error": err})
260 }
khenaidoo09771ef2019-10-11 14:25:02 -0400261 return res
262 }
263 } else {
264 // An unexpected value - let's get out of here as something did not go according to plan
npujar1d86a522019-11-14 17:11:16 +0530265 res = AbandonedWatchBySelf
266 log.Debugw("cannot-read-transaction-value", log.Fields{"txn": c.txnID, "error": err})
khenaidoo09771ef2019-10-11 14:25:02 -0400267 return res
268 }
269 }
270
A R Karthick919f6db2019-08-29 18:14:56 +0000271 for {
272 select {
A R Karthick919f6db2019-08-29 18:14:56 +0000273 case event := <-events:
khenaidoo09771ef2019-10-11 14:25:02 -0400274 transactionWasAcquiredByOther = true
npujar1d86a522019-11-14 17:11:16 +0530275 log.Debugw("received-event", log.Fields{"txn": c.txnID, "type": event.EventType})
A R Karthick919f6db2019-08-29 18:14:56 +0000276 if event.EventType == kvstore.DELETE {
277 // The other core failed to process the request
npujar1d86a522019-11-14 17:11:16 +0530278 res = AbandonedByOther
A R Karthick919f6db2019-08-29 18:14:56 +0000279 } else if event.EventType == kvstore.PUT {
280 key, e1 := kvstore.ToString(event.Key)
281 val, e2 := kvstore.ToString(event.Value)
khenaidoo09771ef2019-10-11 14:25:02 -0400282 if e1 == nil && e2 == nil && key == c.txnKey {
npujar1d86a522019-11-14 17:11:16 +0530283 if val == TransactionComplete {
284 res = CompletedByOther
khenaidoo09771ef2019-10-11 14:25:02 -0400285 // Successful request completion has been detected. Remove the transaction key
npujar1d86a522019-11-14 17:11:16 +0530286 err := c.Delete()
287 if err != nil {
288 log.Errorw("unable-to-delete-the-transaction", log.Fields{"error": err})
289 }
A R Karthick919f6db2019-08-29 18:14:56 +0000290 } else {
khenaidoo09771ef2019-10-11 14:25:02 -0400291 log.Debugw("Ignoring-PUT-event", log.Fields{"val": val, "key": key})
A R Karthick919f6db2019-08-29 18:14:56 +0000292 continue
293 }
khenaidoo09771ef2019-10-11 14:25:02 -0400294 } else {
npujar1d86a522019-11-14 17:11:16 +0530295 log.Warnw("received-unexpected-PUT-event", log.Fields{"txn": c.txnID, "key": key, "ctxKey": c.txnKey})
A R Karthick919f6db2019-08-29 18:14:56 +0000296 }
Richard Jankowski199fd862019-03-18 14:49:51 -0400297 }
khenaidoo09771ef2019-10-11 14:25:02 -0400298 case <-time.After(time.Duration(durationInSecs) * time.Second):
299 // Corner case: In the case where the Core owning the device dies and before this Core takes ownership of
300 // this device there is a window where new requests will end up being watched instead of being processed.
301 // Grab the request if the other Core did not create the transaction in the KV store.
302 // TODO: Use a peer-monitoring probe to switch over (still relies on the probe frequency). This will
303 // guarantee that the peer is actually gone instead of limiting the time the peer can get hold of a
304 // request.
305 if !transactionWasAcquiredByOther {
npujar1d86a522019-11-14 17:11:16 +0530306 log.Debugw("timeout-no-peer", log.Fields{"txId": c.txnID})
307 res = AbandonedByOther
khenaidoo09771ef2019-10-11 14:25:02 -0400308 } else {
309 continue
310 }
Richard Jankowski199fd862019-03-18 14:49:51 -0400311 }
A R Karthick919f6db2019-08-29 18:14:56 +0000312 break
Richard Jankowski199fd862019-03-18 14:49:51 -0400313 }
314 return res
315}
316
npujar1d86a522019-11-14 17:11:16 +0530317// Close closes transaction
Richard Jankowskie4d77662018-10-17 13:53:21 -0400318func (c *KVTransaction) Close() error {
npujar1d86a522019-11-14 17:11:16 +0530319 log.Debugw("close", log.Fields{"txn": c.txnID})
khenaidoo09771ef2019-10-11 14:25:02 -0400320 // Stop monitoring the key (applies only when there has been no transaction switch over)
321 if c.monitorCh != nil {
322 close(c.monitorCh)
npujar1d86a522019-11-14 17:11:16 +0530323 err := ctx.kvClient.Put(c.txnKey, TransactionComplete, ctx.kvOperationTimeout)
324
325 if err != nil {
326 log.Errorw("unable-to-write-a-key-value-pair-to-the-KV-store", log.Fields{"error": err})
327 }
khenaidoo09771ef2019-10-11 14:25:02 -0400328 }
329 return nil
Richard Jankowski215a3e22018-10-04 13:56:11 -0400330}
331
npujar1d86a522019-11-14 17:11:16 +0530332// Delete deletes transaction
Richard Jankowskie4d77662018-10-17 13:53:21 -0400333func (c *KVTransaction) Delete() error {
npujar1d86a522019-11-14 17:11:16 +0530334 log.Debugw("delete", log.Fields{"txn": c.txnID})
sbarbari17d7e222019-11-05 10:02:29 -0500335 return ctx.kvClient.Delete(c.txnKey, ctx.kvOperationTimeout)
khenaidoo09771ef2019-10-11 14:25:02 -0400336}
337
338// holdOnToTxnUntilProcessingCompleted renews the transaction lease until the transaction is complete. durationInSecs
339// is used to calculate the frequency at which the Core processing the transaction renews the lease. This function
340// exits only when the transaction is Closed, i.e completed.
341func (c *KVTransaction) holdOnToTxnUntilProcessingCompleted(key string, owner string, durationInSecs int64) {
npujar1d86a522019-11-14 17:11:16 +0530342 log.Debugw("holdOnToTxnUntilProcessingCompleted", log.Fields{"txn": c.txnID})
343 renewInterval := durationInSecs / NumTxnRenewalPerRequest
344 if renewInterval < MinTxnRenewalIntervalInSec {
345 renewInterval = MinTxnRenewalIntervalInSec
khenaidoo09771ef2019-10-11 14:25:02 -0400346 }
347forLoop:
348 for {
349 select {
350 case <-c.monitorCh:
npujar1d86a522019-11-14 17:11:16 +0530351 log.Debugw("transaction-renewal-exits", log.Fields{"txn": c.txnID})
khenaidoo09771ef2019-10-11 14:25:02 -0400352 break forLoop
353 case <-time.After(time.Duration(renewInterval) * time.Second):
354 if err := ctx.kvClient.RenewReservation(c.txnKey); err != nil {
355 // Log and continue.
356 log.Warnw("transaction-renewal-failed", log.Fields{"txnId": c.txnKey, "error": err})
357 }
358 }
359 }
Richard Jankowski215a3e22018-10-04 13:56:11 -0400360}