blob: 1b4370da46bd2c93f880f9d65b86d72604ac0878 [file] [log] [blame]
Richard Jankowski215a3e22018-10-04 13:56:11 -04001/*
2 * Copyright 2018-present Open Networking Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Two voltha cores receive the same request; each tries to acquire ownership of the request
19 * by writing its identifier (e.g. container name or pod name) to the transaction key named
Richard Jankowskie4d77662018-10-17 13:53:21 -040020 * after the serial number of the request. The core that loses the race for acquisition
21 * monitors the progress of the core actually serving the request by watching for changes
22 * in the value of the transaction key. Once the request is complete, the
23 * serving core closes the transaction by invoking the KVTransaction's Close method, which
Richard Jankowski215a3e22018-10-04 13:56:11 -040024 * replaces the value of the transaction (i.e. serial number) key with the string
25 * TRANSACTION_COMPLETE. The standby core observes this update, stops watching the transaction,
26 * and then deletes the transaction key.
27 *
Richard Jankowski215a3e22018-10-04 13:56:11 -040028 */
29package core
30
31import (
khenaidoo2c6a0992019-04-29 13:46:56 -040032 "github.com/opencord/voltha-go/common/log"
khenaidoo89b0e942018-10-21 21:11:33 -040033 "github.com/opencord/voltha-go/db/kvstore"
Kent Hagerman46dcd9d2019-09-18 16:42:59 -040034 "google.golang.org/grpc/codes"
35 "google.golang.org/grpc/status"
khenaidoo89b0e942018-10-21 21:11:33 -040036 "time"
Richard Jankowski215a3e22018-10-04 13:56:11 -040037)
38
39// Transaction acquisition results
40const (
khenaidoo89b0e942018-10-21 21:11:33 -040041 UNKNOWN = iota
42 SEIZED_BY_SELF
43 COMPLETED_BY_OTHER
44 ABANDONED_BY_OTHER
khenaidoo09771ef2019-10-11 14:25:02 -040045 ABANDONED_WATCH_BY_SELF
Richard Jankowski215a3e22018-10-04 13:56:11 -040046)
47
Kent Hagerman46dcd9d2019-09-18 16:42:59 -040048var errorTransactionNotAcquired = status.Error(codes.Canceled, "transaction-not-acquired")
khenaidoo09771ef2019-10-11 14:25:02 -040049var errorTransactionInvalidId = status.Error(codes.Canceled, "transaction-invalid-id")
Kent Hagerman46dcd9d2019-09-18 16:42:59 -040050
Richard Jankowski215a3e22018-10-04 13:56:11 -040051const (
khenaidoo89b0e942018-10-21 21:11:33 -040052 TRANSACTION_COMPLETE = "TRANSACTION-COMPLETE"
Richard Jankowski215a3e22018-10-04 13:56:11 -040053)
54
khenaidoo09771ef2019-10-11 14:25:02 -040055// Transaction constants used to guarantee the Core processing a request hold on to the transaction until
56// it either completes it (either successfully or times out) or the Core itself crashes (e.g. a server failure).
57// If a request has a timeout of x seconds then the Core processing the request will renew the transaction lease
58// every x/NUM_TXN_RENEWAL_PER_REQUEST seconds. After the Core completes the request it stops renewing the
59// transaction and sets the transaction value to TRANSACTION_COMPLETE. If the processing Core crashes then it
60// will not renew the transaction causing the KV store to delete the transaction after its renewal period. The
61// Core watching the transaction will then take over.
62// Since the MIN_TXN_RENEWAL_INTERVAL_IN_SEC is 3 seconds then for any transaction that completes within 3 seconds
63// there won't be a transaction renewal done.
64const (
65 NUM_TXN_RENEWAL_PER_REQUEST = 2
66 MIN_TXN_RENEWAL_INTERVAL_IN_SEC = 3
67 MIN_TXN_RESERVATION_DURATION_IN_SEC = 5
68)
69
Richard Jankowski215a3e22018-10-04 13:56:11 -040070type TransactionContext struct {
khenaidoo09771ef2019-10-11 14:25:02 -040071 kvClient kvstore.Client
72 kvOperationTimeout int
73 owner string
74 txnPrefix string
Richard Jankowski215a3e22018-10-04 13:56:11 -040075}
khenaidoo89b0e942018-10-21 21:11:33 -040076
Richard Jankowski215a3e22018-10-04 13:56:11 -040077var ctx *TransactionContext
78
khenaidoo89b0e942018-10-21 21:11:33 -040079var txnState = []string{
80 "UNKNOWN",
81 "SEIZED-BY-SELF",
82 "COMPLETED-BY-OTHER",
83 "ABANDONED-BY-OTHER",
khenaidoo09771ef2019-10-11 14:25:02 -040084 "ABANDONED_WATCH_BY_SELF"}
Richard Jankowski215a3e22018-10-04 13:56:11 -040085
86func init() {
Richard Jankowski199fd862019-03-18 14:49:51 -040087 log.AddPackage(log.JSON, log.DebugLevel, nil)
Richard Jankowski215a3e22018-10-04 13:56:11 -040088}
89
90func NewTransactionContext(
khenaidoo89b0e942018-10-21 21:11:33 -040091 owner string,
92 txnPrefix string,
93 kvClient kvstore.Client,
khenaidoo09771ef2019-10-11 14:25:02 -040094 kvOpTimeout int) *TransactionContext {
Richard Jankowski215a3e22018-10-04 13:56:11 -040095
khenaidoo89b0e942018-10-21 21:11:33 -040096 return &TransactionContext{
khenaidoo09771ef2019-10-11 14:25:02 -040097 owner: owner,
98 txnPrefix: txnPrefix,
99 kvClient: kvClient,
100 kvOperationTimeout: kvOpTimeout}
Richard Jankowski215a3e22018-10-04 13:56:11 -0400101}
102
103/*
104 * Before instantiating a KVTransaction, a TransactionContext must be created.
105 * The parameters stored in the context govern the behaviour of all KVTransaction
106 * instances.
107 *
108 * :param owner: The owner (i.e. voltha core name) of a transaction
109 * :param txnPrefix: The key prefix under which all transaction IDs, or serial numbers,
110 * will be created (e.g. "service/voltha/transactions")
111 * :param kvClient: The client API used for all interactions with the KV store. Currently
112 * only the etcd client is supported.
Richard Jankowski199fd862019-03-18 14:49:51 -0400113 * :param: kvOpTimeout: The maximum time, in seconds, to be taken by any KV operation
114 * used by this package
Richard Jankowski215a3e22018-10-04 13:56:11 -0400115 */
116func SetTransactionContext(owner string,
khenaidoo89b0e942018-10-21 21:11:33 -0400117 txnPrefix string,
118 kvClient kvstore.Client,
khenaidoo09771ef2019-10-11 14:25:02 -0400119 kvOpTimeout int) error {
Richard Jankowski215a3e22018-10-04 13:56:11 -0400120
khenaidoo09771ef2019-10-11 14:25:02 -0400121 ctx = NewTransactionContext(owner, txnPrefix, kvClient, kvOpTimeout)
khenaidoo89b0e942018-10-21 21:11:33 -0400122 return nil
Richard Jankowski215a3e22018-10-04 13:56:11 -0400123}
124
Richard Jankowskie4d77662018-10-17 13:53:21 -0400125type KVTransaction struct {
khenaidoo09771ef2019-10-11 14:25:02 -0400126 monitorCh chan int
127 txnId string
128 txnKey string
Richard Jankowski215a3e22018-10-04 13:56:11 -0400129}
130
131/*
132 * A KVTransaction constructor
133 *
134 * :param txnId: The serial number of a voltha request.
Richard Jankowskie4d77662018-10-17 13:53:21 -0400135 * :return: A KVTransaction instance
Richard Jankowski215a3e22018-10-04 13:56:11 -0400136 */
Richard Jankowskie4d77662018-10-17 13:53:21 -0400137func NewKVTransaction(txnId string) *KVTransaction {
khenaidoo89b0e942018-10-21 21:11:33 -0400138 return &KVTransaction{
139 txnId: txnId,
140 txnKey: ctx.txnPrefix + txnId}
Richard Jankowski215a3e22018-10-04 13:56:11 -0400141}
142
143/*
khenaidoo09771ef2019-10-11 14:25:02 -0400144 * Acquired is invoked by a Core, upon reception of a request, to reserve the transaction key in the KV store. The
145 * request may be resource specific (i.e will include an ID for that resource) or may be generic (i.e. list a set of
146 * resources). If the request is resource specific then this function should be invoked with the ownedByMe flag to
147 * indicate whether this Core owns this resource. In the case where this Core owns this resource or it is a generic
148 * request then we will proceed to reserve the transaction key in the KV store for a minimum time specified by the
149 * minDuration param. If the reservation request fails (i.e. the other Core got the reservation before this one - this
150 * can happen only for generic request) then the Core will start to watch for changes to the key to determine
151 * whether the other Core completed the transaction successfully or the Core just died. If the Core does not own the
152 * resource then we will proceed to watch the transaction key.
Richard Jankowski215a3e22018-10-04 13:56:11 -0400153 *
khenaidoo09771ef2019-10-11 14:25:02 -0400154 * :param minDuration: minimum time to reserve the transaction key in the KV store
155 * :param ownedByMe: specify whether the request is about a resource owned or not. If it's absent then this is a
156 * generic request that has no specific resource ID (e.g. list)
157 *
158 * :return: A boolean specifying whether the resource was acquired. An error is return in case this function is invoked
159 * for a resource that is nonexistent.
Richard Jankowski215a3e22018-10-04 13:56:11 -0400160 */
khenaidoo09771ef2019-10-11 14:25:02 -0400161func (c *KVTransaction) Acquired(minDuration int64, ownedByMe ...bool) (bool, error) {
khenaidoo89b0e942018-10-21 21:11:33 -0400162 var acquired bool
163 var currOwner string = ""
164 var res int
Richard Jankowski215a3e22018-10-04 13:56:11 -0400165
khenaidoo89b0e942018-10-21 21:11:33 -0400166 // Convert milliseconds to seconds, rounding up
167 // The reservation TTL is specified in seconds
khenaidoo09771ef2019-10-11 14:25:02 -0400168 durationInSecs := minDuration / 1000
169 if remainder := minDuration % 1000; remainder > 0 {
khenaidoo89b0e942018-10-21 21:11:33 -0400170 durationInSecs++
171 }
khenaidoo09771ef2019-10-11 14:25:02 -0400172 if durationInSecs < int64(MIN_TXN_RESERVATION_DURATION_IN_SEC) {
173 durationInSecs = int64(MIN_TXN_RESERVATION_DURATION_IN_SEC)
174 }
175 genericRequest := true
176 resourceOwned := false
177 if len(ownedByMe) > 0 {
178 genericRequest = false
179 resourceOwned = ownedByMe[0]
180 }
181 if resourceOwned || genericRequest {
182 // Keep the reservation longer that the minDuration (which is really the request timeout) to ensure the
183 // transaction key stays in the KV store until after the Core finalize a request timeout condition (which is
184 // a success from a request completion perspective).
185 if err := c.tryToReserveTxn(durationInSecs * 2); err == nil {
186 res = SEIZED_BY_SELF
187 } else {
188 log.Debugw("watch-other-server",
189 log.Fields{"transactionId": c.txnId, "owner": currOwner, "timeout": durationInSecs})
190 res = c.Watch(durationInSecs)
191 }
192 } else {
193 res = c.Watch(durationInSecs)
194 }
195 switch res {
196 case SEIZED_BY_SELF, ABANDONED_BY_OTHER:
197 acquired = true
198 default:
199 acquired = false
200 }
201 log.Debugw("acquire-transaction-status", log.Fields{"transactionId": c.txnId, "acquired": acquired, "result": txnState[res]})
202 return acquired, nil
203}
Richard Jankowski215a3e22018-10-04 13:56:11 -0400204
khenaidoo09771ef2019-10-11 14:25:02 -0400205func (c *KVTransaction) tryToReserveTxn(durationInSecs int64) error {
206 var currOwner string = ""
207 var res int
208 value, err := ctx.kvClient.Reserve(c.txnKey, ctx.owner, durationInSecs)
khenaidoo89b0e942018-10-21 21:11:33 -0400209 if value != nil {
khenaidoo09771ef2019-10-11 14:25:02 -0400210 if currOwner, err = kvstore.ToString(value); err != nil { // This should never happen
211 log.Errorw("unexpected-owner-type", log.Fields{"transactionId": c.txnId, "error": err})
212 return err
213 }
214 if currOwner == ctx.owner {
215 log.Debugw("acquired-transaction", log.Fields{"transactionId": c.txnId, "result": txnState[res]})
216 // Setup the monitoring channel
217 c.monitorCh = make(chan int)
218 go c.holdOnToTxnUntilProcessingCompleted(c.txnKey, ctx.owner, durationInSecs)
219 return nil
khenaidoo89b0e942018-10-21 21:11:33 -0400220 }
221 }
khenaidoo09771ef2019-10-11 14:25:02 -0400222 return status.Error(codes.PermissionDenied, "reservation-denied")
Richard Jankowski215a3e22018-10-04 13:56:11 -0400223}
224
khenaidoo09771ef2019-10-11 14:25:02 -0400225func (c *KVTransaction) Watch(durationInSecs int64) int {
Richard Jankowski199fd862019-03-18 14:49:51 -0400226 var res int
227
228 events := ctx.kvClient.Watch(c.txnKey)
A R Karthick43ba1fb2019-10-03 16:24:21 +0000229 defer ctx.kvClient.CloseWatch(c.txnKey, events)
Richard Jankowski199fd862019-03-18 14:49:51 -0400230
khenaidoo09771ef2019-10-11 14:25:02 -0400231 transactionWasAcquiredByOther := false
232
233 //Check whether the transaction was already completed by the other Core before we got here.
234 if kvp, _ := ctx.kvClient.Get(c.txnKey, ctx.kvOperationTimeout); kvp != nil {
235 transactionWasAcquiredByOther = true
236 if val, err := kvstore.ToString(kvp.Value); err == nil {
237 if val == TRANSACTION_COMPLETE {
238 res = COMPLETED_BY_OTHER
239 // Do an immediate delete of the transaction in the KV Store to free up KV Storage faster
240 c.Delete()
241 return res
242 }
243 } else {
244 // An unexpected value - let's get out of here as something did not go according to plan
245 res = ABANDONED_WATCH_BY_SELF
246 log.Debugw("cannot-read-transaction-value", log.Fields{"txn": c.txnId, "error": err})
247 return res
248 }
249 }
250
A R Karthick919f6db2019-08-29 18:14:56 +0000251 for {
252 select {
A R Karthick919f6db2019-08-29 18:14:56 +0000253 case event := <-events:
khenaidoo09771ef2019-10-11 14:25:02 -0400254 transactionWasAcquiredByOther = true
A R Karthick919f6db2019-08-29 18:14:56 +0000255 log.Debugw("received-event", log.Fields{"txn": c.txnId, "type": event.EventType})
256 if event.EventType == kvstore.DELETE {
257 // The other core failed to process the request
258 res = ABANDONED_BY_OTHER
259 } else if event.EventType == kvstore.PUT {
260 key, e1 := kvstore.ToString(event.Key)
261 val, e2 := kvstore.ToString(event.Value)
khenaidoo09771ef2019-10-11 14:25:02 -0400262 if e1 == nil && e2 == nil && key == c.txnKey {
A R Karthick919f6db2019-08-29 18:14:56 +0000263 if val == TRANSACTION_COMPLETE {
264 res = COMPLETED_BY_OTHER
khenaidoo09771ef2019-10-11 14:25:02 -0400265 // Successful request completion has been detected. Remove the transaction key
A R Karthick919f6db2019-08-29 18:14:56 +0000266 c.Delete()
267 } else {
khenaidoo09771ef2019-10-11 14:25:02 -0400268 log.Debugw("Ignoring-PUT-event", log.Fields{"val": val, "key": key})
A R Karthick919f6db2019-08-29 18:14:56 +0000269 continue
270 }
khenaidoo09771ef2019-10-11 14:25:02 -0400271 } else {
272 log.Warnw("received-unexpected-PUT-event", log.Fields{"txn": c.txnId, "key": key, "ctxKey": c.txnKey})
A R Karthick919f6db2019-08-29 18:14:56 +0000273 }
Richard Jankowski199fd862019-03-18 14:49:51 -0400274 }
khenaidoo09771ef2019-10-11 14:25:02 -0400275 case <-time.After(time.Duration(durationInSecs) * time.Second):
276 // Corner case: In the case where the Core owning the device dies and before this Core takes ownership of
277 // this device there is a window where new requests will end up being watched instead of being processed.
278 // Grab the request if the other Core did not create the transaction in the KV store.
279 // TODO: Use a peer-monitoring probe to switch over (still relies on the probe frequency). This will
280 // guarantee that the peer is actually gone instead of limiting the time the peer can get hold of a
281 // request.
282 if !transactionWasAcquiredByOther {
283 log.Debugw("timeout-no-peer", log.Fields{"txId": c.txnId})
284 res = ABANDONED_BY_OTHER
285 } else {
286 continue
287 }
Richard Jankowski199fd862019-03-18 14:49:51 -0400288 }
A R Karthick919f6db2019-08-29 18:14:56 +0000289 break
Richard Jankowski199fd862019-03-18 14:49:51 -0400290 }
291 return res
292}
293
Richard Jankowskie4d77662018-10-17 13:53:21 -0400294func (c *KVTransaction) Close() error {
khenaidoo1ce37ad2019-03-24 22:07:24 -0400295 log.Debugw("close", log.Fields{"txn": c.txnId})
khenaidoo09771ef2019-10-11 14:25:02 -0400296 // Stop monitoring the key (applies only when there has been no transaction switch over)
297 if c.monitorCh != nil {
298 close(c.monitorCh)
299 ctx.kvClient.Put(c.txnKey, TRANSACTION_COMPLETE, ctx.kvOperationTimeout, false)
300 }
301 return nil
Richard Jankowski215a3e22018-10-04 13:56:11 -0400302}
303
Richard Jankowskie4d77662018-10-17 13:53:21 -0400304func (c *KVTransaction) Delete() error {
khenaidoo1ce37ad2019-03-24 22:07:24 -0400305 log.Debugw("delete", log.Fields{"txn": c.txnId})
khenaidoo09771ef2019-10-11 14:25:02 -0400306 return ctx.kvClient.Delete(c.txnKey, ctx.kvOperationTimeout, false)
307}
308
309// holdOnToTxnUntilProcessingCompleted renews the transaction lease until the transaction is complete. durationInSecs
310// is used to calculate the frequency at which the Core processing the transaction renews the lease. This function
311// exits only when the transaction is Closed, i.e completed.
312func (c *KVTransaction) holdOnToTxnUntilProcessingCompleted(key string, owner string, durationInSecs int64) {
313 log.Debugw("holdOnToTxnUntilProcessingCompleted", log.Fields{"txn": c.txnId})
314 renewInterval := durationInSecs / NUM_TXN_RENEWAL_PER_REQUEST
315 if renewInterval < MIN_TXN_RENEWAL_INTERVAL_IN_SEC {
316 renewInterval = MIN_TXN_RENEWAL_INTERVAL_IN_SEC
317 }
318forLoop:
319 for {
320 select {
321 case <-c.monitorCh:
322 log.Debugw("transaction-renewal-exits", log.Fields{"txn": c.txnId})
323 break forLoop
324 case <-time.After(time.Duration(renewInterval) * time.Second):
325 if err := ctx.kvClient.RenewReservation(c.txnKey); err != nil {
326 // Log and continue.
327 log.Warnw("transaction-renewal-failed", log.Fields{"txnId": c.txnKey, "error": err})
328 }
329 }
330 }
Richard Jankowski215a3e22018-10-04 13:56:11 -0400331}