VOL-2101 readiness and liveliness check added to message-bus and etcd
updated vendor folder
Change-Id: I0c1b983bf47e2740e0a24ea0c3a05a803caff15c
updated testcases
diff --git a/vendor/github.com/opencord/voltha-lib-go/v2/pkg/kafka/sarama_client.go b/vendor/github.com/opencord/voltha-lib-go/v2/pkg/kafka/sarama_client.go
index fc75026..a251c56 100644
--- a/vendor/github.com/opencord/voltha-lib-go/v2/pkg/kafka/sarama_client.go
+++ b/vendor/github.com/opencord/voltha-lib-go/v2/pkg/kafka/sarama_client.go
@@ -74,6 +74,11 @@
 	topicLockMap                  map[string]*sync.RWMutex
 	lockOfTopicLockMap            sync.RWMutex
 	metadataMaxRetry              int
+	alive                         bool
+	liveness                      chan bool
+	livenessChannelInterval       time.Duration
+	lastLivenessTime              time.Time
+	started                       bool
 }
 
 type SaramaClientOption func(*SaramaClient)
@@ -186,6 +191,12 @@
 	}
 }
 
+func LivenessChannelInterval(opt time.Duration) SaramaClientOption {
+	return func(args *SaramaClient) {
+		args.livenessChannelInterval = opt
+	}
+}
+
 func NewSaramaClient(opts ...SaramaClientOption) *SaramaClient {
 	client := &SaramaClient{
 		KafkaHost: DefaultKafkaHost,
@@ -205,6 +216,7 @@
 	client.numReplicas = DefaultNumberReplicas
 	client.autoCreateTopic = DefaultAutoCreateTopic
 	client.metadataMaxRetry = DefaultMetadataMaxRetry
+	client.livenessChannelInterval = DefaultLivenessChannelInterval
 
 	for _, option := range opts {
 		option(client)
@@ -216,6 +228,10 @@
 	client.topicLockMap = make(map[string]*sync.RWMutex)
 	client.lockOfTopicLockMap = sync.RWMutex{}
 	client.lockOfGroupConsumers = sync.RWMutex{}
+
+	// alive until proven otherwise
+	client.alive = true
+
 	return client
 }
 
@@ -259,12 +275,16 @@
 
 	log.Info("kafka-sarama-client-started")
 
+	sc.started = true
+
 	return nil
 }
 
 func (sc *SaramaClient) Stop() {
 	log.Info("stopping-sarama-client")
 
+	sc.started = false
+
 	//Send a message over the done channel to close all long running routines
 	sc.doneCh <- 1
 
@@ -438,6 +458,30 @@
 	return err
 }
 
+func (sc *SaramaClient) updateLiveness(alive bool) {
+	// Post a consistent stream of liveness data to the channel,
+	// so that in a live state, the core does not timeout and
+	// send a forced liveness message. Production of liveness
+	// events to the channel is rate-limited by livenessChannelInterval.
+	if sc.liveness != nil {
+		if sc.alive != alive {
+			log.Info("update-liveness-channel-because-change")
+			sc.liveness <- alive
+			sc.lastLivenessTime = time.Now()
+		} else if time.Now().Sub(sc.lastLivenessTime) > sc.livenessChannelInterval {
+			log.Info("update-liveness-channel-because-interval")
+			sc.liveness <- alive
+			sc.lastLivenessTime = time.Now()
+		}
+	}
+
+	// Only emit a log message when the state changes
+	if sc.alive != alive {
+		log.Info("set-client-alive", log.Fields{"alive": alive})
+		sc.alive = alive
+	}
+}
+
 // send formats and sends the request onto the kafka messaging bus.
 func (sc *SaramaClient) Send(msg interface{}, topic *Topic, keys ...string) error {
 
@@ -474,8 +518,68 @@
 	select {
 	case ok := <-sc.producer.Successes():
 		log.Debugw("message-sent", log.Fields{"status": ok.Topic})
+		sc.updateLiveness(true)
 	case notOk := <-sc.producer.Errors():
 		log.Debugw("error-sending", log.Fields{"status": notOk})
+		if strings.Contains(notOk.Error(), "Failed to produce") {
+			sc.updateLiveness(false)
+		}
+		return notOk
+	}
+	return nil
+}
+
+// Enable the liveness monitor channel. This channel will report
+// a "true" or "false" on every publish, which indicates whether
+// or not the channel is still live. This channel is then picked up
+// by the service (i.e. rw_core / ro_core) to update readiness status
+// and/or take other actions.
+func (sc *SaramaClient) EnableLivenessChannel(enable bool) chan bool {
+	log.Infow("kafka-enable-liveness-channel", log.Fields{"enable": enable})
+	if enable {
+		if sc.liveness == nil {
+			log.Info("kafka-create-liveness-channel")
+			// At least 1, so we can immediately post to it without blocking
+			// Setting a bigger number (10) allows the monitor to fall behind
+			// without blocking others. The monitor shouldn't really fall
+			// behind...
+			sc.liveness = make(chan bool, 10)
+			// post intial state to the channel
+			sc.liveness <- sc.alive
+		}
+	} else {
+		// TODO: Think about whether we need the ability to turn off
+		// liveness monitoring
+		panic("Turning off liveness reporting is not supported")
+	}
+	return sc.liveness
+}
+
+// send an empty message on the liveness channel to check whether connectivity has
+// been restored.
+func (sc *SaramaClient) SendLiveness() error {
+	if !sc.started {
+		return fmt.Errorf("SendLiveness() called while not started")
+	}
+
+	kafkaMsg := &sarama.ProducerMessage{
+		Topic: "_liveness_test",
+		Value: sarama.StringEncoder(time.Now().Format(time.RFC3339)), // for debugging / informative use
+	}
+
+	// Send message to kafka
+	sc.producer.Input() <- kafkaMsg
+	// Wait for result
+	// TODO: Use a lock or a different mechanism to ensure the response received corresponds to the message sent.
+	select {
+	case ok := <-sc.producer.Successes():
+		log.Debugw("liveness-message-sent", log.Fields{"status": ok.Topic})
+		sc.updateLiveness(true)
+	case notOk := <-sc.producer.Errors():
+		log.Debugw("liveness-error-sending", log.Fields{"status": notOk})
+		if strings.Contains(notOk.Error(), "Failed to produce") {
+			sc.updateLiveness(false)
+		}
 		return notOk
 	}
 	return nil
@@ -713,7 +817,8 @@
 	config := scc.NewConfig()
 	config.ClientID = uuid.New().String()
 	config.Group.Mode = scc.ConsumerModeMultiplex
-	//config.Consumer.Return.Errors = true
+	config.Consumer.Group.Heartbeat.Interval, _ = time.ParseDuration("1s")
+	config.Consumer.Return.Errors = true
 	//config.Group.Return.Notifications = false
 	//config.Consumer.MaxWaitTime = time.Duration(DefaultConsumerMaxwait) * time.Millisecond
 	//config.Consumer.MaxProcessingTime = time.Duration(DefaultMaxProcessingTime) * time.Millisecond
@@ -791,16 +896,20 @@
 		select {
 		case err, ok := <-consumer.Errors():
 			if ok {
+				sc.updateLiveness(false)
 				log.Warnw("group-consumers-error", log.Fields{"topic": topic.Name, "error": err})
 			} else {
+				log.Warnw("group-consumers-closed-err", log.Fields{"topic": topic.Name})
 				// channel is closed
 				break startloop
 			}
 		case msg, ok := <-consumer.Messages():
 			if !ok {
+				log.Warnw("group-consumers-closed-msg", log.Fields{"topic": topic.Name})
 				// Channel closed
 				break startloop
 			}
+			sc.updateLiveness(true)
 			log.Debugw("message-received", log.Fields{"timestamp": msg.Timestamp, "receivedTopic": msg.Topic})
 			msgBody := msg.Value
 			icm := &ic.InterContainerMessage{}