[VOL-3197][VOL-3196] Enhanced Kafka RPC and gRPC interfaces to propagate Span context for log correlation

Also, made some corrections to helper method in log/utils based on testing

Change-Id: Ic0fec935dd8996b3c6c17116586c5bd307e7bebb
diff --git a/pkg/adapters/common/adapter_proxy.go b/pkg/adapters/common/adapter_proxy.go
index cb5d1bb..8588fe4 100644
--- a/pkg/adapters/common/adapter_proxy.go
+++ b/pkg/adapters/common/adapter_proxy.go
@@ -97,6 +97,8 @@
 	replyToTopic := kafka.Topic{Name: fromAdapter}
 	rpc := "process_inter_adapter_message"
 
+	// Add a indication in context to differentiate this Inter Adapter message during Span processing in Kafka IC proxy
+	ctx = context.WithValue(ctx, "inter-adapter-msg-type", msgType)
 	success, result := ap.kafkaICProxy.InvokeRPC(ctx, rpc, &topic, &replyToTopic, true, proxyDeviceId, args...)
 	logger.Debugw(ctx, "inter-adapter-msg-response", log.Fields{"replyTopic": replyToTopic, "success": success})
 	return unPackResponse(ctx, rpc, "", success, result)
diff --git a/pkg/grpc/server.go b/pkg/grpc/server.go
index 2bf7696..9f7c785 100644
--- a/pkg/grpc/server.go
+++ b/pkg/grpc/server.go
@@ -17,6 +17,9 @@
 
 import (
 	"context"
+	grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
+	grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/tracing/opentracing"
+	"github.com/opentracing/opentracing-go"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials"
@@ -96,17 +99,27 @@
 		logger.Fatalf(ctx, "failed to listen: %v", err)
 	}
 
+	// Use Intercepters to automatically inject and publish Open Tracing Spans by this GRPC server
+	serverOptions := []grpc.ServerOption{
+		grpc.StreamInterceptor(grpc_middleware.ChainStreamServer(
+			grpc_opentracing.StreamServerInterceptor(grpc_opentracing.WithTracer(opentracing.GlobalTracer())),
+		)),
+		grpc.UnaryInterceptor(grpc_middleware.ChainUnaryServer(
+			grpc_opentracing.UnaryServerInterceptor(grpc_opentracing.WithTracer(opentracing.GlobalTracer())),
+			mkServerInterceptor(s),
+		))}
+
 	if s.secure && s.GrpcSecurity != nil {
 		creds, err := credentials.NewServerTLSFromFile(s.CertFile, s.KeyFile)
 		if err != nil {
 			logger.Fatalf(ctx, "could not load TLS keys: %s", err)
 		}
-		s.gs = grpc.NewServer(grpc.Creds(creds),
-			withServerUnaryInterceptor(s))
 
+		serverOptions = append(serverOptions, grpc.Creds(creds))
+		s.gs = grpc.NewServer(serverOptions...)
 	} else {
 		logger.Info(ctx, "starting-insecure-grpc-server")
-		s.gs = grpc.NewServer(withServerUnaryInterceptor(s))
+		s.gs = grpc.NewServer(serverOptions...)
 	}
 
 	// Register all required services
@@ -119,10 +132,6 @@
 	}
 }
 
-func withServerUnaryInterceptor(s *GrpcServer) grpc.ServerOption {
-	return grpc.UnaryInterceptor(mkServerInterceptor(s))
-}
-
 // Make a serverInterceptor for the given GrpcServer
 // This interceptor will check whether there is an attached probe,
 // and if that probe indicates NotReady, then an UNAVAILABLE
diff --git a/pkg/kafka/kafka_inter_container_library.go b/pkg/kafka/kafka_inter_container_library.go
index 368391e..92d2529 100644
--- a/pkg/kafka/kafka_inter_container_library.go
+++ b/pkg/kafka/kafka_inter_container_library.go
@@ -17,22 +17,23 @@
 
 import (
 	"context"
+	"encoding/json"
 	"errors"
 	"fmt"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
 	"reflect"
 	"strings"
 	"sync"
 	"time"
 
-	"google.golang.org/grpc/codes"
-	"google.golang.org/grpc/status"
-
 	"github.com/golang/protobuf/proto"
 	"github.com/golang/protobuf/ptypes"
 	"github.com/golang/protobuf/ptypes/any"
 	"github.com/google/uuid"
 	"github.com/opencord/voltha-lib-go/v3/pkg/log"
 	ic "github.com/opencord/voltha-protos/v3/go/inter_container"
+	"github.com/opentracing/opentracing-go"
 )
 
 const (
@@ -197,6 +198,13 @@
 	waitForResponse bool, key string, kvArgs ...*KVArg) chan *RpcResponse {
 
 	logger.Debugw(ctx, "InvokeAsyncRPC", log.Fields{"rpc": rpc, "key": key})
+
+	spanArg, span, ctx := kp.embedSpanAsArg(ctx, rpc, !waitForResponse)
+	if spanArg != nil {
+		kvArgs = append(kvArgs, &spanArg[0])
+	}
+	defer span.Finish()
+
 	//	If a replyToTopic is provided then we use it, otherwise just use the  default toTopic.  The replyToTopic is
 	// typically the device ID.
 	responseTopic := replyToTopic
@@ -219,6 +227,7 @@
 		protoRequest, err = encodeRequest(ctx, rpc, toTopic, responseTopic, key, kvArgs...)
 		if err != nil {
 			logger.Warnw(ctx, "cannot-format-request", log.Fields{"rpc": rpc, "error": err})
+			log.MarkSpanError(ctx, errors.New("cannot-format-request"))
 			chnl <- NewResponse(RpcFormattingError, err, nil)
 			return
 		}
@@ -227,6 +236,7 @@
 		var ch <-chan *ic.InterContainerMessage
 		if ch, err = kp.subscribeForResponse(ctx, *responseTopic, protoRequest.Header.Id); err != nil {
 			logger.Errorw(ctx, "failed-to-subscribe-for-response", log.Fields{"error": err, "toTopic": toTopic.Name})
+			log.MarkSpanError(ctx, errors.New("failed-to-subscribe-for-response"))
 			chnl <- NewResponse(RpcTransportError, err, nil)
 			return
 		}
@@ -260,6 +270,7 @@
 		case msg, ok := <-ch:
 			if !ok {
 				logger.Warnw(ctx, "channel-closed", log.Fields{"rpc": rpc, "replyTopic": replyToTopic.Name})
+				log.MarkSpanError(ctx, errors.New("channel-closed"))
 				chnl <- NewResponse(RpcTransportError, status.Error(codes.Aborted, "channel closed"), nil)
 			}
 			logger.Debugw(ctx, "received-response", log.Fields{"rpc": rpc, "msgHeader": msg.Header})
@@ -280,6 +291,7 @@
 			}
 		case <-ctx.Done():
 			logger.Errorw(ctx, "context-cancelled", log.Fields{"rpc": rpc, "ctx": ctx.Err()})
+			log.MarkSpanError(ctx, errors.New("context-cancelled"))
 			err := status.Error(codes.DeadlineExceeded, ctx.Err().Error())
 			chnl <- NewResponse(RpcTimeout, err, nil)
 		case <-kp.doneCh:
@@ -290,10 +302,72 @@
 	return chnl
 }
 
+// Method to extract Open-tracing Span from Context and serialize it for transport over Kafka embedded as a additional argument.
+// Additional argument is injected using key as "span" and value as Span marshalled into a byte slice
+//
+// The span name is automatically constructed using the RPC name with following convention (<rpc-name> represents name of invoked method):
+// - RPC invoked in Sync manner (WaitForResponse=true) : kafka-rpc-<rpc-name>
+// - RPC invoked in Async manner (WaitForResponse=false) : kafka-async-rpc-<rpc-name>
+// - Inter Adapter RPC invoked in Sync manner (WaitForResponse=true) : kafka-inter-adapter-rpc-<rpc-name>
+// - Inter Adapter RPC invoked in Async manner (WaitForResponse=false) : kafka-inter-adapter-async-rpc-<rpc-name>
+func (kp *interContainerProxy) embedSpanAsArg(ctx context.Context, rpc string, isAsync bool) ([]KVArg, opentracing.Span, context.Context) {
+	var err error
+	var newCtx context.Context
+	var spanToInject opentracing.Span
+
+	var spanName strings.Builder
+	spanName.WriteString("kafka-")
+
+	// In case of inter adapter message, use Msg Type for constructing RPC name
+	if rpc == "process_inter_adapter_message" {
+		if msgType, ok := ctx.Value("inter-adapter-msg-type").(ic.InterAdapterMessageType_Types); ok {
+			spanName.WriteString("inter-adapter-")
+			rpc = msgType.String()
+		}
+	}
+
+	if isAsync {
+		spanName.WriteString("async-rpc-")
+	} else {
+		spanName.WriteString("rpc-")
+	}
+	spanName.WriteString(rpc)
+
+	if isAsync {
+		spanToInject, newCtx = log.CreateAsyncSpan(ctx, spanName.String())
+	} else {
+		spanToInject, newCtx = log.CreateChildSpan(ctx, spanName.String())
+	}
+
+	spanToInject.SetBaggageItem("rpc-span-name", spanName.String())
+
+	textMapCarrier := opentracing.TextMapCarrier(make(map[string]string))
+	if err = opentracing.GlobalTracer().Inject(spanToInject.Context(), opentracing.TextMap, textMapCarrier); err != nil {
+		logger.Warnw(ctx, "unable-to-serialize-span-to-textmap", log.Fields{"span": spanToInject, "error": err})
+		return nil, spanToInject, newCtx
+	}
+
+	var textMapJson []byte
+	if textMapJson, err = json.Marshal(textMapCarrier); err != nil {
+		logger.Warnw(ctx, "unable-to-marshal-textmap-to-json-string", log.Fields{"textMap": textMapCarrier, "error": err})
+		return nil, spanToInject, newCtx
+	}
+
+	spanArg := make([]KVArg, 1)
+	spanArg[0] = KVArg{Key: "span", Value: &ic.StrType{Val: string(textMapJson)}}
+	return spanArg, spanToInject, newCtx
+}
+
 // InvokeRPC is used to send a request to a given topic
 func (kp *interContainerProxy) InvokeRPC(ctx context.Context, rpc string, toTopic *Topic, replyToTopic *Topic,
 	waitForResponse bool, key string, kvArgs ...*KVArg) (bool, *any.Any) {
 
+	spanArg, span, ctx := kp.embedSpanAsArg(ctx, rpc, false)
+	if spanArg != nil {
+		kvArgs = append(kvArgs, &spanArg[0])
+	}
+	defer span.Finish()
+
 	//	If a replyToTopic is provided then we use it, otherwise just use the  default toTopic.  The replyToTopic is
 	// typically the device ID.
 	responseTopic := replyToTopic
@@ -305,6 +379,7 @@
 	protoRequest, err := encodeRequest(ctx, rpc, toTopic, responseTopic, key, kvArgs...)
 	if err != nil {
 		logger.Warnw(ctx, "cannot-format-request", log.Fields{"rpc": rpc, "error": err})
+		log.MarkSpanError(ctx, errors.New("cannot-format-request"))
 		return false, nil
 	}
 
@@ -324,6 +399,7 @@
 	logger.Debugw(ctx, "sending-msg", log.Fields{"rpc": rpc, "toTopic": toTopic, "replyTopic": responseTopic, "key": key, "xId": protoRequest.Header.Id})
 	go func() {
 		if err := kp.kafkaClient.Send(ctx, protoRequest, toTopic, key); err != nil {
+			log.MarkSpanError(ctx, errors.New("send-failed"))
 			logger.Errorw(ctx, "send-failed", log.Fields{
 				"topic": toTopic,
 				"key":   key,
@@ -355,6 +431,7 @@
 		case msg, ok := <-ch:
 			if !ok {
 				logger.Warnw(ctx, "channel-closed", log.Fields{"rpc": rpc, "replyTopic": replyToTopic.Name})
+				log.MarkSpanError(ctx, errors.New("channel-closed"))
 				protoError := &ic.Error{Reason: "channel-closed"}
 				var marshalledArg *any.Any
 				if marshalledArg, err = ptypes.MarshalAny(protoError); err != nil {
@@ -372,6 +449,7 @@
 			return responseBody.Success, responseBody.Result
 		case <-ctx.Done():
 			logger.Debugw(ctx, "context-cancelled", log.Fields{"rpc": rpc, "ctx": ctx.Err()})
+			log.MarkSpanError(ctx, errors.New("context-cancelled"))
 			//	 pack the error as proto any type
 			protoError := &ic.Error{Reason: ctx.Err().Error(), Code: ic.ErrorCode_DEADLINE_EXCEEDED}
 
@@ -382,6 +460,7 @@
 			return false, marshalledArg
 		case <-childCtx.Done():
 			logger.Debugw(ctx, "context-cancelled", log.Fields{"rpc": rpc, "ctx": childCtx.Err()})
+			log.MarkSpanError(ctx, errors.New("context-cancelled"))
 			//	 pack the error as proto any type
 			protoError := &ic.Error{Reason: childCtx.Err().Error(), Code: ic.ErrorCode_DEADLINE_EXCEEDED}
 
@@ -720,6 +799,71 @@
 	return append(currentArgs, protoArg)
 }
 
+// Method to extract the Span embedded in Kafka RPC request on the receiver side. If span is found embedded in the KV args (with key as "span"),
+// it is de-serialized and injected into the Context to be carried forward by the RPC request processor thread.
+// If no span is found embedded, even then a span is created with name as "kafka-rpc-<rpc-name>" to enrich the Context for RPC calls coming
+// from components currently not sending the span (e.g. openonu adapter)
+func (kp *interContainerProxy) enrichContextWithSpan(ctx context.Context, rpcName string, args []*ic.Argument) (opentracing.Span, context.Context) {
+
+	for _, arg := range args {
+		if arg.Key == "span" {
+			var err error
+			var textMapString ic.StrType
+			if err = ptypes.UnmarshalAny(arg.Value, &textMapString); err != nil {
+				logger.Warnw(ctx, "unable-to-unmarshal-kvarg-to-textmap-string", log.Fields{"value": arg.Value})
+				break
+			}
+
+			spanTextMap := make(map[string]string)
+			if err = json.Unmarshal([]byte(textMapString.Val), &spanTextMap); err != nil {
+				logger.Warnw(ctx, "unable-to-unmarshal-textmap-from-json-string", log.Fields{"textMapString": textMapString, "error": err})
+				break
+			}
+
+			var spanContext opentracing.SpanContext
+			if spanContext, err = opentracing.GlobalTracer().Extract(opentracing.TextMap, opentracing.TextMapCarrier(spanTextMap)); err != nil {
+				logger.Warnw(ctx, "unable-to-deserialize-textmap-to-span", log.Fields{"textMap": spanTextMap, "error": err})
+				break
+			}
+
+			var receivedRpcName string
+			extractBaggage := func(k, v string) bool {
+				if k == "rpc-span-name" {
+					receivedRpcName = v
+					return false
+				}
+				return true
+			}
+
+			spanContext.ForeachBaggageItem(extractBaggage)
+
+			return opentracing.StartSpanFromContext(ctx, receivedRpcName, opentracing.FollowsFrom(spanContext))
+		}
+	}
+
+	// Create new Child Span with rpc as name if no span details were received in kafka arguments
+	var spanName strings.Builder
+	spanName.WriteString("kafka-")
+
+	// In case of inter adapter message, use Msg Type for constructing RPC name
+	if rpcName == "process_inter_adapter_message" {
+		for _, arg := range args {
+			if arg.Key == "msg" {
+				iamsg := ic.InterAdapterMessage{}
+				if err := ptypes.UnmarshalAny(arg.Value, &iamsg); err == nil {
+					spanName.WriteString("inter-adapter-")
+					rpcName = iamsg.Header.Type.String()
+				}
+			}
+		}
+	}
+
+	spanName.WriteString("rpc-")
+	spanName.WriteString(rpcName)
+
+	return opentracing.StartSpanFromContext(ctx, spanName.String())
+}
+
 func (kp *interContainerProxy) handleMessage(ctx context.Context, msg *ic.InterContainerMessage, targetInterface interface{}) {
 
 	// First extract the header to know whether this is a request - responses are handled by a different handler
@@ -732,6 +876,9 @@
 		if err = ptypes.UnmarshalAny(msg.Body, requestBody); err != nil {
 			logger.Warnw(ctx, "cannot-unmarshal-request", log.Fields{"error": err})
 		} else {
+			span, ctx := kp.enrichContextWithSpan(ctx, requestBody.Rpc, requestBody.Args)
+			defer span.Finish()
+
 			logger.Debugw(ctx, "received-request", log.Fields{"rpc": requestBody.Rpc, "header": msg.Header})
 			// let the callee unpack the arguments as its the only one that knows the real proto type
 			// Augment the requestBody with the message Id as it will be used in scenarios where cores
diff --git a/pkg/log/utils.go b/pkg/log/utils.go
index cdd0a4e..b22ca14 100644
--- a/pkg/log/utils.go
+++ b/pkg/log/utils.go
@@ -175,6 +175,15 @@
 
 					attrMap[k] = v
 				}
+
+				processBaggageItems := func(k, v string) bool {
+					if k != "rpc-span-name" {
+						attrMap[k] = v
+					}
+					return true
+				}
+
+				jspan.SpanContext().ForeachBaggageItem(processBaggageItems)
 			}
 		}
 	}
@@ -186,9 +195,23 @@
 func EnrichSpan(ctx context.Context, keyAndValues ...Fields) {
 	span := opentracing.SpanFromContext(ctx)
 	if span != nil {
-		for _, field := range keyAndValues {
-			for k, v := range field {
-				span.SetTag(k, v)
+		if jspan, ok := span.(*jtracing.Span); ok {
+			// Inject as a BaggageItem when the Span is the Root Span so that it propagates
+			// across the components along with Root Span (called as Trace)
+			// Else, inject as a Tag so that it is attached to the Child Task
+			isRootSpan := false
+			if jspan.SpanContext().TraceID().String() == jspan.SpanContext().SpanID().String() {
+				isRootSpan = true
+			}
+
+			for _, field := range keyAndValues {
+				for k, v := range field {
+					if isRootSpan {
+						span.SetBaggageItem(k, v.(string))
+					} else {
+						span.SetTag(k, v)
+					}
+				}
 			}
 		}
 	}