[VOL-3142] Enabled tracing for rw-core and added config parameters

Change-Id: I0c5faa104f35b2af694f32785ec031c5277e3f64
diff --git a/VERSION b/VERSION
index 9080d9c..73462a5 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.5.1-dev
+2.5.1
diff --git a/go.mod b/go.mod
index ee14fab..f0513f2 100644
--- a/go.mod
+++ b/go.mod
@@ -6,7 +6,7 @@
 	github.com/gogo/protobuf v1.3.0
 	github.com/golang/protobuf v1.3.2
 	github.com/google/uuid v1.1.1
-	github.com/opencord/voltha-lib-go/v3 v3.2.1
+	github.com/opencord/voltha-lib-go/v3 v3.2.2
 	github.com/opencord/voltha-protos/v3 v3.4.1
 	github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2
 	github.com/stretchr/testify v1.4.0
diff --git a/go.sum b/go.sum
index 9c0256a..b8ad939 100644
--- a/go.sum
+++ b/go.sum
@@ -198,8 +198,8 @@
 github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/gomega v1.4.2 h1:3mYCb7aPxS/RU7TI1y4rkEn1oKmPRjNJLNEXgw7MH2I=
 github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
-github.com/opencord/voltha-lib-go/v3 v3.2.1 h1:s6thA7jLGkrx6S3fzyCBYkXS4c0lIlPgmI0nYCme6NA=
-github.com/opencord/voltha-lib-go/v3 v3.2.1/go.mod h1:RPWJ4EY9Q758uwYAAb2WqZYDSI2M0FCQqjIHJxxDiHY=
+github.com/opencord/voltha-lib-go/v3 v3.2.2 h1:MqVkzoEuAztV2SjexpTzDmuHTlDKK+gN+slcZRe7Wyc=
+github.com/opencord/voltha-lib-go/v3 v3.2.2/go.mod h1:7CnNtVE/O8y80Xw7JXtaWQ61HxL7yWXlB0/uH5R/X/Y=
 github.com/opencord/voltha-protos/v3 v3.4.1 h1:Nrr0jKy7WiMiFlXYiK8z73RvI4K4L5b/top7d3htQQI=
 github.com/opencord/voltha-protos/v3 v3.4.1/go.mod h1:nl1ETp5Iw3avxOaKD8BJlYY5wYI4KeV95aT1pL63nto=
 github.com/opentracing/opentracing-go v1.1.0 h1:pWlfV3Bxv7k65HYwkikxat0+s3pV4bsqf19k25Ur8rU=
diff --git a/rw_core/config/config.go b/rw_core/config/config.go
index 3d95f33..03abc67 100644
--- a/rw_core/config/config.go
+++ b/rw_core/config/config.go
@@ -50,6 +50,9 @@
 	defaultLiveProbeInterval         = 60 * time.Second
 	defaultNotLiveProbeInterval      = 5 * time.Second // Probe more frequently when not alive
 	defaultProbeAddress              = ":8080"
+	defaultTraceEnabled              = false
+	defaultTraceAgentAddress         = "127.0.0.1:6831"
+	defaultLogCorrelationEnabled     = true
 )
 
 // RWCoreFlags represents the set of configurations used by the read-write core service
@@ -79,6 +82,9 @@
 	LiveProbeInterval         time.Duration
 	NotLiveProbeInterval      time.Duration
 	ProbeAddress              string
+	TraceEnabled              bool
+	TraceAgentAddress         string
+	LogCorrelationEnabled     bool
 }
 
 // NewRWCoreFlags returns a new RWCore config
@@ -108,6 +114,9 @@
 		LiveProbeInterval:         defaultLiveProbeInterval,
 		NotLiveProbeInterval:      defaultNotLiveProbeInterval,
 		ProbeAddress:              defaultProbeAddress,
+		TraceEnabled:              defaultTraceEnabled,
+		TraceAgentAddress:         defaultTraceAgentAddress,
+		LogCorrelationEnabled:     defaultLogCorrelationEnabled,
 	}
 	return &rwCoreFlag
 }
@@ -180,5 +189,14 @@
 	help = fmt.Sprintf("The address on which to listen to answer liveness and readiness probe queries over HTTP.")
 	flag.StringVar(&(cf.ProbeAddress), "probe_address", defaultProbeAddress, help)
 
+	help = fmt.Sprintf("Whether to send logs to tracing agent?")
+	flag.BoolVar(&(cf.TraceEnabled), "trace_enabled", defaultTraceEnabled, help)
+
+	help = fmt.Sprintf("The address of tracing agent to which span info should be sent.")
+	flag.StringVar(&(cf.TraceAgentAddress), "trace_agent_address", defaultTraceAgentAddress, help)
+
+	help = fmt.Sprintf("Whether to enrich log statements with fields denoting operation being executed for achieving correlation?")
+	flag.BoolVar(&(cf.LogCorrelationEnabled), "log_correlation_enabled", defaultLogCorrelationEnabled, help)
+
 	flag.Parse()
 }
diff --git a/rw_core/main.go b/rw_core/main.go
index 19de6f0..98d3985 100644
--- a/rw_core/main.go
+++ b/rw_core/main.go
@@ -106,8 +106,6 @@
 	// Update all loggers to log level specified as input parameter
 	log.SetAllLogLevel(logLevel)
 
-	//log.SetPackageLogLevel("github.com/opencord/voltha-go/rw_core/core", log.DebugLevel)
-
 	defer func() {
 		err := log.CleanUp()
 		if err != nil {
@@ -143,6 +141,13 @@
 	// Add the probe to the context to pass to all the services started
 	probeCtx := context.WithValue(ctx, probe.ProbeContextKey, p)
 
+	closer, err := log.InitTracingAndLogCorrelation(cf.TraceEnabled, cf.TraceAgentAddress, cf.LogCorrelationEnabled)
+	if err != nil {
+		logger.Warnw(ctx, "unable-to-initialize-tracing-and-log-correlation-module", log.Fields{"error": err})
+	} else {
+		defer closer.Close()
+	}
+
 	// create and start the core
 	core := c.NewCore(probeCtx, instanceID, cf)
 
diff --git a/vendor/github.com/opencord/voltha-lib-go/v3/pkg/log/utils.go b/vendor/github.com/opencord/voltha-lib-go/v3/pkg/log/utils.go
index 1869b1a..a947185 100644
--- a/vendor/github.com/opencord/voltha-lib-go/v3/pkg/log/utils.go
+++ b/vendor/github.com/opencord/voltha-lib-go/v3/pkg/log/utils.go
@@ -25,15 +25,52 @@
 	"github.com/opentracing/opentracing-go"
 	jtracing "github.com/uber/jaeger-client-go"
 	jcfg "github.com/uber/jaeger-client-go/config"
-	jmetrics "github.com/uber/jaeger-lib/metrics"
 	"io"
+	"io/ioutil"
 	"os"
+	"strings"
 )
 
+const (
+	RootSpanNameKey = "op-name"
+)
+
+// Flag indicating whether to extract Log Fields from Span embedded in the received Context
+var extractLogFieldsFromContext bool = true
+
+// Flag indicating whether to process Span related operations; to save CPU cycles when disabled
+var processSpanOperations bool = true
+
+// Jaeger complaint Logger instance to redirect logs to Default Logger
+type traceLogger struct {
+	logger *clogger
+}
+
+func (tl traceLogger) Error(msg string) {
+	tl.logger.Error(context.Background(), msg)
+}
+
+func (tl traceLogger) Infof(msg string, args ...interface{}) {
+	// Tracing logs should be performed only at Debug Verbosity
+	tl.logger.Debugf(context.Background(), msg, args...)
+}
+
 // This method will start the Tracing for a component using Component name injected from the Chart
 // The close() method on returned Closer instance should be called in defer mode to gracefully
 // terminate tracing on component shutdown
-func StartTracing() (io.Closer, error) {
+func InitTracingAndLogCorrelation(tracePublishEnabled bool, traceAgentAddress string, logCorrelationEnabled bool) (io.Closer, error) {
+	if !tracePublishEnabled && !logCorrelationEnabled {
+		defaultLogger.Info(context.Background(), "Skipping Global Tracer initialization as both Trace publish and Log correlation are configured as disabled")
+		extractLogFieldsFromContext = false
+		processSpanOperations = false
+		return ioutil.NopCloser(strings.NewReader("")), nil
+	}
+
+	if !logCorrelationEnabled {
+		defaultLogger.Info(context.Background(), "Disabling Log Fields extraction from context as configured")
+		extractLogFieldsFromContext = false
+	}
+
 	componentName := os.Getenv("COMPONENT_NAME")
 	if componentName == "" {
 		return nil, errors.New("Unable to retrieve PoD Component Name from Runtime env")
@@ -42,10 +79,45 @@
 	// Use basic configuration to start with; will extend later to support dynamic config updates
 	cfg := jcfg.Configuration{}
 
-	jLoggerCfgOption := jcfg.Logger(jtracing.StdLogger)
-	jMetricsFactoryCfgOption := jcfg.Metrics(jmetrics.NullFactory)
+	var err error
+	var jReporterConfig jcfg.ReporterConfig
+	var jReporterCfgOption jtracing.Reporter
 
-	return cfg.InitGlobalTracer(componentName, jLoggerCfgOption, jMetricsFactoryCfgOption)
+	// Attempt Trace Agent Address only if Trace Publishing is enabled; else directly use Loopback IP
+	if tracePublishEnabled {
+		jReporterConfig = jcfg.ReporterConfig{LocalAgentHostPort: traceAgentAddress, LogSpans: true}
+		jReporterCfgOption, err = jReporterConfig.NewReporter(componentName, jtracing.NewNullMetrics(), traceLogger{logger: defaultLogger})
+
+		if err != nil {
+			defaultLogger.Errorw(context.Background(), "Unable to create Reporter with given Trace Agent address",
+				Fields{"error": err, "address": traceAgentAddress})
+			// The Reporter initialization may fail due to Invalid Agent address or non-existent Agent (DNS lookup failure).
+			// It is essential for Tracer Instance to still start for correct Span propagation needed for log correlation.
+			// Thus, falback to use loopback IP for Reporter initialization before throwing back any error
+			tracePublishEnabled = false
+		}
+	}
+
+	if !tracePublishEnabled {
+		jReporterConfig.LocalAgentHostPort = "127.0.0.1:6831"
+		jReporterCfgOption, err = jReporterConfig.NewReporter(componentName, jtracing.NewNullMetrics(), traceLogger{logger: defaultLogger})
+		if err != nil {
+			return nil, errors.New("Failed to initialize Jaeger Tracing due to Reporter creation error : " + err.Error())
+		}
+	}
+
+	// To start with, we are using Constant Sampling type
+	samplerParam := 0 // 0: Do not publish span, 1: Publish
+	if tracePublishEnabled {
+		samplerParam = 1
+	}
+	jSamplerConfig := jcfg.SamplerConfig{Type: "const", Param: float64(samplerParam)}
+	jSamplerCfgOption, err := jSamplerConfig.NewSampler(componentName, jtracing.NewNullMetrics())
+	if err != nil {
+		return nil, errors.New("Unable to create Sampler : " + err.Error())
+	}
+
+	return cfg.InitGlobalTracer(componentName, jcfg.Reporter(jReporterCfgOption), jcfg.Sampler(jSamplerCfgOption))
 }
 
 // Extracts details of Execution Context as log fields from the Tracing Span injected into the
@@ -57,18 +129,43 @@
 //
 // If no Span is found associated with context, blank slice is returned without any log fields
 func ExtractContextAttributes(ctx context.Context) []interface{} {
+	if !extractLogFieldsFromContext {
+		return make([]interface{}, 0)
+	}
+
 	attrMap := make(map[string]interface{})
 
 	if ctx != nil {
 		if span := opentracing.SpanFromContext(ctx); span != nil {
 			if jspan, ok := span.(*jtracing.Span); ok {
-				opname := jspan.OperationName()
-				spanid := jspan.SpanContext().SpanID().String()
+				// Add Log fields for operation identified by Root Level Span (Trace)
+				opId := jspan.SpanContext().TraceID().String()
+				opName := jspan.BaggageItem(RootSpanNameKey)
 
-				attrMap["op-id"] = spanid
-				attrMap["op-name"] = opname
+				taskId := jspan.SpanContext().SpanID().String()
+				taskName := jspan.OperationName()
+
+				if opName == "" {
+					span.SetBaggageItem(RootSpanNameKey, taskName)
+					opName = taskName
+				}
+
+				attrMap["op-id"] = opId
+				attrMap["op-name"] = opName
+
+				// Add Log fields for task identified by Current Span, if it is different
+				// than operation
+				if taskId != opId {
+					attrMap["task-id"] = taskId
+					attrMap["task-name"] = taskName
+				}
 
 				for k, v := range jspan.Tags() {
+					// Ignore the special tags added by Jaeger, middleware (sampler.type, span.*) present in the span
+					if strings.HasPrefix(k, "sampler.") || strings.HasPrefix(k, "span.") || k == "component" {
+						continue
+					}
+
 					attrMap[k] = v
 				}
 			}
@@ -78,6 +175,90 @@
 	return serializeMap(attrMap)
 }
 
+// Method to inject additional log fields into Span e.g. device-id
+func EnrichSpan(ctx context.Context, keyAndValues ...Fields) {
+	span := opentracing.SpanFromContext(ctx)
+	if span != nil {
+		for _, field := range keyAndValues {
+			for k, v := range field {
+				span.SetTag(k, v)
+			}
+		}
+	}
+}
+
+// Method to inject Error into the Span in event of any operation failure
+func MarkSpanError(ctx context.Context, err error) {
+	span := opentracing.SpanFromContext(ctx)
+	span.SetTag("error", true)
+	span.SetTag("err", err)
+}
+
+// Creates a Child Span from Parent Span embedded in passed context. Should be used before starting a new major
+// operation in Synchronous or Asynchronous mode (go routine), such as following:
+// 1. Start of all implemented External API methods unless using a interceptor for auto-injection of Span (Server side impl)
+// 2. Just before calling an Third-Party lib which is invoking a External API (etcd, kafka)
+// 3. In start of a Go Routine responsible for performing a major task involving significant duration
+// 4. Any method which is suspected to be time consuming...
+func CreateChildSpan(ctx context.Context, taskName string, keyAndValues ...Fields) (opentracing.Span, context.Context) {
+	if !processSpanOperations {
+		return opentracing.NoopTracer{}.StartSpan(taskName), ctx
+	}
+
+	parentSpan := opentracing.SpanFromContext(ctx)
+	childSpan, newCtx := opentracing.StartSpanFromContext(ctx, taskName)
+
+	if parentSpan == nil || parentSpan.BaggageItem(RootSpanNameKey) == "" {
+		childSpan.SetBaggageItem(RootSpanNameKey, taskName)
+	}
+
+	EnrichSpan(newCtx, keyAndValues...)
+	return childSpan, newCtx
+}
+
+// Creates a Async Child Span with Follows-From relationship from Parent Span embedded in passed context.
+// Should be used only in scenarios when
+// a) There is dis-continuation in execution and thus result of Child span does not affect the Parent flow at all
+// b) The execution of Child Span is guaranteed to start after the completion of Parent Span
+// In case of any confusion, use CreateChildSpan method
+// Some situations where this method would be suitable includes Kafka Async RPC call, Propagation of Event across
+// a channel etc.
+func CreateAsyncSpan(ctx context.Context, taskName string, keyAndValues ...Fields) (opentracing.Span, context.Context) {
+	if !processSpanOperations {
+		return opentracing.NoopTracer{}.StartSpan(taskName), ctx
+	}
+
+	var asyncSpan opentracing.Span
+	var newCtx context.Context
+
+	parentSpan := opentracing.SpanFromContext(ctx)
+
+	// We should always be creating Aysnc span from a Valid parent span. If not, create a Child span instead
+	if parentSpan == nil {
+		defaultLogger.Warn(context.Background(), "Async span must be created with a Valid parent span only")
+		asyncSpan, newCtx = opentracing.StartSpanFromContext(ctx, taskName)
+	} else {
+		// Use Background context as the base for Follows-from case; else new span is getting both Child and FollowsFrom relationship
+		asyncSpan, newCtx = opentracing.StartSpanFromContext(context.Background(), taskName, opentracing.FollowsFrom(parentSpan.Context()))
+	}
+
+	if parentSpan == nil || parentSpan.BaggageItem(RootSpanNameKey) == "" {
+		asyncSpan.SetBaggageItem(RootSpanNameKey, taskName)
+	}
+
+	EnrichSpan(newCtx, keyAndValues...)
+	return asyncSpan, newCtx
+}
+
+// Extracts the span from Source context and injects into the supplied Target context.
+// This should be used in situations wherein we are calling a time-sensitive operation (etcd update) and hence
+// had a context.Background() used earlier to avoid any cancellation/timeout of operation by passed context.
+// This will allow propagation of span with a different base context (and not the original context)
+func WithSpanFromContext(targetCtx, sourceCtx context.Context) context.Context {
+	span := opentracing.SpanFromContext(sourceCtx)
+	return opentracing.ContextWithSpan(targetCtx, span)
+}
+
 // Utility method to convert log Fields into array of interfaces expected by zap logger methods
 func serializeMap(fields Fields) []interface{} {
 	data := make([]interface{}, len(fields)*2)
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 526fc15..8646644 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -99,7 +99,7 @@
 github.com/modern-go/concurrent
 # github.com/modern-go/reflect2 v1.0.1
 github.com/modern-go/reflect2
-# github.com/opencord/voltha-lib-go/v3 v3.2.1
+# github.com/opencord/voltha-lib-go/v3 v3.2.2
 github.com/opencord/voltha-lib-go/v3/pkg/adapters
 github.com/opencord/voltha-lib-go/v3/pkg/adapters/adapterif
 github.com/opencord/voltha-lib-go/v3/pkg/adapters/common