blob: b22ca143dbd83796a1005cbb81fa4780923e6c4c [file] [log] [blame]
Girish Kumar46d7c3a2020-05-18 12:06:33 +00001/*
2 * Copyright 2018-present Open Networking Foundation
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// File contains utility functions to support Open Tracing in conjunction with
18// Enhanced Logging based on context propagation
19
20package log
21
22import (
23 "context"
24 "errors"
25 "github.com/opentracing/opentracing-go"
26 jtracing "github.com/uber/jaeger-client-go"
27 jcfg "github.com/uber/jaeger-client-go/config"
Girish Kumar46d7c3a2020-05-18 12:06:33 +000028 "io"
Girish Kumar131ef8f2020-06-10 14:50:42 +000029 "io/ioutil"
Girish Kumar46d7c3a2020-05-18 12:06:33 +000030 "os"
Girish Kumar131ef8f2020-06-10 14:50:42 +000031 "strings"
Girish Kumar46d7c3a2020-05-18 12:06:33 +000032)
33
Girish Kumar131ef8f2020-06-10 14:50:42 +000034const (
35 RootSpanNameKey = "op-name"
36)
37
38// Flag indicating whether to extract Log Fields from Span embedded in the received Context
39var extractLogFieldsFromContext bool = true
40
41// Flag indicating whether to process Span related operations; to save CPU cycles when disabled
42var processSpanOperations bool = true
43
44// Jaeger complaint Logger instance to redirect logs to Default Logger
45type traceLogger struct {
46 logger *clogger
47}
48
49func (tl traceLogger) Error(msg string) {
50 tl.logger.Error(context.Background(), msg)
51}
52
53func (tl traceLogger) Infof(msg string, args ...interface{}) {
54 // Tracing logs should be performed only at Debug Verbosity
55 tl.logger.Debugf(context.Background(), msg, args...)
56}
57
Girish Kumar46d7c3a2020-05-18 12:06:33 +000058// This method will start the Tracing for a component using Component name injected from the Chart
59// The close() method on returned Closer instance should be called in defer mode to gracefully
60// terminate tracing on component shutdown
Girish Kumar131ef8f2020-06-10 14:50:42 +000061func InitTracingAndLogCorrelation(tracePublishEnabled bool, traceAgentAddress string, logCorrelationEnabled bool) (io.Closer, error) {
62 if !tracePublishEnabled && !logCorrelationEnabled {
63 defaultLogger.Info(context.Background(), "Skipping Global Tracer initialization as both Trace publish and Log correlation are configured as disabled")
64 extractLogFieldsFromContext = false
65 processSpanOperations = false
66 return ioutil.NopCloser(strings.NewReader("")), nil
67 }
68
69 if !logCorrelationEnabled {
70 defaultLogger.Info(context.Background(), "Disabling Log Fields extraction from context as configured")
71 extractLogFieldsFromContext = false
72 }
73
Girish Kumar46d7c3a2020-05-18 12:06:33 +000074 componentName := os.Getenv("COMPONENT_NAME")
75 if componentName == "" {
76 return nil, errors.New("Unable to retrieve PoD Component Name from Runtime env")
77 }
78
79 // Use basic configuration to start with; will extend later to support dynamic config updates
80 cfg := jcfg.Configuration{}
81
Girish Kumar131ef8f2020-06-10 14:50:42 +000082 var err error
83 var jReporterConfig jcfg.ReporterConfig
84 var jReporterCfgOption jtracing.Reporter
Girish Kumar46d7c3a2020-05-18 12:06:33 +000085
Girish Kumar131ef8f2020-06-10 14:50:42 +000086 // Attempt Trace Agent Address only if Trace Publishing is enabled; else directly use Loopback IP
87 if tracePublishEnabled {
88 jReporterConfig = jcfg.ReporterConfig{LocalAgentHostPort: traceAgentAddress, LogSpans: true}
89 jReporterCfgOption, err = jReporterConfig.NewReporter(componentName, jtracing.NewNullMetrics(), traceLogger{logger: defaultLogger})
90
91 if err != nil {
92 defaultLogger.Errorw(context.Background(), "Unable to create Reporter with given Trace Agent address",
93 Fields{"error": err, "address": traceAgentAddress})
94 // The Reporter initialization may fail due to Invalid Agent address or non-existent Agent (DNS lookup failure).
95 // It is essential for Tracer Instance to still start for correct Span propagation needed for log correlation.
96 // Thus, falback to use loopback IP for Reporter initialization before throwing back any error
97 tracePublishEnabled = false
98 }
99 }
100
101 if !tracePublishEnabled {
102 jReporterConfig.LocalAgentHostPort = "127.0.0.1:6831"
103 jReporterCfgOption, err = jReporterConfig.NewReporter(componentName, jtracing.NewNullMetrics(), traceLogger{logger: defaultLogger})
104 if err != nil {
105 return nil, errors.New("Failed to initialize Jaeger Tracing due to Reporter creation error : " + err.Error())
106 }
107 }
108
109 // To start with, we are using Constant Sampling type
110 samplerParam := 0 // 0: Do not publish span, 1: Publish
111 if tracePublishEnabled {
112 samplerParam = 1
113 }
114 jSamplerConfig := jcfg.SamplerConfig{Type: "const", Param: float64(samplerParam)}
115 jSamplerCfgOption, err := jSamplerConfig.NewSampler(componentName, jtracing.NewNullMetrics())
116 if err != nil {
117 return nil, errors.New("Unable to create Sampler : " + err.Error())
118 }
119
120 return cfg.InitGlobalTracer(componentName, jcfg.Reporter(jReporterCfgOption), jcfg.Sampler(jSamplerCfgOption))
Girish Kumar46d7c3a2020-05-18 12:06:33 +0000121}
122
Girish Kumar446274f2020-07-22 03:17:28 +0000123func TerminateTracing(c io.Closer) {
124 err := c.Close()
125 if err != nil {
126 defaultLogger.Error(context.Background(), "error-while-closing-jaeger-tracer", Fields{"err": err})
127 }
128}
129
Girish Kumar46d7c3a2020-05-18 12:06:33 +0000130// Extracts details of Execution Context as log fields from the Tracing Span injected into the
131// context instance. Following log fields are extracted:
132// 1. Operation Name : key as 'op-name' and value as Span operation name
133// 2. Operation Id : key as 'op-id' and value as 64 bit Span Id in hex digits string
134//
135// Additionally, any tags present in Span are also extracted to use as log fields e.g. device-id.
136//
137// If no Span is found associated with context, blank slice is returned without any log fields
138func ExtractContextAttributes(ctx context.Context) []interface{} {
Girish Kumar131ef8f2020-06-10 14:50:42 +0000139 if !extractLogFieldsFromContext {
140 return make([]interface{}, 0)
141 }
142
Girish Kumar46d7c3a2020-05-18 12:06:33 +0000143 attrMap := make(map[string]interface{})
144
145 if ctx != nil {
146 if span := opentracing.SpanFromContext(ctx); span != nil {
147 if jspan, ok := span.(*jtracing.Span); ok {
Girish Kumar131ef8f2020-06-10 14:50:42 +0000148 // Add Log fields for operation identified by Root Level Span (Trace)
149 opId := jspan.SpanContext().TraceID().String()
150 opName := jspan.BaggageItem(RootSpanNameKey)
Girish Kumar46d7c3a2020-05-18 12:06:33 +0000151
Girish Kumar131ef8f2020-06-10 14:50:42 +0000152 taskId := jspan.SpanContext().SpanID().String()
153 taskName := jspan.OperationName()
154
155 if opName == "" {
156 span.SetBaggageItem(RootSpanNameKey, taskName)
157 opName = taskName
158 }
159
160 attrMap["op-id"] = opId
161 attrMap["op-name"] = opName
162
163 // Add Log fields for task identified by Current Span, if it is different
164 // than operation
165 if taskId != opId {
166 attrMap["task-id"] = taskId
167 attrMap["task-name"] = taskName
168 }
Girish Kumar46d7c3a2020-05-18 12:06:33 +0000169
170 for k, v := range jspan.Tags() {
Girish Kumar131ef8f2020-06-10 14:50:42 +0000171 // Ignore the special tags added by Jaeger, middleware (sampler.type, span.*) present in the span
172 if strings.HasPrefix(k, "sampler.") || strings.HasPrefix(k, "span.") || k == "component" {
173 continue
174 }
175
Girish Kumar46d7c3a2020-05-18 12:06:33 +0000176 attrMap[k] = v
177 }
Girish Kumar74240652020-07-10 11:54:28 +0000178
179 processBaggageItems := func(k, v string) bool {
180 if k != "rpc-span-name" {
181 attrMap[k] = v
182 }
183 return true
184 }
185
186 jspan.SpanContext().ForeachBaggageItem(processBaggageItems)
Girish Kumar46d7c3a2020-05-18 12:06:33 +0000187 }
188 }
189 }
190
191 return serializeMap(attrMap)
192}
193
Girish Kumar131ef8f2020-06-10 14:50:42 +0000194// Method to inject additional log fields into Span e.g. device-id
195func EnrichSpan(ctx context.Context, keyAndValues ...Fields) {
196 span := opentracing.SpanFromContext(ctx)
197 if span != nil {
Girish Kumar74240652020-07-10 11:54:28 +0000198 if jspan, ok := span.(*jtracing.Span); ok {
199 // Inject as a BaggageItem when the Span is the Root Span so that it propagates
200 // across the components along with Root Span (called as Trace)
201 // Else, inject as a Tag so that it is attached to the Child Task
202 isRootSpan := false
203 if jspan.SpanContext().TraceID().String() == jspan.SpanContext().SpanID().String() {
204 isRootSpan = true
205 }
206
207 for _, field := range keyAndValues {
208 for k, v := range field {
209 if isRootSpan {
210 span.SetBaggageItem(k, v.(string))
211 } else {
212 span.SetTag(k, v)
213 }
214 }
Girish Kumar131ef8f2020-06-10 14:50:42 +0000215 }
216 }
217 }
218}
219
220// Method to inject Error into the Span in event of any operation failure
221func MarkSpanError(ctx context.Context, err error) {
222 span := opentracing.SpanFromContext(ctx)
223 span.SetTag("error", true)
224 span.SetTag("err", err)
225}
226
227// Creates a Child Span from Parent Span embedded in passed context. Should be used before starting a new major
228// operation in Synchronous or Asynchronous mode (go routine), such as following:
229// 1. Start of all implemented External API methods unless using a interceptor for auto-injection of Span (Server side impl)
230// 2. Just before calling an Third-Party lib which is invoking a External API (etcd, kafka)
231// 3. In start of a Go Routine responsible for performing a major task involving significant duration
232// 4. Any method which is suspected to be time consuming...
233func CreateChildSpan(ctx context.Context, taskName string, keyAndValues ...Fields) (opentracing.Span, context.Context) {
234 if !processSpanOperations {
235 return opentracing.NoopTracer{}.StartSpan(taskName), ctx
236 }
237
238 parentSpan := opentracing.SpanFromContext(ctx)
239 childSpan, newCtx := opentracing.StartSpanFromContext(ctx, taskName)
240
241 if parentSpan == nil || parentSpan.BaggageItem(RootSpanNameKey) == "" {
242 childSpan.SetBaggageItem(RootSpanNameKey, taskName)
243 }
244
245 EnrichSpan(newCtx, keyAndValues...)
246 return childSpan, newCtx
247}
248
249// Creates a Async Child Span with Follows-From relationship from Parent Span embedded in passed context.
250// Should be used only in scenarios when
251// a) There is dis-continuation in execution and thus result of Child span does not affect the Parent flow at all
252// b) The execution of Child Span is guaranteed to start after the completion of Parent Span
253// In case of any confusion, use CreateChildSpan method
254// Some situations where this method would be suitable includes Kafka Async RPC call, Propagation of Event across
255// a channel etc.
256func CreateAsyncSpan(ctx context.Context, taskName string, keyAndValues ...Fields) (opentracing.Span, context.Context) {
257 if !processSpanOperations {
258 return opentracing.NoopTracer{}.StartSpan(taskName), ctx
259 }
260
261 var asyncSpan opentracing.Span
262 var newCtx context.Context
263
264 parentSpan := opentracing.SpanFromContext(ctx)
265
266 // We should always be creating Aysnc span from a Valid parent span. If not, create a Child span instead
267 if parentSpan == nil {
268 defaultLogger.Warn(context.Background(), "Async span must be created with a Valid parent span only")
269 asyncSpan, newCtx = opentracing.StartSpanFromContext(ctx, taskName)
270 } else {
271 // Use Background context as the base for Follows-from case; else new span is getting both Child and FollowsFrom relationship
272 asyncSpan, newCtx = opentracing.StartSpanFromContext(context.Background(), taskName, opentracing.FollowsFrom(parentSpan.Context()))
273 }
274
275 if parentSpan == nil || parentSpan.BaggageItem(RootSpanNameKey) == "" {
276 asyncSpan.SetBaggageItem(RootSpanNameKey, taskName)
277 }
278
279 EnrichSpan(newCtx, keyAndValues...)
280 return asyncSpan, newCtx
281}
282
283// Extracts the span from Source context and injects into the supplied Target context.
284// This should be used in situations wherein we are calling a time-sensitive operation (etcd update) and hence
285// had a context.Background() used earlier to avoid any cancellation/timeout of operation by passed context.
286// This will allow propagation of span with a different base context (and not the original context)
287func WithSpanFromContext(targetCtx, sourceCtx context.Context) context.Context {
288 span := opentracing.SpanFromContext(sourceCtx)
289 return opentracing.ContextWithSpan(targetCtx, span)
290}
291
Girish Kumar46d7c3a2020-05-18 12:06:33 +0000292// Utility method to convert log Fields into array of interfaces expected by zap logger methods
293func serializeMap(fields Fields) []interface{} {
294 data := make([]interface{}, len(fields)*2)
295 i := 0
296 for k, v := range fields {
297 data[i] = k
298 data[i+1] = v
299 i = i + 2
300 }
301 return data
302}