blob: cd6d27bad3fa6f5bdef97635ce97c933f000aca4 [file] [log] [blame]
Scott Baker2c1c4822019-10-16 11:02:41 -07001/*
2 * Copyright 2018-present Open Networking Foundation
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package kafka
17
18import (
Scott Bakerfa2f6ee2019-11-19 14:53:14 -080019 "context"
Scott Baker2c1c4822019-10-16 11:02:41 -070020 "errors"
21 "fmt"
serkant.uluderyab38671c2019-11-01 09:35:38 -070022 "strings"
23 "sync"
24 "time"
25
Scott Baker2c1c4822019-10-16 11:02:41 -070026 "github.com/Shopify/sarama"
27 scc "github.com/bsm/sarama-cluster"
Scott Bakerfa2f6ee2019-11-19 14:53:14 -080028 "github.com/eapache/go-resiliency/breaker"
Scott Baker2c1c4822019-10-16 11:02:41 -070029 "github.com/golang/protobuf/proto"
Scott Baker84a55ce2020-04-17 10:11:30 -070030 "github.com/golang/protobuf/ptypes"
Scott Baker2c1c4822019-10-16 11:02:41 -070031 "github.com/google/uuid"
Girish Gowdra89c985b2020-10-14 15:02:09 -070032 "github.com/opencord/voltha-lib-go/v4/pkg/log"
33 ic "github.com/opencord/voltha-protos/v4/go/inter_container"
Scott Baker2c1c4822019-10-16 11:02:41 -070034)
35
Scott Baker2c1c4822019-10-16 11:02:41 -070036// consumerChannels represents one or more consumers listening on a kafka topic. Once a message is received on that
37// topic, the consumer(s) broadcasts the message to all the listening channels. The consumer can be a partition
38//consumer or a group consumer
39type consumerChannels struct {
40 consumers []interface{}
41 channels []chan *ic.InterContainerMessage
42}
43
Kent Hagermanccfa2132019-12-17 13:29:34 -050044// static check to ensure SaramaClient implements Client
45var _ Client = &SaramaClient{}
46
Scott Baker2c1c4822019-10-16 11:02:41 -070047// SaramaClient represents the messaging proxy
48type SaramaClient struct {
49 cAdmin sarama.ClusterAdmin
Neha Sharmadd9af392020-04-28 09:03:57 +000050 KafkaAddress string
Scott Baker2c1c4822019-10-16 11:02:41 -070051 producer sarama.AsyncProducer
52 consumer sarama.Consumer
53 groupConsumers map[string]*scc.Consumer
54 lockOfGroupConsumers sync.RWMutex
55 consumerGroupPrefix string
56 consumerType int
57 consumerGroupName string
58 producerFlushFrequency int
59 producerFlushMessages int
60 producerFlushMaxmessages int
61 producerRetryMax int
62 producerRetryBackOff time.Duration
63 producerReturnSuccess bool
64 producerReturnErrors bool
65 consumerMaxwait int
66 maxProcessingTime int
67 numPartitions int
68 numReplicas int
69 autoCreateTopic bool
70 doneCh chan int
Scott Baker84a55ce2020-04-17 10:11:30 -070071 metadataCallback func(fromTopic string, timestamp time.Time)
Scott Baker2c1c4822019-10-16 11:02:41 -070072 topicToConsumerChannelMap map[string]*consumerChannels
73 lockTopicToConsumerChannelMap sync.RWMutex
74 topicLockMap map[string]*sync.RWMutex
75 lockOfTopicLockMap sync.RWMutex
76 metadataMaxRetry int
Scott Baker104b67d2019-10-29 15:56:27 -070077 alive bool
David K. Bainbridge5edd7fb2020-07-29 19:30:48 -070078 livenessMutex sync.Mutex
Scott Baker104b67d2019-10-29 15:56:27 -070079 liveness chan bool
80 livenessChannelInterval time.Duration
81 lastLivenessTime time.Time
82 started bool
David K. Bainbridge5edd7fb2020-07-29 19:30:48 -070083 healthinessMutex sync.Mutex
Scott Baker0fef6982019-12-12 09:49:42 -080084 healthy bool
85 healthiness chan bool
Scott Baker2c1c4822019-10-16 11:02:41 -070086}
87
88type SaramaClientOption func(*SaramaClient)
89
Neha Sharmadd9af392020-04-28 09:03:57 +000090func Address(address string) SaramaClientOption {
Scott Baker2c1c4822019-10-16 11:02:41 -070091 return func(args *SaramaClient) {
Neha Sharmadd9af392020-04-28 09:03:57 +000092 args.KafkaAddress = address
Scott Baker2c1c4822019-10-16 11:02:41 -070093 }
94}
95
96func ConsumerGroupPrefix(prefix string) SaramaClientOption {
97 return func(args *SaramaClient) {
98 args.consumerGroupPrefix = prefix
99 }
100}
101
102func ConsumerGroupName(name string) SaramaClientOption {
103 return func(args *SaramaClient) {
104 args.consumerGroupName = name
105 }
106}
107
108func ConsumerType(consumer int) SaramaClientOption {
109 return func(args *SaramaClient) {
110 args.consumerType = consumer
111 }
112}
113
114func ProducerFlushFrequency(frequency int) SaramaClientOption {
115 return func(args *SaramaClient) {
116 args.producerFlushFrequency = frequency
117 }
118}
119
120func ProducerFlushMessages(num int) SaramaClientOption {
121 return func(args *SaramaClient) {
122 args.producerFlushMessages = num
123 }
124}
125
126func ProducerFlushMaxMessages(num int) SaramaClientOption {
127 return func(args *SaramaClient) {
128 args.producerFlushMaxmessages = num
129 }
130}
131
132func ProducerMaxRetries(num int) SaramaClientOption {
133 return func(args *SaramaClient) {
134 args.producerRetryMax = num
135 }
136}
137
138func ProducerRetryBackoff(duration time.Duration) SaramaClientOption {
139 return func(args *SaramaClient) {
140 args.producerRetryBackOff = duration
141 }
142}
143
144func ProducerReturnOnErrors(opt bool) SaramaClientOption {
145 return func(args *SaramaClient) {
146 args.producerReturnErrors = opt
147 }
148}
149
150func ProducerReturnOnSuccess(opt bool) SaramaClientOption {
151 return func(args *SaramaClient) {
152 args.producerReturnSuccess = opt
153 }
154}
155
156func ConsumerMaxWait(wait int) SaramaClientOption {
157 return func(args *SaramaClient) {
158 args.consumerMaxwait = wait
159 }
160}
161
162func MaxProcessingTime(pTime int) SaramaClientOption {
163 return func(args *SaramaClient) {
164 args.maxProcessingTime = pTime
165 }
166}
167
168func NumPartitions(number int) SaramaClientOption {
169 return func(args *SaramaClient) {
170 args.numPartitions = number
171 }
172}
173
174func NumReplicas(number int) SaramaClientOption {
175 return func(args *SaramaClient) {
176 args.numReplicas = number
177 }
178}
179
180func AutoCreateTopic(opt bool) SaramaClientOption {
181 return func(args *SaramaClient) {
182 args.autoCreateTopic = opt
183 }
184}
185
186func MetadatMaxRetries(retry int) SaramaClientOption {
187 return func(args *SaramaClient) {
188 args.metadataMaxRetry = retry
189 }
190}
191
Scott Baker104b67d2019-10-29 15:56:27 -0700192func LivenessChannelInterval(opt time.Duration) SaramaClientOption {
193 return func(args *SaramaClient) {
194 args.livenessChannelInterval = opt
195 }
196}
197
Scott Baker2c1c4822019-10-16 11:02:41 -0700198func NewSaramaClient(opts ...SaramaClientOption) *SaramaClient {
199 client := &SaramaClient{
Neha Sharmadd9af392020-04-28 09:03:57 +0000200 KafkaAddress: DefaultKafkaAddress,
Scott Baker2c1c4822019-10-16 11:02:41 -0700201 }
202 client.consumerType = DefaultConsumerType
203 client.producerFlushFrequency = DefaultProducerFlushFrequency
204 client.producerFlushMessages = DefaultProducerFlushMessages
205 client.producerFlushMaxmessages = DefaultProducerFlushMaxmessages
206 client.producerReturnErrors = DefaultProducerReturnErrors
207 client.producerReturnSuccess = DefaultProducerReturnSuccess
208 client.producerRetryMax = DefaultProducerRetryMax
209 client.producerRetryBackOff = DefaultProducerRetryBackoff
210 client.consumerMaxwait = DefaultConsumerMaxwait
211 client.maxProcessingTime = DefaultMaxProcessingTime
212 client.numPartitions = DefaultNumberPartitions
213 client.numReplicas = DefaultNumberReplicas
214 client.autoCreateTopic = DefaultAutoCreateTopic
215 client.metadataMaxRetry = DefaultMetadataMaxRetry
Scott Baker104b67d2019-10-29 15:56:27 -0700216 client.livenessChannelInterval = DefaultLivenessChannelInterval
Scott Baker2c1c4822019-10-16 11:02:41 -0700217
218 for _, option := range opts {
219 option(client)
220 }
221
222 client.groupConsumers = make(map[string]*scc.Consumer)
223
224 client.lockTopicToConsumerChannelMap = sync.RWMutex{}
225 client.topicLockMap = make(map[string]*sync.RWMutex)
226 client.lockOfTopicLockMap = sync.RWMutex{}
227 client.lockOfGroupConsumers = sync.RWMutex{}
Scott Baker104b67d2019-10-29 15:56:27 -0700228
Scott Baker0fef6982019-12-12 09:49:42 -0800229 // healthy and alive until proven otherwise
Scott Baker104b67d2019-10-29 15:56:27 -0700230 client.alive = true
Scott Baker0fef6982019-12-12 09:49:42 -0800231 client.healthy = true
Scott Baker104b67d2019-10-29 15:56:27 -0700232
Scott Baker2c1c4822019-10-16 11:02:41 -0700233 return client
234}
235
Neha Sharma94f16a92020-06-26 04:17:55 +0000236func (sc *SaramaClient) Start(ctx context.Context) error {
237 logger.Info(ctx, "Starting-kafka-sarama-client")
Scott Baker2c1c4822019-10-16 11:02:41 -0700238
239 // Create the Done channel
240 sc.doneCh = make(chan int, 1)
241
242 var err error
243
244 // Add a cleanup in case of failure to startup
245 defer func() {
246 if err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000247 sc.Stop(ctx)
Scott Baker2c1c4822019-10-16 11:02:41 -0700248 }
249 }()
250
251 // Create the Cluster Admin
Neha Sharma94f16a92020-06-26 04:17:55 +0000252 if err = sc.createClusterAdmin(ctx); err != nil {
253 logger.Errorw(ctx, "Cannot-create-cluster-admin", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700254 return err
255 }
256
257 // Create the Publisher
Neha Sharma94f16a92020-06-26 04:17:55 +0000258 if err := sc.createPublisher(ctx); err != nil {
259 logger.Errorw(ctx, "Cannot-create-kafka-publisher", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700260 return err
261 }
262
263 if sc.consumerType == DefaultConsumerType {
264 // Create the master consumers
Neha Sharma94f16a92020-06-26 04:17:55 +0000265 if err := sc.createConsumer(ctx); err != nil {
266 logger.Errorw(ctx, "Cannot-create-kafka-consumers", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700267 return err
268 }
269 }
270
271 // Create the topic to consumers/channel map
272 sc.topicToConsumerChannelMap = make(map[string]*consumerChannels)
273
Neha Sharma94f16a92020-06-26 04:17:55 +0000274 logger.Info(ctx, "kafka-sarama-client-started")
Scott Baker2c1c4822019-10-16 11:02:41 -0700275
Scott Baker104b67d2019-10-29 15:56:27 -0700276 sc.started = true
277
Scott Baker2c1c4822019-10-16 11:02:41 -0700278 return nil
279}
280
Neha Sharma94f16a92020-06-26 04:17:55 +0000281func (sc *SaramaClient) Stop(ctx context.Context) {
282 logger.Info(ctx, "stopping-sarama-client")
Scott Baker2c1c4822019-10-16 11:02:41 -0700283
Scott Baker104b67d2019-10-29 15:56:27 -0700284 sc.started = false
285
Scott Baker2c1c4822019-10-16 11:02:41 -0700286 //Send a message over the done channel to close all long running routines
287 sc.doneCh <- 1
288
289 if sc.producer != nil {
290 if err := sc.producer.Close(); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000291 logger.Errorw(ctx, "closing-producer-failed", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700292 }
293 }
294
295 if sc.consumer != nil {
296 if err := sc.consumer.Close(); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000297 logger.Errorw(ctx, "closing-partition-consumer-failed", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700298 }
299 }
300
301 for key, val := range sc.groupConsumers {
Neha Sharma94f16a92020-06-26 04:17:55 +0000302 logger.Debugw(ctx, "closing-group-consumer", log.Fields{"topic": key})
Scott Baker2c1c4822019-10-16 11:02:41 -0700303 if err := val.Close(); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000304 logger.Errorw(ctx, "closing-group-consumer-failed", log.Fields{"error": err, "topic": key})
Scott Baker2c1c4822019-10-16 11:02:41 -0700305 }
306 }
307
308 if sc.cAdmin != nil {
309 if err := sc.cAdmin.Close(); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000310 logger.Errorw(ctx, "closing-cluster-admin-failed", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700311 }
312 }
313
314 //TODO: Clear the consumers map
315 //sc.clearConsumerChannelMap()
316
Neha Sharma94f16a92020-06-26 04:17:55 +0000317 logger.Info(ctx, "sarama-client-stopped")
Scott Baker2c1c4822019-10-16 11:02:41 -0700318}
319
320//createTopic is an internal function to create a topic on the Kafka Broker. No locking is required as
321// the invoking function must hold the lock
Neha Sharma94f16a92020-06-26 04:17:55 +0000322func (sc *SaramaClient) createTopic(ctx context.Context, topic *Topic, numPartition int, repFactor int) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700323 // Set the topic details
324 topicDetail := &sarama.TopicDetail{}
325 topicDetail.NumPartitions = int32(numPartition)
326 topicDetail.ReplicationFactor = int16(repFactor)
327 topicDetail.ConfigEntries = make(map[string]*string)
328 topicDetails := make(map[string]*sarama.TopicDetail)
329 topicDetails[topic.Name] = topicDetail
330
331 if err := sc.cAdmin.CreateTopic(topic.Name, topicDetail, false); err != nil {
332 if err == sarama.ErrTopicAlreadyExists {
333 // Not an error
Neha Sharma94f16a92020-06-26 04:17:55 +0000334 logger.Debugw(ctx, "topic-already-exist", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700335 return nil
336 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000337 logger.Errorw(ctx, "create-topic-failure", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700338 return err
339 }
340 // TODO: Wait until the topic has been created. No API is available in the Sarama clusterAdmin to
341 // do so.
Neha Sharma94f16a92020-06-26 04:17:55 +0000342 logger.Debugw(ctx, "topic-created", log.Fields{"topic": topic, "numPartition": numPartition, "replicationFactor": repFactor})
Scott Baker2c1c4822019-10-16 11:02:41 -0700343 return nil
344}
345
346//CreateTopic is a public API to create a topic on the Kafka Broker. It uses a lock on a specific topic to
347// ensure no two go routines are performing operations on the same topic
Neha Sharma94f16a92020-06-26 04:17:55 +0000348func (sc *SaramaClient) CreateTopic(ctx context.Context, topic *Topic, numPartition int, repFactor int) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700349 sc.lockTopic(topic)
350 defer sc.unLockTopic(topic)
351
Neha Sharma94f16a92020-06-26 04:17:55 +0000352 return sc.createTopic(ctx, topic, numPartition, repFactor)
Scott Baker2c1c4822019-10-16 11:02:41 -0700353}
354
355//DeleteTopic removes a topic from the kafka Broker
Neha Sharma94f16a92020-06-26 04:17:55 +0000356func (sc *SaramaClient) DeleteTopic(ctx context.Context, topic *Topic) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700357 sc.lockTopic(topic)
358 defer sc.unLockTopic(topic)
359
360 // Remove the topic from the broker
361 if err := sc.cAdmin.DeleteTopic(topic.Name); err != nil {
362 if err == sarama.ErrUnknownTopicOrPartition {
363 // Not an error as does not exist
Neha Sharma94f16a92020-06-26 04:17:55 +0000364 logger.Debugw(ctx, "topic-not-exist", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700365 return nil
366 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000367 logger.Errorw(ctx, "delete-topic-failed", log.Fields{"topic": topic, "error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700368 return err
369 }
370
371 // Clear the topic from the consumer channel. This will also close any consumers listening on that topic.
Neha Sharma94f16a92020-06-26 04:17:55 +0000372 if err := sc.clearTopicFromConsumerChannelMap(ctx, *topic); err != nil {
373 logger.Errorw(ctx, "failure-clearing-channels", log.Fields{"topic": topic, "error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700374 return err
375 }
376 return nil
377}
378
379// Subscribe registers a caller to a topic. It returns a channel that the caller can use to receive
380// messages from that topic
Neha Sharma94f16a92020-06-26 04:17:55 +0000381func (sc *SaramaClient) Subscribe(ctx context.Context, topic *Topic, kvArgs ...*KVArg) (<-chan *ic.InterContainerMessage, error) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700382 sc.lockTopic(topic)
383 defer sc.unLockTopic(topic)
384
Neha Sharma94f16a92020-06-26 04:17:55 +0000385 logger.Debugw(ctx, "subscribe", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700386
387 // If a consumers already exist for that topic then resuse it
388 if consumerCh := sc.getConsumerChannel(topic); consumerCh != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000389 logger.Debugw(ctx, "topic-already-subscribed", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700390 // Create a channel specific for that consumers and add it to the consumers channel map
391 ch := make(chan *ic.InterContainerMessage)
Neha Sharma94f16a92020-06-26 04:17:55 +0000392 sc.addChannelToConsumerChannelMap(ctx, topic, ch)
Scott Baker2c1c4822019-10-16 11:02:41 -0700393 return ch, nil
394 }
395
396 // Register for the topic and set it up
397 var consumerListeningChannel chan *ic.InterContainerMessage
398 var err error
399
400 // Use the consumerType option to figure out the type of consumer to launch
401 if sc.consumerType == PartitionConsumer {
402 if sc.autoCreateTopic {
Neha Sharma94f16a92020-06-26 04:17:55 +0000403 if err = sc.createTopic(ctx, topic, sc.numPartitions, sc.numReplicas); err != nil {
404 logger.Errorw(ctx, "create-topic-failure", log.Fields{"error": err, "topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700405 return nil, err
406 }
407 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000408 if consumerListeningChannel, err = sc.setupPartitionConsumerChannel(ctx, topic, getOffset(kvArgs...)); err != nil {
409 logger.Warnw(ctx, "create-consumers-channel-failure", log.Fields{"error": err, "topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700410 return nil, err
411 }
412 } else if sc.consumerType == GroupCustomer {
413 // TODO: create topic if auto create is on. There is an issue with the sarama cluster library that
414 // does not consume from a precreated topic in some scenarios
415 //if sc.autoCreateTopic {
416 // if err = sc.createTopic(topic, sc.numPartitions, sc.numReplicas); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000417 // logger.Errorw(ctx, "create-topic-failure", logger.Fields{"error": err, "topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700418 // return nil, err
419 // }
420 //}
421 //groupId := sc.consumerGroupName
422 groupId := getGroupId(kvArgs...)
423 // Include the group prefix
424 if groupId != "" {
425 groupId = sc.consumerGroupPrefix + groupId
426 } else {
427 // Need to use a unique group Id per topic
428 groupId = sc.consumerGroupPrefix + topic.Name
429 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000430 if consumerListeningChannel, err = sc.setupGroupConsumerChannel(ctx, topic, groupId, getOffset(kvArgs...)); err != nil {
431 logger.Warnw(ctx, "create-consumers-channel-failure", log.Fields{"error": err, "topic": topic.Name, "groupId": groupId})
Scott Baker2c1c4822019-10-16 11:02:41 -0700432 return nil, err
433 }
434
435 } else {
Neha Sharma94f16a92020-06-26 04:17:55 +0000436 logger.Warnw(ctx, "unknown-consumer-type", log.Fields{"consumer-type": sc.consumerType})
Scott Baker2c1c4822019-10-16 11:02:41 -0700437 return nil, errors.New("unknown-consumer-type")
438 }
439
440 return consumerListeningChannel, nil
441}
442
443//UnSubscribe unsubscribe a consumer from a given topic
Neha Sharma94f16a92020-06-26 04:17:55 +0000444func (sc *SaramaClient) UnSubscribe(ctx context.Context, topic *Topic, ch <-chan *ic.InterContainerMessage) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700445 sc.lockTopic(topic)
446 defer sc.unLockTopic(topic)
447
Neha Sharma94f16a92020-06-26 04:17:55 +0000448 logger.Debugw(ctx, "unsubscribing-channel-from-topic", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700449 var err error
Neha Sharma94f16a92020-06-26 04:17:55 +0000450 if err = sc.removeChannelFromConsumerChannelMap(ctx, *topic, ch); err != nil {
451 logger.Errorw(ctx, "failed-removing-channel", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700452 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000453 if err = sc.deleteFromGroupConsumers(ctx, topic.Name); err != nil {
454 logger.Errorw(ctx, "failed-deleting-group-consumer", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700455 }
456 return err
457}
458
Neha Sharma94f16a92020-06-26 04:17:55 +0000459func (sc *SaramaClient) SubscribeForMetadata(ctx context.Context, callback func(fromTopic string, timestamp time.Time)) {
Kent Hagermanccfa2132019-12-17 13:29:34 -0500460 sc.metadataCallback = callback
461}
462
Neha Sharma94f16a92020-06-26 04:17:55 +0000463func (sc *SaramaClient) updateLiveness(ctx context.Context, alive bool) {
Scott Baker104b67d2019-10-29 15:56:27 -0700464 // Post a consistent stream of liveness data to the channel,
465 // so that in a live state, the core does not timeout and
466 // send a forced liveness message. Production of liveness
467 // events to the channel is rate-limited by livenessChannelInterval.
David K. Bainbridge5edd7fb2020-07-29 19:30:48 -0700468 sc.livenessMutex.Lock()
469 defer sc.livenessMutex.Unlock()
Scott Baker104b67d2019-10-29 15:56:27 -0700470 if sc.liveness != nil {
471 if sc.alive != alive {
Neha Sharma94f16a92020-06-26 04:17:55 +0000472 logger.Info(ctx, "update-liveness-channel-because-change")
Scott Baker104b67d2019-10-29 15:56:27 -0700473 sc.liveness <- alive
474 sc.lastLivenessTime = time.Now()
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800475 } else if time.Since(sc.lastLivenessTime) > sc.livenessChannelInterval {
Neha Sharma94f16a92020-06-26 04:17:55 +0000476 logger.Info(ctx, "update-liveness-channel-because-interval")
Scott Baker104b67d2019-10-29 15:56:27 -0700477 sc.liveness <- alive
478 sc.lastLivenessTime = time.Now()
479 }
480 }
481
482 // Only emit a log message when the state changes
483 if sc.alive != alive {
Neha Sharma94f16a92020-06-26 04:17:55 +0000484 logger.Info(ctx, "set-client-alive", log.Fields{"alive": alive})
Scott Baker104b67d2019-10-29 15:56:27 -0700485 sc.alive = alive
486 }
487}
488
Scott Baker0fef6982019-12-12 09:49:42 -0800489// Once unhealthy, we never go back
Neha Sharma94f16a92020-06-26 04:17:55 +0000490func (sc *SaramaClient) setUnhealthy(ctx context.Context) {
Scott Baker0fef6982019-12-12 09:49:42 -0800491 sc.healthy = false
David K. Bainbridge5edd7fb2020-07-29 19:30:48 -0700492 sc.healthinessMutex.Lock()
493 defer sc.healthinessMutex.Unlock()
Scott Baker0fef6982019-12-12 09:49:42 -0800494 if sc.healthiness != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000495 logger.Infow(ctx, "set-client-unhealthy", log.Fields{"healthy": sc.healthy})
Scott Baker0fef6982019-12-12 09:49:42 -0800496 sc.healthiness <- sc.healthy
497 }
498}
499
Neha Sharma94f16a92020-06-26 04:17:55 +0000500func (sc *SaramaClient) isLivenessError(ctx context.Context, err error) bool {
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800501 // Sarama producers and consumers encapsulate the error inside
502 // a ProducerError or ConsumerError struct.
503 if prodError, ok := err.(*sarama.ProducerError); ok {
504 err = prodError.Err
505 } else if consumerError, ok := err.(*sarama.ConsumerError); ok {
506 err = consumerError.Err
507 }
508
509 // Sarama-Cluster will compose the error into a ClusterError struct,
510 // which we can't do a compare by reference. To handle that, we the
511 // best we can do is compare the error strings.
512
513 switch err.Error() {
514 case context.DeadlineExceeded.Error():
Neha Sharma94f16a92020-06-26 04:17:55 +0000515 logger.Info(ctx, "is-liveness-error-timeout")
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800516 return true
517 case sarama.ErrOutOfBrokers.Error(): // "Kafka: client has run out of available brokers"
Neha Sharma94f16a92020-06-26 04:17:55 +0000518 logger.Info(ctx, "is-liveness-error-no-brokers")
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800519 return true
520 case sarama.ErrShuttingDown.Error(): // "Kafka: message received by producer in process of shutting down"
Neha Sharma94f16a92020-06-26 04:17:55 +0000521 logger.Info(ctx, "is-liveness-error-shutting-down")
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800522 return true
523 case sarama.ErrControllerNotAvailable.Error(): // "Kafka: controller is not available"
Neha Sharma94f16a92020-06-26 04:17:55 +0000524 logger.Info(ctx, "is-liveness-error-not-available")
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800525 return true
526 case breaker.ErrBreakerOpen.Error(): // "circuit breaker is open"
Neha Sharma94f16a92020-06-26 04:17:55 +0000527 logger.Info(ctx, "is-liveness-error-circuit-breaker-open")
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800528 return true
529 }
530
531 if strings.HasSuffix(err.Error(), "connection refused") { // "dial tcp 10.244.1.176:9092: connect: connection refused"
Neha Sharma94f16a92020-06-26 04:17:55 +0000532 logger.Info(ctx, "is-liveness-error-connection-refused")
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800533 return true
534 }
535
Scott Baker718bee02020-01-07 09:52:02 -0800536 if strings.HasSuffix(err.Error(), "i/o timeout") { // "dial tcp 10.244.1.176:9092: i/o timeout"
Neha Sharma94f16a92020-06-26 04:17:55 +0000537 logger.Info(ctx, "is-liveness-error-io-timeout")
Scott Baker718bee02020-01-07 09:52:02 -0800538 return true
539 }
540
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800541 // Other errors shouldn't trigger a loss of liveness
542
Neha Sharma94f16a92020-06-26 04:17:55 +0000543 logger.Infow(ctx, "is-liveness-error-ignored", log.Fields{"err": err})
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800544
545 return false
546}
547
Scott Baker2c1c4822019-10-16 11:02:41 -0700548// send formats and sends the request onto the kafka messaging bus.
Neha Sharma94f16a92020-06-26 04:17:55 +0000549func (sc *SaramaClient) Send(ctx context.Context, msg interface{}, topic *Topic, keys ...string) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700550
551 // Assert message is a proto message
552 var protoMsg proto.Message
553 var ok bool
554 // ascertain the value interface type is a proto.Message
555 if protoMsg, ok = msg.(proto.Message); !ok {
Neha Sharma94f16a92020-06-26 04:17:55 +0000556 logger.Warnw(ctx, "message-not-proto-message", log.Fields{"msg": msg})
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800557 return fmt.Errorf("not-a-proto-msg-%s", msg)
Scott Baker2c1c4822019-10-16 11:02:41 -0700558 }
559
560 var marshalled []byte
561 var err error
562 // Create the Sarama producer message
563 if marshalled, err = proto.Marshal(protoMsg); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000564 logger.Errorw(ctx, "marshalling-failed", log.Fields{"msg": protoMsg, "error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700565 return err
566 }
567 key := ""
568 if len(keys) > 0 {
569 key = keys[0] // Only the first key is relevant
570 }
571 kafkaMsg := &sarama.ProducerMessage{
572 Topic: topic.Name,
573 Key: sarama.StringEncoder(key),
574 Value: sarama.ByteEncoder(marshalled),
575 }
576
577 // Send message to kafka
578 sc.producer.Input() <- kafkaMsg
579 // Wait for result
580 // TODO: Use a lock or a different mechanism to ensure the response received corresponds to the message sent.
581 select {
582 case ok := <-sc.producer.Successes():
Neha Sharma94f16a92020-06-26 04:17:55 +0000583 logger.Debugw(ctx, "message-sent", log.Fields{"status": ok.Topic})
584 sc.updateLiveness(ctx, true)
Scott Baker2c1c4822019-10-16 11:02:41 -0700585 case notOk := <-sc.producer.Errors():
Neha Sharma94f16a92020-06-26 04:17:55 +0000586 logger.Debugw(ctx, "error-sending", log.Fields{"status": notOk})
587 if sc.isLivenessError(ctx, notOk) {
588 sc.updateLiveness(ctx, false)
Scott Baker104b67d2019-10-29 15:56:27 -0700589 }
590 return notOk
591 }
592 return nil
593}
594
595// Enable the liveness monitor channel. This channel will report
596// a "true" or "false" on every publish, which indicates whether
597// or not the channel is still live. This channel is then picked up
598// by the service (i.e. rw_core / ro_core) to update readiness status
599// and/or take other actions.
Neha Sharma94f16a92020-06-26 04:17:55 +0000600func (sc *SaramaClient) EnableLivenessChannel(ctx context.Context, enable bool) chan bool {
601 logger.Infow(ctx, "kafka-enable-liveness-channel", log.Fields{"enable": enable})
Scott Baker104b67d2019-10-29 15:56:27 -0700602 if enable {
David K. Bainbridge5edd7fb2020-07-29 19:30:48 -0700603 sc.livenessMutex.Lock()
604 defer sc.livenessMutex.Unlock()
Scott Baker104b67d2019-10-29 15:56:27 -0700605 if sc.liveness == nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000606 logger.Info(ctx, "kafka-create-liveness-channel")
Scott Baker104b67d2019-10-29 15:56:27 -0700607 // At least 1, so we can immediately post to it without blocking
608 // Setting a bigger number (10) allows the monitor to fall behind
609 // without blocking others. The monitor shouldn't really fall
610 // behind...
611 sc.liveness = make(chan bool, 10)
612 // post intial state to the channel
613 sc.liveness <- sc.alive
614 }
615 } else {
616 // TODO: Think about whether we need the ability to turn off
617 // liveness monitoring
618 panic("Turning off liveness reporting is not supported")
619 }
620 return sc.liveness
621}
622
Scott Baker0fef6982019-12-12 09:49:42 -0800623// Enable the Healthiness monitor channel. This channel will report "false"
624// if the kafka consumers die, or some other problem occurs which is
625// catastrophic that would require re-creating the client.
Neha Sharma94f16a92020-06-26 04:17:55 +0000626func (sc *SaramaClient) EnableHealthinessChannel(ctx context.Context, enable bool) chan bool {
627 logger.Infow(ctx, "kafka-enable-healthiness-channel", log.Fields{"enable": enable})
Scott Baker0fef6982019-12-12 09:49:42 -0800628 if enable {
David K. Bainbridge5edd7fb2020-07-29 19:30:48 -0700629 sc.healthinessMutex.Lock()
630 defer sc.healthinessMutex.Unlock()
Scott Baker0fef6982019-12-12 09:49:42 -0800631 if sc.healthiness == nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000632 logger.Info(ctx, "kafka-create-healthiness-channel")
Scott Baker0fef6982019-12-12 09:49:42 -0800633 // At least 1, so we can immediately post to it without blocking
634 // Setting a bigger number (10) allows the monitor to fall behind
635 // without blocking others. The monitor shouldn't really fall
636 // behind...
637 sc.healthiness = make(chan bool, 10)
638 // post intial state to the channel
639 sc.healthiness <- sc.healthy
640 }
641 } else {
642 // TODO: Think about whether we need the ability to turn off
643 // liveness monitoring
644 panic("Turning off healthiness reporting is not supported")
645 }
646 return sc.healthiness
647}
648
Scott Baker104b67d2019-10-29 15:56:27 -0700649// send an empty message on the liveness channel to check whether connectivity has
650// been restored.
Neha Sharma94f16a92020-06-26 04:17:55 +0000651func (sc *SaramaClient) SendLiveness(ctx context.Context) error {
Scott Baker104b67d2019-10-29 15:56:27 -0700652 if !sc.started {
653 return fmt.Errorf("SendLiveness() called while not started")
654 }
655
656 kafkaMsg := &sarama.ProducerMessage{
657 Topic: "_liveness_test",
658 Value: sarama.StringEncoder(time.Now().Format(time.RFC3339)), // for debugging / informative use
659 }
660
661 // Send message to kafka
662 sc.producer.Input() <- kafkaMsg
663 // Wait for result
664 // TODO: Use a lock or a different mechanism to ensure the response received corresponds to the message sent.
665 select {
666 case ok := <-sc.producer.Successes():
Neha Sharma94f16a92020-06-26 04:17:55 +0000667 logger.Debugw(ctx, "liveness-message-sent", log.Fields{"status": ok.Topic})
668 sc.updateLiveness(ctx, true)
Scott Baker104b67d2019-10-29 15:56:27 -0700669 case notOk := <-sc.producer.Errors():
Neha Sharma94f16a92020-06-26 04:17:55 +0000670 logger.Debugw(ctx, "liveness-error-sending", log.Fields{"status": notOk})
671 if sc.isLivenessError(ctx, notOk) {
672 sc.updateLiveness(ctx, false)
Scott Baker104b67d2019-10-29 15:56:27 -0700673 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700674 return notOk
675 }
676 return nil
677}
678
679// getGroupId returns the group id from the key-value args.
680func getGroupId(kvArgs ...*KVArg) string {
681 for _, arg := range kvArgs {
682 if arg.Key == GroupIdKey {
683 return arg.Value.(string)
684 }
685 }
686 return ""
687}
688
689// getOffset returns the offset from the key-value args.
690func getOffset(kvArgs ...*KVArg) int64 {
691 for _, arg := range kvArgs {
692 if arg.Key == Offset {
693 return arg.Value.(int64)
694 }
695 }
696 return sarama.OffsetNewest
697}
698
Neha Sharma94f16a92020-06-26 04:17:55 +0000699func (sc *SaramaClient) createClusterAdmin(ctx context.Context) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700700 config := sarama.NewConfig()
701 config.Version = sarama.V1_0_0_0
702
703 // Create a cluster Admin
704 var cAdmin sarama.ClusterAdmin
705 var err error
Neha Sharmadd9af392020-04-28 09:03:57 +0000706 if cAdmin, err = sarama.NewClusterAdmin([]string{sc.KafkaAddress}, config); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000707 logger.Errorw(ctx, "cluster-admin-failure", log.Fields{"error": err, "broker-address": sc.KafkaAddress})
Scott Baker2c1c4822019-10-16 11:02:41 -0700708 return err
709 }
710 sc.cAdmin = cAdmin
711 return nil
712}
713
714func (sc *SaramaClient) lockTopic(topic *Topic) {
715 sc.lockOfTopicLockMap.Lock()
716 if _, exist := sc.topicLockMap[topic.Name]; exist {
717 sc.lockOfTopicLockMap.Unlock()
718 sc.topicLockMap[topic.Name].Lock()
719 } else {
720 sc.topicLockMap[topic.Name] = &sync.RWMutex{}
721 sc.lockOfTopicLockMap.Unlock()
722 sc.topicLockMap[topic.Name].Lock()
723 }
724}
725
726func (sc *SaramaClient) unLockTopic(topic *Topic) {
727 sc.lockOfTopicLockMap.Lock()
728 defer sc.lockOfTopicLockMap.Unlock()
729 if _, exist := sc.topicLockMap[topic.Name]; exist {
730 sc.topicLockMap[topic.Name].Unlock()
731 }
732}
733
734func (sc *SaramaClient) addTopicToConsumerChannelMap(id string, arg *consumerChannels) {
735 sc.lockTopicToConsumerChannelMap.Lock()
736 defer sc.lockTopicToConsumerChannelMap.Unlock()
737 if _, exist := sc.topicToConsumerChannelMap[id]; !exist {
738 sc.topicToConsumerChannelMap[id] = arg
739 }
740}
741
Scott Baker2c1c4822019-10-16 11:02:41 -0700742func (sc *SaramaClient) getConsumerChannel(topic *Topic) *consumerChannels {
743 sc.lockTopicToConsumerChannelMap.RLock()
744 defer sc.lockTopicToConsumerChannelMap.RUnlock()
745
746 if consumerCh, exist := sc.topicToConsumerChannelMap[topic.Name]; exist {
747 return consumerCh
748 }
749 return nil
750}
751
Neha Sharma94f16a92020-06-26 04:17:55 +0000752func (sc *SaramaClient) addChannelToConsumerChannelMap(ctx context.Context, topic *Topic, ch chan *ic.InterContainerMessage) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700753 sc.lockTopicToConsumerChannelMap.Lock()
754 defer sc.lockTopicToConsumerChannelMap.Unlock()
755 if consumerCh, exist := sc.topicToConsumerChannelMap[topic.Name]; exist {
756 consumerCh.channels = append(consumerCh.channels, ch)
757 return
758 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000759 logger.Warnw(ctx, "consumers-channel-not-exist", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700760}
761
762//closeConsumers closes a list of sarama consumers. The consumers can either be a partition consumers or a group consumers
Neha Sharma94f16a92020-06-26 04:17:55 +0000763func closeConsumers(ctx context.Context, consumers []interface{}) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700764 var err error
765 for _, consumer := range consumers {
766 // Is it a partition consumers?
767 if partionConsumer, ok := consumer.(sarama.PartitionConsumer); ok {
768 if errTemp := partionConsumer.Close(); errTemp != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000769 logger.Debugw(ctx, "partition!!!", log.Fields{"err": errTemp})
Scott Baker2c1c4822019-10-16 11:02:41 -0700770 if strings.Compare(errTemp.Error(), sarama.ErrUnknownTopicOrPartition.Error()) == 0 {
771 // This can occur on race condition
772 err = nil
773 } else {
774 err = errTemp
775 }
776 }
777 } else if groupConsumer, ok := consumer.(*scc.Consumer); ok {
778 if errTemp := groupConsumer.Close(); errTemp != nil {
779 if strings.Compare(errTemp.Error(), sarama.ErrUnknownTopicOrPartition.Error()) == 0 {
780 // This can occur on race condition
781 err = nil
782 } else {
783 err = errTemp
784 }
785 }
786 }
787 }
788 return err
789}
790
Neha Sharma94f16a92020-06-26 04:17:55 +0000791func (sc *SaramaClient) removeChannelFromConsumerChannelMap(ctx context.Context, topic Topic, ch <-chan *ic.InterContainerMessage) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700792 sc.lockTopicToConsumerChannelMap.Lock()
793 defer sc.lockTopicToConsumerChannelMap.Unlock()
794 if consumerCh, exist := sc.topicToConsumerChannelMap[topic.Name]; exist {
795 // Channel will be closed in the removeChannel method
Neha Sharma94f16a92020-06-26 04:17:55 +0000796 consumerCh.channels = removeChannel(ctx, consumerCh.channels, ch)
Scott Baker2c1c4822019-10-16 11:02:41 -0700797 // If there are no more channels then we can close the consumers itself
798 if len(consumerCh.channels) == 0 {
Neha Sharma94f16a92020-06-26 04:17:55 +0000799 logger.Debugw(ctx, "closing-consumers", log.Fields{"topic": topic})
800 err := closeConsumers(ctx, consumerCh.consumers)
Scott Baker2c1c4822019-10-16 11:02:41 -0700801 //err := consumerCh.consumers.Close()
802 delete(sc.topicToConsumerChannelMap, topic.Name)
803 return err
804 }
805 return nil
806 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000807 logger.Warnw(ctx, "topic-does-not-exist", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700808 return errors.New("topic-does-not-exist")
809}
810
Neha Sharma94f16a92020-06-26 04:17:55 +0000811func (sc *SaramaClient) clearTopicFromConsumerChannelMap(ctx context.Context, topic Topic) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700812 sc.lockTopicToConsumerChannelMap.Lock()
813 defer sc.lockTopicToConsumerChannelMap.Unlock()
814 if consumerCh, exist := sc.topicToConsumerChannelMap[topic.Name]; exist {
815 for _, ch := range consumerCh.channels {
816 // Channel will be closed in the removeChannel method
Neha Sharma94f16a92020-06-26 04:17:55 +0000817 removeChannel(ctx, consumerCh.channels, ch)
Scott Baker2c1c4822019-10-16 11:02:41 -0700818 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000819 err := closeConsumers(ctx, consumerCh.consumers)
Scott Baker2c1c4822019-10-16 11:02:41 -0700820 //if err == sarama.ErrUnknownTopicOrPartition {
821 // // Not an error
822 // err = nil
823 //}
824 //err := consumerCh.consumers.Close()
825 delete(sc.topicToConsumerChannelMap, topic.Name)
826 return err
827 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000828 logger.Debugw(ctx, "topic-does-not-exist", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700829 return nil
830}
831
Scott Baker2c1c4822019-10-16 11:02:41 -0700832//createPublisher creates the publisher which is used to send a message onto kafka
Neha Sharma94f16a92020-06-26 04:17:55 +0000833func (sc *SaramaClient) createPublisher(ctx context.Context) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700834 // This Creates the publisher
835 config := sarama.NewConfig()
Himani Chawlaf87a6a92021-04-01 17:44:16 +0530836 config.Version = sarama.V1_0_0_0
Scott Baker2c1c4822019-10-16 11:02:41 -0700837 config.Producer.Partitioner = sarama.NewRandomPartitioner
838 config.Producer.Flush.Frequency = time.Duration(sc.producerFlushFrequency)
839 config.Producer.Flush.Messages = sc.producerFlushMessages
840 config.Producer.Flush.MaxMessages = sc.producerFlushMaxmessages
841 config.Producer.Return.Errors = sc.producerReturnErrors
842 config.Producer.Return.Successes = sc.producerReturnSuccess
843 //config.Producer.RequiredAcks = sarama.WaitForAll
844 config.Producer.RequiredAcks = sarama.WaitForLocal
845
Neha Sharmadd9af392020-04-28 09:03:57 +0000846 brokers := []string{sc.KafkaAddress}
Scott Baker2c1c4822019-10-16 11:02:41 -0700847
848 if producer, err := sarama.NewAsyncProducer(brokers, config); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000849 logger.Errorw(ctx, "error-starting-publisher", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700850 return err
851 } else {
852 sc.producer = producer
853 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000854 logger.Info(ctx, "Kafka-publisher-created")
Scott Baker2c1c4822019-10-16 11:02:41 -0700855 return nil
856}
857
Neha Sharma94f16a92020-06-26 04:17:55 +0000858func (sc *SaramaClient) createConsumer(ctx context.Context) error {
Scott Baker2c1c4822019-10-16 11:02:41 -0700859 config := sarama.NewConfig()
Himani Chawlaf87a6a92021-04-01 17:44:16 +0530860 config.Version = sarama.V1_0_0_0
Scott Baker2c1c4822019-10-16 11:02:41 -0700861 config.Consumer.Return.Errors = true
862 config.Consumer.Fetch.Min = 1
863 config.Consumer.MaxWaitTime = time.Duration(sc.consumerMaxwait) * time.Millisecond
864 config.Consumer.MaxProcessingTime = time.Duration(sc.maxProcessingTime) * time.Millisecond
865 config.Consumer.Offsets.Initial = sarama.OffsetNewest
866 config.Metadata.Retry.Max = sc.metadataMaxRetry
Neha Sharmadd9af392020-04-28 09:03:57 +0000867 brokers := []string{sc.KafkaAddress}
Scott Baker2c1c4822019-10-16 11:02:41 -0700868
869 if consumer, err := sarama.NewConsumer(brokers, config); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000870 logger.Errorw(ctx, "error-starting-consumers", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700871 return err
872 } else {
873 sc.consumer = consumer
874 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000875 logger.Info(ctx, "Kafka-consumers-created")
Scott Baker2c1c4822019-10-16 11:02:41 -0700876 return nil
877}
878
879// createGroupConsumer creates a consumers group
Neha Sharma94f16a92020-06-26 04:17:55 +0000880func (sc *SaramaClient) createGroupConsumer(ctx context.Context, topic *Topic, groupId string, initialOffset int64, retries int) (*scc.Consumer, error) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700881 config := scc.NewConfig()
Himani Chawlaf87a6a92021-04-01 17:44:16 +0530882 config.Version = sarama.V1_0_0_0
Scott Baker2c1c4822019-10-16 11:02:41 -0700883 config.ClientID = uuid.New().String()
884 config.Group.Mode = scc.ConsumerModeMultiplex
Scott Baker104b67d2019-10-29 15:56:27 -0700885 config.Consumer.Group.Heartbeat.Interval, _ = time.ParseDuration("1s")
886 config.Consumer.Return.Errors = true
Scott Baker2c1c4822019-10-16 11:02:41 -0700887 //config.Group.Return.Notifications = false
888 //config.Consumer.MaxWaitTime = time.Duration(DefaultConsumerMaxwait) * time.Millisecond
889 //config.Consumer.MaxProcessingTime = time.Duration(DefaultMaxProcessingTime) * time.Millisecond
890 config.Consumer.Offsets.Initial = initialOffset
891 //config.Consumer.Offsets.Initial = sarama.OffsetOldest
Neha Sharmadd9af392020-04-28 09:03:57 +0000892 brokers := []string{sc.KafkaAddress}
Scott Baker2c1c4822019-10-16 11:02:41 -0700893
894 topics := []string{topic.Name}
895 var consumer *scc.Consumer
896 var err error
897
898 if consumer, err = scc.NewConsumer(brokers, groupId, topics, config); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000899 logger.Errorw(ctx, "create-group-consumers-failure", log.Fields{"error": err, "topic": topic.Name, "groupId": groupId})
Scott Baker2c1c4822019-10-16 11:02:41 -0700900 return nil, err
901 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000902 logger.Debugw(ctx, "create-group-consumers-success", log.Fields{"topic": topic.Name, "groupId": groupId})
Scott Baker2c1c4822019-10-16 11:02:41 -0700903
904 //sc.groupConsumers[topic.Name] = consumer
905 sc.addToGroupConsumers(topic.Name, consumer)
906 return consumer, nil
907}
908
909// dispatchToConsumers sends the intercontainermessage received on a given topic to all subscribers for that
910// topic via the unique channel each subscriber received during subscription
911func (sc *SaramaClient) dispatchToConsumers(consumerCh *consumerChannels, protoMessage *ic.InterContainerMessage) {
912 // Need to go over all channels and publish messages to them - do we need to copy msg?
913 sc.lockTopicToConsumerChannelMap.RLock()
Scott Baker2c1c4822019-10-16 11:02:41 -0700914 for _, ch := range consumerCh.channels {
915 go func(c chan *ic.InterContainerMessage) {
916 c <- protoMessage
917 }(ch)
918 }
Kent Hagermanccfa2132019-12-17 13:29:34 -0500919 sc.lockTopicToConsumerChannelMap.RUnlock()
920
921 if callback := sc.metadataCallback; callback != nil {
Scott Baker84a55ce2020-04-17 10:11:30 -0700922 ts, _ := ptypes.Timestamp(protoMessage.Header.Timestamp)
923 callback(protoMessage.Header.FromTopic, ts)
Kent Hagermanccfa2132019-12-17 13:29:34 -0500924 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700925}
926
Neha Sharma94f16a92020-06-26 04:17:55 +0000927func (sc *SaramaClient) consumeFromAPartition(ctx context.Context, topic *Topic, consumer sarama.PartitionConsumer, consumerChnls *consumerChannels) {
928 logger.Debugw(ctx, "starting-partition-consumption-loop", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700929startloop:
930 for {
931 select {
932 case err, ok := <-consumer.Errors():
933 if ok {
Neha Sharma94f16a92020-06-26 04:17:55 +0000934 if sc.isLivenessError(ctx, err) {
935 sc.updateLiveness(ctx, false)
936 logger.Warnw(ctx, "partition-consumers-error", log.Fields{"error": err})
cbabud4978652019-12-04 08:04:21 +0100937 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700938 } else {
939 // Channel is closed
940 break startloop
941 }
942 case msg, ok := <-consumer.Messages():
Neha Sharma94f16a92020-06-26 04:17:55 +0000943 //logger.Debugw(ctx, "message-received", logger.Fields{"msg": msg, "receivedTopic": msg.Topic})
Scott Baker2c1c4822019-10-16 11:02:41 -0700944 if !ok {
945 // channel is closed
946 break startloop
947 }
948 msgBody := msg.Value
Neha Sharma94f16a92020-06-26 04:17:55 +0000949 sc.updateLiveness(ctx, true)
950 logger.Debugw(ctx, "message-received", log.Fields{"timestamp": msg.Timestamp, "receivedTopic": msg.Topic})
Scott Baker2c1c4822019-10-16 11:02:41 -0700951 icm := &ic.InterContainerMessage{}
952 if err := proto.Unmarshal(msgBody, icm); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000953 logger.Warnw(ctx, "partition-invalid-message", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700954 continue
955 }
956 go sc.dispatchToConsumers(consumerChnls, icm)
957 case <-sc.doneCh:
Neha Sharma94f16a92020-06-26 04:17:55 +0000958 logger.Infow(ctx, "partition-received-exit-signal", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700959 break startloop
960 }
961 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000962 logger.Infow(ctx, "partition-consumer-stopped", log.Fields{"topic": topic.Name})
963 sc.setUnhealthy(ctx)
Scott Baker2c1c4822019-10-16 11:02:41 -0700964}
965
Neha Sharma94f16a92020-06-26 04:17:55 +0000966func (sc *SaramaClient) consumeGroupMessages(ctx context.Context, topic *Topic, consumer *scc.Consumer, consumerChnls *consumerChannels) {
967 logger.Debugw(ctx, "starting-group-consumption-loop", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700968
969startloop:
970 for {
971 select {
972 case err, ok := <-consumer.Errors():
973 if ok {
Neha Sharma94f16a92020-06-26 04:17:55 +0000974 if sc.isLivenessError(ctx, err) {
975 sc.updateLiveness(ctx, false)
Scott Bakerfa2f6ee2019-11-19 14:53:14 -0800976 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000977 logger.Warnw(ctx, "group-consumers-error", log.Fields{"topic": topic.Name, "error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700978 } else {
Neha Sharma94f16a92020-06-26 04:17:55 +0000979 logger.Warnw(ctx, "group-consumers-closed-err", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700980 // channel is closed
981 break startloop
982 }
983 case msg, ok := <-consumer.Messages():
984 if !ok {
Neha Sharma94f16a92020-06-26 04:17:55 +0000985 logger.Warnw(ctx, "group-consumers-closed-msg", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700986 // Channel closed
987 break startloop
988 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000989 sc.updateLiveness(ctx, true)
990 logger.Debugw(ctx, "message-received", log.Fields{"timestamp": msg.Timestamp, "receivedTopic": msg.Topic})
Scott Baker2c1c4822019-10-16 11:02:41 -0700991 msgBody := msg.Value
992 icm := &ic.InterContainerMessage{}
993 if err := proto.Unmarshal(msgBody, icm); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000994 logger.Warnw(ctx, "invalid-message", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -0700995 continue
996 }
997 go sc.dispatchToConsumers(consumerChnls, icm)
998 consumer.MarkOffset(msg, "")
999 case ntf := <-consumer.Notifications():
Neha Sharma94f16a92020-06-26 04:17:55 +00001000 logger.Debugw(ctx, "group-received-notification", log.Fields{"notification": ntf})
Scott Baker2c1c4822019-10-16 11:02:41 -07001001 case <-sc.doneCh:
Neha Sharma94f16a92020-06-26 04:17:55 +00001002 logger.Infow(ctx, "group-received-exit-signal", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -07001003 break startloop
1004 }
1005 }
Neha Sharma94f16a92020-06-26 04:17:55 +00001006 logger.Infow(ctx, "group-consumer-stopped", log.Fields{"topic": topic.Name})
1007 sc.setUnhealthy(ctx)
Scott Baker2c1c4822019-10-16 11:02:41 -07001008}
1009
Neha Sharma94f16a92020-06-26 04:17:55 +00001010func (sc *SaramaClient) startConsumers(ctx context.Context, topic *Topic) error {
1011 logger.Debugw(ctx, "starting-consumers", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -07001012 var consumerCh *consumerChannels
1013 if consumerCh = sc.getConsumerChannel(topic); consumerCh == nil {
Neha Sharma94f16a92020-06-26 04:17:55 +00001014 logger.Errorw(ctx, "consumers-not-exist", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -07001015 return errors.New("consumers-not-exist")
1016 }
1017 // For each consumer listening for that topic, start a consumption loop
1018 for _, consumer := range consumerCh.consumers {
1019 if pConsumer, ok := consumer.(sarama.PartitionConsumer); ok {
Neha Sharma94f16a92020-06-26 04:17:55 +00001020 go sc.consumeFromAPartition(ctx, topic, pConsumer, consumerCh)
Scott Baker2c1c4822019-10-16 11:02:41 -07001021 } else if gConsumer, ok := consumer.(*scc.Consumer); ok {
Neha Sharma94f16a92020-06-26 04:17:55 +00001022 go sc.consumeGroupMessages(ctx, topic, gConsumer, consumerCh)
Scott Baker2c1c4822019-10-16 11:02:41 -07001023 } else {
Neha Sharma94f16a92020-06-26 04:17:55 +00001024 logger.Errorw(ctx, "invalid-consumer", log.Fields{"topic": topic})
Scott Baker2c1c4822019-10-16 11:02:41 -07001025 return errors.New("invalid-consumer")
1026 }
1027 }
1028 return nil
1029}
1030
1031//// setupConsumerChannel creates a consumerChannels object for that topic and add it to the consumerChannels map
1032//// for that topic. It also starts the routine that listens for messages on that topic.
Neha Sharma94f16a92020-06-26 04:17:55 +00001033func (sc *SaramaClient) setupPartitionConsumerChannel(ctx context.Context, topic *Topic, initialOffset int64) (chan *ic.InterContainerMessage, error) {
Scott Baker2c1c4822019-10-16 11:02:41 -07001034 var pConsumers []sarama.PartitionConsumer
1035 var err error
1036
Neha Sharma94f16a92020-06-26 04:17:55 +00001037 if pConsumers, err = sc.createPartitionConsumers(ctx, topic, initialOffset); err != nil {
1038 logger.Errorw(ctx, "creating-partition-consumers-failure", log.Fields{"error": err, "topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -07001039 return nil, err
1040 }
1041
1042 consumersIf := make([]interface{}, 0)
1043 for _, pConsumer := range pConsumers {
1044 consumersIf = append(consumersIf, pConsumer)
1045 }
1046
1047 // Create the consumers/channel structure and set the consumers and create a channel on that topic - for now
1048 // unbuffered to verify race conditions.
1049 consumerListeningChannel := make(chan *ic.InterContainerMessage)
1050 cc := &consumerChannels{
1051 consumers: consumersIf,
1052 channels: []chan *ic.InterContainerMessage{consumerListeningChannel},
1053 }
1054
1055 // Add the consumers channel to the map
1056 sc.addTopicToConsumerChannelMap(topic.Name, cc)
1057
1058 //Start a consumers to listen on that specific topic
David K. Bainbridge7c75cac2020-02-19 08:53:46 -08001059 go func() {
Neha Sharma94f16a92020-06-26 04:17:55 +00001060 if err := sc.startConsumers(ctx, topic); err != nil {
1061 logger.Errorw(ctx, "start-consumers-failed", log.Fields{
David K. Bainbridge7c75cac2020-02-19 08:53:46 -08001062 "topic": topic,
1063 "error": err})
1064 }
1065 }()
Scott Baker2c1c4822019-10-16 11:02:41 -07001066
1067 return consumerListeningChannel, nil
1068}
1069
1070// setupConsumerChannel creates a consumerChannels object for that topic and add it to the consumerChannels map
1071// for that topic. It also starts the routine that listens for messages on that topic.
Neha Sharma94f16a92020-06-26 04:17:55 +00001072func (sc *SaramaClient) setupGroupConsumerChannel(ctx context.Context, topic *Topic, groupId string, initialOffset int64) (chan *ic.InterContainerMessage, error) {
Scott Baker2c1c4822019-10-16 11:02:41 -07001073 // TODO: Replace this development partition consumers with a group consumers
1074 var pConsumer *scc.Consumer
1075 var err error
Neha Sharma94f16a92020-06-26 04:17:55 +00001076 if pConsumer, err = sc.createGroupConsumer(ctx, topic, groupId, initialOffset, DefaultMaxRetries); err != nil {
1077 logger.Errorw(ctx, "creating-partition-consumers-failure", log.Fields{"error": err, "topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -07001078 return nil, err
1079 }
1080 // Create the consumers/channel structure and set the consumers and create a channel on that topic - for now
1081 // unbuffered to verify race conditions.
1082 consumerListeningChannel := make(chan *ic.InterContainerMessage)
1083 cc := &consumerChannels{
1084 consumers: []interface{}{pConsumer},
1085 channels: []chan *ic.InterContainerMessage{consumerListeningChannel},
1086 }
1087
1088 // Add the consumers channel to the map
1089 sc.addTopicToConsumerChannelMap(topic.Name, cc)
1090
1091 //Start a consumers to listen on that specific topic
David K. Bainbridge7c75cac2020-02-19 08:53:46 -08001092 go func() {
Neha Sharma94f16a92020-06-26 04:17:55 +00001093 if err := sc.startConsumers(ctx, topic); err != nil {
1094 logger.Errorw(ctx, "start-consumers-failed", log.Fields{
David K. Bainbridge7c75cac2020-02-19 08:53:46 -08001095 "topic": topic,
1096 "error": err})
1097 }
1098 }()
Scott Baker2c1c4822019-10-16 11:02:41 -07001099
1100 return consumerListeningChannel, nil
1101}
1102
Neha Sharma94f16a92020-06-26 04:17:55 +00001103func (sc *SaramaClient) createPartitionConsumers(ctx context.Context, topic *Topic, initialOffset int64) ([]sarama.PartitionConsumer, error) {
1104 logger.Debugw(ctx, "creating-partition-consumers", log.Fields{"topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -07001105 partitionList, err := sc.consumer.Partitions(topic.Name)
1106 if err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +00001107 logger.Warnw(ctx, "get-partition-failure", log.Fields{"error": err, "topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -07001108 return nil, err
1109 }
1110
1111 pConsumers := make([]sarama.PartitionConsumer, 0)
1112 for _, partition := range partitionList {
1113 var pConsumer sarama.PartitionConsumer
1114 if pConsumer, err = sc.consumer.ConsumePartition(topic.Name, partition, initialOffset); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +00001115 logger.Warnw(ctx, "consumers-partition-failure", log.Fields{"error": err, "topic": topic.Name})
Scott Baker2c1c4822019-10-16 11:02:41 -07001116 return nil, err
1117 }
1118 pConsumers = append(pConsumers, pConsumer)
1119 }
1120 return pConsumers, nil
1121}
1122
Neha Sharma94f16a92020-06-26 04:17:55 +00001123func removeChannel(ctx context.Context, channels []chan *ic.InterContainerMessage, ch <-chan *ic.InterContainerMessage) []chan *ic.InterContainerMessage {
Scott Baker2c1c4822019-10-16 11:02:41 -07001124 var i int
1125 var channel chan *ic.InterContainerMessage
1126 for i, channel = range channels {
1127 if channel == ch {
1128 channels[len(channels)-1], channels[i] = channels[i], channels[len(channels)-1]
1129 close(channel)
Neha Sharma94f16a92020-06-26 04:17:55 +00001130 logger.Debug(ctx, "channel-closed")
Scott Baker2c1c4822019-10-16 11:02:41 -07001131 return channels[:len(channels)-1]
1132 }
1133 }
1134 return channels
1135}
1136
1137func (sc *SaramaClient) addToGroupConsumers(topic string, consumer *scc.Consumer) {
1138 sc.lockOfGroupConsumers.Lock()
1139 defer sc.lockOfGroupConsumers.Unlock()
1140 if _, exist := sc.groupConsumers[topic]; !exist {
1141 sc.groupConsumers[topic] = consumer
1142 }
1143}
1144
Neha Sharma94f16a92020-06-26 04:17:55 +00001145func (sc *SaramaClient) deleteFromGroupConsumers(ctx context.Context, topic string) error {
Scott Baker2c1c4822019-10-16 11:02:41 -07001146 sc.lockOfGroupConsumers.Lock()
1147 defer sc.lockOfGroupConsumers.Unlock()
1148 if _, exist := sc.groupConsumers[topic]; exist {
1149 consumer := sc.groupConsumers[topic]
1150 delete(sc.groupConsumers, topic)
1151 if err := consumer.Close(); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +00001152 logger.Errorw(ctx, "failure-closing-consumer", log.Fields{"error": err})
Scott Baker2c1c4822019-10-16 11:02:41 -07001153 return err
1154 }
1155 }
1156 return nil
1157}