blob: de469b5938f7de942242f2357885e661ef49abcf [file] [log] [blame]
Scott Bakere7144bc2019-10-01 14:16:47 -07001/*
2 * Copyright 2018-present Open Networking Foundation
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package main
18
19import (
20 "errors"
21 "flag"
22 "fmt"
23 "google.golang.org/grpc/connectivity"
24 "google.golang.org/grpc/keepalive"
25 "k8s.io/api/core/v1"
26 "math"
27 "os"
28 "path"
29 "regexp"
30 "strconv"
31 "time"
32
33 "github.com/golang/protobuf/ptypes"
34 "github.com/golang/protobuf/ptypes/empty"
35 "github.com/opencord/voltha-go/common/log"
36 "github.com/opencord/voltha-go/common/version"
37 "github.com/opencord/voltha-go/kafka"
38 pb "github.com/opencord/voltha-protos/go/afrouter"
39 cmn "github.com/opencord/voltha-protos/go/common"
40 ic "github.com/opencord/voltha-protos/go/inter_container"
41 vpb "github.com/opencord/voltha-protos/go/voltha"
42 "golang.org/x/net/context"
43 "google.golang.org/grpc"
44 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
45 "k8s.io/client-go/kubernetes"
46 "k8s.io/client-go/rest"
47 "k8s.io/client-go/tools/clientcmd"
48)
49
50type volthaPod struct {
Scott Baker4989fe92019-10-09 17:03:06 -070051 name string
52 ipAddr string
53 node string
54 devIds map[string]struct{}
55 backend string
Scott Bakere7144bc2019-10-01 14:16:47 -070056}
57
58type Configuration struct {
59 DisplayVersionOnly *bool
60}
61
62var (
63 // if k8s variables are undefined, will attempt to use in-cluster config
64 k8sApiServer = getStrEnv("K8S_API_SERVER", "")
65 k8sKubeConfigPath = getStrEnv("K8S_KUBE_CONFIG_PATH", "")
66
67 podNamespace = getStrEnv("POD_NAMESPACE", "voltha")
68 podLabelSelector = getStrEnv("POD_LABEL_SELECTOR", "app=rw-core")
69 podAffinityGroupLabel = getStrEnv("POD_AFFINITY_GROUP_LABEL", "affinity-group")
70
71 podGrpcPort = uint64(getIntEnv("POD_GRPC_PORT", 0, math.MaxUint16, 50057))
72
73 afrouterApiAddress = getStrEnv("AFROUTER_API_ADDRESS", "localhost:55554")
74
75 afrouterRouterName = getStrEnv("AFROUTER_ROUTER_NAME", "vcore")
76 afrouterRouteName = getStrEnv("AFROUTER_ROUTE_NAME", "dev_manager")
77 afrouterRWClusterName = getStrEnv("AFROUTER_RW_CLUSTER_NAME", "vcore")
78
79 kafkaTopic = getStrEnv("KAFKA_TOPIC", "AffinityRouter")
80 kafkaClientType = getStrEnv("KAFKA_CLIENT_TYPE", "sarama")
81 kafkaHost = getStrEnv("KAFKA_HOST", "kafka")
82 kafkaPort = getIntEnv("KAFKA_PORT", 0, math.MaxUint16, 9092)
83 kafkaInstanceID = getStrEnv("KAFKA_INSTANCE_ID", "arouterd")
Hardik Windlass9f949c92019-10-10 06:39:24 +000084
85 instanceID = getStrEnv("HOSTNAME", "arouterd001")
Scott Bakere7144bc2019-10-01 14:16:47 -070086)
87
88func getIntEnv(key string, min, max, defaultValue int) int {
89 if val, have := os.LookupEnv(key); have {
90 num, err := strconv.Atoi(val)
91 if err != nil || !(min <= num && num <= max) {
92 panic(fmt.Errorf("%s must be a number in the range [%d, %d]; default: %d", key, min, max, defaultValue))
93 }
94 return num
95 }
96 return defaultValue
97}
98
99func getStrEnv(key, defaultValue string) string {
100 if val, have := os.LookupEnv(key); have {
101 return val
102 }
103 return defaultValue
104}
105
106func newKafkaClient(clientType string, host string, port int, instanceID string) (kafka.Client, error) {
107 log.Infow("kafka-client-type", log.Fields{"client": clientType})
108 switch clientType {
109 case "sarama":
110 return kafka.NewSaramaClient(
111 kafka.Host(host),
112 kafka.Port(port),
113 kafka.ConsumerType(kafka.GroupCustomer),
114 kafka.ProducerReturnOnErrors(true),
115 kafka.ProducerReturnOnSuccess(true),
116 kafka.ProducerMaxRetries(6),
117 kafka.NumPartitions(3),
118 kafka.ConsumerGroupName(instanceID),
119 kafka.ConsumerGroupPrefix(instanceID),
120 kafka.AutoCreateTopic(false),
121 kafka.ProducerFlushFrequency(5),
122 kafka.ProducerRetryBackoff(time.Millisecond*30)), nil
123 }
124 return nil, errors.New("unsupported-client-type")
125}
126
127func k8sClientSet() *kubernetes.Clientset {
128 var config *rest.Config
129 if k8sApiServer != "" || k8sKubeConfigPath != "" {
130 // use combination of URL & local kube-config file
131 c, err := clientcmd.BuildConfigFromFlags(k8sApiServer, k8sKubeConfigPath)
132 if err != nil {
133 panic(err)
134 }
135 config = c
136 } else {
137 // use in-cluster config
138 c, err := rest.InClusterConfig()
139 if err != nil {
140 log.Errorf("Unable to load in-cluster config. Try setting K8S_API_SERVER and K8S_KUBE_CONFIG_PATH?")
141 panic(err)
142 }
143 config = c
144 }
145 // creates the clientset
146 clientset, err := kubernetes.NewForConfig(config)
147 if err != nil {
148 panic(err)
149 }
150
151 return clientset
152}
153
154func connect(ctx context.Context, addr string) (*grpc.ClientConn, error) {
155 log.Debugf("Trying to connect to %s", addr)
156 conn, err := grpc.DialContext(ctx, addr,
157 grpc.WithInsecure(),
158 grpc.WithBlock(),
159 grpc.WithBackoffMaxDelay(time.Second*5),
160 grpc.WithKeepaliveParams(keepalive.ClientParameters{Time: time.Second * 10, Timeout: time.Second * 5}))
161 if err == nil {
162 log.Debugf("Connection succeeded")
163 }
164 return conn, err
165}
166
167func getVolthaPods(cs *kubernetes.Clientset) ([]*volthaPod, error) {
168 pods, err := cs.CoreV1().Pods(podNamespace).List(metav1.ListOptions{LabelSelector: podLabelSelector})
169 if err != nil {
170 return nil, err
171 }
172
173 var rwPods []*volthaPod
174items:
175 for _, v := range pods.Items {
176 // only pods that are actually running should be considered
177 if v.Status.Phase == v1.PodRunning {
178 for _, condition := range v.Status.Conditions {
179 if condition.Status != v1.ConditionTrue {
180 continue items
181 }
182 }
183
184 if group, have := v.Labels[podAffinityGroupLabel]; have {
185 log.Debugf("Namespace: %s, PodName: %s, PodIP: %s, Host: %s\n", v.Namespace, v.Name, v.Status.PodIP, v.Spec.NodeName)
186 rwPods = append(rwPods, &volthaPod{
187 name: v.Name,
188 ipAddr: v.Status.PodIP,
189 node: v.Spec.NodeName,
190 devIds: make(map[string]struct{}),
191 backend: afrouterRWClusterName + group,
192 })
193 } else {
194 log.Warnf("Pod %s found matching % without label %", v.Name, podLabelSelector, podAffinityGroupLabel)
195 }
196 }
197 }
198 return rwPods, nil
199}
200
201func reconcilePodDeviceIds(ctx context.Context, pod *volthaPod, ids map[string]struct{}) {
202 ctxTimeout, _ := context.WithTimeout(ctx, time.Second*5)
203 conn, err := connect(ctxTimeout, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort))
204 if err != nil {
205 log.Debugf("Could not reconcile devices from %s, could not connect: %s", pod.name, err)
206 return
207 }
208 defer conn.Close()
209
210 var idList cmn.IDs
211 for k := range ids {
212 idList.Items = append(idList.Items, &cmn.ID{Id: k})
213 }
214
215 client := vpb.NewVolthaServiceClient(conn)
216 _, err = client.ReconcileDevices(ctx, &idList)
217 if err != nil {
218 log.Errorf("Attempt to reconcile ids on pod %s failed: %s", pod.name, err)
219 return
220 }
221}
222
223func queryPodDeviceIds(ctx context.Context, pod *volthaPod) map[string]struct{} {
224 ctxTimeout, _ := context.WithTimeout(ctx, time.Second*5)
225 conn, err := connect(ctxTimeout, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort))
226 if err != nil {
227 log.Debugf("Could not query devices from %s, could not connect: %s", pod.name, err)
228 return nil
229 }
230 defer conn.Close()
231
232 client := vpb.NewVolthaServiceClient(conn)
233 devs, err := client.ListDeviceIds(ctx, &empty.Empty{})
234 if err != nil {
235 log.Error(err)
236 return nil
237 }
238
239 var ret = make(map[string]struct{})
240 for _, dv := range devs.Items {
241 ret[dv.Id] = struct{}{}
242 }
243 return ret
244}
245
246func setAffinity(ctx context.Context, client pb.ConfigurationClient, deviceId string, backend string) {
247 log.Debugf("Configuring backend %s with device id %s \n", backend, deviceId)
248 if res, err := client.SetAffinity(ctx, &pb.Affinity{
249 Router: afrouterRouterName,
250 Route: afrouterRouteName,
251 Cluster: afrouterRWClusterName,
252 Backend: backend,
253 Id: deviceId,
254 }); err != nil {
255 log.Debugf("failed affinity RPC call: %s\n", err)
256 } else {
257 log.Debugf("Result: %v\n", res)
258 }
259}
260
261func monitorDiscovery(kc kafka.Client, ctx context.Context, client pb.ConfigurationClient, ch <-chan *ic.InterContainerMessage, doneCh chan<- struct{}) {
262 defer close(doneCh)
263 defer kc.Stop()
264
265monitorLoop:
266 for {
267 select {
268 case <-ctx.Done():
269 break monitorLoop
270 case msg := <-ch:
271 log.Debug("Received a device discovery notification")
272 device := &ic.DeviceDiscovered{}
273 if err := ptypes.UnmarshalAny(msg.Body, device); err != nil {
274 log.Errorf("Could not unmarshal received notification %v", msg)
275 } else {
276 // somewhat hackish solution, backend is known from the first digit found in the publisher name
277 group := regexp.MustCompile(`\d`).FindString(device.Publisher)
278 if group != "" {
279 // set the affinity of the discovered device
280 setAffinity(ctx, client, device.Id, afrouterRWClusterName+group)
281 } else {
282 log.Error("backend is unknown")
283 }
284 }
285 }
286 }
287}
288
289func startDiscoveryMonitor(ctx context.Context, client pb.ConfigurationClient) (<-chan struct{}, error) {
290 doneCh := make(chan struct{})
291 // Connect to kafka for discovery events
292 kc, err := newKafkaClient(kafkaClientType, kafkaHost, kafkaPort, kafkaInstanceID)
293 if err != nil {
294 panic(err)
295 }
296
297 for {
298 if err := kc.Start(); err != nil {
299 log.Error("Could not connect to kafka")
300 } else {
301 break
302 }
303 select {
304 case <-ctx.Done():
305 close(doneCh)
306 return doneCh, errors.New("GRPC context done")
307
308 case <-time.After(5 * time.Second):
309 }
310 }
311 ch, err := kc.Subscribe(&kafka.Topic{Name: kafkaTopic})
312 if err != nil {
313 log.Errorf("Could not subscribe to the '%s' channel, discovery disabled", kafkaTopic)
314 close(doneCh)
315 kc.Stop()
316 return doneCh, err
317 }
318
319 go monitorDiscovery(kc, ctx, client, ch, doneCh)
320 return doneCh, nil
321}
322
323// coreMonitor polls the list of devices from all RW cores, pushes these devices
324// into the affinity router, and ensures that all cores in a backend have their devices synced
325func coreMonitor(ctx context.Context, client pb.ConfigurationClient, clientset *kubernetes.Clientset) {
326 // map[backend]map[deviceId]struct{}
327 deviceOwnership := make(map[string]map[string]struct{})
328loop:
329 for {
330 // get the rw core list from k8s
331 rwPods, err := getVolthaPods(clientset)
332 if err != nil {
333 log.Error(err)
334 continue
335 }
336
337 // for every pod
338 for _, pod := range rwPods {
339 // get the devices for this pod's backend
340 devices, have := deviceOwnership[pod.backend]
341 if !have {
342 devices = make(map[string]struct{})
343 deviceOwnership[pod.backend] = devices
344 }
345
346 coreDevices := queryPodDeviceIds(ctx, pod)
347
348 // handle devices that exist in the core, but we have just learned about
349 for deviceId := range coreDevices {
350 // if there's a new device
351 if _, have := devices[deviceId]; !have {
352 // add the device to our local list
353 devices[deviceId] = struct{}{}
354 // push the device into the affinity router
355 setAffinity(ctx, client, deviceId, pod.backend)
356 }
357 }
358
359 // ensure that the core knows about all devices in its backend
360 toSync := make(map[string]struct{})
361 for deviceId := range devices {
362 // if the pod is missing any devices
363 if _, have := coreDevices[deviceId]; !have {
364 // we will reconcile them
365 toSync[deviceId] = struct{}{}
366 }
367 }
368
369 if len(toSync) != 0 {
370 reconcilePodDeviceIds(ctx, pod, toSync)
371 }
372 }
373
374 select {
375 case <-ctx.Done():
376 // if we're done, exit
377 break loop
378 case <-time.After(10 * time.Second): // wait a while
379 }
380 }
381}
382
383// endOnClose cancels the context when the connection closes
384func connectionActiveContext(conn *grpc.ClientConn) context.Context {
385 ctx, disconnected := context.WithCancel(context.Background())
386 go func() {
387 for state := conn.GetState(); state != connectivity.TransientFailure && state != connectivity.Shutdown; state = conn.GetState() {
388 if !conn.WaitForStateChange(context.Background(), state) {
389 break
390 }
391 }
392 log.Infof("Connection to afrouter lost")
393 disconnected()
394 }()
395 return ctx
396}
397
398func main() {
399 config := &Configuration{}
400 cmdParse := flag.NewFlagSet(path.Base(os.Args[0]), flag.ContinueOnError)
401 config.DisplayVersionOnly = cmdParse.Bool("version", false, "Print version information and exit")
402
403 if err := cmdParse.Parse(os.Args[1:]); err != nil {
404 fmt.Printf("Error: %v\n", err)
405 os.Exit(1)
406 }
407
408 if *config.DisplayVersionOnly {
409 fmt.Println("VOLTHA API Server (afrouterd)")
410 fmt.Println(version.VersionInfo.String(" "))
411 return
412 }
413
414 // Set up logging
Hardik Windlass9f949c92019-10-10 06:39:24 +0000415 if _, err := log.SetDefaultLogger(log.JSON, 0, log.Fields{"instanceId": instanceID}); err != nil {
Scott Bakere7144bc2019-10-01 14:16:47 -0700416 log.With(log.Fields{"error": err}).Fatal("Cannot setup logging")
417 }
418
419 // Set up kubernetes api
420 clientset := k8sClientSet()
421
422 for {
423 // Connect to the affinity router
424 conn, err := connect(context.Background(), afrouterApiAddress) // This is a sidecar container so communicating over localhost
425 if err != nil {
426 panic(err)
427 }
428
429 // monitor the connection status, end context if connection is lost
430 ctx := connectionActiveContext(conn)
431
432 // set up the client
433 client := pb.NewConfigurationClient(conn)
434
435 // start the discovery monitor and core monitor
436 // these two processes do the majority of the work
437
438 log.Info("Starting discovery monitoring")
439 doneCh, _ := startDiscoveryMonitor(ctx, client)
440
441 log.Info("Starting core monitoring")
442 coreMonitor(ctx, client, clientset)
443
444 //ensure the discovery monitor to quit
445 <-doneCh
446
447 conn.Close()
448 }
449}