blob: 06f4628c53b41fd1bef6b81613867236c279ae19 [file] [log] [blame]
sslobodr16e41bc2019-01-18 16:22:21 -05001/*
2 * Copyright 2018-present Open Networking Foundation
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package main
18
19import (
20 //"os"
21 "fmt"
22 "time"
23 "regexp"
24 "errors"
25 "strconv"
26 //"io/ioutil"
27 //"encoding/json"
28
29 "k8s.io/client-go/rest"
30 "google.golang.org/grpc"
31 "golang.org/x/net/context"
32 "k8s.io/client-go/kubernetes"
33 "github.com/golang/protobuf/ptypes"
34 //"k8s.io/apimachinery/pkg/api/errors"
35 "github.com/opencord/voltha-go/common/log"
36 kafka "github.com/opencord/voltha-go/kafka"
37 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
38 empty "github.com/golang/protobuf/ptypes/empty"
39 vpb "github.com/opencord/voltha-go/protos/voltha"
40 pb "github.com/opencord/voltha-go/protos/afrouter"
41 ic "github.com/opencord/voltha-go/protos/inter_container"
42)
43
44type configConn struct {
45 Server string `json:"Server"`
46 Cluster string `json:"Cluster"`
47 Backend string `json:"Backend"`
48 connections map[string]connection
49}
50
51type connection struct {
52 Name string `json:"Connection"`
53 Addr string `json:"Addr"`
54 Port uint64 `json:"Port"`
55}
56
57type rwPod struct {
58 name string
59 ipAddr string
60 node string
61 devIds map[string]struct{}
62 backend string
63 connection string
64}
65
66type podTrack struct {
67 pod *rwPod
68 dn bool
69}
70
71// Topic is affinityRouter
72// port: 9092
73
74func newKafkaClient(clientType string, host string, port int, instanceID string) (kafka.Client, error) {
75
76 log.Infow("kafka-client-type", log.Fields{"client": clientType})
77 switch clientType {
78 case "sarama":
79 return kafka.NewSaramaClient(
80 kafka.Host(host),
81 kafka.Port(port),
82 kafka.ConsumerType(kafka.GroupCustomer),
83 kafka.ProducerReturnOnErrors(true),
84 kafka.ProducerReturnOnSuccess(true),
85 kafka.ProducerMaxRetries(6),
86 kafka.NumPartitions(3),
87 kafka.ConsumerGroupName(instanceID),
88 kafka.ConsumerGroupPrefix(instanceID),
89 kafka.AutoCreateTopic(false),
90 kafka.ProducerFlushFrequency(5),
91 kafka.ProducerRetryBackoff(time.Millisecond*30)), nil
92 }
93 return nil, errors.New("unsupported-client-type")
94}
95
96
97func k8sClientSet() *kubernetes.Clientset {
98 // creates the in-cluster config
99 config, err := rest.InClusterConfig()
100 if err != nil {
101 panic(err.Error())
102 }
103 // creates the clientset
104 clientset, err := kubernetes.NewForConfig(config)
105 if err != nil {
106 panic(err.Error())
107 }
108
109 return clientset
110}
111
112
113func connect(addr string) (*grpc.ClientConn, error) {
114 for ctr :=0 ; ctr < 100; ctr++ {
115 log.Debug("Trying to connect to %s", addr)
116 conn, err := grpc.Dial(addr, grpc.WithInsecure())
117 if err != nil {
118 log.Debugf("Attempt to connect failed, retrying %v:", err)
119 } else {
120 log.Debugf("Connection succeeded")
121 return conn,err
122 }
123 time.Sleep(10 * time.Second)
124 }
125 log.Debugf("Too many connection attempts, giving up!")
126 return nil,errors.New("Timeout attempting to conect")
127}
128
129func getRwPods(cs *kubernetes.Clientset, coreFilter * regexp.Regexp) []*rwPod {
130 var rtrn []*rwPod
131
132 pods, err := cs.CoreV1().Pods("").List(metav1.ListOptions{})
133 if err != nil {
134 panic(err.Error())
135 }
136 log.Debugf("There are a total of %d pods in the cluster\n", len(pods.Items))
137
138 for k,v := range pods.Items {
139 if v.Namespace == "voltha" && coreFilter.MatchString(v.Name) {
140 fmt.Printf("Namespace: %s, PodName: %s, PodIP: %s, Host: %s\n", v.Namespace, v.Name,
141 v.Status.PodIP, v.Spec.NodeName)
142 //fmt.Printf("Pod %v,%v\n\n\n",k,v)
143 _ = k
144 // Add this pod to the core structure.
145 rtrn = append(rtrn, &rwPod{name:v.Name,ipAddr:v.Status.PodIP,node:v.Spec.NodeName,
146 devIds:make(map[string]struct{}), backend:"", connection:""})
147 }
148 }
149 return rtrn
150}
151
152func queryDevices(pods []*rwPod) {
153 for pk,pv := range pods {
154 // Open a connection to the pod
155 // port 50057
156 conn, err := connect(pv.ipAddr+":50057")
157 if (err != nil) {
158 log.Debugf("Could not query devices from %s, could not connect", pv.name)
159 continue
160 }
161 client := vpb.NewVolthaServiceClient(conn)
162 devs,err := client.ListDevices(context.Background(), &empty.Empty{})
163 if err != nil {
164 log.Error(err)
165 conn.Close()
166 continue
167 }
168 for _,dv := range devs.Items {
169 pods[pk].devIds[dv.Id]=struct{}{}
170 }
171 conn.Close()
172 }
173}
174
175func allEmpty(pods []*rwPod) bool {
176 for k,_ := range pods {
177 if len(pods[k].devIds) != 0 {
178 return false
179 }
180 }
181 return true
182}
183
184//func groupEmptyCores(pods []*rwPod) [][]*rwPod {
185// return [][]*rwPod{}
186//}
187
188//func groupPods(pods []*rwPod) [][]*rwPod {
189
190// if allEmpty(pods) == true {
191// return groupEmptyCores(pods)
192// } else {
193// return groupPopulatedCores(pods)
194// }
195//}
196
197func rmPod(pods []*rwPod, idx int) []*rwPod {
198 return append(pods[:idx],pods[idx+1:]...)
199}
200
201func groupIntersectingPods1(pods []*rwPod, podCt int) ([][]*rwPod,[]*rwPod) {
202 var rtrn [][]*rwPod
203 var out []*rwPod
204
205 for {
206 if len(pods) == 0 {
207 break
208 }
209 if len(pods[0].devIds) == 0 { // Ignore pods with no devices
210 ////log.Debugf("%s empty pod", pd[k].pod.name)
211 out = append(out, pods[0])
212 pods = rmPod(pods, 0)
213 continue
214 }
215 // Start a pod group with this pod
216 var grp []*rwPod
217 grp = append(grp, pods[0])
218 pods = rmPod(pods,0)
219 //log.Debugf("Creating new group %s", pd[k].pod.name)
220 // Find the peer pod based on device overlap
221 // It's ok if one isn't found, an empty one will be used instead
222 for k,_ := range pods {
223 if len(pods[k].devIds) == 0 { // Skip pods with no devices
224 //log.Debugf("%s empty pod", pd[k1].pod.name)
225 continue
226 }
227 if intersect(grp[0].devIds, pods[k].devIds) == true {
228 //log.Debugf("intersection found %s:%s", pd[k].pod.name, pd[k1].pod.name)
229 if grp[0].node == pods[k].node {
230 // This should never happen
231 log.Errorf("pods %s and %s intersect and are on the same server!! Not pairing",
232 grp[0].name, pods[k].name)
233 continue
234 }
235 grp = append(grp, pods[k])
236 pods = rmPod(pods, k)
237 break
238
239 }
240 }
241 rtrn = append(rtrn, grp)
242 //log.Debugf("Added group %s", grp[0].name)
243 // Check if the number of groups = half the pods, if so all groups are started.
244 if len(rtrn) == podCt >> 1 {
245 // Append any remaining pods to out
246 out = append(out,pods[0:]...)
247 break
248 }
249 }
250 return rtrn,out
251}
252
253func groupIntersectingPods(pd []*podTrack) ([][]*rwPod,[]*podTrack) {
254 var rtrn [][]*rwPod
255
256 for k,_ := range pd {
257 if pd[k].dn == true { // Already processed?
258 //log.Debugf("%s already processed", pd[k].pod.name)
259 continue
260 }
261 if len(pd[k].pod.devIds) == 0 { // Ignore pods with no devices
262 ////log.Debugf("%s empty pod", pd[k].pod.name)
263 continue
264 }
265 // Start a pod group with this pod
266 var grp []*rwPod
267 grp = append(grp, pd[k].pod)
268 pd[k].dn = true
269 //log.Debugf("Creating new group %s", pd[k].pod.name)
270 // Find the peer pod based on device overlap
271 // It's ok if one isn't found, an empty one will be used instead
272 for k1,_ := range pd {
273 if pd[k1].dn == true { // Skip over eliminated pods
274 //log.Debugf("%s eliminated pod", pd[k1].pod.name)
275 continue
276 }
277 if len(pd[k1].pod.devIds) == 0 { // Skip pods with no devices
278 //log.Debugf("%s empty pod", pd[k1].pod.name)
279 continue
280 }
281 if intersect(pd[k].pod.devIds, pd[k1].pod.devIds) == true {
282 //log.Debugf("intersection found %s:%s", pd[k].pod.name, pd[k1].pod.name)
283 if pd[k].pod.node == pd[k1].pod.node {
284 // This should never happen
285 log.Errorf("pods %s and %s intersect and are on the same server!! Not pairing",
286 pd[k].pod.name, pd[k1].pod.name)
287 continue
288 }
289 pd[k1].dn = true
290 grp = append(grp, pd[k1].pod)
291 break
292 }
293 }
294 rtrn = append(rtrn, grp)
295 //log.Debugf("Added group %s", grp[0].name)
296 // Check if the number of groups = half the pods, if so all groups are started.
297 if len(rtrn) == len(pd) >> 1 {
298 break
299 }
300 }
301 return rtrn,pd
302}
303
304func unallocPodCount(pd []*podTrack) int {
305 var rtrn int = 0
306 for _,v := range pd {
307 if v.dn == false {
308 rtrn++
309 }
310 }
311 return rtrn
312}
313
314
315func sameNode(pod *rwPod, grps [][]*rwPod) bool {
316 for _,v := range grps {
317 if v[0].node == pod.node {
318 return true
319 }
320 if len(v) == 2 && v[1].node == pod.node {
321 return true
322 }
323 }
324 return false
325}
326
327func startRemainingGroups1(grps [][]*rwPod, pods []*rwPod, podCt int) ([][]*rwPod, []*rwPod) {
328 var grp []*rwPod
329
330 for k,_ := range pods {
331 if sameNode(pods[k], grps) {
332 continue
333 }
334 grp = []*rwPod{}
335 grp = append(grp, pods[k])
336 pods = rmPod(pods, k)
337 grps = append(grps, grp)
338 if len(grps) == podCt >> 1 {
339 break
340 }
341 }
342 return grps, pods
343}
344
345func startRemainingGroups(grps [][]*rwPod, pd []*podTrack) ([][]*rwPod, []*podTrack) {
346 var grp []*rwPod
347
348 for k,_ := range pd {
349 if sameNode(pd[k].pod, grps) == true {
350 continue
351 }
352 grp = append(grp, pd[k].pod)
353 grps = append(grps, grp)
354 pd[k].dn = true
355 if len(grps) == len(pd) >> 1 {
356 break
357 }
358 }
359 return grps, pd
360}
361
362func hasSingleSecondNode(grp []*rwPod) bool {
363 var srvrs map[string]struct{} = make(map[string]struct{})
364 for k,_ := range grp {
365 if k == 0 {
366 continue // Ignore the first item
367 }
368 srvrs[grp[k].node] = struct{}{}
369 }
370 if len(srvrs) == 1 {
371 return true
372 }
373 return false
374}
375
376func addNode(grps [][]*rwPod, idx *rwPod, item *rwPod) [][]*rwPod {
377 for k,_ := range grps {
378 if grps[k][0].name == idx.name {
379 grps[k] = append(grps[k], item)
380 return grps
381 }
382 }
383 // TODO: Error checking required here.
384 return grps
385}
386
387func removeNode(grps [][]*rwPod, item *rwPod) [][]*rwPod {
388 for k,_ := range grps {
389 for k1,_ := range grps[k] {
390 if grps[k][k1].name == item.name {
391 grps[k] = append(grps[k][:k1],grps[k][k1+1:]...)
392 break
393 }
394 }
395 }
396 return grps
397}
398
399func groupRemainingPods1(grps [][]*rwPod, pods []*rwPod) [][]*rwPod {
400 var lgrps [][]*rwPod
401 // All groups must be started when this function is called.
402 // Copy incomplete groups
403 for k,_ := range grps {
404 if len(grps[k]) != 2 {
405 lgrps = append(lgrps, grps[k])
406 }
407 }
408
409 // Add all pairing candidates to each started group.
410 for k,_ := range pods {
411 for k2,_ := range lgrps {
412 if lgrps[k2][0].node != pods[k].node {
413 lgrps[k2] = append(lgrps[k2], pods[k])
414 }
415 }
416 }
417
418 //TODO: If any member of lgrps doesn't have at least 2
419 // nodes something is wrong. Check for that here
420
421 for {
422 for { // Address groups with only a single server choice
423 var ssn bool = false
424
425 for k,_ := range lgrps {
426 // Now if any of the groups only have a single
427 // node as the choice for the second member
428 // address that one first.
429 if hasSingleSecondNode(lgrps[k]) == true {
430 ssn = true
431 // Add this pairing to the groups
432 grps = addNode(grps, lgrps[k][0], lgrps[k][1])
433 // Since this node is now used, remove it from all
434 // remaining tenative groups
435 lgrps = removeNode(lgrps, lgrps[k][1])
436 // Now remove this group completely since
437 // it's been addressed
438 lgrps = append(lgrps[:k],lgrps[k+1:]...)
439 break
440 }
441 }
442 if ssn == false {
443 break
444 }
445 }
446 // Now adress one of the remaining groups
447 if len(lgrps) == 0 {
448 break // Nothing left to do, exit the loop
449 }
450 grps = addNode(grps, lgrps[0][0], lgrps[0][1])
451 lgrps = removeNode(lgrps, lgrps[0][1])
452 lgrps = append(lgrps[:0],lgrps[1:]...)
453 }
454 return grps
455}
456
457func groupRemainingPods(grps [][]*rwPod, pd []*podTrack) [][]*rwPod{
458 var lgrps [][]*rwPod
459 // All groups must be started when this function is called.
460 // Copy incomplete groups
461 for k,_ := range grps {
462 if len(grps[k]) != 2 {
463 lgrps = append(lgrps, grps[k])
464 }
465 }
466
467 // Add all pairing candidates to each started group.
468 for k,_ := range pd {
469 if pd[k].dn == true {
470 continue
471 }
472 for k2,_ := range lgrps {
473 if lgrps[k2][0].node != pd[k].pod.node {
474 lgrps[k2] = append(lgrps[k2], pd[k].pod)
475 }
476 }
477 }
478
479 //TODO: If any member of lgrps doesn't have at least 2
480 // nodes something is wrong. Check for that here
481
482 for {
483 for { // Address groups with only a single server choice
484 var ssn bool = false
485
486 for k,_ := range lgrps {
487 // Now if any of the groups only have a single
488 // node as the choice for the second member
489 // address that one first.
490 if hasSingleSecondNode(lgrps[k]) == true {
491 ssn = true
492 // Add this pairing to the groups
493 grps = addNode(grps, lgrps[k][0], lgrps[k][1])
494 // Since this node is now used, remove it from all
495 // remaining tenative groups
496 lgrps = removeNode(lgrps, lgrps[k][1])
497 // Now remove this group completely since
498 // it's been addressed
499 lgrps = append(lgrps[:k],lgrps[k+1:]...)
500 break
501 }
502 }
503 if ssn == false {
504 break
505 }
506 }
507 // Now adress one of the remaining groups
508 if len(lgrps) == 0 {
509 break // Nothing left to do, exit the loop
510 }
511 grps = addNode(grps, lgrps[0][0], lgrps[0][1])
512 lgrps = removeNode(lgrps, lgrps[0][1])
513 lgrps = append(lgrps[:0],lgrps[1:]...)
514 }
515 return grps
516}
517
518func groupPods1(pods []*rwPod) [][]*rwPod {
519 var rtrn [][]*rwPod
520 var podCt int = len(pods)
521
522 rtrn,pods = groupIntersectingPods1(pods, podCt)
523 // There are several outcomes here
524 // 1) All pods have been paired and we're done
525 // 2) Some un-allocated pods remain
526 // 2.a) All groups have been started
527 // 2.b) Not all groups have been started
528 if len(pods) == 0 {
529 return rtrn
530 } else if len(rtrn) == podCt >> 1 { // All groupings started
531 // Allocate the remaining (presumably empty) pods to the started groups
532 return groupRemainingPods1(rtrn, pods)
533 } else { // Some groupings started
534 // Start empty groups with remaining pods
535 // each grouping is on a different server then
536 // allocate remaining pods.
537 rtrn, pods = startRemainingGroups1(rtrn, pods, podCt)
538 return groupRemainingPods1(rtrn, pods)
539 }
540}
541
542func groupPods(pods []*rwPod) [][]*rwPod {
543 var rtrn [][]*rwPod
544 var pd []*podTrack
545
546 // Tracking of the grouping process
547 for k,_ := range pods {
548 pd = append(pd, &podTrack{pods[k],false})
549 }
550
551
552 rtrn,pd = groupIntersectingPods(pd)
553 // There are several outcomes here
554 // 1) All pods have been paired and we're done
555 // 2) Some un-allocated pods remain
556 // 2.a) All groups have been started
557 // 2.b) Not all groups have been started
558 if unallocPodCount(pd) == 0 {
559 return rtrn
560 } else if len(rtrn) == len(pd) >> 1 { // All groupings started
561 // Allocate the remaining (presumably empty) pods to the started groups
562 return groupRemainingPods(rtrn, pd)
563 } else { // Some groupings started
564 // Start empty groups with remaining pods
565 // each grouping is on a different server then
566 // allocate remaining pods.
567 rtrn, pd = startRemainingGroups(rtrn, pd)
568 return groupRemainingPods(rtrn, pd)
569 }
570
571
572 // Establish groupings of non-empty pods that have overlapping devices.
573 for k,_ := range pd {
574 if pd[k].dn == true { // Already processed?
575 //log.Debugf("%s already processed", pd[k].pod.name)
576 continue
577 }
578 if len(pd[k].pod.devIds) == 0 { // Ignore pods with no devices
579 ////log.Debugf("%s empty pod", pd[k].pod.name)
580 continue
581 }
582 // Start a pod group with this pod
583 var grp []*rwPod
584 grp = append(grp, pd[k].pod)
585 pd[k].dn = true
586 //log.Debugf("Creating new group %s", pd[k].pod.name)
587 // Find the peer pod based on device overlap
588 // It's ok if one isn't found, an empty one will be used instead
589 for k1,_ := range pd {
590 if pd[k1].dn == true { // Skip over eliminated pods
591 //log.Debugf("%s eliminated pod", pd[k1].pod.name)
592 continue
593 }
594 if len(pd[k1].pod.devIds) == 0 { // Skip pods with no devices
595 //log.Debugf("%s empty pod", pd[k1].pod.name)
596 continue
597 }
598 if intersect(pd[k].pod.devIds, pd[k1].pod.devIds) == true {
599 //log.Debugf("intersection found %s:%s", pd[k].pod.name, pd[k1].pod.name)
600 pd[k1].dn = true
601 grp = append(grp, pd[k1].pod)
602 break
603 }
604 }
605 rtrn = append(rtrn, grp)
606 //log.Debugf("Added group %s", grp[0].name)
607 }
608 // Now find any grouping without 2 members and assign one of the
609 // pods with no devices and on a different server to it.
610 // If there are no pods with no devices left before all
611 // groups are filled report an exception but leave one of the
612 // groups with only one pod.
613 for k,_ := range rtrn {
614 if len(rtrn[k]) < 2 {
615 for k2,_ := range pd {
616 if pd[k2].dn == true {
617 continue
618 }
619 // There should be only empty pods here
620 if len(pd[k2].pod.devIds) != 0 {
621 log.Error("Non empty pod found where empty pod was expected")
622 continue
623 }
624 if pd[k2].pod.node == rtrn[k][0].node {
625 //log.Error("Pods aren't on different servers, continuing")
626 continue
627 }
628 // Add this empty and unused pod to the group
629 //log.Debugf("Adding empty pod %s", pd[k2].pod.name)
630 rtrn[k] = append(rtrn[k], pd[k2].pod)
631 pd[k2].dn = true
632 break
633 }
634 }
635 }
636 return rtrn
637}
638
639func intersect(d1 map[string]struct{}, d2 map[string]struct{}) bool {
640 for k,_ := range d1 {
641 if _,ok := d2[k]; ok == true {
642 return true
643 }
644 }
645 return false
646}
647
648func setConnection(client pb.ConfigurationClient, backend string, connection string, addr string, port uint64) {
649 log.Debugf("Configuring backend %s : connection %s\n\n", backend, connection)
650 cnf := &pb.Conn{Server:"grpc_command",Cluster:"vcore",Backend:backend,
651 Connection:connection,Addr:addr,
652 Port:port}
653 if res, err := client.SetConnection(context.Background(), cnf); err != nil {
654 log.Debugf("failed SetConnection RPC call: %s", err)
655 } else {
656 log.Debugf("Result: %v", res)
657 }
658}
659
660func setAffinity(client pb.ConfigurationClient, ids map[string]struct{}, backend string) {
661 log.Debugf("Configuring backend %s : affinities \n", backend)
662 aff := &pb.Affinity{Router:"vcore",Route:"dev_manager",Cluster:"vcore",Backend:backend}
663 for k,_ := range ids {
664 log.Debugf("Setting affinity for id %s", k)
665 aff.Id = k
666 if res, err := client.SetAffinity(context.Background(), aff); err != nil {
667 log.Debugf("failed affinity RPC call: %s", err)
668 } else {
669 log.Debugf("Result: %v", res)
670 }
671 }
672}
673
sslobodr38afd0d2019-01-21 12:31:46 -0500674func getBackendForCore(coreId string, coreGroups [][]*rwPod) string {
675 for _,v := range(coreGroups) {
676 for _,v2 := range(v) {
677 if v2.name == coreId {
678 return v2.backend
679 }
680 }
681 }
682 log.Errorf("No backend found for core %s\n", coreId)
683 return ""
684}
685
sslobodr16e41bc2019-01-18 16:22:21 -0500686func monitorDiscovery(client pb.ConfigurationClient,
sslobodr38afd0d2019-01-21 12:31:46 -0500687 ch <-chan *ic.InterContainerMessage,
688 coreGroups [][]*rwPod) {
689 var id map[string]struct{} = make(map[string]struct{})
690
sslobodr16e41bc2019-01-18 16:22:21 -0500691 select {
692 case msg := <-ch:
693 log.Debugf("Received a device discovery notification")
sslobodr38afd0d2019-01-21 12:31:46 -0500694 device := &ic.DeviceDiscovered{}
695 if err := ptypes.UnmarshalAny(msg.Body, device); err != nil {
sslobodr16e41bc2019-01-18 16:22:21 -0500696 log.Errorf("Could not unmarshal received notification %v", msg)
697 } else {
sslobodr38afd0d2019-01-21 12:31:46 -0500698 // Set the affinity of the discovered device.
699 if be := getBackendForCore(device.Id, coreGroups); be != "" {
700 id[device.Id]=struct{}{}
701 setAffinity(client, id, be)
702 } else {
703 log.Error("Cant use an empty string as a backend name")
704 }
sslobodr16e41bc2019-01-18 16:22:21 -0500705 }
706 break
707 }
708}
709
sslobodr38afd0d2019-01-21 12:31:46 -0500710func startDiscoveryMonitor(client pb.ConfigurationClient,
711 coreGroups [][]*rwPod) error {
sslobodr16e41bc2019-01-18 16:22:21 -0500712 var ch <-chan *ic.InterContainerMessage
713 // Connect to kafka for discovery events
714 topic := &kafka.Topic{Name: "AffinityRouter"}
715 kc,err := newKafkaClient("sarama", "kafka", 9092, "arouterd")
716 kc.Start()
717
718 if ch, err = kc.Subscribe(topic); err != nil {
719 log.Error("Could not subscribe to the 'AffinityRouter' channel, discovery disabled")
720 return err
721 }
sslobodr38afd0d2019-01-21 12:31:46 -0500722 go monitorDiscovery(client, ch, coreGroups)
sslobodr16e41bc2019-01-18 16:22:21 -0500723 return nil
724}
725
726func startCoreMonitor(client pb.ConfigurationClient,
727 clientset *kubernetes.Clientset,
728 coreFltr *regexp.Regexp,
729 coreGroups [][]*rwPod) error {
730 // Now that initial allocation has been completed, monitor the pods
731 // for IP changes
732 // The main loop needs to do the following:
733 // 1) Periodically query the pods and filter out
734 // the vcore ones
735 // 2) Validate that the pods running are the same
736 // as the previous check
737 // 3) Validate that the IP addresses are the same
738 // as the last check.
739 // If the pod name(s) ha(s/ve) changed then remove
740 // the unused pod names and add in the new pod names
741 // maintaining the cluster/backend information.
742 // If an IP address has changed (which shouldn't
743 // happen unless a pod is re-started) it should get
744 // caught by the pod name change.
745 for {
746 time.Sleep(10 * time.Second) // Wait a while
747 // Get the rw core list from k8s
748 rwPods := getRwPods(clientset, coreFltr)
749 // If we didn't get 2n+1 pods then wait since
750 // something is down and will hopefully come
751 // back up at some point.
752 // TODO: remove the 6 pod hardcoding
753 if len(rwPods) != 6 {
754 continue
755 }
756 // We have all pods, check if any IP addresses
757 // have changed.
758 for _,v := range rwPods {
759 //if hasIpAddr(coreGroups, v.ipAddr) == false {
760 //log.Debug("Address has changed...")
761 //applyAddrDiffs(coreGroups, rwPods)
762 //}
763 _ = v
764 }
765 }
766
767}
768
769func main() {
770 // This is currently hard coded to a cluster with 3 servers
771 //var connections map[string]configConn = make(map[string]configConn)
772 //var rwCorePodsPrev map[string]rwPod = make(map[string]rwPod)
773 var rwCoreNodesPrev map[string][]rwPod = make(map[string][]rwPod)
774 var firstTime bool = true
775 var err error
776 var conn *grpc.ClientConn
777
778
779 // Set up the regular expression to identify the voltha cores
780 coreFltr := regexp.MustCompile(`rw-core[0-9]-`)
781
782 // Set up logging
783 if _, err := log.SetDefaultLogger(log.JSON, 0, nil); err != nil {
784 log.With(log.Fields{"error": err}).Fatal("Cannot setup logging")
785 }
786
787 // Set up kubernetes api
788 clientset := k8sClientSet()
789
790 // Connect to the affinity router and set up the client
791 conn, err = connect("localhost:55554") // This is a sidecar container so communicating over localhost
792 if err != nil {
793 panic(err.Error())
794 }
795 client := pb.NewConfigurationClient(conn)
796
797 // Get the voltha rw-core podes
798 rwPods := getRwPods(clientset, coreFltr)
799
800 // Fetch the devices held by each running core
801 queryDevices(rwPods)
802
803 // For debugging... comment out l8r
804 for _,v := range rwPods {
805 log.Debugf("Pod list %v", *v)
806 }
807
808 coreGroups := groupPods1(rwPods)
809
810
811 // Assign the groupings to the the backends and connections
812 for k,_ := range coreGroups {
813 for k1,_ := range coreGroups[k] {
814 coreGroups[k][k1].backend = "vcore"+strconv.Itoa(k+1)
815 coreGroups[k][k1].connection = "vcore"+strconv.Itoa(k+1)+strconv.Itoa(k1+1)
816 }
817 }
818 log.Debug("Core gouping completed")
819
820 // TODO: Debugging code, comment out for production
821 for k,v := range coreGroups {
822 for k2,v2 := range v {
823 log.Debugf("Core group %d,%d: %v", k, k2, v2)
824 }
825 }
826 log.Debug("Setting affinities")
827 // Now set the affinities for exising devices in the cores
828 for _,v := range coreGroups {
829 setAffinity(client, v[0].devIds, v[0].backend)
830 setAffinity(client, v[1].devIds, v[1].backend)
831 }
832 log.Debug("Setting connections")
833 // Configure the backeds based on the calculated core groups
834 for _,v := range coreGroups {
835 setConnection(client, v[0].backend, v[0].connection, v[0].ipAddr, 50057)
836 setConnection(client, v[1].backend, v[1].connection, v[1].ipAddr, 50057)
837 }
838
839 log.Debug("Starting discovery monitoring")
sslobodr38afd0d2019-01-21 12:31:46 -0500840 startDiscoveryMonitor(client, coreGroups)
sslobodr16e41bc2019-01-18 16:22:21 -0500841
842 log.Debugf("Starting core monitoring")
843 startCoreMonitor(client, clientset, coreFltr, coreGroups) // Never returns
844 return
845
846
847 // The main loop needs to do the following:
848 // 1) Periodically query the pods and filter out
849 // the vcore ones
850 // 2) Validate that the pods running are the same
851 // as the previous check
852 // 3) Validate that the IP addresses are the same
853 // as the last check.
854 // If the pod name(s) ha(s/ve) changed then remove
855 // the unused pod names and add in the new pod names
856 // maintaining the cluster/backend information.
857 // If an IP address has changed (which shouldn't
858 // happen unless a pod is re-started) it should get
859 // caught by the pod name change.
860 for {
861 var rwCorePods map[string]rwPod = make(map[string]rwPod)
862 var rwCoreNodes map[string][]rwPod = make(map[string][]rwPod)
863 pods, err := clientset.CoreV1().Pods("").List(metav1.ListOptions{})
864 if err != nil {
865 panic(err.Error())
866 }
867 log.Debugf("There are %d pods in the cluster\n", len(pods.Items))
868
869 /*
870 for k,v := range pods.Items {
871 if v.Namespace == "voltha" && coreFltr.MatchString(v.Name) {
872 fmt.Printf("Namespace: %s, PodName: %s, PodIP: %s, Host: %s\n", v.Namespace, v.Name,
873 v.Status.PodIP, v.Spec.NodeName)
874 //fmt.Printf("Pod %v,%v\n\n\n",k,v)
875 _ = k
876 // Add this pod to the core structure.
877 if firstTime == true {
878 rwCorePodsPrev[v.Name] = rwPod{name:v.Name,node:v.Spec.NodeName}
879 rwCoreNodesPrev[v.Spec.NodeName] =
880 append(rwCoreNodesPrev[v.Spec.NodeName], rwPod{name:v.Name,node:v.Spec.NodeName})
881 }
882 rwCorePods[v.Name] = rwPod{v.Name,v.Status.PodIP,v.Spec.NodeName, "", ""}
883 rwCoreNodes[v.Spec.NodeName] =
884 append(rwCoreNodes[v.Spec.NodeName], rwPod{v.Name,v.Status.PodIP,v.Spec.NodeName,"",""})
885 }
886 }
887 */
888
889 if len(rwCorePods) != 6 {
890 continue
891 }
892
893 //fmt.Printf("Pod map: %v\n", rwCorePods)
894 //fmt.Printf("Pod map2: %v\n", rwCoreNodes)
895
896 // Examples for error handling:
897 // - Use helper functions like e.g. errors.IsNotFound()
898 // - And/or cast to StatusError and use its properties like e.g. ErrStatus.Message
899 /*
900 _, err = clientset.CoreV1().Pods("default").Get("example-xxxxx", metav1.GetOptions{})
901 if errors.IsNotFound(err) {
902 fmt.Printf("Pod not found\n")
903 } else if statusError, isStatus := err.(*errors.StatusError); isStatus {
904 fmt.Printf("Error getting pod %v\n", statusError.ErrStatus.Message)
905 } else if err != nil {
906 panic(err.Error())
907 } else {
908 fmt.Printf("Found pod\n")
909 }
910 */
911 // Set the association to backends and connections only once.
912 // TODO: This needs to be reworked for when a pod crashes
913 // and it's name changes.
914 if firstTime == true {
915 be := 1
916 for k,_ := range rwCoreNodesPrev { // Each node has 2 cores running on it
917 // Use a pretty dumb distribution algorithm.
918 log.Debugf("Processing core node %s:%d\n", k,be)
919 rwCoreNodesPrev[k][0].backend = "vcore"+strconv.Itoa(be)
920 rwCoreNodesPrev[k][0].connection = "vcore"+strconv.Itoa(be)+strconv.Itoa(1)
921 rwCoreNodesPrev[k][1].backend = "vcore"+strconv.Itoa(be%3+1)
922 rwCoreNodesPrev[k][1].connection = "vcore"+strconv.Itoa(be%3+1)+strconv.Itoa(2)
923 be++
924 }
925 }
926
927 log.Debugf("Backend Allocation: %v",rwCoreNodesPrev)
928 // Compare the current node IPs with the previous node IPs and if they differ
929 // then set the new one and send the command to configure the router with the
930 // new backend connection.
931 for k,v := range rwCoreNodesPrev {
932 if rwCoreNodes[k][0].ipAddr != rwCoreNodesPrev[k][0].ipAddr {
933 log.Debugf("Configuring backend %s : connection %s\n\n", v[0].backend, v[0].connection)
934 cnf := &pb.Conn{Server:"grpc_command",Cluster:"vcore",Backend:rwCoreNodesPrev[k][0].backend,
935 Connection:rwCoreNodesPrev[k][0].connection,Addr:rwCoreNodes[k][0].ipAddr,
936 Port:50057}
937 if res, err := client.SetConnection(context.Background(), cnf); err != nil {
938 log.Debugf("failed SetConnection RPC call: %s", err)
939 } else {
940 log.Debugf("Result: %v", res)
941 rwCoreNodesPrev[k][0].ipAddr = rwCoreNodes[k][0].ipAddr
942 }
943 }
944 if rwCoreNodes[k][1].ipAddr != rwCoreNodesPrev[k][1].ipAddr {
945 log.Debugf("Configuring backend %s : connection %s\n\n", v[1].backend, v[1].connection)
946 cnf := &pb.Conn{Server:"grpc_command",Cluster:"vcore",Backend:rwCoreNodesPrev[k][1].backend,
947 Connection:rwCoreNodesPrev[k][1].connection,Addr:rwCoreNodes[k][1].ipAddr,
948 Port:50057}
949 if res, err := client.SetConnection(context.Background(), cnf); err != nil {
950 log.Debugf("failed SetConnection RPC call: %s", err)
951 } else {
952 log.Debugf("Result: %v", res)
953 rwCoreNodesPrev[k][1].ipAddr = rwCoreNodes[k][1].ipAddr
954 }
955 }
956 }
957
958
959 fmt.Printf("The structure for setting the connections is: %v\n", rwCoreNodesPrev)
960 firstTime = false
961
962 // Now make the API calls
963 time.Sleep(10 * time.Second)
964 }
965 conn.Close()
966
967}
968