blob: bfd7ba9a86327db84be737bd8e747ca2143f060b [file] [log] [blame]
sslobodr16e41bc2019-01-18 16:22:21 -05001/*
2 * Copyright 2018-present Open Networking Foundation
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package main
18
19import (
sslobodr16e41bc2019-01-18 16:22:21 -050020 "errors"
David K. Bainbridgef430cd52019-05-28 15:00:35 -070021 "flag"
Kent Hagerman334a8ce2019-05-16 16:50:33 -040022 "fmt"
Kent Hagerman92661462019-06-04 18:22:05 -040023 "google.golang.org/grpc/connectivity"
24 "google.golang.org/grpc/keepalive"
Kent Hagerman334a8ce2019-05-16 16:50:33 -040025 "math"
26 "os"
David K. Bainbridgef430cd52019-05-28 15:00:35 -070027 "path"
Kent Hagerman0ab4cb22019-04-24 13:13:35 -040028 "regexp"
sslobodr16e41bc2019-01-18 16:22:21 -050029 "strconv"
Kent Hagerman0ab4cb22019-04-24 13:13:35 -040030 "time"
sslobodr16e41bc2019-01-18 16:22:21 -050031
sslobodr16e41bc2019-01-18 16:22:21 -050032 "github.com/golang/protobuf/ptypes"
Kent Hagerman334a8ce2019-05-16 16:50:33 -040033 "github.com/golang/protobuf/ptypes/empty"
sslobodr16e41bc2019-01-18 16:22:21 -050034 "github.com/opencord/voltha-go/common/log"
David K. Bainbridgef430cd52019-05-28 15:00:35 -070035 "github.com/opencord/voltha-go/common/version"
Kent Hagerman334a8ce2019-05-16 16:50:33 -040036 "github.com/opencord/voltha-go/kafka"
William Kurkiandaa6bb22019-03-07 12:26:28 -050037 pb "github.com/opencord/voltha-protos/go/afrouter"
Kent Hagerman0ab4cb22019-04-24 13:13:35 -040038 cmn "github.com/opencord/voltha-protos/go/common"
William Kurkiandaa6bb22019-03-07 12:26:28 -050039 ic "github.com/opencord/voltha-protos/go/inter_container"
Kent Hagerman0ab4cb22019-04-24 13:13:35 -040040 vpb "github.com/opencord/voltha-protos/go/voltha"
41 "golang.org/x/net/context"
42 "google.golang.org/grpc"
43 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
44 "k8s.io/client-go/kubernetes"
45 "k8s.io/client-go/rest"
Kent Hagermane566c2e2019-06-03 17:56:42 -040046 "k8s.io/client-go/tools/clientcmd"
sslobodr16e41bc2019-01-18 16:22:21 -050047)
48
sslobodr8e2ccb52019-02-05 09:21:47 -050049type volthaPod struct {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -040050 name string
51 ipAddr string
52 node string
53 devIds map[string]struct{}
54 cluster string
55 backend string
sslobodr16e41bc2019-01-18 16:22:21 -050056 connection string
57}
58
59type podTrack struct {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -040060 pod *volthaPod
61 dn bool
sslobodr16e41bc2019-01-18 16:22:21 -050062}
63
David K. Bainbridgef430cd52019-05-28 15:00:35 -070064type Configuration struct {
65 DisplayVersionOnly *bool
66}
67
Kent Hagerman334a8ce2019-05-16 16:50:33 -040068var (
Kent Hagermane566c2e2019-06-03 17:56:42 -040069 // if k8s variables are undefined, will attempt to use in-cluster config
70 k8sApiServer = getStrEnv("K8S_API_SERVER", "")
71 k8sKubeConfigPath = getStrEnv("K8S_KUBE_CONFIG_PATH", "")
72
Kent Hagerman334a8ce2019-05-16 16:50:33 -040073 podNamespace = getStrEnv("POD_NAMESPACE", "voltha")
74 podGrpcPort = uint64(getIntEnv("POD_GRPC_PORT", 0, math.MaxUint16, 50057))
sslobodre7ce71d2019-01-22 16:21:45 -050075
Kent Hagerman334a8ce2019-05-16 16:50:33 -040076 numRWPods = getIntEnv("NUM_RW_PODS", 1, math.MaxInt32, 6)
77 numROPods = getIntEnv("NUM_RO_PODS", 1, math.MaxInt32, 3)
78
Kent Hagerman92661462019-06-04 18:22:05 -040079 afrouterApiAddress = getStrEnv("AFROUTER_API_ADDRESS", "localhost:55554")
80
Kent Hagerman334a8ce2019-05-16 16:50:33 -040081 afrouterRouterName = getStrEnv("AFROUTER_ROUTER_NAME", "vcore")
82 afrouterRWClusterName = getStrEnv("AFROUTER_RW_CLUSTER_NAME", "vcore")
83 afrouterROClusterName = getStrEnv("AFROUTER_RO_CLUSTER_NAME", "ro_vcore")
84
85 kafkaTopic = getStrEnv("KAFKA_TOPIC", "AffinityRouter")
86 kafkaClientType = getStrEnv("KAFKA_CLIENT_TYPE", "sarama")
87 kafkaHost = getStrEnv("KAFKA_HOST", "kafka")
88 kafkaPort = getIntEnv("KAFKA_PORT", 0, math.MaxUint16, 9092)
89 kafkaInstanceID = getStrEnv("KAFKA_INSTANCE_ID", "arouterd")
90)
91
92func getIntEnv(key string, min, max, defaultValue int) int {
93 if val, have := os.LookupEnv(key); have {
94 num, err := strconv.Atoi(val)
95 if err != nil || !(min <= num && num <= max) {
96 panic(fmt.Errorf("%s must be a number in the range [%d, %d]; default: %d", key, min, max, defaultValue))
97 }
98 return num
99 }
100 return defaultValue
101}
102
103func getStrEnv(key, defaultValue string) string {
104 if val, have := os.LookupEnv(key); have {
105 return val
106 }
107 return defaultValue
108}
sslobodr16e41bc2019-01-18 16:22:21 -0500109
110func newKafkaClient(clientType string, host string, port int, instanceID string) (kafka.Client, error) {
111
112 log.Infow("kafka-client-type", log.Fields{"client": clientType})
113 switch clientType {
114 case "sarama":
115 return kafka.NewSaramaClient(
116 kafka.Host(host),
117 kafka.Port(port),
118 kafka.ConsumerType(kafka.GroupCustomer),
119 kafka.ProducerReturnOnErrors(true),
120 kafka.ProducerReturnOnSuccess(true),
121 kafka.ProducerMaxRetries(6),
122 kafka.NumPartitions(3),
123 kafka.ConsumerGroupName(instanceID),
124 kafka.ConsumerGroupPrefix(instanceID),
125 kafka.AutoCreateTopic(false),
126 kafka.ProducerFlushFrequency(5),
127 kafka.ProducerRetryBackoff(time.Millisecond*30)), nil
128 }
129 return nil, errors.New("unsupported-client-type")
130}
131
sslobodr16e41bc2019-01-18 16:22:21 -0500132func k8sClientSet() *kubernetes.Clientset {
Kent Hagermane566c2e2019-06-03 17:56:42 -0400133 var config *rest.Config
134 if k8sApiServer != "" || k8sKubeConfigPath != "" {
135 // use combination of URL & local kube-config file
136 c, err := clientcmd.BuildConfigFromFlags(k8sApiServer, k8sKubeConfigPath)
137 if err != nil {
138 panic(err)
139 }
140 config = c
141 } else {
142 // use in-cluster config
143 c, err := rest.InClusterConfig()
144 if err != nil {
145 log.Errorf("Unable to load in-cluster config. Try setting K8S_API_SERVER and K8S_KUBE_CONFIG_PATH?")
146 panic(err)
147 }
148 config = c
sslobodr16e41bc2019-01-18 16:22:21 -0500149 }
150 // creates the clientset
151 clientset, err := kubernetes.NewForConfig(config)
152 if err != nil {
Kent Hagermane566c2e2019-06-03 17:56:42 -0400153 panic(err)
sslobodr16e41bc2019-01-18 16:22:21 -0500154 }
155
156 return clientset
157}
158
Kent Hagerman92661462019-06-04 18:22:05 -0400159func connect(ctx context.Context, addr string) (*grpc.ClientConn, error) {
160 log.Debugf("Trying to connect to %s", addr)
161 conn, err := grpc.DialContext(ctx, addr,
162 grpc.WithInsecure(),
163 grpc.WithBlock(),
164 grpc.WithBackoffMaxDelay(time.Second*5),
165 grpc.WithKeepaliveParams(keepalive.ClientParameters{Time: time.Second * 10, Timeout: time.Second * 5}))
166 if err == nil {
167 log.Debugf("Connection succeeded")
sslobodr16e41bc2019-01-18 16:22:21 -0500168 }
Kent Hagerman92661462019-06-04 18:22:05 -0400169 return conn, err
sslobodr16e41bc2019-01-18 16:22:21 -0500170}
171
Kent Hagerman92661462019-06-04 18:22:05 -0400172func getVolthaPods(cs *kubernetes.Clientset, coreFilter *regexp.Regexp) ([]*volthaPod, error) {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400173 pods, err := cs.CoreV1().Pods(podNamespace).List(metav1.ListOptions{})
sslobodr16e41bc2019-01-18 16:22:21 -0500174 if err != nil {
Kent Hagerman92661462019-06-04 18:22:05 -0400175 return nil, err
sslobodr16e41bc2019-01-18 16:22:21 -0500176 }
sslobodre7ce71d2019-01-22 16:21:45 -0500177 //log.Debugf("There are a total of %d pods in the cluster\n", len(pods.Items))
sslobodr16e41bc2019-01-18 16:22:21 -0500178
Kent Hagerman92661462019-06-04 18:22:05 -0400179 var rtrn []*volthaPod
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400180 for _, v := range pods.Items {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400181 if coreFilter.MatchString(v.Name) {
sslobodre7ce71d2019-01-22 16:21:45 -0500182 log.Debugf("Namespace: %s, PodName: %s, PodIP: %s, Host: %s\n", v.Namespace, v.Name,
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400183 v.Status.PodIP, v.Spec.NodeName)
sslobodre7ce71d2019-01-22 16:21:45 -0500184 // Only add the pod if it has an IP address. If it doesn't then it likely crashed and
185 // and is still in the process of getting re-started.
186 if v.Status.PodIP != "" {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400187 rtrn = append(rtrn, &volthaPod{name: v.Name, ipAddr: v.Status.PodIP, node: v.Spec.NodeName,
188 devIds: make(map[string]struct{}), backend: "", connection: ""})
sslobodre7ce71d2019-01-22 16:21:45 -0500189 }
sslobodr16e41bc2019-01-18 16:22:21 -0500190 }
191 }
Kent Hagerman92661462019-06-04 18:22:05 -0400192 return rtrn, nil
sslobodr16e41bc2019-01-18 16:22:21 -0500193}
194
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400195func reconcilePodDeviceIds(pod *volthaPod, ids map[string]struct{}) bool {
Kent Hagerman92661462019-06-04 18:22:05 -0400196 ctx, _ := context.WithTimeout(context.Background(), time.Second*5)
197 conn, err := connect(ctx, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort))
sslobodr6c1689c2019-01-24 07:31:15 -0500198 if err != nil {
sslobodre7ce71d2019-01-22 16:21:45 -0500199 log.Debugf("Could not query devices from %s, could not connect", pod.name)
200 return false
201 }
Kent Hagerman92661462019-06-04 18:22:05 -0400202 defer conn.Close()
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400203
204 var idList cmn.IDs
205 for k := range ids {
206 idList.Items = append(idList.Items, &cmn.ID{Id: k})
207 }
208
sslobodre7ce71d2019-01-22 16:21:45 -0500209 client := vpb.NewVolthaServiceClient(conn)
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400210 _, err = client.ReconcileDevices(context.Background(), &idList)
sslobodre7ce71d2019-01-22 16:21:45 -0500211 if err != nil {
212 log.Error(err)
213 return false
214 }
215
216 return true
217}
218
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400219func queryPodDeviceIds(pod *volthaPod) map[string]struct{} {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400220 var rtrn = make(map[string]struct{})
sslobodre7ce71d2019-01-22 16:21:45 -0500221 // Open a connection to the pod
Kent Hagerman92661462019-06-04 18:22:05 -0400222 ctx, _ := context.WithTimeout(context.Background(), time.Second*5)
223 conn, err := connect(ctx, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort))
sslobodr6c1689c2019-01-24 07:31:15 -0500224 if err != nil {
sslobodre7ce71d2019-01-22 16:21:45 -0500225 log.Debugf("Could not query devices from %s, could not connect", pod.name)
226 return rtrn
227 }
228 defer conn.Close()
229 client := vpb.NewVolthaServiceClient(conn)
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400230 devs, err := client.ListDeviceIds(context.Background(), &empty.Empty{})
sslobodre7ce71d2019-01-22 16:21:45 -0500231 if err != nil {
232 log.Error(err)
233 return rtrn
234 }
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400235 for _, dv := range devs.Items {
236 rtrn[dv.Id] = struct{}{}
sslobodre7ce71d2019-01-22 16:21:45 -0500237 }
238
239 return rtrn
240}
241
sslobodr8e2ccb52019-02-05 09:21:47 -0500242func queryDeviceIds(pods []*volthaPod) {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400243 for pk := range pods {
sslobodre7ce71d2019-01-22 16:21:45 -0500244 // Keep the old Id list if a new list is not returned
245 if idList := queryPodDeviceIds(pods[pk]); len(idList) != 0 {
246 pods[pk].devIds = idList
sslobodr16e41bc2019-01-18 16:22:21 -0500247 }
sslobodr16e41bc2019-01-18 16:22:21 -0500248 }
249}
250
sslobodr8e2ccb52019-02-05 09:21:47 -0500251func allEmpty(pods []*volthaPod) bool {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400252 for k := range pods {
sslobodr16e41bc2019-01-18 16:22:21 -0500253 if len(pods[k].devIds) != 0 {
254 return false
255 }
256 }
257 return true
258}
259
sslobodr8e2ccb52019-02-05 09:21:47 -0500260func rmPod(pods []*volthaPod, idx int) []*volthaPod {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400261 return append(pods[:idx], pods[idx+1:]...)
sslobodr16e41bc2019-01-18 16:22:21 -0500262}
263
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400264func groupIntersectingPods1(pods []*volthaPod, podCt int) ([][]*volthaPod, []*volthaPod) {
sslobodr8e2ccb52019-02-05 09:21:47 -0500265 var rtrn [][]*volthaPod
266 var out []*volthaPod
sslobodr16e41bc2019-01-18 16:22:21 -0500267
268 for {
269 if len(pods) == 0 {
270 break
271 }
272 if len(pods[0].devIds) == 0 { // Ignore pods with no devices
273 ////log.Debugf("%s empty pod", pd[k].pod.name)
274 out = append(out, pods[0])
275 pods = rmPod(pods, 0)
276 continue
277 }
278 // Start a pod group with this pod
sslobodr8e2ccb52019-02-05 09:21:47 -0500279 var grp []*volthaPod
sslobodr16e41bc2019-01-18 16:22:21 -0500280 grp = append(grp, pods[0])
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400281 pods = rmPod(pods, 0)
sslobodr16e41bc2019-01-18 16:22:21 -0500282 //log.Debugf("Creating new group %s", pd[k].pod.name)
283 // Find the peer pod based on device overlap
284 // It's ok if one isn't found, an empty one will be used instead
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400285 for k := range pods {
sslobodr16e41bc2019-01-18 16:22:21 -0500286 if len(pods[k].devIds) == 0 { // Skip pods with no devices
287 //log.Debugf("%s empty pod", pd[k1].pod.name)
288 continue
289 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400290 if intersect(grp[0].devIds, pods[k].devIds) {
sslobodr16e41bc2019-01-18 16:22:21 -0500291 //log.Debugf("intersection found %s:%s", pd[k].pod.name, pd[k1].pod.name)
292 if grp[0].node == pods[k].node {
293 // This should never happen
294 log.Errorf("pods %s and %s intersect and are on the same server!! Not pairing",
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400295 grp[0].name, pods[k].name)
sslobodr16e41bc2019-01-18 16:22:21 -0500296 continue
297 }
298 grp = append(grp, pods[k])
299 pods = rmPod(pods, k)
300 break
301
302 }
303 }
304 rtrn = append(rtrn, grp)
305 //log.Debugf("Added group %s", grp[0].name)
306 // Check if the number of groups = half the pods, if so all groups are started.
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400307 if len(rtrn) == podCt>>1 {
sslobodr16e41bc2019-01-18 16:22:21 -0500308 // Append any remaining pods to out
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400309 out = append(out, pods[0:]...)
sslobodr16e41bc2019-01-18 16:22:21 -0500310 break
311 }
312 }
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400313 return rtrn, out
sslobodr16e41bc2019-01-18 16:22:21 -0500314}
315
sslobodr16e41bc2019-01-18 16:22:21 -0500316func unallocPodCount(pd []*podTrack) int {
317 var rtrn int = 0
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400318 for _, v := range pd {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400319 if !v.dn {
sslobodr16e41bc2019-01-18 16:22:21 -0500320 rtrn++
321 }
322 }
323 return rtrn
324}
325
sslobodr8e2ccb52019-02-05 09:21:47 -0500326func sameNode(pod *volthaPod, grps [][]*volthaPod) bool {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400327 for _, v := range grps {
sslobodr16e41bc2019-01-18 16:22:21 -0500328 if v[0].node == pod.node {
329 return true
330 }
331 if len(v) == 2 && v[1].node == pod.node {
332 return true
333 }
334 }
335 return false
336}
337
sslobodr8e2ccb52019-02-05 09:21:47 -0500338func startRemainingGroups1(grps [][]*volthaPod, pods []*volthaPod, podCt int) ([][]*volthaPod, []*volthaPod) {
339 var grp []*volthaPod
sslobodr16e41bc2019-01-18 16:22:21 -0500340
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400341 for k := range pods {
sslobodr16e41bc2019-01-18 16:22:21 -0500342 if sameNode(pods[k], grps) {
343 continue
344 }
sslobodr8e2ccb52019-02-05 09:21:47 -0500345 grp = []*volthaPod{}
sslobodr16e41bc2019-01-18 16:22:21 -0500346 grp = append(grp, pods[k])
347 pods = rmPod(pods, k)
348 grps = append(grps, grp)
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400349 if len(grps) == podCt>>1 {
sslobodr16e41bc2019-01-18 16:22:21 -0500350 break
351 }
352 }
353 return grps, pods
354}
355
sslobodr8e2ccb52019-02-05 09:21:47 -0500356func hasSingleSecondNode(grp []*volthaPod) bool {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400357 var servers = make(map[string]struct{})
358 for k := range grp {
sslobodr16e41bc2019-01-18 16:22:21 -0500359 if k == 0 {
360 continue // Ignore the first item
361 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400362 servers[grp[k].node] = struct{}{}
sslobodr16e41bc2019-01-18 16:22:21 -0500363 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400364 if len(servers) == 1 {
sslobodr16e41bc2019-01-18 16:22:21 -0500365 return true
366 }
367 return false
368}
369
sslobodr8e2ccb52019-02-05 09:21:47 -0500370func addNode(grps [][]*volthaPod, idx *volthaPod, item *volthaPod) [][]*volthaPod {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400371 for k := range grps {
sslobodr16e41bc2019-01-18 16:22:21 -0500372 if grps[k][0].name == idx.name {
373 grps[k] = append(grps[k], item)
374 return grps
375 }
376 }
377 // TODO: Error checking required here.
378 return grps
379}
380
sslobodr8e2ccb52019-02-05 09:21:47 -0500381func removeNode(grps [][]*volthaPod, item *volthaPod) [][]*volthaPod {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400382 for k := range grps {
383 for k1 := range grps[k] {
sslobodr16e41bc2019-01-18 16:22:21 -0500384 if grps[k][k1].name == item.name {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400385 grps[k] = append(grps[k][:k1], grps[k][k1+1:]...)
sslobodr16e41bc2019-01-18 16:22:21 -0500386 break
387 }
388 }
389 }
390 return grps
391}
392
sslobodr8e2ccb52019-02-05 09:21:47 -0500393func groupRemainingPods1(grps [][]*volthaPod, pods []*volthaPod) [][]*volthaPod {
394 var lgrps [][]*volthaPod
sslobodr16e41bc2019-01-18 16:22:21 -0500395 // All groups must be started when this function is called.
396 // Copy incomplete groups
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400397 for k := range grps {
sslobodr16e41bc2019-01-18 16:22:21 -0500398 if len(grps[k]) != 2 {
399 lgrps = append(lgrps, grps[k])
400 }
401 }
402
403 // Add all pairing candidates to each started group.
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400404 for k := range pods {
405 for k2 := range lgrps {
sslobodr16e41bc2019-01-18 16:22:21 -0500406 if lgrps[k2][0].node != pods[k].node {
407 lgrps[k2] = append(lgrps[k2], pods[k])
408 }
409 }
410 }
411
412 //TODO: If any member of lgrps doesn't have at least 2
413 // nodes something is wrong. Check for that here
414
415 for {
416 for { // Address groups with only a single server choice
417 var ssn bool = false
418
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400419 for k := range lgrps {
sslobodr16e41bc2019-01-18 16:22:21 -0500420 // Now if any of the groups only have a single
421 // node as the choice for the second member
422 // address that one first.
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400423 if hasSingleSecondNode(lgrps[k]) {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400424 ssn = true
sslobodr16e41bc2019-01-18 16:22:21 -0500425 // Add this pairing to the groups
426 grps = addNode(grps, lgrps[k][0], lgrps[k][1])
427 // Since this node is now used, remove it from all
428 // remaining tenative groups
429 lgrps = removeNode(lgrps, lgrps[k][1])
430 // Now remove this group completely since
431 // it's been addressed
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400432 lgrps = append(lgrps[:k], lgrps[k+1:]...)
sslobodr16e41bc2019-01-18 16:22:21 -0500433 break
434 }
435 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400436 if !ssn {
sslobodr16e41bc2019-01-18 16:22:21 -0500437 break
438 }
439 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400440 // Now address one of the remaining groups
sslobodr16e41bc2019-01-18 16:22:21 -0500441 if len(lgrps) == 0 {
442 break // Nothing left to do, exit the loop
443 }
444 grps = addNode(grps, lgrps[0][0], lgrps[0][1])
445 lgrps = removeNode(lgrps, lgrps[0][1])
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400446 lgrps = append(lgrps[:0], lgrps[1:]...)
sslobodr16e41bc2019-01-18 16:22:21 -0500447 }
448 return grps
449}
450
sslobodr8e2ccb52019-02-05 09:21:47 -0500451func groupPods1(pods []*volthaPod) [][]*volthaPod {
452 var rtrn [][]*volthaPod
sslobodr16e41bc2019-01-18 16:22:21 -0500453 var podCt int = len(pods)
454
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400455 rtrn, pods = groupIntersectingPods1(pods, podCt)
456 // There are several outcomes here
sslobodr16e41bc2019-01-18 16:22:21 -0500457 // 1) All pods have been paired and we're done
458 // 2) Some un-allocated pods remain
459 // 2.a) All groups have been started
460 // 2.b) Not all groups have been started
461 if len(pods) == 0 {
462 return rtrn
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400463 } else if len(rtrn) == podCt>>1 { // All groupings started
sslobodr16e41bc2019-01-18 16:22:21 -0500464 // Allocate the remaining (presumably empty) pods to the started groups
465 return groupRemainingPods1(rtrn, pods)
466 } else { // Some groupings started
467 // Start empty groups with remaining pods
468 // each grouping is on a different server then
469 // allocate remaining pods.
470 rtrn, pods = startRemainingGroups1(rtrn, pods, podCt)
471 return groupRemainingPods1(rtrn, pods)
472 }
473}
474
sslobodr16e41bc2019-01-18 16:22:21 -0500475func intersect(d1 map[string]struct{}, d2 map[string]struct{}) bool {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400476 for k := range d1 {
477 if _, ok := d2[k]; ok {
sslobodr16e41bc2019-01-18 16:22:21 -0500478 return true
479 }
480 }
481 return false
482}
483
Kent Hagerman92661462019-06-04 18:22:05 -0400484func setConnection(ctx context.Context, client pb.ConfigurationClient, cluster string, backend string, connection string, addr string, port uint64) {
sslobodr360c8d72019-02-05 12:47:56 -0500485 log.Debugf("Configuring backend %s : connection %s in cluster %s\n\n",
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400486 backend, connection, cluster)
487 cnf := &pb.Conn{Server: "grpc_command", Cluster: cluster, Backend: backend,
488 Connection: connection, Addr: addr,
489 Port: port}
Kent Hagerman92661462019-06-04 18:22:05 -0400490 if res, err := client.SetConnection(ctx, cnf); err != nil {
sslobodr16e41bc2019-01-18 16:22:21 -0500491 log.Debugf("failed SetConnection RPC call: %s", err)
492 } else {
493 log.Debugf("Result: %v", res)
494 }
495}
496
Kent Hagerman92661462019-06-04 18:22:05 -0400497func setAffinity(ctx context.Context, client pb.ConfigurationClient, ids map[string]struct{}, backend string) {
sslobodr16e41bc2019-01-18 16:22:21 -0500498 log.Debugf("Configuring backend %s : affinities \n", backend)
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400499 aff := &pb.Affinity{Router: afrouterRouterName, Route: "dev_manager", Cluster: afrouterRWClusterName, Backend: backend}
500 for k := range ids {
sslobodr16e41bc2019-01-18 16:22:21 -0500501 log.Debugf("Setting affinity for id %s", k)
502 aff.Id = k
Kent Hagerman92661462019-06-04 18:22:05 -0400503 if res, err := client.SetAffinity(ctx, aff); err != nil {
sslobodr16e41bc2019-01-18 16:22:21 -0500504 log.Debugf("failed affinity RPC call: %s", err)
505 } else {
506 log.Debugf("Result: %v", res)
507 }
508 }
509}
510
sslobodr8e2ccb52019-02-05 09:21:47 -0500511func getBackendForCore(coreId string, coreGroups [][]*volthaPod) string {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400512 for _, v := range coreGroups {
513 for _, v2 := range v {
sslobodr38afd0d2019-01-21 12:31:46 -0500514 if v2.name == coreId {
515 return v2.backend
516 }
517 }
518 }
519 log.Errorf("No backend found for core %s\n", coreId)
520 return ""
521}
522
Kent Hagerman92661462019-06-04 18:22:05 -0400523func monitorDiscovery(ctx context.Context,
524 client pb.ConfigurationClient,
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400525 ch <-chan *ic.InterContainerMessage,
Kent Hagerman92661462019-06-04 18:22:05 -0400526 coreGroups [][]*volthaPod,
527 doneCh chan<- struct{}) {
528 defer close(doneCh)
529
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400530 var id = make(map[string]struct{})
sslobodr38afd0d2019-01-21 12:31:46 -0500531
sslobodr16e41bc2019-01-18 16:22:21 -0500532 select {
Kent Hagerman92661462019-06-04 18:22:05 -0400533 case <-ctx.Done():
sslobodr16e41bc2019-01-18 16:22:21 -0500534 case msg := <-ch:
535 log.Debugf("Received a device discovery notification")
sslobodr38afd0d2019-01-21 12:31:46 -0500536 device := &ic.DeviceDiscovered{}
537 if err := ptypes.UnmarshalAny(msg.Body, device); err != nil {
sslobodr16e41bc2019-01-18 16:22:21 -0500538 log.Errorf("Could not unmarshal received notification %v", msg)
539 } else {
sslobodr38afd0d2019-01-21 12:31:46 -0500540 // Set the affinity of the discovered device.
541 if be := getBackendForCore(device.Id, coreGroups); be != "" {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400542 id[device.Id] = struct{}{}
Kent Hagerman92661462019-06-04 18:22:05 -0400543 setAffinity(ctx, client, id, be)
sslobodr38afd0d2019-01-21 12:31:46 -0500544 } else {
545 log.Error("Cant use an empty string as a backend name")
546 }
sslobodr16e41bc2019-01-18 16:22:21 -0500547 }
548 break
549 }
550}
551
Kent Hagerman92661462019-06-04 18:22:05 -0400552func startDiscoveryMonitor(ctx context.Context,
553 client pb.ConfigurationClient,
554 coreGroups [][]*volthaPod) (<-chan struct{}, error) {
555 doneCh := make(chan struct{})
sslobodr16e41bc2019-01-18 16:22:21 -0500556 var ch <-chan *ic.InterContainerMessage
557 // Connect to kafka for discovery events
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400558 topic := &kafka.Topic{Name: kafkaTopic}
559 kc, err := newKafkaClient(kafkaClientType, kafkaHost, kafkaPort, kafkaInstanceID)
sslobodr16e41bc2019-01-18 16:22:21 -0500560 kc.Start()
Kent Hagerman92661462019-06-04 18:22:05 -0400561 defer kc.Stop()
sslobodr16e41bc2019-01-18 16:22:21 -0500562
563 if ch, err = kc.Subscribe(topic); err != nil {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400564 log.Errorf("Could not subscribe to the '%s' channel, discovery disabled", kafkaTopic)
Kent Hagerman92661462019-06-04 18:22:05 -0400565 close(doneCh)
566 return doneCh, err
sslobodr16e41bc2019-01-18 16:22:21 -0500567 }
Kent Hagerman92661462019-06-04 18:22:05 -0400568
569 go monitorDiscovery(ctx, client, ch, coreGroups, doneCh)
570 return doneCh, nil
sslobodr16e41bc2019-01-18 16:22:21 -0500571}
572
sslobodre7ce71d2019-01-22 16:21:45 -0500573// Determines which items in core groups
574// have changed based on the list provided
575// and returns a coreGroup with only the changed
576// items and a pod list with the new items
sslobodr8e2ccb52019-02-05 09:21:47 -0500577func getAddrDiffs(coreGroups [][]*volthaPod, rwPods []*volthaPod) ([][]*volthaPod, []*volthaPod) {
578 var nList []*volthaPod
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400579 var rtrn = make([][]*volthaPod, numRWPods>>1)
580 var ipAddrs = make(map[string]struct{})
sslobodre7ce71d2019-01-22 16:21:45 -0500581
582 log.Debug("Get addr diffs")
583
584 // Start with an empty array
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400585 for k := range rtrn {
sslobodr8e2ccb52019-02-05 09:21:47 -0500586 rtrn[k] = make([]*volthaPod, 2)
sslobodre7ce71d2019-01-22 16:21:45 -0500587 }
588
589 // Build a list with only the new items
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400590 for _, v := range rwPods {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400591 if !hasIpAddr(coreGroups, v.ipAddr) {
sslobodre7ce71d2019-01-22 16:21:45 -0500592 nList = append(nList, v)
593 }
594 ipAddrs[v.ipAddr] = struct{}{} // for the search below
595 }
596
597 // Now build the coreGroups with only the changed items
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400598 for k1, v1 := range coreGroups {
599 for k2, v2 := range v1 {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400600 if _, ok := ipAddrs[v2.ipAddr]; !ok {
sslobodre7ce71d2019-01-22 16:21:45 -0500601 rtrn[k1][k2] = v2
602 }
603 }
604 }
605 return rtrn, nList
606}
607
608// Figure out where best to put the new pods
609// in the coreGroup array based on the old
610// pods being replaced. The criteria is that
611// the new pod be on the same server as the
612// old pod was.
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400613func reconcileAddrDiffs(coreGroupDiffs [][]*volthaPod, rwPodDiffs []*volthaPod) [][]*volthaPod {
sslobodr8e2ccb52019-02-05 09:21:47 -0500614 var srvrs map[string][]*volthaPod = make(map[string][]*volthaPod)
sslobodre7ce71d2019-01-22 16:21:45 -0500615
616 log.Debug("Reconciling diffs")
617 log.Debug("Building server list")
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400618 for _, v := range rwPodDiffs {
sslobodre7ce71d2019-01-22 16:21:45 -0500619 log.Debugf("Adding %v to the server list", *v)
620 srvrs[v.node] = append(srvrs[v.node], v)
621 }
622
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400623 for k1, v1 := range coreGroupDiffs {
624 log.Debugf("k1:%v, v1:%v", k1, v1)
625 for k2, v2 := range v1 {
626 log.Debugf("k2:%v, v2:%v", k2, v2)
sslobodre7ce71d2019-01-22 16:21:45 -0500627 if v2 == nil { // Nothing to do here
628 continue
629 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400630 if _, ok := srvrs[v2.node]; ok {
sslobodre7ce71d2019-01-22 16:21:45 -0500631 coreGroupDiffs[k1][k2] = srvrs[v2.node][0]
632 if len(srvrs[v2.node]) > 1 { // remove one entry from the list
633 srvrs[v2.node] = append(srvrs[v2.node][:0], srvrs[v2.node][1:]...)
634 } else { // Delete the endtry from the map
635 delete(srvrs, v2.node)
636 }
637 } else {
638 log.Error("This should never happen, node appears to have changed names")
639 // attempt to limp along by keeping this old entry
640 }
641 }
642 }
643
644 return coreGroupDiffs
645}
646
Kent Hagerman92661462019-06-04 18:22:05 -0400647func applyAddrDiffs(ctx context.Context, client pb.ConfigurationClient, coreList interface{}, nPods []*volthaPod) {
sslobodr8e2ccb52019-02-05 09:21:47 -0500648 var newEntries [][]*volthaPod
sslobodre7ce71d2019-01-22 16:21:45 -0500649
650 log.Debug("Applying diffs")
sslobodr8e2ccb52019-02-05 09:21:47 -0500651 switch cores := coreList.(type) {
652 case [][]*volthaPod:
653 newEntries = reconcileAddrDiffs(getAddrDiffs(cores, nPods))
sslobodre7ce71d2019-01-22 16:21:45 -0500654
sslobodr8e2ccb52019-02-05 09:21:47 -0500655 // Now replace the information in coreGropus with the new
656 // entries and then reconcile the device ids on the core
657 // that's in the new entry with the device ids of it's
658 // active-active peer.
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400659 for k1, v1 := range cores {
660 for k2, v2 := range v1 {
sslobodr8e2ccb52019-02-05 09:21:47 -0500661 if newEntries[k1][k2] != nil {
662 // TODO: Missing is the case where bothe the primary
663 // and the secondary core crash and come back.
664 // Pull the device ids from the active-active peer
665 ids := queryPodDeviceIds(cores[k1][k2^1])
666 if len(ids) != 0 {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400667 if !reconcilePodDeviceIds(newEntries[k1][k2], ids) {
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400668 log.Errorf("Attempt to reconcile ids on pod %v failed", newEntries[k1][k2])
sslobodr8e2ccb52019-02-05 09:21:47 -0500669 }
sslobodre7ce71d2019-01-22 16:21:45 -0500670 }
sslobodr8e2ccb52019-02-05 09:21:47 -0500671 // Send the affininty router new connection information
Kent Hagerman92661462019-06-04 18:22:05 -0400672 setConnection(ctx, client, afrouterRWClusterName, v2.backend, v2.connection, newEntries[k1][k2].ipAddr, podGrpcPort)
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400673 // Copy the new entry information over
sslobodr8e2ccb52019-02-05 09:21:47 -0500674 cores[k1][k2].ipAddr = newEntries[k1][k2].ipAddr
675 cores[k1][k2].name = newEntries[k1][k2].name
676 cores[k1][k2].devIds = ids
sslobodre7ce71d2019-01-22 16:21:45 -0500677 }
sslobodre7ce71d2019-01-22 16:21:45 -0500678 }
679 }
sslobodr8e2ccb52019-02-05 09:21:47 -0500680 case []*volthaPod:
681 var mia []*volthaPod
682 var found bool
683 // TODO: Break this using functions to simplify
684 // reading of the code.
685 // Find the core(s) that have changed addresses
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400686 for k1, v1 := range cores {
sslobodr8e2ccb52019-02-05 09:21:47 -0500687 found = false
688 for _, v2 := range nPods {
689 if v1.ipAddr == v2.ipAddr {
690 found = true
691 break
692 }
693 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400694 if !found {
sslobodr8e2ccb52019-02-05 09:21:47 -0500695 mia = append(mia, cores[k1])
696 }
697 }
698 // Now plug in the new addresses and set the connection
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400699 for _, v1 := range nPods {
sslobodr8e2ccb52019-02-05 09:21:47 -0500700 found = false
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400701 for _, v2 := range cores {
sslobodr8e2ccb52019-02-05 09:21:47 -0500702 if v1.ipAddr == v2.ipAddr {
703 found = true
704 break
705 }
706 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400707 if found {
sslobodr8e2ccb52019-02-05 09:21:47 -0500708 continue
709 }
710 mia[0].ipAddr = v1.ipAddr
711 mia[0].name = v1.name
Kent Hagerman92661462019-06-04 18:22:05 -0400712 setConnection(ctx, client, afrouterROClusterName, mia[0].backend, mia[0].connection, v1.ipAddr, podGrpcPort)
sslobodr8e2ccb52019-02-05 09:21:47 -0500713 // Now get rid of the mia entry just processed
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400714 mia = append(mia[:0], mia[1:]...)
sslobodr8e2ccb52019-02-05 09:21:47 -0500715 }
716 default:
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400717 log.Error("Internal: Unexpected type in call to applyAddrDiffs")
sslobodre7ce71d2019-01-22 16:21:45 -0500718 }
719}
720
sslobodr8e2ccb52019-02-05 09:21:47 -0500721func updateDeviceIds(coreGroups [][]*volthaPod, rwPods []*volthaPod) {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400722 var byName = make(map[string]*volthaPod)
sslobodrcd37bc52019-01-24 11:47:16 -0500723
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400724 // Convenience
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400725 for _, v := range rwPods {
sslobodrcd37bc52019-01-24 11:47:16 -0500726 byName[v.name] = v
727 }
728
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400729 for k1, v1 := range coreGroups {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400730 for k2 := range v1 {
sslobodrcd37bc52019-01-24 11:47:16 -0500731 coreGroups[k1][k2].devIds = byName[v1[k2].name].devIds
732 }
733 }
734}
735
Kent Hagerman92661462019-06-04 18:22:05 -0400736func startCoreMonitor(ctx context.Context,
737 client pb.ConfigurationClient,
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400738 clientset *kubernetes.Clientset,
739 rwCoreFltr *regexp.Regexp,
740 roCoreFltr *regexp.Regexp,
741 coreGroups [][]*volthaPod,
Kent Hagerman92661462019-06-04 18:22:05 -0400742 oRoPods []*volthaPod) {
sslobodr16e41bc2019-01-18 16:22:21 -0500743 // Now that initial allocation has been completed, monitor the pods
744 // for IP changes
745 // The main loop needs to do the following:
746 // 1) Periodically query the pods and filter out
747 // the vcore ones
748 // 2) Validate that the pods running are the same
749 // as the previous check
750 // 3) Validate that the IP addresses are the same
751 // as the last check.
752 // If the pod name(s) ha(s/ve) changed then remove
753 // the unused pod names and add in the new pod names
754 // maintaining the cluster/backend information.
755 // If an IP address has changed (which shouldn't
756 // happen unless a pod is re-started) it should get
757 // caught by the pod name change.
Kent Hagerman92661462019-06-04 18:22:05 -0400758loop:
sslobodr16e41bc2019-01-18 16:22:21 -0500759 for {
Kent Hagerman92661462019-06-04 18:22:05 -0400760 select {
761 case <-ctx.Done():
762 // if we're done, exit
763 break loop
764 case <-time.After(10 * time.Second): //wait a while
765 }
766
sslobodr16e41bc2019-01-18 16:22:21 -0500767 // Get the rw core list from k8s
Kent Hagerman92661462019-06-04 18:22:05 -0400768 rwPods, err := getVolthaPods(clientset, rwCoreFltr)
769 if err != nil {
770 log.Error(err)
771 continue
772 }
773
sslobodre7ce71d2019-01-22 16:21:45 -0500774 queryDeviceIds(rwPods)
sslobodrcd37bc52019-01-24 11:47:16 -0500775 updateDeviceIds(coreGroups, rwPods)
sslobodr16e41bc2019-01-18 16:22:21 -0500776 // If we didn't get 2n+1 pods then wait since
777 // something is down and will hopefully come
778 // back up at some point.
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400779 if len(rwPods) != numRWPods {
sslobodr16e41bc2019-01-18 16:22:21 -0500780 continue
781 }
782 // We have all pods, check if any IP addresses
783 // have changed.
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400784 for _, v := range rwPods {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400785 if !hasIpAddr(coreGroups, v.ipAddr) {
sslobodre7ce71d2019-01-22 16:21:45 -0500786 log.Debug("Address has changed...")
Kent Hagerman92661462019-06-04 18:22:05 -0400787 applyAddrDiffs(ctx, client, coreGroups, rwPods)
sslobodr8e2ccb52019-02-05 09:21:47 -0500788 break
sslobodre7ce71d2019-01-22 16:21:45 -0500789 }
sslobodr16e41bc2019-01-18 16:22:21 -0500790 }
sslobodr8e2ccb52019-02-05 09:21:47 -0500791
Kent Hagerman92661462019-06-04 18:22:05 -0400792 roPods, err := getVolthaPods(clientset, roCoreFltr)
793 if err != nil {
794 log.Error(err)
795 continue
796 }
sslobodr8e2ccb52019-02-05 09:21:47 -0500797
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400798 if len(roPods) != numROPods {
sslobodr8e2ccb52019-02-05 09:21:47 -0500799 continue
800 }
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400801 for _, v := range roPods {
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400802 if !hasIpAddr(oRoPods, v.ipAddr) {
Kent Hagerman92661462019-06-04 18:22:05 -0400803 applyAddrDiffs(ctx, client, oRoPods, roPods)
sslobodr8e2ccb52019-02-05 09:21:47 -0500804 break
805 }
806 }
sslobodr16e41bc2019-01-18 16:22:21 -0500807 }
sslobodr16e41bc2019-01-18 16:22:21 -0500808}
809
sslobodr8e2ccb52019-02-05 09:21:47 -0500810func hasIpAddr(coreList interface{}, ipAddr string) bool {
811 switch cores := coreList.(type) {
812 case []*volthaPod:
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400813 for _, v := range cores {
sslobodr8e2ccb52019-02-05 09:21:47 -0500814 if v.ipAddr == ipAddr {
sslobodre7ce71d2019-01-22 16:21:45 -0500815 return true
816 }
817 }
sslobodr8e2ccb52019-02-05 09:21:47 -0500818 case [][]*volthaPod:
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400819 for _, v1 := range cores {
820 for _, v2 := range v1 {
sslobodr8e2ccb52019-02-05 09:21:47 -0500821 if v2.ipAddr == ipAddr {
822 return true
823 }
824 }
825 }
826 default:
827 log.Error("Internal: Unexpected type in call to hasIpAddr")
sslobodre7ce71d2019-01-22 16:21:45 -0500828 }
829 return false
830}
831
Kent Hagerman92661462019-06-04 18:22:05 -0400832// endOnClose cancels the context when the connection closes
833func connectionActiveContext(conn *grpc.ClientConn) context.Context {
834 ctx, disconnected := context.WithCancel(context.Background())
835 go func() {
836 for state := conn.GetState(); state != connectivity.TransientFailure && state != connectivity.Shutdown; state = conn.GetState() {
837 if !conn.WaitForStateChange(context.Background(), state) {
838 break
839 }
840 }
841 log.Infof("Connection to afrouter lost")
842 disconnected()
843 }()
844 return ctx
845}
sslobodr16e41bc2019-01-18 16:22:21 -0500846
Kent Hagerman92661462019-06-04 18:22:05 -0400847func main() {
David K. Bainbridgef430cd52019-05-28 15:00:35 -0700848 config := &Configuration{}
849 cmdParse := flag.NewFlagSet(path.Base(os.Args[0]), flag.ContinueOnError)
850 config.DisplayVersionOnly = cmdParse.Bool("version", false, "Print version information and exit")
851
Kent Hagerman92661462019-06-04 18:22:05 -0400852 if err := cmdParse.Parse(os.Args[1:]); err != nil {
David K. Bainbridgef430cd52019-05-28 15:00:35 -0700853 fmt.Printf("Error: %v\n", err)
854 os.Exit(1)
855 }
856
857 if *config.DisplayVersionOnly {
858 fmt.Println("VOLTHA API Server (afrouterd)")
859 fmt.Println(version.VersionInfo.String(" "))
860 return
861 }
862
sslobodr16e41bc2019-01-18 16:22:21 -0500863 // Set up logging
864 if _, err := log.SetDefaultLogger(log.JSON, 0, nil); err != nil {
865 log.With(log.Fields{"error": err}).Fatal("Cannot setup logging")
866 }
867
868 // Set up kubernetes api
869 clientset := k8sClientSet()
870
Kent Hagerman92661462019-06-04 18:22:05 -0400871 for {
872 // Connect to the affinity router
873 conn, err := connect(context.Background(), afrouterApiAddress) // This is a sidecar container so communicating over localhost
874 if err != nil {
875 panic(err)
876 }
877
878 // monitor the connection status, end context if connection is lost
879 ctx := connectionActiveContext(conn)
880
881 // set up the client
882 client := pb.NewConfigurationClient(conn)
883
884 // determine config & repopulate the afrouter
885 generateAndMaintainConfiguration(ctx, client, clientset)
886
887 conn.Close()
888 }
889}
890
891// generateAndMaintainConfiguration does the pod-reconciliation work,
892// it only returns once all sub-processes have completed
893func generateAndMaintainConfiguration(ctx context.Context, client pb.ConfigurationClient, clientset *kubernetes.Clientset) {
894 // Set up the regular expression to identify the voltha cores
895 rwCoreFltr := regexp.MustCompile(`rw-core[0-9]-`)
896 roCoreFltr := regexp.MustCompile(`ro-core-`)
897
898 // Get the voltha rw-core podes
899 rwPods, err := getVolthaPods(clientset, rwCoreFltr)
sslobodr16e41bc2019-01-18 16:22:21 -0500900 if err != nil {
Kent Hagermane566c2e2019-06-03 17:56:42 -0400901 panic(err)
sslobodr16e41bc2019-01-18 16:22:21 -0500902 }
sslobodr16e41bc2019-01-18 16:22:21 -0500903
904 // Fetch the devices held by each running core
sslobodre7ce71d2019-01-22 16:21:45 -0500905 queryDeviceIds(rwPods)
sslobodr16e41bc2019-01-18 16:22:21 -0500906
907 // For debugging... comment out l8r
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400908 for _, v := range rwPods {
sslobodr16e41bc2019-01-18 16:22:21 -0500909 log.Debugf("Pod list %v", *v)
910 }
911
912 coreGroups := groupPods1(rwPods)
913
sslobodr16e41bc2019-01-18 16:22:21 -0500914 // Assign the groupings to the the backends and connections
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400915 for k := range coreGroups {
916 for k1 := range coreGroups[k] {
917 coreGroups[k][k1].cluster = afrouterRWClusterName
918 coreGroups[k][k1].backend = afrouterRWClusterName + strconv.Itoa(k+1)
919 coreGroups[k][k1].connection = afrouterRWClusterName + strconv.Itoa(k+1) + strconv.Itoa(k1+1)
sslobodr16e41bc2019-01-18 16:22:21 -0500920 }
921 }
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400922 log.Info("Core grouping completed")
sslobodr16e41bc2019-01-18 16:22:21 -0500923
924 // TODO: Debugging code, comment out for production
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400925 for k, v := range coreGroups {
926 for k2, v2 := range v {
sslobodr16e41bc2019-01-18 16:22:21 -0500927 log.Debugf("Core group %d,%d: %v", k, k2, v2)
928 }
929 }
sslobodrcd37bc52019-01-24 11:47:16 -0500930 log.Info("Setting affinities")
sslobodr16e41bc2019-01-18 16:22:21 -0500931 // Now set the affinities for exising devices in the cores
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400932 for _, v := range coreGroups {
Kent Hagerman92661462019-06-04 18:22:05 -0400933 setAffinity(ctx, client, v[0].devIds, v[0].backend)
934 setAffinity(ctx, client, v[1].devIds, v[1].backend)
sslobodr16e41bc2019-01-18 16:22:21 -0500935 }
sslobodrcd37bc52019-01-24 11:47:16 -0500936 log.Info("Setting connections")
sslobodr16e41bc2019-01-18 16:22:21 -0500937 // Configure the backeds based on the calculated core groups
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400938 for _, v := range coreGroups {
Kent Hagerman92661462019-06-04 18:22:05 -0400939 setConnection(ctx, client, afrouterRWClusterName, v[0].backend, v[0].connection, v[0].ipAddr, podGrpcPort)
940 setConnection(ctx, client, afrouterRWClusterName, v[1].backend, v[1].connection, v[1].ipAddr, podGrpcPort)
sslobodr8e2ccb52019-02-05 09:21:47 -0500941 }
942
943 // Process the read only pods
Kent Hagerman92661462019-06-04 18:22:05 -0400944 roPods, err := getVolthaPods(clientset, roCoreFltr)
945 if err != nil {
946 panic(err)
947 }
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400948 for k, v := range roPods {
sslobodr8e2ccb52019-02-05 09:21:47 -0500949 log.Debugf("Processing ro_pod %v", v)
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400950 vN := afrouterROClusterName + strconv.Itoa(k+1)
sslobodr8e2ccb52019-02-05 09:21:47 -0500951 log.Debugf("Setting connection %s, %s, %s", vN, vN+"1", v.ipAddr)
Kent Hagerman334a8ce2019-05-16 16:50:33 -0400952 roPods[k].cluster = afrouterROClusterName
sslobodr8e2ccb52019-02-05 09:21:47 -0500953 roPods[k].backend = vN
Kent Hagerman0ab4cb22019-04-24 13:13:35 -0400954 roPods[k].connection = vN + "1"
Kent Hagerman92661462019-06-04 18:22:05 -0400955 setConnection(ctx, client, afrouterROClusterName, v.backend, v.connection, v.ipAddr, podGrpcPort)
sslobodr16e41bc2019-01-18 16:22:21 -0500956 }
957
sslobodrcd37bc52019-01-24 11:47:16 -0500958 log.Info("Starting discovery monitoring")
Kent Hagerman92661462019-06-04 18:22:05 -0400959 doneCh, _ := startDiscoveryMonitor(ctx, client, coreGroups)
sslobodr16e41bc2019-01-18 16:22:21 -0500960
sslobodrcd37bc52019-01-24 11:47:16 -0500961 log.Info("Starting core monitoring")
Kent Hagerman92661462019-06-04 18:22:05 -0400962 startCoreMonitor(ctx, client, clientset, rwCoreFltr, roCoreFltr, coreGroups, roPods)
963
964 //ensure the discovery monitor to quit
965 <-doneCh
sslobodr16e41bc2019-01-18 16:22:21 -0500966}