sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2018-present Open Networking Foundation |
| 3 | |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package main |
| 18 | |
| 19 | import ( |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 20 | "errors" |
David K. Bainbridge | f430cd5 | 2019-05-28 15:00:35 -0700 | [diff] [blame] | 21 | "flag" |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 22 | "fmt" |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 23 | "google.golang.org/grpc/connectivity" |
| 24 | "google.golang.org/grpc/keepalive" |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 25 | "k8s.io/api/core/v1" |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 26 | "math" |
| 27 | "os" |
David K. Bainbridge | f430cd5 | 2019-05-28 15:00:35 -0700 | [diff] [blame] | 28 | "path" |
Kent Hagerman | 0ab4cb2 | 2019-04-24 13:13:35 -0400 | [diff] [blame] | 29 | "regexp" |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 30 | "strconv" |
Kent Hagerman | 0ab4cb2 | 2019-04-24 13:13:35 -0400 | [diff] [blame] | 31 | "time" |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 32 | |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 33 | "github.com/golang/protobuf/ptypes" |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 34 | "github.com/golang/protobuf/ptypes/empty" |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 35 | "github.com/opencord/voltha-go/common/log" |
David K. Bainbridge | f430cd5 | 2019-05-28 15:00:35 -0700 | [diff] [blame] | 36 | "github.com/opencord/voltha-go/common/version" |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 37 | "github.com/opencord/voltha-go/kafka" |
William Kurkian | daa6bb2 | 2019-03-07 12:26:28 -0500 | [diff] [blame] | 38 | pb "github.com/opencord/voltha-protos/go/afrouter" |
Kent Hagerman | 0ab4cb2 | 2019-04-24 13:13:35 -0400 | [diff] [blame] | 39 | cmn "github.com/opencord/voltha-protos/go/common" |
William Kurkian | daa6bb2 | 2019-03-07 12:26:28 -0500 | [diff] [blame] | 40 | ic "github.com/opencord/voltha-protos/go/inter_container" |
Kent Hagerman | 0ab4cb2 | 2019-04-24 13:13:35 -0400 | [diff] [blame] | 41 | vpb "github.com/opencord/voltha-protos/go/voltha" |
| 42 | "golang.org/x/net/context" |
| 43 | "google.golang.org/grpc" |
| 44 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 45 | "k8s.io/client-go/kubernetes" |
| 46 | "k8s.io/client-go/rest" |
Kent Hagerman | e566c2e | 2019-06-03 17:56:42 -0400 | [diff] [blame] | 47 | "k8s.io/client-go/tools/clientcmd" |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 48 | ) |
| 49 | |
sslobodr | 8e2ccb5 | 2019-02-05 09:21:47 -0500 | [diff] [blame] | 50 | type volthaPod struct { |
Kent Hagerman | 0ab4cb2 | 2019-04-24 13:13:35 -0400 | [diff] [blame] | 51 | name string |
| 52 | ipAddr string |
| 53 | node string |
| 54 | devIds map[string]struct{} |
Kent Hagerman | 0ab4cb2 | 2019-04-24 13:13:35 -0400 | [diff] [blame] | 55 | backend string |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 56 | connection string |
| 57 | } |
| 58 | |
David K. Bainbridge | f430cd5 | 2019-05-28 15:00:35 -0700 | [diff] [blame] | 59 | type Configuration struct { |
| 60 | DisplayVersionOnly *bool |
| 61 | } |
| 62 | |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 63 | var ( |
Kent Hagerman | e566c2e | 2019-06-03 17:56:42 -0400 | [diff] [blame] | 64 | // if k8s variables are undefined, will attempt to use in-cluster config |
| 65 | k8sApiServer = getStrEnv("K8S_API_SERVER", "") |
| 66 | k8sKubeConfigPath = getStrEnv("K8S_KUBE_CONFIG_PATH", "") |
| 67 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 68 | podNamespace = getStrEnv("POD_NAMESPACE", "voltha") |
| 69 | podLabelSelector = getStrEnv("POD_LABEL_SELECTOR", "app=rw-core") |
| 70 | podAffinityGroupLabel = getStrEnv("POD_AFFINITY_GROUP_LABEL", "affinity-group") |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 71 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 72 | podGrpcPort = uint64(getIntEnv("POD_GRPC_PORT", 0, math.MaxUint16, 50057)) |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 73 | |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 74 | afrouterApiAddress = getStrEnv("AFROUTER_API_ADDRESS", "localhost:55554") |
| 75 | |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 76 | afrouterRouterName = getStrEnv("AFROUTER_ROUTER_NAME", "vcore") |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 77 | afrouterRouteName = getStrEnv("AFROUTER_ROUTE_NAME", "dev_manager") |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 78 | afrouterRWClusterName = getStrEnv("AFROUTER_RW_CLUSTER_NAME", "vcore") |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 79 | |
| 80 | kafkaTopic = getStrEnv("KAFKA_TOPIC", "AffinityRouter") |
| 81 | kafkaClientType = getStrEnv("KAFKA_CLIENT_TYPE", "sarama") |
| 82 | kafkaHost = getStrEnv("KAFKA_HOST", "kafka") |
| 83 | kafkaPort = getIntEnv("KAFKA_PORT", 0, math.MaxUint16, 9092) |
| 84 | kafkaInstanceID = getStrEnv("KAFKA_INSTANCE_ID", "arouterd") |
| 85 | ) |
| 86 | |
| 87 | func getIntEnv(key string, min, max, defaultValue int) int { |
| 88 | if val, have := os.LookupEnv(key); have { |
| 89 | num, err := strconv.Atoi(val) |
| 90 | if err != nil || !(min <= num && num <= max) { |
| 91 | panic(fmt.Errorf("%s must be a number in the range [%d, %d]; default: %d", key, min, max, defaultValue)) |
| 92 | } |
| 93 | return num |
| 94 | } |
| 95 | return defaultValue |
| 96 | } |
| 97 | |
| 98 | func getStrEnv(key, defaultValue string) string { |
| 99 | if val, have := os.LookupEnv(key); have { |
| 100 | return val |
| 101 | } |
| 102 | return defaultValue |
| 103 | } |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 104 | |
| 105 | func newKafkaClient(clientType string, host string, port int, instanceID string) (kafka.Client, error) { |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 106 | log.Infow("kafka-client-type", log.Fields{"client": clientType}) |
| 107 | switch clientType { |
| 108 | case "sarama": |
| 109 | return kafka.NewSaramaClient( |
| 110 | kafka.Host(host), |
| 111 | kafka.Port(port), |
| 112 | kafka.ConsumerType(kafka.GroupCustomer), |
| 113 | kafka.ProducerReturnOnErrors(true), |
| 114 | kafka.ProducerReturnOnSuccess(true), |
| 115 | kafka.ProducerMaxRetries(6), |
| 116 | kafka.NumPartitions(3), |
| 117 | kafka.ConsumerGroupName(instanceID), |
| 118 | kafka.ConsumerGroupPrefix(instanceID), |
| 119 | kafka.AutoCreateTopic(false), |
| 120 | kafka.ProducerFlushFrequency(5), |
| 121 | kafka.ProducerRetryBackoff(time.Millisecond*30)), nil |
| 122 | } |
| 123 | return nil, errors.New("unsupported-client-type") |
| 124 | } |
| 125 | |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 126 | func k8sClientSet() *kubernetes.Clientset { |
Kent Hagerman | e566c2e | 2019-06-03 17:56:42 -0400 | [diff] [blame] | 127 | var config *rest.Config |
| 128 | if k8sApiServer != "" || k8sKubeConfigPath != "" { |
| 129 | // use combination of URL & local kube-config file |
| 130 | c, err := clientcmd.BuildConfigFromFlags(k8sApiServer, k8sKubeConfigPath) |
| 131 | if err != nil { |
| 132 | panic(err) |
| 133 | } |
| 134 | config = c |
| 135 | } else { |
| 136 | // use in-cluster config |
| 137 | c, err := rest.InClusterConfig() |
| 138 | if err != nil { |
| 139 | log.Errorf("Unable to load in-cluster config. Try setting K8S_API_SERVER and K8S_KUBE_CONFIG_PATH?") |
| 140 | panic(err) |
| 141 | } |
| 142 | config = c |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 143 | } |
| 144 | // creates the clientset |
| 145 | clientset, err := kubernetes.NewForConfig(config) |
| 146 | if err != nil { |
Kent Hagerman | e566c2e | 2019-06-03 17:56:42 -0400 | [diff] [blame] | 147 | panic(err) |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 148 | } |
| 149 | |
| 150 | return clientset |
| 151 | } |
| 152 | |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 153 | func connect(ctx context.Context, addr string) (*grpc.ClientConn, error) { |
| 154 | log.Debugf("Trying to connect to %s", addr) |
| 155 | conn, err := grpc.DialContext(ctx, addr, |
| 156 | grpc.WithInsecure(), |
| 157 | grpc.WithBlock(), |
| 158 | grpc.WithBackoffMaxDelay(time.Second*5), |
| 159 | grpc.WithKeepaliveParams(keepalive.ClientParameters{Time: time.Second * 10, Timeout: time.Second * 5})) |
| 160 | if err == nil { |
| 161 | log.Debugf("Connection succeeded") |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 162 | } |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 163 | return conn, err |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 164 | } |
| 165 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 166 | func getVolthaPods(cs *kubernetes.Clientset) ([]*volthaPod, error) { |
| 167 | pods, err := cs.CoreV1().Pods(podNamespace).List(metav1.ListOptions{LabelSelector: podLabelSelector}) |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 168 | if err != nil { |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 169 | return nil, err |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 170 | } |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 171 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 172 | var rwPods []*volthaPod |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 173 | items: |
Kent Hagerman | 0ab4cb2 | 2019-04-24 13:13:35 -0400 | [diff] [blame] | 174 | for _, v := range pods.Items { |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 175 | // only pods that are actually running should be considered |
| 176 | if v.Status.Phase == v1.PodRunning { |
| 177 | for _, condition := range v.Status.Conditions { |
| 178 | if condition.Status != v1.ConditionTrue { |
| 179 | continue items |
| 180 | } |
| 181 | } |
| 182 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 183 | if group, have := v.Labels[podAffinityGroupLabel]; have { |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 184 | log.Debugf("Namespace: %s, PodName: %s, PodIP: %s, Host: %s\n", v.Namespace, v.Name, v.Status.PodIP, v.Spec.NodeName) |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 185 | rwPods = append(rwPods, &volthaPod{ |
| 186 | name: v.Name, |
| 187 | ipAddr: v.Status.PodIP, |
| 188 | node: v.Spec.NodeName, |
| 189 | devIds: make(map[string]struct{}), |
| 190 | backend: afrouterRWClusterName + group, |
| 191 | }) |
| 192 | } else { |
| 193 | log.Warnf("Pod %s found matching % without label %", v.Name, podLabelSelector, podAffinityGroupLabel) |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 194 | } |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 195 | } |
| 196 | } |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 197 | return rwPods, nil |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 198 | } |
| 199 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 200 | func reconcilePodDeviceIds(ctx context.Context, pod *volthaPod, ids map[string]struct{}) { |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 201 | ctxTimeout, _ := context.WithTimeout(ctx, time.Second*5) |
| 202 | conn, err := connect(ctxTimeout, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort)) |
sslobodr | 6c1689c | 2019-01-24 07:31:15 -0500 | [diff] [blame] | 203 | if err != nil { |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 204 | log.Debugf("Could not reconcile devices from %s, could not connect: %s", pod.name, err) |
| 205 | return |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 206 | } |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 207 | defer conn.Close() |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 208 | |
| 209 | var idList cmn.IDs |
| 210 | for k := range ids { |
| 211 | idList.Items = append(idList.Items, &cmn.ID{Id: k}) |
| 212 | } |
| 213 | |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 214 | client := vpb.NewVolthaServiceClient(conn) |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 215 | _, err = client.ReconcileDevices(ctx, &idList) |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 216 | if err != nil { |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 217 | log.Errorf("Attempt to reconcile ids on pod %s failed: %s", pod.name, err) |
| 218 | return |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 219 | } |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 220 | } |
| 221 | |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 222 | func queryPodDeviceIds(ctx context.Context, pod *volthaPod) map[string]struct{} { |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 223 | ctxTimeout, _ := context.WithTimeout(ctx, time.Second*5) |
| 224 | conn, err := connect(ctxTimeout, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort)) |
sslobodr | 6c1689c | 2019-01-24 07:31:15 -0500 | [diff] [blame] | 225 | if err != nil { |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 226 | log.Debugf("Could not query devices from %s, could not connect: %s", pod.name, err) |
| 227 | return nil |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 228 | } |
| 229 | defer conn.Close() |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 230 | |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 231 | client := vpb.NewVolthaServiceClient(conn) |
Kent Hagerman | 737b9e5 | 2019-06-18 16:29:33 -0400 | [diff] [blame] | 232 | devs, err := client.ListDeviceIds(ctx, &empty.Empty{}) |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 233 | if err != nil { |
| 234 | log.Error(err) |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 235 | return nil |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 236 | } |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 237 | |
| 238 | var ret = make(map[string]struct{}) |
Kent Hagerman | 0ab4cb2 | 2019-04-24 13:13:35 -0400 | [diff] [blame] | 239 | for _, dv := range devs.Items { |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 240 | ret[dv.Id] = struct{}{} |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 241 | } |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 242 | return ret |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 243 | } |
| 244 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 245 | func setAffinity(ctx context.Context, client pb.ConfigurationClient, deviceId string, backend string) { |
| 246 | log.Debugf("Configuring backend %s with device id %s \n", backend, deviceId) |
| 247 | if res, err := client.SetAffinity(ctx, &pb.Affinity{ |
| 248 | Router: afrouterRouterName, |
| 249 | Route: afrouterRouteName, |
| 250 | Cluster: afrouterRWClusterName, |
| 251 | Backend: backend, |
| 252 | Id: deviceId, |
| 253 | }); err != nil { |
| 254 | log.Debugf("failed affinity RPC call: %s\n", err) |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 255 | } else { |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 256 | log.Debugf("Result: %v\n", res) |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 257 | } |
| 258 | } |
| 259 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 260 | func monitorDiscovery(ctx context.Context, client pb.ConfigurationClient, ch <-chan *ic.InterContainerMessage, doneCh chan<- struct{}) { |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 261 | defer close(doneCh) |
| 262 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 263 | monitorLoop: |
| 264 | for { |
| 265 | select { |
| 266 | case <-ctx.Done(): |
| 267 | case msg := <-ch: |
| 268 | log.Debug("Received a device discovery notification") |
| 269 | device := &ic.DeviceDiscovered{} |
| 270 | if err := ptypes.UnmarshalAny(msg.Body, device); err != nil { |
| 271 | log.Errorf("Could not unmarshal received notification %v", msg) |
sslobodr | 38afd0d | 2019-01-21 12:31:46 -0500 | [diff] [blame] | 272 | } else { |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 273 | // somewhat hackish solution, backend is known from the first digit found in the publisher name |
| 274 | group := regexp.MustCompile(`\d`).FindString(device.Publisher) |
| 275 | if group == "" { |
| 276 | // set the affinity of the discovered device |
| 277 | setAffinity(ctx, client, device.Id, afrouterRWClusterName+group) |
| 278 | } else { |
| 279 | log.Error("backend is unknown") |
| 280 | } |
sslobodr | 38afd0d | 2019-01-21 12:31:46 -0500 | [diff] [blame] | 281 | } |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 282 | break monitorLoop |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 283 | } |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 284 | } |
| 285 | } |
| 286 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 287 | func startDiscoveryMonitor(ctx context.Context, client pb.ConfigurationClient) (<-chan struct{}, error) { |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 288 | doneCh := make(chan struct{}) |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 289 | // Connect to kafka for discovery events |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 290 | kc, err := newKafkaClient(kafkaClientType, kafkaHost, kafkaPort, kafkaInstanceID) |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 291 | if err != nil { |
| 292 | panic(err) |
| 293 | } |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 294 | kc.Start() |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 295 | defer kc.Stop() |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 296 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 297 | ch, err := kc.Subscribe(&kafka.Topic{Name: kafkaTopic}) |
| 298 | if err != nil { |
Kent Hagerman | 334a8ce | 2019-05-16 16:50:33 -0400 | [diff] [blame] | 299 | log.Errorf("Could not subscribe to the '%s' channel, discovery disabled", kafkaTopic) |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 300 | close(doneCh) |
| 301 | return doneCh, err |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 302 | } |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 303 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 304 | go monitorDiscovery(ctx, client, ch, doneCh) |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 305 | return doneCh, nil |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 306 | } |
| 307 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 308 | // coreMonitor polls the list of devices from all RW cores, pushes these devices |
| 309 | // into the affinity router, and ensures that all cores in a backend have their devices synced |
| 310 | func coreMonitor(ctx context.Context, client pb.ConfigurationClient, clientset *kubernetes.Clientset) { |
| 311 | // map[backend]map[deviceId]struct{} |
| 312 | deviceOwnership := make(map[string]map[string]struct{}) |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 313 | loop: |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 314 | for { |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 315 | // get the rw core list from k8s |
| 316 | rwPods, err := getVolthaPods(clientset) |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 317 | if err != nil { |
| 318 | log.Error(err) |
| 319 | continue |
| 320 | } |
| 321 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 322 | // for every pod |
| 323 | for _, pod := range rwPods { |
| 324 | // get the devices for this pod's backend |
| 325 | devices, have := deviceOwnership[pod.backend] |
| 326 | if !have { |
| 327 | devices = make(map[string]struct{}) |
| 328 | deviceOwnership[pod.backend] = devices |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 329 | } |
sslobodr | 8e2ccb5 | 2019-02-05 09:21:47 -0500 | [diff] [blame] | 330 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 331 | coreDevices := queryPodDeviceIds(ctx, pod) |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 332 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 333 | // handle devices that exist in the core, but we have just learned about |
| 334 | for deviceId := range coreDevices { |
| 335 | // if there's a new device |
| 336 | if _, have := devices[deviceId]; !have { |
| 337 | // add the device to our local list |
| 338 | devices[deviceId] = struct{}{} |
| 339 | // push the device into the affinity router |
| 340 | setAffinity(ctx, client, deviceId, pod.backend) |
sslobodr | 8e2ccb5 | 2019-02-05 09:21:47 -0500 | [diff] [blame] | 341 | } |
| 342 | } |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 343 | |
| 344 | // ensure that the core knows about all devices in its backend |
| 345 | toSync := make(map[string]struct{}) |
| 346 | for deviceId := range devices { |
| 347 | // if the pod is missing any devices |
| 348 | if _, have := coreDevices[deviceId]; !have { |
| 349 | // we will reconcile them |
| 350 | toSync[deviceId] = struct{}{} |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | if len(toSync) != 0 { |
| 355 | reconcilePodDeviceIds(ctx, pod, toSync) |
| 356 | } |
sslobodr | 8e2ccb5 | 2019-02-05 09:21:47 -0500 | [diff] [blame] | 357 | } |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 358 | |
| 359 | select { |
| 360 | case <-ctx.Done(): |
| 361 | // if we're done, exit |
| 362 | break loop |
| 363 | case <-time.After(10 * time.Second): // wait a while |
| 364 | } |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 365 | } |
sslobodr | e7ce71d | 2019-01-22 16:21:45 -0500 | [diff] [blame] | 366 | } |
| 367 | |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 368 | // endOnClose cancels the context when the connection closes |
| 369 | func connectionActiveContext(conn *grpc.ClientConn) context.Context { |
| 370 | ctx, disconnected := context.WithCancel(context.Background()) |
| 371 | go func() { |
| 372 | for state := conn.GetState(); state != connectivity.TransientFailure && state != connectivity.Shutdown; state = conn.GetState() { |
| 373 | if !conn.WaitForStateChange(context.Background(), state) { |
| 374 | break |
| 375 | } |
| 376 | } |
| 377 | log.Infof("Connection to afrouter lost") |
| 378 | disconnected() |
| 379 | }() |
| 380 | return ctx |
| 381 | } |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 382 | |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 383 | func main() { |
David K. Bainbridge | f430cd5 | 2019-05-28 15:00:35 -0700 | [diff] [blame] | 384 | config := &Configuration{} |
| 385 | cmdParse := flag.NewFlagSet(path.Base(os.Args[0]), flag.ContinueOnError) |
| 386 | config.DisplayVersionOnly = cmdParse.Bool("version", false, "Print version information and exit") |
| 387 | |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 388 | if err := cmdParse.Parse(os.Args[1:]); err != nil { |
David K. Bainbridge | f430cd5 | 2019-05-28 15:00:35 -0700 | [diff] [blame] | 389 | fmt.Printf("Error: %v\n", err) |
| 390 | os.Exit(1) |
| 391 | } |
| 392 | |
| 393 | if *config.DisplayVersionOnly { |
| 394 | fmt.Println("VOLTHA API Server (afrouterd)") |
| 395 | fmt.Println(version.VersionInfo.String(" ")) |
| 396 | return |
| 397 | } |
| 398 | |
sslobodr | 16e41bc | 2019-01-18 16:22:21 -0500 | [diff] [blame] | 399 | // Set up logging |
| 400 | if _, err := log.SetDefaultLogger(log.JSON, 0, nil); err != nil { |
| 401 | log.With(log.Fields{"error": err}).Fatal("Cannot setup logging") |
| 402 | } |
| 403 | |
| 404 | // Set up kubernetes api |
| 405 | clientset := k8sClientSet() |
| 406 | |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 407 | for { |
| 408 | // Connect to the affinity router |
| 409 | conn, err := connect(context.Background(), afrouterApiAddress) // This is a sidecar container so communicating over localhost |
| 410 | if err != nil { |
| 411 | panic(err) |
| 412 | } |
| 413 | |
| 414 | // monitor the connection status, end context if connection is lost |
| 415 | ctx := connectionActiveContext(conn) |
| 416 | |
| 417 | // set up the client |
| 418 | client := pb.NewConfigurationClient(conn) |
| 419 | |
Kent Hagerman | 9a879af | 2019-07-22 17:21:09 -0400 | [diff] [blame] | 420 | // start the discovery monitor and core monitor |
| 421 | // these two processes do the majority of the work |
| 422 | |
| 423 | log.Info("Starting discovery monitoring") |
| 424 | doneCh, _ := startDiscoveryMonitor(ctx, client) |
| 425 | |
| 426 | log.Info("Starting core monitoring") |
| 427 | coreMonitor(ctx, client, clientset) |
| 428 | |
| 429 | //ensure the discovery monitor to quit |
| 430 | <-doneCh |
Kent Hagerman | 9266146 | 2019-06-04 18:22:05 -0400 | [diff] [blame] | 431 | |
| 432 | conn.Close() |
| 433 | } |
| 434 | } |