VOL-2087 reorganize and unit test arouterd

Change-Id: Iadacd1829095e8ffde306e5ebf164f4d67196a68
diff --git a/cmd/arouterd/arouterd.go b/cmd/arouterd/arouterd.go
index de469b5..78c8af8 100644
--- a/cmd/arouterd/arouterd.go
+++ b/cmd/arouterd/arouterd.go
@@ -17,398 +17,41 @@
 package main
 
 import (
-	"errors"
 	"flag"
 	"fmt"
-	"google.golang.org/grpc/connectivity"
-	"google.golang.org/grpc/keepalive"
-	"k8s.io/api/core/v1"
-	"math"
+	"github.com/opencord/voltha-api-server/internal/pkg/afrouterd"
+	"github.com/opencord/voltha-go/common/version"
 	"os"
 	"path"
-	"regexp"
-	"strconv"
-	"time"
 
-	"github.com/golang/protobuf/ptypes"
-	"github.com/golang/protobuf/ptypes/empty"
 	"github.com/opencord/voltha-go/common/log"
-	"github.com/opencord/voltha-go/common/version"
-	"github.com/opencord/voltha-go/kafka"
 	pb "github.com/opencord/voltha-protos/go/afrouter"
-	cmn "github.com/opencord/voltha-protos/go/common"
-	ic "github.com/opencord/voltha-protos/go/inter_container"
-	vpb "github.com/opencord/voltha-protos/go/voltha"
 	"golang.org/x/net/context"
-	"google.golang.org/grpc"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/rest"
-	"k8s.io/client-go/tools/clientcmd"
 )
 
-type volthaPod struct {
-	name    string
-	ipAddr  string
-	node    string
-	devIds  map[string]struct{}
-	backend string
-}
+var (
+	instanceID         = afrouterd.GetStrEnv("HOSTNAME", "arouterd001")
+	afrouterApiAddress = afrouterd.GetStrEnv("AFROUTER_API_ADDRESS", "localhost:55554")
+)
 
 type Configuration struct {
 	DisplayVersionOnly *bool
 }
 
-var (
-	// if k8s variables are undefined, will attempt to use in-cluster config
-	k8sApiServer      = getStrEnv("K8S_API_SERVER", "")
-	k8sKubeConfigPath = getStrEnv("K8S_KUBE_CONFIG_PATH", "")
-
-	podNamespace          = getStrEnv("POD_NAMESPACE", "voltha")
-	podLabelSelector      = getStrEnv("POD_LABEL_SELECTOR", "app=rw-core")
-	podAffinityGroupLabel = getStrEnv("POD_AFFINITY_GROUP_LABEL", "affinity-group")
-
-	podGrpcPort = uint64(getIntEnv("POD_GRPC_PORT", 0, math.MaxUint16, 50057))
-
-	afrouterApiAddress = getStrEnv("AFROUTER_API_ADDRESS", "localhost:55554")
-
-	afrouterRouterName    = getStrEnv("AFROUTER_ROUTER_NAME", "vcore")
-	afrouterRouteName     = getStrEnv("AFROUTER_ROUTE_NAME", "dev_manager")
-	afrouterRWClusterName = getStrEnv("AFROUTER_RW_CLUSTER_NAME", "vcore")
-
-	kafkaTopic      = getStrEnv("KAFKA_TOPIC", "AffinityRouter")
-	kafkaClientType = getStrEnv("KAFKA_CLIENT_TYPE", "sarama")
-	kafkaHost       = getStrEnv("KAFKA_HOST", "kafka")
-	kafkaPort       = getIntEnv("KAFKA_PORT", 0, math.MaxUint16, 9092)
-	kafkaInstanceID = getStrEnv("KAFKA_INSTANCE_ID", "arouterd")
-
-	instanceID = getStrEnv("HOSTNAME", "arouterd001")
-)
-
-func getIntEnv(key string, min, max, defaultValue int) int {
-	if val, have := os.LookupEnv(key); have {
-		num, err := strconv.Atoi(val)
-		if err != nil || !(min <= num && num <= max) {
-			panic(fmt.Errorf("%s must be a number in the range [%d, %d]; default: %d", key, min, max, defaultValue))
-		}
-		return num
-	}
-	return defaultValue
-}
-
-func getStrEnv(key, defaultValue string) string {
-	if val, have := os.LookupEnv(key); have {
-		return val
-	}
-	return defaultValue
-}
-
-func newKafkaClient(clientType string, host string, port int, instanceID string) (kafka.Client, error) {
-	log.Infow("kafka-client-type", log.Fields{"client": clientType})
-	switch clientType {
-	case "sarama":
-		return kafka.NewSaramaClient(
-			kafka.Host(host),
-			kafka.Port(port),
-			kafka.ConsumerType(kafka.GroupCustomer),
-			kafka.ProducerReturnOnErrors(true),
-			kafka.ProducerReturnOnSuccess(true),
-			kafka.ProducerMaxRetries(6),
-			kafka.NumPartitions(3),
-			kafka.ConsumerGroupName(instanceID),
-			kafka.ConsumerGroupPrefix(instanceID),
-			kafka.AutoCreateTopic(false),
-			kafka.ProducerFlushFrequency(5),
-			kafka.ProducerRetryBackoff(time.Millisecond*30)), nil
-	}
-	return nil, errors.New("unsupported-client-type")
-}
-
-func k8sClientSet() *kubernetes.Clientset {
-	var config *rest.Config
-	if k8sApiServer != "" || k8sKubeConfigPath != "" {
-		// use combination of URL & local kube-config file
-		c, err := clientcmd.BuildConfigFromFlags(k8sApiServer, k8sKubeConfigPath)
-		if err != nil {
-			panic(err)
-		}
-		config = c
-	} else {
-		// use in-cluster config
-		c, err := rest.InClusterConfig()
-		if err != nil {
-			log.Errorf("Unable to load in-cluster config.  Try setting K8S_API_SERVER and K8S_KUBE_CONFIG_PATH?")
-			panic(err)
-		}
-		config = c
-	}
-	// creates the clientset
-	clientset, err := kubernetes.NewForConfig(config)
-	if err != nil {
-		panic(err)
-	}
-
-	return clientset
-}
-
-func connect(ctx context.Context, addr string) (*grpc.ClientConn, error) {
-	log.Debugf("Trying to connect to %s", addr)
-	conn, err := grpc.DialContext(ctx, addr,
-		grpc.WithInsecure(),
-		grpc.WithBlock(),
-		grpc.WithBackoffMaxDelay(time.Second*5),
-		grpc.WithKeepaliveParams(keepalive.ClientParameters{Time: time.Second * 10, Timeout: time.Second * 5}))
-	if err == nil {
-		log.Debugf("Connection succeeded")
-	}
-	return conn, err
-}
-
-func getVolthaPods(cs *kubernetes.Clientset) ([]*volthaPod, error) {
-	pods, err := cs.CoreV1().Pods(podNamespace).List(metav1.ListOptions{LabelSelector: podLabelSelector})
-	if err != nil {
-		return nil, err
-	}
-
-	var rwPods []*volthaPod
-items:
-	for _, v := range pods.Items {
-		// only pods that are actually running should be considered
-		if v.Status.Phase == v1.PodRunning {
-			for _, condition := range v.Status.Conditions {
-				if condition.Status != v1.ConditionTrue {
-					continue items
-				}
-			}
-
-			if group, have := v.Labels[podAffinityGroupLabel]; have {
-				log.Debugf("Namespace: %s, PodName: %s, PodIP: %s, Host: %s\n", v.Namespace, v.Name, v.Status.PodIP, v.Spec.NodeName)
-				rwPods = append(rwPods, &volthaPod{
-					name:    v.Name,
-					ipAddr:  v.Status.PodIP,
-					node:    v.Spec.NodeName,
-					devIds:  make(map[string]struct{}),
-					backend: afrouterRWClusterName + group,
-				})
-			} else {
-				log.Warnf("Pod %s found matching % without label %", v.Name, podLabelSelector, podAffinityGroupLabel)
-			}
-		}
-	}
-	return rwPods, nil
-}
-
-func reconcilePodDeviceIds(ctx context.Context, pod *volthaPod, ids map[string]struct{}) {
-	ctxTimeout, _ := context.WithTimeout(ctx, time.Second*5)
-	conn, err := connect(ctxTimeout, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort))
-	if err != nil {
-		log.Debugf("Could not reconcile devices from %s, could not connect: %s", pod.name, err)
-		return
-	}
-	defer conn.Close()
-
-	var idList cmn.IDs
-	for k := range ids {
-		idList.Items = append(idList.Items, &cmn.ID{Id: k})
-	}
-
-	client := vpb.NewVolthaServiceClient(conn)
-	_, err = client.ReconcileDevices(ctx, &idList)
-	if err != nil {
-		log.Errorf("Attempt to reconcile ids on pod %s failed: %s", pod.name, err)
-		return
-	}
-}
-
-func queryPodDeviceIds(ctx context.Context, pod *volthaPod) map[string]struct{} {
-	ctxTimeout, _ := context.WithTimeout(ctx, time.Second*5)
-	conn, err := connect(ctxTimeout, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort))
-	if err != nil {
-		log.Debugf("Could not query devices from %s, could not connect: %s", pod.name, err)
-		return nil
-	}
-	defer conn.Close()
-
-	client := vpb.NewVolthaServiceClient(conn)
-	devs, err := client.ListDeviceIds(ctx, &empty.Empty{})
-	if err != nil {
-		log.Error(err)
-		return nil
-	}
-
-	var ret = make(map[string]struct{})
-	for _, dv := range devs.Items {
-		ret[dv.Id] = struct{}{}
-	}
-	return ret
-}
-
-func setAffinity(ctx context.Context, client pb.ConfigurationClient, deviceId string, backend string) {
-	log.Debugf("Configuring backend %s with device id %s \n", backend, deviceId)
-	if res, err := client.SetAffinity(ctx, &pb.Affinity{
-		Router:  afrouterRouterName,
-		Route:   afrouterRouteName,
-		Cluster: afrouterRWClusterName,
-		Backend: backend,
-		Id:      deviceId,
-	}); err != nil {
-		log.Debugf("failed affinity RPC call: %s\n", err)
-	} else {
-		log.Debugf("Result: %v\n", res)
-	}
-}
-
-func monitorDiscovery(kc kafka.Client, ctx context.Context, client pb.ConfigurationClient, ch <-chan *ic.InterContainerMessage, doneCh chan<- struct{}) {
-	defer close(doneCh)
-	defer kc.Stop()
-
-monitorLoop:
-	for {
-		select {
-		case <-ctx.Done():
-			break monitorLoop
-		case msg := <-ch:
-			log.Debug("Received a device discovery notification")
-			device := &ic.DeviceDiscovered{}
-			if err := ptypes.UnmarshalAny(msg.Body, device); err != nil {
-				log.Errorf("Could not unmarshal received notification %v", msg)
-			} else {
-				// somewhat hackish solution, backend is known from the first digit found in the publisher name
-				group := regexp.MustCompile(`\d`).FindString(device.Publisher)
-				if group != "" {
-					// set the affinity of the discovered device
-					setAffinity(ctx, client, device.Id, afrouterRWClusterName+group)
-				} else {
-					log.Error("backend is unknown")
-				}
-			}
-		}
-	}
-}
-
-func startDiscoveryMonitor(ctx context.Context, client pb.ConfigurationClient) (<-chan struct{}, error) {
-	doneCh := make(chan struct{})
-	// Connect to kafka for discovery events
-	kc, err := newKafkaClient(kafkaClientType, kafkaHost, kafkaPort, kafkaInstanceID)
-	if err != nil {
-		panic(err)
-	}
-
-	for {
-		if err := kc.Start(); err != nil {
-			log.Error("Could not connect to kafka")
-		} else {
-			break
-		}
-		select {
-		case <-ctx.Done():
-			close(doneCh)
-			return doneCh, errors.New("GRPC context done")
-
-		case <-time.After(5 * time.Second):
-		}
-	}
-	ch, err := kc.Subscribe(&kafka.Topic{Name: kafkaTopic})
-	if err != nil {
-		log.Errorf("Could not subscribe to the '%s' channel, discovery disabled", kafkaTopic)
-		close(doneCh)
-		kc.Stop()
-		return doneCh, err
-	}
-
-	go monitorDiscovery(kc, ctx, client, ch, doneCh)
-	return doneCh, nil
-}
-
-// coreMonitor polls the list of devices from all RW cores, pushes these devices
-// into the affinity router, and ensures that all cores in a backend have their devices synced
-func coreMonitor(ctx context.Context, client pb.ConfigurationClient, clientset *kubernetes.Clientset) {
-	// map[backend]map[deviceId]struct{}
-	deviceOwnership := make(map[string]map[string]struct{})
-loop:
-	for {
-		// get the rw core list from k8s
-		rwPods, err := getVolthaPods(clientset)
-		if err != nil {
-			log.Error(err)
-			continue
-		}
-
-		// for every pod
-		for _, pod := range rwPods {
-			// get the devices for this pod's backend
-			devices, have := deviceOwnership[pod.backend]
-			if !have {
-				devices = make(map[string]struct{})
-				deviceOwnership[pod.backend] = devices
-			}
-
-			coreDevices := queryPodDeviceIds(ctx, pod)
-
-			// handle devices that exist in the core, but we have just learned about
-			for deviceId := range coreDevices {
-				// if there's a new device
-				if _, have := devices[deviceId]; !have {
-					// add the device to our local list
-					devices[deviceId] = struct{}{}
-					// push the device into the affinity router
-					setAffinity(ctx, client, deviceId, pod.backend)
-				}
-			}
-
-			// ensure that the core knows about all devices in its backend
-			toSync := make(map[string]struct{})
-			for deviceId := range devices {
-				// if the pod is missing any devices
-				if _, have := coreDevices[deviceId]; !have {
-					// we will reconcile them
-					toSync[deviceId] = struct{}{}
-				}
-			}
-
-			if len(toSync) != 0 {
-				reconcilePodDeviceIds(ctx, pod, toSync)
-			}
-		}
-
-		select {
-		case <-ctx.Done():
-			// if we're done, exit
-			break loop
-		case <-time.After(10 * time.Second): // wait a while
-		}
-	}
-}
-
-// endOnClose cancels the context when the connection closes
-func connectionActiveContext(conn *grpc.ClientConn) context.Context {
-	ctx, disconnected := context.WithCancel(context.Background())
-	go func() {
-		for state := conn.GetState(); state != connectivity.TransientFailure && state != connectivity.Shutdown; state = conn.GetState() {
-			if !conn.WaitForStateChange(context.Background(), state) {
-				break
-			}
-		}
-		log.Infof("Connection to afrouter lost")
-		disconnected()
-	}()
-	return ctx
-}
-
-func main() {
+func startup() int {
 	config := &Configuration{}
 	cmdParse := flag.NewFlagSet(path.Base(os.Args[0]), flag.ContinueOnError)
 	config.DisplayVersionOnly = cmdParse.Bool("version", false, "Print version information and exit")
 
 	if err := cmdParse.Parse(os.Args[1:]); err != nil {
 		fmt.Printf("Error: %v\n", err)
-		os.Exit(1)
+		return 1
 	}
 
 	if *config.DisplayVersionOnly {
 		fmt.Println("VOLTHA API Server (afrouterd)")
 		fmt.Println(version.VersionInfo.String("  "))
-		return
+		return 0
 	}
 
 	// Set up logging
@@ -417,17 +60,17 @@
 	}
 
 	// Set up kubernetes api
-	clientset := k8sClientSet()
+	clientset := afrouterd.K8sClientSet()
 
 	for {
 		// Connect to the affinity router
-		conn, err := connect(context.Background(), afrouterApiAddress) // This is a sidecar container so communicating over localhost
+		conn, err := afrouterd.Connect(context.Background(), afrouterApiAddress) // This is a sidecar container so communicating over localhost
 		if err != nil {
 			panic(err)
 		}
 
 		// monitor the connection status, end context if connection is lost
-		ctx := connectionActiveContext(conn)
+		ctx := afrouterd.ConnectionActiveContext(conn)
 
 		// set up the client
 		client := pb.NewConfigurationClient(conn)
@@ -436,10 +79,10 @@
 		// these two processes do the majority of the work
 
 		log.Info("Starting discovery monitoring")
-		doneCh, _ := startDiscoveryMonitor(ctx, client)
+		doneCh, _ := afrouterd.StartDiscoveryMonitor(ctx, client)
 
 		log.Info("Starting core monitoring")
-		coreMonitor(ctx, client, clientset)
+		afrouterd.CoreMonitor(ctx, client, clientset)
 
 		//ensure the discovery monitor to quit
 		<-doneCh
@@ -447,3 +90,10 @@
 		conn.Close()
 	}
 }
+
+func main() {
+	status := startup()
+	if status != 0 {
+		os.Exit(status)
+	}
+}
diff --git a/cmd/arouterd/arouterd_test.go b/cmd/arouterd/arouterd_test.go
new file mode 100644
index 0000000..e0f0ac4
--- /dev/null
+++ b/cmd/arouterd/arouterd_test.go
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file implements an exit handler that tries to shut down all the
+// running servers before finally exiting. There are 2 triggers to this
+// clean exit thread: signals and an exit channel.
+
+package main
+
+import (
+	"github.com/stretchr/testify/assert"
+	"io/ioutil"
+	"os"
+	"testing"
+)
+
+// run the function fp() and return its return value and stdout
+func CaptureStdout(fp func() int) (int, string, error) {
+	origStdout := os.Stdout
+
+	// log.Cleanup() will call Sync on sys.stdout, and that doesn't
+	// work on pipes. Instead of creating a pipe, write the output
+	// to a file, then read that file back in.
+	f, err := ioutil.TempFile("", "arouter.json")
+	if err != nil {
+		return 0, "", err
+	}
+
+	// Make sure the file is closed and deleted on exit
+	defer func() { f.Close(); os.Remove(f.Name()) }()
+
+	// reassign stdout to the file, ensure it will be restored on exit
+	os.Stdout = f
+	defer func() { os.Stdout = origStdout }()
+
+	status := fp()
+
+	// read back the contents of the tempfile
+	_, err = f.Seek(0, 0)
+	if err != nil {
+		return 0, "", err
+	}
+	out := make([]byte, 16384)
+	numRead, err := f.Read(out)
+	if err != nil {
+		return 0, "", err
+	}
+
+	return status, string(out[:numRead]), nil
+}
+
+// Test output of "--version" command
+func TestStartupVersionOnly(t *testing.T) {
+	oldArgs := os.Args
+	defer func() { os.Args = oldArgs }()
+
+	os.Args = []string{os.Args[0], "--version"}
+
+	status, s, err := CaptureStdout(startup)
+	assert.Nil(t, err)
+
+	assert.Equal(t, 0, status)
+
+	expected := `VOLTHA API Server (afrouterd)
+  Version:      unknown-version
+  GoVersion:    unknown-goversion
+  VCS Ref:      unknown-vcsref
+  VCS Dirty:    unknown-vcsdirty
+  Built:        unknown-buildtime
+  OS/Arch:      unknown-os/unknown-arch
+
+`
+	assert.Equal(t, expected, s)
+}
+
+// An unknown command-line option should produce an error
+func TestStartupBadCommandLine(t *testing.T) {
+	oldArgs := os.Args
+	defer func() { os.Args = oldArgs }()
+
+	os.Args = []string{os.Args[0], "--badoption"}
+
+	status, s, err := CaptureStdout(startup)
+	assert.Nil(t, err)
+
+	assert.Equal(t, 1, status)
+
+	assert.Contains(t, s, "Error: flag provided but not defined: -badoption")
+}
diff --git a/internal/pkg/afrouterd/coreMonitor.go b/internal/pkg/afrouterd/coreMonitor.go
new file mode 100644
index 0000000..6a86c64
--- /dev/null
+++ b/internal/pkg/afrouterd/coreMonitor.go
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package afrouterd
+
+import (
+	"fmt"
+	"github.com/golang/protobuf/ptypes/empty"
+	"github.com/opencord/voltha-go/common/log"
+	pb "github.com/opencord/voltha-protos/go/afrouter"
+	cmn "github.com/opencord/voltha-protos/go/common"
+	vpb "github.com/opencord/voltha-protos/go/voltha"
+	"golang.org/x/net/context"
+	"k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"time"
+)
+
+func getVolthaPods(cs *kubernetes.Clientset) ([]*volthaPod, error) {
+	pods, err := cs.CoreV1().Pods(podNamespace).List(metav1.ListOptions{LabelSelector: podLabelSelector})
+	if err != nil {
+		return nil, err
+	}
+
+	var rwPods []*volthaPod
+items:
+	for _, v := range pods.Items {
+		// only pods that are actually running should be considered
+		if v.Status.Phase == v1.PodRunning {
+			for _, condition := range v.Status.Conditions {
+				if condition.Status != v1.ConditionTrue {
+					continue items
+				}
+			}
+
+			if group, have := v.Labels[podAffinityGroupLabel]; have {
+				log.Debugf("Namespace: %s, PodName: %s, PodIP: %s, Host: %s\n", v.Namespace, v.Name, v.Status.PodIP, v.Spec.NodeName)
+				rwPods = append(rwPods, &volthaPod{
+					name:    v.Name,
+					ipAddr:  v.Status.PodIP,
+					node:    v.Spec.NodeName,
+					devIds:  make(map[string]struct{}),
+					backend: afrouterRWClusterName + group,
+				})
+			} else {
+				log.Warnf("Pod %s found matching % without label %", v.Name, podLabelSelector, podAffinityGroupLabel)
+			}
+		}
+	}
+	return rwPods, nil
+}
+
+func reconcilePodDeviceIds(ctx context.Context, pod *volthaPod, ids map[string]struct{}) {
+	ctxTimeout, _ := context.WithTimeout(ctx, time.Second*5)
+	conn, err := Connect(ctxTimeout, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort))
+	if err != nil {
+		log.Debugf("Could not reconcile devices from %s, could not connect: %s", pod.name, err)
+		return
+	}
+	defer conn.Close()
+
+	var idList cmn.IDs
+	for k := range ids {
+		idList.Items = append(idList.Items, &cmn.ID{Id: k})
+	}
+
+	client := vpb.NewVolthaServiceClient(conn)
+	_, err = client.ReconcileDevices(ctx, &idList)
+	if err != nil {
+		log.Errorf("Attempt to reconcile ids on pod %s failed: %s", pod.name, err)
+		return
+	}
+}
+
+func queryPodDeviceIds(ctx context.Context, pod *volthaPod) map[string]struct{} {
+	ctxTimeout, _ := context.WithTimeout(ctx, time.Second*5)
+	conn, err := Connect(ctxTimeout, fmt.Sprintf("%s:%d", pod.ipAddr, podGrpcPort))
+	if err != nil {
+		log.Debugf("Could not query devices from %s, could not connect: %s", pod.name, err)
+		return nil
+	}
+	defer conn.Close()
+
+	client := vpb.NewVolthaServiceClient(conn)
+	devs, err := client.ListDeviceIds(ctx, &empty.Empty{})
+	if err != nil {
+		log.Error(err)
+		return nil
+	}
+
+	var ret = make(map[string]struct{})
+	for _, dv := range devs.Items {
+		ret[dv.Id] = struct{}{}
+	}
+	return ret
+}
+
+// coreMonitor polls the list of devices from all RW cores, pushes these devices
+// into the affinity router, and ensures that all cores in a backend have their devices synced
+func CoreMonitor(ctx context.Context, client pb.ConfigurationClient, clientset *kubernetes.Clientset) {
+	// map[backend]map[deviceId]struct{}
+	deviceOwnership := make(map[string]map[string]struct{})
+loop:
+	for {
+		// get the rw core list from k8s
+		rwPods, err := getVolthaPods(clientset)
+		if err != nil {
+			log.Error(err)
+			continue
+		}
+
+		// for every pod
+		for _, pod := range rwPods {
+			// get the devices for this pod's backend
+			devices, have := deviceOwnership[pod.backend]
+			if !have {
+				devices = make(map[string]struct{})
+				deviceOwnership[pod.backend] = devices
+			}
+
+			coreDevices := queryPodDeviceIds(ctx, pod)
+
+			// handle devices that exist in the core, but we have just learned about
+			for deviceId := range coreDevices {
+				// if there's a new device
+				if _, have := devices[deviceId]; !have {
+					// add the device to our local list
+					devices[deviceId] = struct{}{}
+					// push the device into the affinity router
+					setAffinity(ctx, client, deviceId, pod.backend)
+				}
+			}
+
+			// ensure that the core knows about all devices in its backend
+			toSync := make(map[string]struct{})
+			for deviceId := range devices {
+				// if the pod is missing any devices
+				if _, have := coreDevices[deviceId]; !have {
+					// we will reconcile them
+					toSync[deviceId] = struct{}{}
+				}
+			}
+
+			if len(toSync) != 0 {
+				reconcilePodDeviceIds(ctx, pod, toSync)
+			}
+		}
+
+		select {
+		case <-ctx.Done():
+			// if we're done, exit
+			break loop
+		case <-time.After(10 * time.Second): // wait a while
+		}
+	}
+}
diff --git a/internal/pkg/afrouterd/discoveryMonitor.go b/internal/pkg/afrouterd/discoveryMonitor.go
new file mode 100644
index 0000000..2fab6ed
--- /dev/null
+++ b/internal/pkg/afrouterd/discoveryMonitor.go
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package afrouterd
+
+import (
+	"errors"
+	"github.com/golang/protobuf/ptypes"
+	"github.com/opencord/voltha-go/common/log"
+	"github.com/opencord/voltha-go/kafka"
+	pb "github.com/opencord/voltha-protos/go/afrouter"
+	ic "github.com/opencord/voltha-protos/go/inter_container"
+	"golang.org/x/net/context"
+	"regexp"
+	"time"
+)
+
+func newKafkaClient(clientType string, host string, port int, instanceID string) (kafka.Client, error) {
+	log.Infow("kafka-client-type", log.Fields{"client": clientType})
+	switch clientType {
+	case "sarama":
+		return kafka.NewSaramaClient(
+			kafka.Host(host),
+			kafka.Port(port),
+			kafka.ConsumerType(kafka.GroupCustomer),
+			kafka.ProducerReturnOnErrors(true),
+			kafka.ProducerReturnOnSuccess(true),
+			kafka.ProducerMaxRetries(6),
+			kafka.NumPartitions(3),
+			kafka.ConsumerGroupName(instanceID),
+			kafka.ConsumerGroupPrefix(instanceID),
+			kafka.AutoCreateTopic(false),
+			kafka.ProducerFlushFrequency(5),
+			kafka.ProducerRetryBackoff(time.Millisecond*30)), nil
+	}
+	return nil, errors.New("unsupported-client-type")
+}
+
+func monitorDiscovery(kc kafka.Client, ctx context.Context, client pb.ConfigurationClient, ch <-chan *ic.InterContainerMessage, doneCh chan<- struct{}) {
+	defer close(doneCh)
+	defer kc.Stop()
+
+monitorLoop:
+	for {
+		select {
+		case <-ctx.Done():
+			break monitorLoop
+		case msg := <-ch:
+			log.Debug("Received a device discovery notification")
+			device := &ic.DeviceDiscovered{}
+			if err := ptypes.UnmarshalAny(msg.Body, device); err != nil {
+				log.Errorf("Could not unmarshal received notification %v", msg)
+			} else {
+				// somewhat hackish solution, backend is known from the first digit found in the publisher name
+				group := regexp.MustCompile(`\d`).FindString(device.Publisher)
+				if group != "" {
+					// set the affinity of the discovered device
+					setAffinity(ctx, client, device.Id, afrouterRWClusterName+group)
+				} else {
+					log.Error("backend is unknown")
+				}
+			}
+		}
+	}
+}
+
+func StartDiscoveryMonitor(ctx context.Context, client pb.ConfigurationClient) (<-chan struct{}, error) {
+	doneCh := make(chan struct{})
+	// Connect to kafka for discovery events
+	kc, err := newKafkaClient(kafkaClientType, kafkaHost, kafkaPort, kafkaInstanceID)
+	if err != nil {
+		panic(err)
+	}
+
+	for {
+		if err := kc.Start(); err != nil {
+			log.Error("Could not connect to kafka")
+		} else {
+			break
+		}
+		select {
+		case <-ctx.Done():
+			close(doneCh)
+			return doneCh, errors.New("GRPC context done")
+
+		case <-time.After(5 * time.Second):
+		}
+	}
+	ch, err := kc.Subscribe(&kafka.Topic{Name: kafkaTopic})
+	if err != nil {
+		log.Errorf("Could not subscribe to the '%s' channel, discovery disabled", kafkaTopic)
+		close(doneCh)
+		kc.Stop()
+		return doneCh, err
+	}
+
+	go monitorDiscovery(kc, ctx, client, ch, doneCh)
+	return doneCh, nil
+}
diff --git a/internal/pkg/afrouterd/k8s.go b/internal/pkg/afrouterd/k8s.go
new file mode 100644
index 0000000..16ed7f3
--- /dev/null
+++ b/internal/pkg/afrouterd/k8s.go
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package afrouterd
+
+import (
+	"github.com/opencord/voltha-go/common/log"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+)
+
+func K8sClientSet() *kubernetes.Clientset {
+	var config *rest.Config
+	if k8sApiServer != "" || k8sKubeConfigPath != "" {
+		// use combination of URL & local kube-config file
+		c, err := clientcmd.BuildConfigFromFlags(k8sApiServer, k8sKubeConfigPath)
+		if err != nil {
+			panic(err)
+		}
+		config = c
+	} else {
+		// use in-cluster config
+		c, err := rest.InClusterConfig()
+		if err != nil {
+			log.Errorf("Unable to load in-cluster config.  Try setting K8S_API_SERVER and K8S_KUBE_CONFIG_PATH?")
+			panic(err)
+		}
+		config = c
+	}
+	// creates the clientset
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		panic(err)
+	}
+
+	return clientset
+}
diff --git a/internal/pkg/afrouterd/misc.go b/internal/pkg/afrouterd/misc.go
new file mode 100644
index 0000000..09f3a8f
--- /dev/null
+++ b/internal/pkg/afrouterd/misc.go
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package afrouterd
+
+import (
+	"fmt"
+	"github.com/opencord/voltha-go/common/log"
+	pb "github.com/opencord/voltha-protos/go/afrouter"
+	"golang.org/x/net/context"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/connectivity"
+	"google.golang.org/grpc/keepalive"
+	"math"
+	"os"
+	"strconv"
+	"time"
+)
+
+type volthaPod struct {
+	name    string
+	ipAddr  string
+	node    string
+	devIds  map[string]struct{}
+	backend string
+}
+
+// TODO: These variables should be passed in from main() rather than
+// declared here.
+
+var (
+	// if k8s variables are undefined, will attempt to use in-cluster config
+	k8sApiServer      = GetStrEnv("K8S_API_SERVER", "")
+	k8sKubeConfigPath = GetStrEnv("K8S_KUBE_CONFIG_PATH", "")
+
+	podNamespace          = GetStrEnv("POD_NAMESPACE", "voltha")
+	podLabelSelector      = GetStrEnv("POD_LABEL_SELECTOR", "app=rw-core")
+	podAffinityGroupLabel = GetStrEnv("POD_AFFINITY_GROUP_LABEL", "affinity-group")
+
+	podGrpcPort = uint64(GetIntEnv("POD_GRPC_PORT", 0, math.MaxUint16, 50057))
+
+	afrouterRouterName    = GetStrEnv("AFROUTER_ROUTER_NAME", "vcore")
+	afrouterRouteName     = GetStrEnv("AFROUTER_ROUTE_NAME", "dev_manager")
+	afrouterRWClusterName = GetStrEnv("AFROUTER_RW_CLUSTER_NAME", "vcore")
+
+	kafkaTopic      = GetStrEnv("KAFKA_TOPIC", "AffinityRouter")
+	kafkaClientType = GetStrEnv("KAFKA_CLIENT_TYPE", "sarama")
+	kafkaHost       = GetStrEnv("KAFKA_HOST", "kafka")
+	kafkaPort       = GetIntEnv("KAFKA_PORT", 0, math.MaxUint16, 9092)
+	kafkaInstanceID = GetStrEnv("KAFKA_INSTANCE_ID", "arouterd")
+)
+
+func GetIntEnv(key string, min, max, defaultValue int) int {
+	if val, have := os.LookupEnv(key); have {
+		num, err := strconv.Atoi(val)
+		if err != nil || !(min <= num && num <= max) {
+			panic(fmt.Errorf("%s must be a number in the range [%d, %d]; default: %d", key, min, max, defaultValue))
+		}
+		return num
+	}
+	return defaultValue
+}
+
+func GetStrEnv(key, defaultValue string) string {
+	if val, have := os.LookupEnv(key); have {
+		return val
+	}
+	return defaultValue
+}
+
+func Connect(ctx context.Context, addr string) (*grpc.ClientConn, error) {
+	log.Debugf("Trying to connect to %s", addr)
+	conn, err := grpc.DialContext(ctx, addr,
+		grpc.WithInsecure(),
+		grpc.WithBlock(),
+		grpc.WithBackoffMaxDelay(time.Second*5),
+		grpc.WithKeepaliveParams(keepalive.ClientParameters{Time: time.Second * 10, Timeout: time.Second * 5}))
+	if err == nil {
+		log.Debugf("Connection succeeded")
+	}
+	return conn, err
+}
+
+func setAffinity(ctx context.Context, client pb.ConfigurationClient, deviceId string, backend string) {
+	log.Debugf("Configuring backend %s with device id %s \n", backend, deviceId)
+	if res, err := client.SetAffinity(ctx, &pb.Affinity{
+		Router:  afrouterRouterName,
+		Route:   afrouterRouteName,
+		Cluster: afrouterRWClusterName,
+		Backend: backend,
+		Id:      deviceId,
+	}); err != nil {
+		log.Debugf("failed affinity RPC call: %s\n", err)
+	} else {
+		log.Debugf("Result: %v\n", res)
+	}
+}
+
+// endOnClose cancels the context when the connection closes
+func ConnectionActiveContext(conn *grpc.ClientConn) context.Context {
+	ctx, disconnected := context.WithCancel(context.Background())
+	go func() {
+		for state := conn.GetState(); state != connectivity.TransientFailure && state != connectivity.Shutdown; state = conn.GetState() {
+			if !conn.WaitForStateChange(context.Background(), state) {
+				break
+			}
+		}
+		log.Infof("Connection to afrouter lost")
+		disconnected()
+	}()
+	return ctx
+}
diff --git a/internal/pkg/afrouterd/misc_test.go b/internal/pkg/afrouterd/misc_test.go
new file mode 100644
index 0000000..a07b9aa
--- /dev/null
+++ b/internal/pkg/afrouterd/misc_test.go
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file implements an exit handler that tries to shut down all the
+// running servers before finally exiting. There are 2 triggers to this
+// clean exit thread: signals and an exit channel.
+
+package afrouterd
+
+import (
+	"github.com/stretchr/testify/assert"
+	"os"
+	"testing"
+)
+
+func TestGetIntEnv(t *testing.T) {
+
+	err := os.Setenv("testkey", "123")
+	assert.Nil(t, err)
+
+	defer func() { os.Unsetenv("testkey") }()
+
+	v := GetIntEnv("testkey", 0, 1000, 456)
+	assert.Equal(t, 123, v)
+
+	v = GetIntEnv("doesnotexist", 0, 1000, 456)
+	assert.Equal(t, 456, v)
+}
+
+func TestGetIntEnvTooLow(t *testing.T) {
+
+	err := os.Setenv("testkey", "-1")
+	assert.Nil(t, err)
+
+	defer func() { os.Unsetenv("testkey") }()
+
+	defer func() {
+		if r := recover(); r == nil {
+			t.Errorf("The code did not panic")
+		}
+	}()
+	_ = GetIntEnv("testkey", 0, 1000, 456)
+}
+
+func TestGetIntEnvTooHigh(t *testing.T) {
+
+	err := os.Setenv("testkey", "1001")
+	assert.Nil(t, err)
+
+	defer func() { os.Unsetenv("testkey") }()
+
+	defer func() {
+		if r := recover(); r == nil {
+			t.Errorf("The code did not panic")
+		}
+	}()
+	_ = GetIntEnv("testkey", 0, 1000, 456)
+}
+
+func TestGetIntEnvNotInteger(t *testing.T) {
+
+	err := os.Setenv("testkey", "stuff")
+	assert.Nil(t, err)
+
+	defer func() { os.Unsetenv("testkey") }()
+
+	defer func() {
+		if r := recover(); r == nil {
+			t.Errorf("The code did not panic")
+		}
+	}()
+	_ = GetIntEnv("testkey", 0, 1000, 456)
+}
+
+func TestGetStrEnv(t *testing.T) {
+
+	err := os.Setenv("testkey", "abc")
+	assert.Nil(t, err)
+
+	defer func() { os.Unsetenv("testkey") }()
+
+	v := GetStrEnv("testkey", "def")
+	assert.Equal(t, "abc", v)
+
+	v = GetStrEnv("doesnotexist", "def")
+	assert.Equal(t, "def", v)
+}