VOL-794: Assignment lists in etcd not updated correctly in presence of vcore failures

* Fixed coordinator method _sanitize_member_list
* Fixed keys-only GET operation for etcd KV store
* Fixed key DELETE operation for etcd KV store
* Modified vcore manifest to allow a failed pod to be re-spawned on a
  different host but with the same pod name
* Modified zookeeper manifest to allow a failed pod to be re-spawned
  on a different host. With the current manifest, when all 3 zookeeper
  pods are scheduled on the same node and that node fails, none of the
  pods get re-spawned.
* Added NodePort for the Envoy service
* Removed anti-affinity rule from OFAgent and NetConf manifests to allow
  the deployment of multiple pods on the same node

Change-Id: I052d952d81a81cafb96acfc1d57a192596e2e9a1
diff --git a/k8s/envoy_for_consul.yml b/k8s/envoy_for_consul.yml
index 6ed7923..e80812e 100644
--- a/k8s/envoy_for_consul.yml
+++ b/k8s/envoy_for_consul.yml
@@ -6,7 +6,7 @@
   labels:
     name: voltha
 spec:
-  clusterIP: None
+  type: NodePort
   ports:
     - name: rest
       port: 8882
@@ -16,6 +16,7 @@
       targetPort: 8001
     - name: mystery2
       port: 8443
+      nodePort: 32443
       targetPort: 8443
     - name: grpc
       port: 50555
diff --git a/k8s/envoy_for_etcd.yml b/k8s/envoy_for_etcd.yml
index af5c1f4..d217eee 100644
--- a/k8s/envoy_for_etcd.yml
+++ b/k8s/envoy_for_etcd.yml
@@ -6,7 +6,7 @@
   labels:
     name: voltha
 spec:
-  clusterIP: None
+  type: NodePort
   ports:
     - name: rest
       port: 8882
@@ -16,6 +16,7 @@
       targetPort: 8001
     - name: mystery2
       port: 8443
+      nodePort: 32443
       targetPort: 8443
     - name: grpc
       port: 50555
diff --git a/k8s/netconf.yml b/k8s/netconf.yml
index 9bb7e67..b15bc5c 100644
--- a/k8s/netconf.yml
+++ b/k8s/netconf.yml
@@ -26,16 +26,6 @@
         cni: "calico"
     spec:
       terminationGracePeriodSeconds: 10
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - netconf
-              topologyKey: kubernetes.io/hostname
       containers:
       - name: netconf
         image: voltha-netconf
diff --git a/k8s/ofagent.yml b/k8s/ofagent.yml
index c2415ae..5449947 100644
--- a/k8s/ofagent.yml
+++ b/k8s/ofagent.yml
@@ -13,16 +13,6 @@
         cni: "calico"
     spec:
       terminationGracePeriodSeconds: 10
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: app
-                operator: In
-                values:
-                - ofagent
-            topologyKey: kubernetes.io/hostname
       containers:
       - name: ofagent
         image: voltha-ofagent
diff --git a/k8s/vcore_for_consul.yml b/k8s/vcore_for_consul.yml
index 2679451..b32f1c2 100644
--- a/k8s/vcore_for_consul.yml
+++ b/k8s/vcore_for_consul.yml
@@ -61,3 +61,4 @@
         - "--backend=consul"
         - "--pon-subnet=10.38.0.0/12"
         - "--ponsim-comm=grpc"
+        - "--core-number-extractor=^.*-([0-9]+)_.*$"
diff --git a/k8s/vcore_for_etcd.yml b/k8s/vcore_for_etcd.yml
index 37ceeea..b2d5d5d 100644
--- a/k8s/vcore_for_etcd.yml
+++ b/k8s/vcore_for_etcd.yml
@@ -20,13 +20,19 @@
   selector:
     app: vcore
 ---
-apiVersion: apps/v1beta1
-kind: Deployment
+apiVersion: apps/v1
+kind: StatefulSet
 metadata:
   name: vcore
   namespace: voltha
 spec:
+  serviceName: vcore
   replicas: 3
+  updateStrategy:
+    type: RollingUpdate
+  selector:
+    matchLabels:
+      app: vcore
   template:
     metadata:
       labels:
@@ -34,6 +40,7 @@
       annotations:
         cni: "calico"
     spec:
+      terminationGracePeriodSeconds: 0
       containers:
         - name: voltha
           image: voltha-voltha
@@ -53,6 +60,7 @@
             - "--backend=etcd"
             - "--pon-subnet=10.38.0.0/12"
             - "--ponsim-comm=grpc"
+            - "--core-number-extractor=^.*-([0-9]+)_.*$"
           ports:
             - containerPort: 8880
               name: rest-port
diff --git a/k8s/zookeeper.yml b/k8s/zookeeper.yml
index 1c7f02d..93e4385 100644
--- a/k8s/zookeeper.yml
+++ b/k8s/zookeeper.yml
@@ -78,6 +78,7 @@
       annotations:
         cni: "calico"
     spec:
+      terminationGracePeriodSeconds: 0
       containers:
       - name: zoo1
         image: zookeeper:3.4.11
@@ -118,6 +119,7 @@
       annotations:
         cni: "calico"
     spec:
+      terminationGracePeriodSeconds: 0
       containers:
       - name: zoo2
         image: zookeeper:3.4.11
@@ -158,6 +160,7 @@
       annotations:
         cni: "calico"
     spec:
+      terminationGracePeriodSeconds: 0
       containers:
       - name: zoo3
         image: zookeeper:3.4.11
diff --git a/voltha/coordinator.py b/voltha/coordinator.py
index 9bad7d0..5b6f68f 100644
--- a/voltha/coordinator.py
+++ b/voltha/coordinator.py
@@ -65,7 +65,8 @@
                  instance_id,
                  rest_port,
                  config,
-                 consul='localhost:8500'):
+                 consul='localhost:8500',
+                 container_name_regex='^.*\.([0-9]+)\..*$'):
 
         log.info('initializing-coordinator')
         self.config = config['coordinator']
@@ -120,6 +121,8 @@
         # TODO need to handle reconnect events properly
         self.consul = Consul(host=self.host, port=self.port)
 
+        self.container_name_regex = container_name_regex
+
         self.wait_for_leader_deferreds = []
 
         self.peers_mapping_queue = MessageQueue()
diff --git a/voltha/coordinator_etcd.py b/voltha/coordinator_etcd.py
index 238223f..8ed25e0 100644
--- a/voltha/coordinator_etcd.py
+++ b/voltha/coordinator_etcd.py
@@ -66,7 +66,8 @@
                  rest_port,
                  config,
                  consul='localhost:8500',
-                 etcd='localhost:2379'):
+                 etcd='localhost:2379',
+                 container_name_regex='^.*\.([0-9]+)\..*$'):
 
         log.info('initializing-coordinator')
         self.config = config['coordinator']
@@ -129,6 +130,8 @@
         self.etcd_url = u'http://' + kv_host + u':' + kv_port
         self.etcd = Client(reactor, self.etcd_url)
 
+        self.container_name_regex = container_name_regex
+
         self.wait_for_leader_deferreds = []
 
         self.peers_mapping_queue = MessageQueue()
@@ -499,16 +502,19 @@
     @inlineCallbacks
     def _retry(self, operation, *args, **kw):
         prefix = False
+        keys_only = False
         for name, value in kw.items():
             if name == 'acquire':
                 lease = value
                 kw['lease'] = lease
                 kw.pop('acquire')
             elif name == 'keys':
+                keys_only = True
+                prefix = True
+                keyset = KeySet(bytes(args[0]), prefix=True)
                 kw['keys_only'] = True
                 kw.pop('keys')
             elif name=='recurse':
-#               if value == 'True':
                 prefix = True
                 keyset = KeySet(bytes(args[0]), prefix=True)
                 kw.pop('recurse')
@@ -536,8 +542,10 @@
                                 result = (index, record)
                     else:
                         # Get values for all keys that match the prefix
+                        # If keys_only requested, get only the keys
                         index = 0
                         records = []
+                        keys = []
                         res = yield etcd.get(keyset, **kw)
                         if args[0] == 'service/voltha/assignments/':
                             log.info('assignments', result=res)
@@ -546,19 +554,22 @@
                                 # Which index should be returned? The max over all keys?
                                 if kv.mod_revision > index:
                                     index = kv.mod_revision
-                                rec = dict()
-                                rec['Key'] = kv.key
-                                rec['Value'] = kv.value
-                                rec['ModifyIndex'] = kv.mod_revision
-                                rec['Session'] = self.lease.lease_id if self.lease else ''
-                                records.append(rec)
-                        result = (index, records)
+                                if keys_only:
+                                    keys.append(kv.key)
+                                else:
+                                    rec = dict()
+                                    rec['Key'] = kv.key
+                                    rec['Value'] = kv.value
+                                    rec['ModifyIndex'] = kv.mod_revision
+                                    rec['Session'] = self.lease.lease_id if self.lease else ''
+                                    records.append(rec)
+                        result = (index, keys) if keys_only else (index, records)
                 elif operation == 'PUT':
                     key = bytes(args[0])
                     result = yield etcd.set(key, args[1], **kw)
                 elif operation == 'DELETE':
                     key = bytes(args[0])
-                    result = yield etcd.delete(key, **kw)
+                    result = yield etcd.delete(keyset)
                 else:
                     # Default case - consider operation as a function call
                     result = yield operation(*args, **kw)
diff --git a/voltha/leader.py b/voltha/leader.py
index 60d1e6c..54f1117 100644
--- a/voltha/leader.py
+++ b/voltha/leader.py
@@ -42,9 +42,8 @@
     """
 
     ID_EXTRACTOR = '^(%s)([^/]+)$'
-    CORE_STORE_KEY_EXTRACTOR = '^%s(?P<core_store_id>[^/]+)/root$'
-    CORE_NUMBER_EXTRACTOR = '^.*\.([0-9]+)\..*$'
-    START_TIMESTAMP_EXTRACTOR = '^.*\..*\..*_([0-9]+)$'
+    CORE_STORE_KEY_EXTRACTOR = '^%s/(?P<core_store_id>[^/]+)/root$'
+    START_TIMESTAMP_EXTRACTOR = '^.*_([0-9]+)$'
     ASSIGNMENT_ID_EXTRACTOR = '^(%s)([^/]+)/core_store$'
 
     # Public methods:
@@ -72,8 +71,8 @@
         self.core_data_id_match = re.compile(
             self.CORE_STORE_KEY_EXTRACTOR % self.coord.core_store_prefix).match
 
-        self.core_match = re.compile(self.CORE_NUMBER_EXTRACTOR).match
-        self.timestamp_match = re.compile(self.START_TIMESTAMP_EXTRACTOR ).match
+        self.core_match = re.compile(self.coord.container_name_regex).match
+        self.timestamp_match = re.compile(self.START_TIMESTAMP_EXTRACTOR).match
 
         self.assignment_id_match = re.compile(
             self.ASSIGNMENT_ID_EXTRACTOR % self.coord.assignment_prefix).match
@@ -168,9 +167,10 @@
         # This method removes any duplicates from the member list using the
         # voltha number from the member id and the time that voltha instance
         # started, again from the member id.  This method is meaningful only
-        # in the swarm cluster.  In a non-cluster environment the member id
-        # is formatted differently.  In such a case, the method below will
-        # create an exception and return the member list as is.
+        # in a clustered environment (e.g. Docker swarm or Kubernetes). In
+        # a non-cluster environment the member id is formatted differently.
+        # In such a case, the method below will create an exception and
+        # return the member list as is.
 
         try:
             unique_members = {}
@@ -202,7 +202,8 @@
                 return updated_members
             else:
                 return members
-        except:
+        except Exception as e:
+            log.exception('extraction-error', e=e)
             return members
 
     @inlineCallbacks
diff --git a/voltha/main.py b/voltha/main.py
index 29585b4..c19a018 100755
--- a/voltha/main.py
+++ b/voltha/main.py
@@ -52,6 +52,7 @@
 
 defs = dict(
     config=os.environ.get('CONFIG', './voltha.yml'),
+    container_name_regex=os.environ.get('CORE_NUMBER_EXTRACTOR', '^.*\.([0-9]+)\..*$'),
     consul=os.environ.get('CONSUL', 'localhost:8500'),
     etcd=os.environ.get('ETCD', 'localhost:2379'),
     inter_core_subnet=os.environ.get('INTER_CORE_SUBNET', None),
@@ -83,6 +84,12 @@
                         default=defs['config'],
                         help=_help)
 
+    _help = 'Regular expression for extracting core number from container name (default: %s)' % defs['container_name_regex']
+    parser.add_argument(
+        '-X', '--core-number-extractor', dest='container_name_regex', action='store',
+        default=defs['container_name_regex'],
+        help=_help)
+
     _help = '<hostname>:<port> to consul agent (default: %s)' % defs['consul']
     parser.add_argument(
         '-C', '--consul', dest='consul', action='store',
@@ -281,6 +288,7 @@
         self.log = setup_logging(self.config.get('logging', {}),
                                  args.instance_id,
                                  verbosity_adjust=verbosity_adjust)
+        self.log.info('core-number-extractor', regex=args.container_name_regex)
 
         # configurable variables from voltha.yml file
         #self.configurable_vars = self.config.get('Constants', {})
@@ -288,7 +296,7 @@
         if not args.no_banner:
             print_banner(self.log)
 
-        # Create a unique instnce id using the passed-in instanceid and
+        # Create a unique instance id using the passed-in instance id and
         # UTC timestamp
         current_time = arrow.utcnow().timestamp
         self.instance_id = self.args.instance_id + '_' + str(current_time)
@@ -340,7 +348,8 @@
                         rest_port=self.args.rest_port,
                         instance_id=self.instance_id,
                         config=self.config,
-                        consul=self.args.consul)
+                        consul=self.args.consul,
+                        container_name_regex=self.args.container_name_regex)
                 ).start()
             elif self.args.backend == 'etcd':
                 yield registry.register(
@@ -352,7 +361,8 @@
                         instance_id=self.instance_id,
                         config=self.config,
                         consul=self.args.consul,
-                        etcd=self.args.etcd)
+                        etcd=self.args.etcd,
+                        container_name_regex=self.args.container_name_regex)
                 ).start()
 
             self.log.info('waiting-for-config-assignment')