Run each cord instance in the cluster under a shared data volume for persistent storage.
Some fixes/changes to cluster controller restart test case that restarts each onos controller instance and verifies cluster state.
Change-Id: I658ea2270ed6be6dead42663a510654452410568
diff --git a/src/test/cluster/clusterTest.py b/src/test/cluster/clusterTest.py
index b4c5f16..080f547 100644
--- a/src/test/cluster/clusterTest.py
+++ b/src/test/cluster/clusterTest.py
@@ -61,7 +61,7 @@
acl = cluster_acl()
dhcprelay = cluster_dhcprelay()
subscriber = cluster_subscriber()
- testcaseLoggers = ('test_cluster_controller_kills',)
+ testcaseLoggers = ('test_cluster_controller_restarts',)
def setUp(self):
if self._testMethodName not in self.testcaseLoggers:
@@ -115,6 +115,11 @@
result.append(leaders)
return result
+ def verify_leaders(self, controller = None):
+ leaders = self.get_leaders(controller = controller)
+ failed = filter(lambda l: l == None, leaders)
+ return failed
+
def verify_cluster_status(self,controller = None,onos_instances=ONOS_INSTANCES,verify=False):
tries = 0
try:
@@ -303,7 +308,7 @@
log.info('Cluster new master is %s'%new_master_ip)
return True
- def test_cluster_controller_kills(self):
+ def test_cluster_controller_restarts(self):
'''Test the cluster by repeatedly killing the controllers'''
controllers = self.get_controllers()
ctlr_len = len(controllers)
@@ -314,7 +319,7 @@
#this call would verify the cluster for once
onos_map = self.get_cluster_container_names_ips()
- def check_storage_exception(controller = None):
+ def check_exception(controller = None):
adjacent_controller = None
adjacent_controllers = None
if controller:
@@ -324,23 +329,27 @@
onosLog = OnosLog(host = node)
##check the logs for storage exception
_, output = onosLog.get_log(('ERROR', 'Exception',))
- if output and output.find('StorageException') >= 0:
- log.info('Storage Exception found on node: %s' %node)
+ if output and output.find('StorageException$Timeout') >= 0:
+ log.info('\nStorage Exception Timeout found on node: %s\n' %node)
+ log.info('Dumping the ERROR and Exception logs for node: %s\n' %node)
+ log.info('\n' + '-' * 50 + '\n')
log.info('%s' %output)
- assert_equal('Storage Exception on node {}'.format(node), False)
+ log.info('\n' + '-' * 50 + '\n')
+ failed = self.verify_leaders(controllers)
+ if failed:
+ log.info('Leaders command failed on node: %s' %node)
+ assert_equal(len(failed), 0)
return controller
try:
- ips = self.get_cluster_current_member_ips(controller = controller)
+ ips = self.get_cluster_current_member_ips(controller = adjacent_controller)
print('ONOS cluster formed with controllers: %s' %ips)
st = True
except:
st = False
- leaders = self.get_leaders(controllers)
- failed = filter(lambda l: l == None, leaders)
+ failed = self.verify_leaders(controllers)
assert_equal(len(failed), 0)
-
if st is False:
log.info('No storage exception and ONOS cluster was not formed successfully')
else:
@@ -361,7 +370,7 @@
except:
time.sleep(5)
continue
- next_controller = check_storage_exception(controller = controller)
+ next_controller = check_exception(controller = controller)
#pass
def test_cluster_formation_and_verification(self,onos_instances = ONOS_INSTANCES):
diff --git a/src/test/setup/cord-test.py b/src/test/setup/cord-test.py
index ca70ba5..5b21c2b 100755
--- a/src/test/setup/cord-test.py
+++ b/src/test/setup/cord-test.py
@@ -402,8 +402,9 @@
Onos.IMAGE = onos_cnt['image']
Onos.PREFIX = args.prefix
Onos.TAG = onos_cnt['tag']
+ data_volume = '{}-data'.format(Onos.NAME)
onos = Onos(image = Onos.IMAGE,
- tag = Onos.TAG, boot_delay = 60)
+ tag = Onos.TAG, boot_delay = 60, cluster = cluster_mode, data_volume = data_volume)
onos_ip = onos.ip()
onos_ips = [ onos_ip ]
num_onos_instances = args.onos_instances
@@ -412,7 +413,9 @@
onos_instances.append(onos)
for i in range(1, num_onos_instances):
name = '{}-{}'.format(Onos.NAME, i+1)
- onos = Onos(name = name, image = Onos.IMAGE, tag = Onos.TAG, boot_delay = 60, cluster = cluster_mode)
+ data_volume = '{}-data'.format(name)
+ onos = Onos(name = name, image = Onos.IMAGE, tag = Onos.TAG, boot_delay = 60, cluster = cluster_mode,
+ data_volume = data_volume)
onos_instances.append(onos)
onos_ips.append(onos.ipaddr)
try:
@@ -622,7 +625,9 @@
cluster_mode = True if args.onos_instances > 1 else False
onos = None
if onos_ip is None:
- onos = Onos(image = Onos.IMAGE, tag = Onos.TAG, boot_delay = 60, cluster = cluster_mode)
+ data_volume = '{}-data'.format(Onos.NAME)
+ onos = Onos(image = Onos.IMAGE, tag = Onos.TAG, boot_delay = 60, cluster = cluster_mode,
+ data_volume = data_volume)
onos_ip = onos.ip()
num_onos_instances = args.onos_instances
@@ -632,7 +637,9 @@
onos_instances.append(onos)
for i in range(1, num_onos_instances):
name = '{}-{}'.format(Onos.NAME, i+1)
- onos = Onos(name = name, image = Onos.IMAGE, tag = Onos.TAG, boot_delay = 60, cluster = cluster_mode)
+ data_volume = '{}-data'.format(name)
+ onos = Onos(name = name, image = Onos.IMAGE, tag = Onos.TAG, boot_delay = 60, cluster = cluster_mode,
+ data_volume = data_volume)
onos_instances.append(onos)
onos_ips.append(onos.ipaddr)
Onos.setup_cluster(onos_instances)
@@ -737,7 +744,11 @@
for onos in onos_list:
Container.dckr.kill(onos)
Container.dckr.remove_container(onos, force=True)
+ for index in range(len(onos_list)):
+ volume = '{}-data'.format(Onos.NAME) if index == 0 else '{}-{}-data'.format(Onos.NAME, index+1)
+ Onos.remove_data_map(volume, Onos.guest_data_dir)
Onos.cleanup_runtime()
+
if args.xos:
##cleanup XOS images
xos_images = ( '{}:{}'.format(XosServer.IMAGE,XosServer.TAG),
diff --git a/src/test/utils/CordContainer.py b/src/test/utils/CordContainer.py
index c4a73a0..a228819 100644
--- a/src/test/utils/CordContainer.py
+++ b/src/test/utils/CordContainer.py
@@ -21,7 +21,7 @@
from itertools import chain
from nsenter import Namespace
from docker import Client
-from shutil import copy
+from shutil import rmtree
from OnosCtrl import OnosCtrl
from OnosLog import OnosLog
@@ -314,6 +314,7 @@
setup_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'setup')
host_config_dir = os.path.join(setup_dir, 'onos-config')
guest_config_dir = '/root/onos/config'
+ guest_data_dir = '/root/onos/apache-karaf-3.0.5/data'
onos_gen_partitions = os.path.join(setup_dir, 'onos-gen-partitions')
onos_form_cluster = os.path.join(setup_dir, 'onos-form-cluster')
cord_apps_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'apps')
@@ -357,8 +358,26 @@
os.unlink(f)
except: pass
+ @classmethod
+ def get_data_map(cls, host_volume, guest_volume_dir):
+ host_volume_dir = os.path.join(cls.setup_dir, os.path.basename(host_volume))
+ if not os.path.exists(host_volume_dir):
+ os.mkdir(host_volume_dir)
+ return ( (host_volume_dir, guest_volume_dir), )
+
+ @classmethod
+ def remove_data_map(cls, host_volume, guest_volume_dir):
+ host_volume_dir = os.path.join(cls.setup_dir, os.path.basename(host_volume))
+ if os.path.exists(host_volume_dir):
+ rmtree(host_volume_dir)
+
+ def remove_data_volume(self):
+ if self.data_map is not None:
+ self.remove_data_map(*self.data_map)
+
def __init__(self, name = NAME, image = IMAGE, prefix = PREFIX, tag = TAG,
- boot_delay = 20, restart = False, network_cfg = None, cluster = False):
+ boot_delay = 20, restart = False, network_cfg = None,
+ cluster = False, data_volume = None):
if restart is True:
##Find the right image to restart
running_image = filter(lambda c: c['Names'][0] == '/{}'.format(name), self.dckr.containers())
@@ -371,9 +390,13 @@
super(Onos, self).__init__(name, image, prefix = prefix, tag = tag, quagga_config = self.quagga_config)
self.boot_delay = boot_delay
+ self.data_map = None
if cluster is True:
self.ports = []
self.env['JAVA_OPTS'] = self.JAVA_OPTS_CLUSTER
+ if data_volume is not None:
+ self.data_map = self.get_data_map(data_volume, self.guest_data_dir)
+ self.host_guest_map = self.host_guest_map + self.data_map
if os.access(self.cluster_cfg, os.F_OK):
try:
os.unlink(self.cluster_cfg)
@@ -561,6 +584,8 @@
print('Restarting ONOS container %s' %onos.name)
onos.start(ports = onos.ports, environment = onos.env,
host_config = onos.host_config, volumes = onos.volumes, tty = True)
+ #onos.ipaddr = onos.ip()
+ #onos.wait_for_onos_start(onos.ipaddr)
print('Waiting %d seconds for ONOS %s to boot' %(onos.boot_delay, onos.name))
time.sleep(onos.boot_delay)
onos.ipaddr = onos.ip()