blob: 154c931fdac58ef2b3d3bb0a64ca0d73d6ecf368 [file] [log] [blame]
#
# Copyright 2017 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import re
from structlog import get_logger
from twisted.internet import reactor
from twisted.internet.base import DelayedCall
from twisted.internet.defer import inlineCallbacks, returnValue, Deferred
from simplejson import dumps, loads
from common.utils.asleep import asleep
log = get_logger()
class Worker(object):
"""
Worker side of the coordinator. An instance of this class runs in every
voltha instance. It monitors what work is assigned to this instance by
the leader. This is all done via consul.
"""
ASSIGNMENT_EXTRACTOR = '^%s(?P<member_id>[^/]+)/(?P<work_id>[^/]+)$'
# Public methods:
def __init__(self, instance_id, coordinator):
self.instance_id = instance_id
self.coord = coordinator
self.halted = False
self.soak_time = 0.5 # soak till assignment list settles
self.my_workload = set() # list of work_id's assigned to me
self.assignment_soak_timer = None
self.assignment_core_store_soak_timer = None
self.my_candidate_workload = set() # we stash here during soaking
self.assignment_match = re.compile(
self.ASSIGNMENT_EXTRACTOR % self.coord.assignment_prefix).match
self.mycore_store_id = None
self.wait_for_core_store_assignment = Deferred()
self.peers_map = None
@inlineCallbacks
def start(self):
log.debug('starting')
yield self._start_tracking_my_assignments()
yield self._start_tracking_my_peers()
log.info('started')
returnValue(self)
def stop(self):
log.debug('stopping')
self.halted = True
if isinstance(self.assignment_soak_timer, DelayedCall):
if not self.assignment_soak_timer.called:
self.assignment_soak_timer.cancel()
if isinstance(self.assignment_core_store_soak_timer, DelayedCall):
if not self.assignment_core_store_soak_timer.called:
self.assignment_core_store_soak_timer.cancel()
log.info('stopped')
@inlineCallbacks
def get_core_store_id(self):
if self.mycore_store_id:
returnValue(self.mycore_store_id)
else:
# Let's wait until we get assigned a store_id from the leader
val = yield self.wait_for_core_store_assignment
returnValue(val)
# Private methods:
def _start_tracking_my_assignments(self):
reactor.callLater(0, self._track_my_assignments, 0)
def _start_tracking_my_peers(self):
reactor.callLater(0, self._track_my_peers, 0)
@inlineCallbacks
def _track_my_assignments(self, index):
try:
# if there is no leader yet, wait for a stable leader
d = self.coord.wait_for_a_leader()
if not d.called:
yield d
# additional time to let leader update
# assignments, to minimize potential churn
yield asleep(self.coord.worker_config.get(
self.coord.worker_config['time_to_let_leader_update'], 5))
(index, results) = yield self.coord.kv_get(
self.coord.assignment_prefix + self.instance_id,
index=index, recurse=True)
# 1. Check whether we have been assigned a full voltha instance
if results and not self.mycore_store_id:
# We have no store id set yet
core_stores = [c['Value'] for c in results if
c['Key'] == self.coord.assignment_prefix +
self.instance_id + '/' +
self.coord.core_storage_suffix and c['Value']]
if core_stores:
self.mycore_store_id = core_stores[0]
log.debug('store-assigned',
mycore_store_id=self.mycore_store_id)
self._stash_and_restart_core_store_soak_timer()
# 2. Check whether we have been assigned a work item
if results and self.mycore_store_id:
# Check for difference between current worload and newer one
# TODO: Depending on how workload gets load balanced we may
# need to add workload distribution here
pass
except Exception, e:
log.exception('assignments-track-error', e=e)
yield asleep(
self.coord.worker_config.get(
self.coord.worker_config[
'assignments_track_error_to_avoid_flood'], 1))
# to prevent flood
finally:
if not self.halted and not self.mycore_store_id:
reactor.callLater(0, self._track_my_assignments, index)
@inlineCallbacks
def _track_my_peers(self, index):
try:
prev_index = index
if self.mycore_store_id:
# Wait for updates to the store assigment key
is_timeout, (tmp_index, mappings) = yield \
self.coord.consul_get_with_timeout(
key=self.coord.core_store_assignment_key,
recurse=True,
index=index,
timeout=10)
if is_timeout:
return
# After timeout event the index returned from
# consul_get_with_timeout is None. If we are here it's not a
# timeout, therefore the index is a valid one.
index=tmp_index
if mappings and index != prev_index:
new_map = loads(mappings[0]['Value'])
# Remove my id from my peers list
new_map.pop(self.mycore_store_id)
if self.peers_map is None or self.peers_map != new_map:
self.coord.publish_peers_map_change(new_map)
self.peers_map = new_map
log.info('peer-mapping-changed', mapping=new_map)
else:
log.debug('no-mapping-change', mappings=mappings,
index=index, prev_index=prev_index)
except Exception, e:
log.exception('peer-track-error', e=e)
yield asleep(
self.coord.worker_config.get(
self.coord.worker_config[
'assignments_track_error_to_avoid_flood'], 1))
# to prevent flood
finally:
if not self.halted:
# Wait longer if we have not received a core id yet
reactor.callLater(1 if self.mycore_store_id else 5,
self._track_my_peers, index)
def _stash_and_restart_soak_timer(self, candidate_workload):
log.debug('re-start-assignment-soaking')
if self.assignment_soak_timer is not None:
if not self.assignment_soak_timer.called:
self.assignment_soak_timer.cancel()
self.my_candidate_workload = candidate_workload
self.assignment_soak_timer = reactor.callLater(
self.soak_time, self._update_assignments)
def _update_assignments(self):
"""
Called when finally the dust has settled on our assignments.
:return: None
"""
log.debug('my-assignments-changed',
old_count=len(self.my_workload),
new_count=len(self.my_candidate_workload),
workload=self.my_workload)
self.my_workload, self.my_candidate_workload = \
self.my_candidate_workload, None
def _stash_and_restart_core_store_soak_timer(self):
log.debug('re-start-assignment-config-soaking')
if self.assignment_core_store_soak_timer is not None:
if not self.assignment_core_store_soak_timer.called:
self.assignment_core_store_soak_timer.cancel()
self.assignment_core_store_soak_timer = reactor.callLater(
self.soak_time, self._process_config_assignment)
def _process_config_assignment(self):
log.debug('process-config-assignment',
mycore_store_id=self.mycore_store_id)
self.wait_for_core_store_assignment.callback(self.mycore_store_id)