blob: a29e8acb69dff0080e0a44bb6160bab593df79d1 [file] [log] [blame]
Chip Boling8e042f62019-02-12 16:14:34 -06001# Copyright 2017-present Adtran, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import structlog
16from twisted.internet import reactor
17from pyvoltha.protos.common_pb2 import OperStatus, ConnectStatus
18from pyvoltha.adapters.extensions.omci.omci_me import OntGFrame
19
20
21class HeartBeat(object):
22 """Wraps health-check support for ONU"""
23 INITIAL_DELAY = 60 # Delay after start until first check
24 TICK_DELAY = 2 # Heartbeat interval
25
26 def __init__(self, handler, device_id):
27 self.log = structlog.get_logger(device_id=device_id)
28 self._enabled = False
29 self._handler = handler
30 self._device_id = device_id
31 self._defer = None
32 self._alarm_active = False
33 self._heartbeat_count = 0
34 self._heartbeat_miss = 0
35 self._alarms_raised_count = 0
36 self.heartbeat_failed_limit = 5
37 self.heartbeat_last_reason = ''
38 self.heartbeat_interval = self.TICK_DELAY
39
40 def __str__(self):
41 return "HeartBeat: count:{}, miss: {}".format(self._heartbeat_count,
42 self._heartbeat_miss)
43
44 @staticmethod
45 def create(handler, device_id):
46 return HeartBeat(handler, device_id)
47
48 def _start(self, delay=INITIAL_DELAY):
49 self._defer = reactor.callLater(delay, self.check_pulse)
50
51 def _stop(self):
52 d, self._defeered = self._defeered, None
53 if d is not None and not d.called():
54 d.cancel()
55
56 @property
57 def enabled(self):
58 return self._enabled
59
60 @enabled.setter
61 def enabled(self, value):
62 if self._enabled != value:
63 self._enabled = value
64
65 # if value:
66 # self._start()
67 # else:
68 # self._stop()
69
70 @property
71 def check_item(self):
72 return 'vendor_id'
73
74 @property
75 def check_value(self):
76 # device = self._handler.adapter_agent.get_device(self._device_id)
77 # return device.serial_number
78 return 'ADTN'
79
80 @property
81 def alarm_active(self):
82 return self._alarm_active
83
84 @property
85 def heartbeat_count(self):
86 return self._heartbeat_count
87
88 @property
89 def heartbeat_miss(self):
90 return self._heartbeat_miss
91
92 @property
93 def alarms_raised_count(self):
94 return self._alarms_raised_count
95
96 def check_pulse(self):
97 if self.enabled:
98 try:
99 self._defer = self._handler.openomci.omci_cc.send(OntGFrame(self.check_item).get())
100 self._defer.addCallbacks(self._heartbeat_success, self._heartbeat_fail)
101
102 except Exception as e:
103 self._defer = reactor.callLater(5, self._heartbeat_fail, e)
104
105 def _heartbeat_success(self, results):
106 self.log.debug('heartbeat-success')
107
108 try:
109 omci_response = results.getfieldval("omci_message")
110 data = omci_response.getfieldval("data")
111 value = data[self.check_item]
112
113 if value != self.check_value:
114 self._heartbeat_miss = self.heartbeat_failed_limit
115 self.heartbeat_last_reason = "Invalid {}, got '{}' but expected '{}'".\
116 format(self.check_item, value, self.check_value)
117 else:
118 self._heartbeat_miss = 0
119 self.heartbeat_last_reason = ''
120
121 except Exception as e:
122 self._heartbeat_miss = self.heartbeat_failed_limit
123 self.heartbeat_last_reason = e.message
124
125 self.heartbeat_check_status(results)
126
127 def _heartbeat_fail(self, failure):
128 self._heartbeat_miss += 1
129 self.log.info('heartbeat-miss', failure=failure,
130 count=self._heartbeat_count,
131 miss=self._heartbeat_miss)
132 self.heartbeat_last_reason = 'OMCI connectivity error'
133 self.heartbeat_check_status(None)
134
135 def on_heartbeat_alarm(self, active):
136 # TODO: Do something here ?
137 #
138 # TODO: If failed (active = true) due to bad serial-number shut off the UNI port?
139 pass
140
141 def heartbeat_check_status(self, results):
142 """
143 Check the number of heartbeat failures against the limit and emit an alarm if needed
144 """
145 device = self._handler.adapter_agent.get_device(self._device_id)
146
147 try:
148 from voltha.extensions.alarms.heartbeat_alarm import HeartbeatAlarm
149
150 if self._heartbeat_miss >= self.heartbeat_failed_limit:
151 if device.connect_status == ConnectStatus.REACHABLE:
152 self.log.warning('heartbeat-failed', count=self._heartbeat_miss)
153 device.connect_status = ConnectStatus.UNREACHABLE
154 device.oper_status = OperStatus.FAILED
155 device.reason = self.heartbeat_last_reason
156 self._handler.adapter_agent.update_device(device)
157 HeartbeatAlarm(self._handler.alarms, 'onu', self._heartbeat_miss).raise_alarm()
158 self._alarm_active = True
159 self.on_heartbeat_alarm(True)
160 else:
161 # Update device states
162 if device.connect_status != ConnectStatus.REACHABLE and self._alarm_active:
163 device.connect_status = ConnectStatus.REACHABLE
164 device.oper_status = OperStatus.ACTIVE
165 device.reason = ''
166 self._handler.adapter_agent.update_device(device)
167 HeartbeatAlarm(self._handler.alarms, 'onu').clear_alarm()
168
169 self._alarm_active = False
170 self._alarms_raised_count += 1
171 self.on_heartbeat_alarm(False)
172
173 except Exception as e:
174 self.log.exception('heartbeat-check', e=e)
175
176 # Reschedule next heartbeat
177 if self.enabled:
178 self._heartbeat_count += 1
179 self._defer = reactor.callLater(self.heartbeat_interval, self.check_pulse)