AETHER-1259 Add signal quality report to the monitoring agent
In addition to the signal quality report,
- Fix the license
- Do not run user plane test when attach failed
- Replace all os.system to subprecess
- Add more exception handling
- Make log level and attach/detach timeout configurable in config.json
Change-Id: I774917586a5b66c5bc7eaf2feca943f692ece54a
diff --git a/edge-monitoring/agent_modem/config.json b/edge-monitoring/agent_modem/config.json
index efd0139..c8717b0 100644
--- a/edge-monitoring/agent_modem/config.json
+++ b/edge-monitoring/agent_modem/config.json
@@ -9,7 +9,10 @@
"user_plane_ping_test": "8.8.8.8",
"speedtest_ping_dns": "8.8.8.8"
},
+ "attach_timeout": 30,
+ "detach_timeout": 10,
"report_url": "https://monitoring.aetherproject.org/edges",
"report_interval": 180,
+ "log_level": "WARN",
"log_file": "/var/log/edge_monitoring_agent.log"
}
diff --git a/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py b/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
index 757d622..cdfab63 100755
--- a/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
+++ b/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
@@ -2,17 +2,7 @@
# Copyright 2020-present Open Networking Foundation
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License
+# SPDX-License-Identifier: LicenseRef-ONF-Member-Only-1.0
import sys
import os
@@ -40,27 +30,9 @@
logging.basicConfig(
filename=CONF.log_file,
format='%(asctime)s [%(levelname)s] %(message)s',
- level=logging.ERROR
+ level=logging.getLevelName(CONF.log_level)
)
-report = {
- 'name': CONF.edge_name,
- 'status': {
- 'control_plane': None,
- 'user_plane': None
- },
- 'speedtest': {
- 'ping': {
- 'dns': {
- 'min': None,
- 'avg': None,
- 'max': None,
- 'stddev': None
- }
- }
- }
-}
-
class State(enum.Enum):
error = "-1"
@@ -114,7 +86,7 @@
return False, None
return True, response
- def is_connected(self):
+ def get_state(self):
success, result = self.write('AT+CGATT?')
if not success or 'CGATT:' not in result:
return State.error
@@ -126,49 +98,45 @@
def get_control_plane_state(modem):
- # Delete the existing session
+ # Disable radio fuction
# "echo" works more stable than serial for this action
- # success, result = modem.write('AT+CFUN=0')
- logging.debug("echo 'AT+CFUN=0' > " + CONF.modem.port)
- success = os.system("echo 'AT+CFUN=0' > " + CONF.modem.port)
- logging.debug("result: %s", success)
- if success is not 0:
- msg = "Write 'AT+CFUN=0' failed"
- logging.error(msg)
- return State.error, msg
+ try:
+ logging.debug("echo 'AT+CFUN=0' > " + CONF.modem.port)
+ subprocess.check_output(
+ "echo 'AT+CFUN=0' > " + CONF.modem.port, shell=True)
+ except subprocess.CalledProcessError as e:
+ logging.error("Write 'AT+CFUN=0' failed")
+ return State.error
# Wait until the modem is fully disconnected
retry = 0
state = None
- while retry < 5:
- state = modem.is_connected()
+ while retry < CONF.detach_timeout:
+ state = modem.get_state()
if state is State.disconnected:
break
time.sleep(1)
retry += 1
- # Consider the modem is not responding if disconnection failed
if state is not State.disconnected:
- msg = "Failed to disconnect."
- logging.error(msg)
- return State.error, msg
+ logging.error("Failed to disconnect")
+ return State.error
time.sleep(2)
- # Create a new session
+ # Enable radio function
# "echo" works more stable than serial for this action
- # success, result = modem.write('AT+CGATT=1')
- logging.debug("echo 'AT+CFUN=1' > " + CONF.modem.port)
- success = os.system("echo 'AT+CFUN=1' > " + CONF.modem.port)
- logging.debug("result: %s", success)
- if success is not 0:
- msg = "Write 'AT+CFUN=1' failed"
- logging.error(msg)
- return State.error, msg
+ try:
+ logging.debug("echo 'AT+CFUN=1' > " + CONF.modem.port)
+ subprocess.check_output(
+ "echo 'AT+CFUN=1' > " + CONF.modem.port, shell=True)
+ except subprocess.CalledProcessError as e:
+ logging.error("Write 'AT+CFUN=1' failed")
+ return State.error
- # Give 10 sec for the modem to be fully connected
+ # Wait attach_timeout sec for the modem to be fully connected
retry = 0
- while retry < 30:
- state = modem.is_connected()
+ while retry < CONF.attach_timeout:
+ state = modem.get_state()
if state is State.connected:
break
time.sleep(1)
@@ -177,13 +145,18 @@
if state is State.error:
state = State.disconnected
- time.sleep(2)
- return state, None
+ return state
def get_user_plane_state(modem):
- resp = os.system("ping -c 3 " + CONF.ips.user_plane_ping_test + ">/dev/null 2>&1")
- return State.connected if resp is 0 else State.disconnected, None
+ try:
+ subprocess.check_output(
+ "ping -c 3 " + CONF.ips.user_plane_ping_test + ">/dev/null 2>&1",
+ shell=True)
+ return State.connected
+ except subprocess.CalledProcessError as e:
+ logging.warning("User plane test failed")
+ return State.disconnected
def run_ping_test(ip, count):
@@ -221,10 +194,56 @@
return speedtest_ping
-def report_status(cp_state, up_state, speedtest_ping):
- report['status']['control_plane'] = cp_state.name
- report['status']['user_plane'] = up_state.name
- report['speedtest']['ping'] = speedtest_ping
+def get_signal_quality(modem):
+ success, result = modem.write('AT+CESQ')
+ if not success or 'CESQ: ' not in result:
+ logging.error("Failed to get signal quality")
+ return 0, 0
+
+ logging.debug("%s", result)
+ tmp_rsrq = result.split('CESQ:')[1].split(',')[4]
+ tmp_rsrp = result.split('CESQ:')[1].split(',')[5]
+
+ rsrq = int(tmp_rsrq.strip())
+ rsrp = int(tmp_rsrp.strip().split(' ')[0])
+ result = {
+ 'rsrq': 0 if rsrq is 255 else rsrq,
+ 'rsrp': 0 if rsrp is 255 else rsrp
+ }
+
+ return result
+
+
+def report_status(signal_quality, cp_state=None, up_state=None, speedtest_ping=None):
+ report = {
+ 'name': CONF.edge_name,
+ 'status': {
+ 'control_plane': "disconnected",
+ 'user_plane': "disconnected"
+ },
+ 'speedtest': {
+ 'ping': {
+ 'dns': {
+ 'min': 0.0,
+ 'avg': 0.0,
+ 'max': 0.0,
+ 'stddev': 0.0
+ }
+ }
+ },
+ 'signal_quality': {
+ 'rsrq': 0,
+ 'rsrp': 0
+ }
+ }
+
+ if cp_state is not None:
+ report['status']['control_plane'] = cp_state.name
+ if up_state is not None:
+ report['status']['user_plane'] = up_state.name
+ if speedtest_ping is not None:
+ report['speedtest']['ping'] = speedtest_ping
+ report['signal_quality'] = signal_quality
logging.info("Sending report %s", report)
try:
@@ -237,9 +256,18 @@
except requests.exceptions.HTTPError as e:
logging.error("Failed to report for %s", e)
pass
+ time.sleep(CONF.report_interval)
def main():
+ for ip in CONF.ips:
+ try:
+ subprocess.check_output("sudo ip route replace {}/32 via {}".format(
+ ip, CONF.modem.ip_addr), shell=True)
+ except subprocess.CalledProcessError as e:
+ logging.error("Failed to add routes", e.returncode, e.output)
+ sys.exit(1)
+
modem = Modem(CONF.modem.port, CONF.modem.baud)
try:
modem.connect()
@@ -247,24 +275,26 @@
logging.error("Failed to connect the modem for %s", e)
sys.exit(1)
- for ip in CONF.ips:
- success = os.system("sudo ip route replace {}/32 via {}".format(
- ip, CONF.modem.ip_addr))
- if success is not 0:
- logging.error("Failed to add test routing to " + ip)
- sys.exit(1)
-
while True:
- cp_state, cp_msg = get_control_plane_state(modem)
- up_state, up_msg = get_user_plane_state(modem)
- speedtest_ping = get_ping_test(modem)
+ signal_quality = get_signal_quality(modem)
+ cp_state = get_control_plane_state(modem)
if cp_state is State.error:
logging.error("Modem is in error state.")
sys.exit(1)
+ if cp_state is State.disconnected:
+ # Failed to attach, don't need to run other tests
+ report_status(signal_quality)
+ continue
- report_status(cp_state, up_state, speedtest_ping)
- time.sleep(CONF.report_interval)
+ up_state = get_user_plane_state(modem)
+ if up_state is State.disconnected:
+ # Basic user plan test failed, don't need to run the rest of tests
+ report_status(signal_quality, cp_state)
+ continue
+
+ speedtest_ping = get_ping_test(modem)
+ report_status(signal_quality, cp_state, up_state, speedtest_ping)
modem.close()