AETHER-1259 Add signal quality report to the monitoring agent

In addition to the signal quality report,
- Fix the license
- Do not run user plane test when attach failed
- Replace all os.system to subprecess
- Add more exception handling
- Make log level and attach/detach timeout configurable in config.json

Change-Id: I774917586a5b66c5bc7eaf2feca943f692ece54a
diff --git a/edge-monitoring/agent_modem/config.json b/edge-monitoring/agent_modem/config.json
index efd0139..c8717b0 100644
--- a/edge-monitoring/agent_modem/config.json
+++ b/edge-monitoring/agent_modem/config.json
@@ -9,7 +9,10 @@
       "user_plane_ping_test": "8.8.8.8",
       "speedtest_ping_dns": "8.8.8.8"
     },
+    "attach_timeout": 30,
+    "detach_timeout": 10,
     "report_url": "https://monitoring.aetherproject.org/edges",
     "report_interval": 180,
+    "log_level": "WARN",
     "log_file": "/var/log/edge_monitoring_agent.log"
 }
diff --git a/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py b/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
index 757d622..cdfab63 100755
--- a/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
+++ b/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
@@ -2,17 +2,7 @@
 
 # Copyright 2020-present Open Networking Foundation
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License
+# SPDX-License-Identifier: LicenseRef-ONF-Member-Only-1.0
 
 import sys
 import os
@@ -40,27 +30,9 @@
 logging.basicConfig(
     filename=CONF.log_file,
     format='%(asctime)s [%(levelname)s] %(message)s',
-    level=logging.ERROR
+    level=logging.getLevelName(CONF.log_level)
 )
 
-report = {
-    'name': CONF.edge_name,
-    'status': {
-        'control_plane': None,
-        'user_plane': None
-    },
-    'speedtest': {
-        'ping': {
-            'dns': {
-                'min': None,
-                'avg': None,
-                'max': None,
-                'stddev': None
-            }
-        }
-    }
-}
-
 
 class State(enum.Enum):
     error = "-1"
@@ -114,7 +86,7 @@
             return False, None
         return True, response
 
-    def is_connected(self):
+    def get_state(self):
         success, result = self.write('AT+CGATT?')
         if not success or 'CGATT:' not in result:
             return State.error
@@ -126,49 +98,45 @@
 
 
 def get_control_plane_state(modem):
-    # Delete the existing session
+    # Disable radio fuction
     # "echo" works more stable than serial for this action
-    # success, result = modem.write('AT+CFUN=0')
-    logging.debug("echo 'AT+CFUN=0' > " + CONF.modem.port)
-    success = os.system("echo 'AT+CFUN=0' > " + CONF.modem.port)
-    logging.debug("result: %s", success)
-    if success is not 0:
-        msg = "Write 'AT+CFUN=0' failed"
-        logging.error(msg)
-        return State.error, msg
+    try:
+        logging.debug("echo 'AT+CFUN=0' > " + CONF.modem.port)
+        subprocess.check_output(
+            "echo 'AT+CFUN=0' > " + CONF.modem.port, shell=True)
+    except subprocess.CalledProcessError as e:
+        logging.error("Write 'AT+CFUN=0' failed")
+        return State.error
 
     # Wait until the modem is fully disconnected
     retry = 0
     state = None
-    while retry < 5:
-        state = modem.is_connected()
+    while retry < CONF.detach_timeout:
+        state = modem.get_state()
         if state is State.disconnected:
             break
         time.sleep(1)
         retry += 1
 
-    # Consider the modem is not responding if disconnection failed
     if state is not State.disconnected:
-        msg = "Failed to disconnect."
-        logging.error(msg)
-        return State.error, msg
+        logging.error("Failed to disconnect")
+        return State.error
 
     time.sleep(2)
-    # Create a new session
+    # Enable radio function
     # "echo" works more stable than serial for this action
-    # success, result = modem.write('AT+CGATT=1')
-    logging.debug("echo 'AT+CFUN=1' > " + CONF.modem.port)
-    success = os.system("echo 'AT+CFUN=1' > " + CONF.modem.port)
-    logging.debug("result: %s", success)
-    if success is not 0:
-        msg = "Write 'AT+CFUN=1' failed"
-        logging.error(msg)
-        return State.error, msg
+    try:
+        logging.debug("echo 'AT+CFUN=1' > " + CONF.modem.port)
+        subprocess.check_output(
+            "echo 'AT+CFUN=1' > " + CONF.modem.port, shell=True)
+    except subprocess.CalledProcessError as e:
+        logging.error("Write 'AT+CFUN=1' failed")
+        return State.error
 
-    # Give 10 sec for the modem to be fully connected
+    # Wait attach_timeout sec for the modem to be fully connected
     retry = 0
-    while retry < 30:
-        state = modem.is_connected()
+    while retry < CONF.attach_timeout:
+        state = modem.get_state()
         if state is State.connected:
             break
         time.sleep(1)
@@ -177,13 +145,18 @@
     if state is State.error:
         state = State.disconnected
 
-    time.sleep(2)
-    return state, None
+    return state
 
 
 def get_user_plane_state(modem):
-    resp = os.system("ping -c 3 " + CONF.ips.user_plane_ping_test + ">/dev/null 2>&1")
-    return State.connected if resp is 0 else State.disconnected, None
+    try:
+        subprocess.check_output(
+            "ping -c 3 " + CONF.ips.user_plane_ping_test + ">/dev/null 2>&1",
+            shell=True)
+        return State.connected
+    except subprocess.CalledProcessError as e:
+        logging.warning("User plane test failed")
+        return State.disconnected
 
 
 def run_ping_test(ip, count):
@@ -221,10 +194,56 @@
     return speedtest_ping
 
 
-def report_status(cp_state, up_state, speedtest_ping):
-    report['status']['control_plane'] = cp_state.name
-    report['status']['user_plane'] = up_state.name
-    report['speedtest']['ping'] = speedtest_ping
+def get_signal_quality(modem):
+    success, result = modem.write('AT+CESQ')
+    if not success or 'CESQ: ' not in result:
+        logging.error("Failed to get signal quality")
+        return 0, 0
+
+    logging.debug("%s", result)
+    tmp_rsrq = result.split('CESQ:')[1].split(',')[4]
+    tmp_rsrp = result.split('CESQ:')[1].split(',')[5]
+
+    rsrq = int(tmp_rsrq.strip())
+    rsrp = int(tmp_rsrp.strip().split(' ')[0])
+    result = {
+        'rsrq': 0 if rsrq is 255 else rsrq,
+        'rsrp': 0 if rsrp is 255 else rsrp
+    }
+
+    return result
+
+
+def report_status(signal_quality, cp_state=None, up_state=None, speedtest_ping=None):
+    report = {
+        'name': CONF.edge_name,
+        'status': {
+            'control_plane': "disconnected",
+            'user_plane': "disconnected"
+        },
+        'speedtest': {
+            'ping': {
+                'dns': {
+                    'min': 0.0,
+                    'avg': 0.0,
+                    'max': 0.0,
+                    'stddev': 0.0
+                }
+            }
+        },
+        'signal_quality': {
+            'rsrq': 0,
+            'rsrp': 0
+        }
+    }
+
+    if cp_state is not None:
+        report['status']['control_plane'] = cp_state.name
+    if up_state is not None:
+        report['status']['user_plane'] = up_state.name
+    if speedtest_ping is not None:
+        report['speedtest']['ping'] = speedtest_ping
+    report['signal_quality'] = signal_quality
 
     logging.info("Sending report %s", report)
     try:
@@ -237,9 +256,18 @@
     except requests.exceptions.HTTPError as e:
         logging.error("Failed to report for %s", e)
         pass
+    time.sleep(CONF.report_interval)
 
 
 def main():
+    for ip in CONF.ips:
+        try:
+            subprocess.check_output("sudo ip route replace {}/32 via {}".format(
+                ip, CONF.modem.ip_addr), shell=True)
+        except subprocess.CalledProcessError as e:
+            logging.error("Failed to add routes", e.returncode, e.output)
+            sys.exit(1)
+
     modem = Modem(CONF.modem.port, CONF.modem.baud)
     try:
         modem.connect()
@@ -247,24 +275,26 @@
         logging.error("Failed to connect the modem for %s", e)
         sys.exit(1)
 
-    for ip in CONF.ips:
-        success = os.system("sudo ip route replace {}/32 via {}".format(
-            ip, CONF.modem.ip_addr))
-        if success is not 0:
-            logging.error("Failed to add test routing to " + ip)
-            sys.exit(1)
-
     while True:
-        cp_state, cp_msg = get_control_plane_state(modem)
-        up_state, up_msg = get_user_plane_state(modem)
-        speedtest_ping = get_ping_test(modem)
+        signal_quality = get_signal_quality(modem)
 
+        cp_state = get_control_plane_state(modem)
         if cp_state is State.error:
             logging.error("Modem is in error state.")
             sys.exit(1)
+        if cp_state is State.disconnected:
+            # Failed to attach, don't need to run other tests
+            report_status(signal_quality)
+            continue
 
-        report_status(cp_state, up_state, speedtest_ping)
-        time.sleep(CONF.report_interval)
+        up_state = get_user_plane_state(modem)
+        if up_state is State.disconnected:
+            # Basic user plan test failed, don't need to run the rest of tests
+            report_status(signal_quality, cp_state)
+            continue
+
+        speedtest_ping = get_ping_test(modem)
+        report_status(signal_quality, cp_state, up_state, speedtest_ping)
 
     modem.close()