Add counters
Change-Id: I7ccb042ba2759f07cbe693217ba37f987cb9fd6b
diff --git a/VERSION b/VERSION
index b4d6d12..88a7b22 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.7.11
+0.7.12
diff --git a/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py b/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
index b769153..7b0739c 100755
--- a/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
+++ b/edge-monitoring/agent_modem/edge_monitoring_agent_modem.py
@@ -10,7 +10,6 @@
import logging
import enum
import pycurl
-import time
import serial
import subprocess
import time
@@ -32,7 +31,7 @@
config_file_contents = config_file_contents.replace("speedtest_iperf", "iperf_server")
config_file_contents = config_file_contents.replace("\"speedtest_ping_dns\": \"8.8.8.8\",", "")
# replace 1.1.1.1 with 8.8.8.8
-config_file_contents = config_file_contents.replace("\"1.1.1.1\": \"8.8.8.8\",", "")
+config_file_contents = config_file_contents.replace('1.1.1.1', '8.8.8.8')
CONF = json.loads(
config_file_contents, object_hook=lambda d: namedtuple('X', d.keys())(*d.values())
)
@@ -68,7 +67,7 @@
cmd = "ls " + CONF.modem.port
sp = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,
stderr=subprocess.PIPE, universal_newlines=True)
- rc = sp.wait()
+ sp.wait()
ret,err = sp.communicate()
if err != "" :
self.log.error("unable to find serial port " + err)
@@ -113,9 +112,11 @@
return False, None
return True, response
- def get_state(self):
+ def get_state(self, counters):
success, result = self.write('AT+CGATT?')
if not success or 'CGATT:' not in result:
+ logging.error("AT+CGATT modem cmd failed")
+ counters['modem_cgatt_error'] += 1
return State.error
state = result.split('CGATT:')[1].split(' ')[0]
return State(state)
@@ -124,11 +125,13 @@
self.serial.close()
-def get_control_plane_state(modem, dongle_stats=None):
+def get_control_plane_state(modem, counters, dongle_stats=None):
if not modem and dongle_stats:
- if dongle_stats and dongle_stats['Connection'] == 'Connected':
+ if dongle_stats['Connection'] == 'Connected':
return State.connected
else:
+ logging.error("Dongle not connected: {}".format(dongle_stats['Connection']))
+ counters['dongle_connect_error'] += 1
return State.disconnected
# Disable radio fuction
@@ -138,14 +141,15 @@
subprocess.check_output(
"echo 'AT+CFUN=0' > " + modem.port, shell=True)
except subprocess.CalledProcessError as e:
- logging.error("Write 'AT+CFUN=0' failed")
+ logging.error("Write 'AT+CFUN=0' failed: {}".format(e))
+ counters['modem_cfun0_error'] += 1
return State.error
# Wait until the modem is fully disconnected
retry = 0
state = None
while retry < CONF.detach_timeout:
- state = modem.get_state()
+ state = modem.get_state(counters)
if state is State.disconnected:
break
time.sleep(1)
@@ -163,13 +167,14 @@
subprocess.check_output(
"echo 'AT+CFUN=1' > " + modem.port, shell=True)
except subprocess.CalledProcessError as e:
- logging.error("Write 'AT+CFUN=1' failed")
+ logging.error("Write 'AT+CFUN=1' failed: {}".format(e))
+ counters['modem_cfun1_error'] += 1
return State.error
# Wait attach_timeout sec for the modem to be fully connected
retry = 0
while retry < CONF.attach_timeout:
- state = modem.get_state()
+ state = modem.get_state(counters)
if state is State.connected:
break
time.sleep(1)
@@ -181,26 +186,21 @@
return state
-def get_user_plane_state():
+def dry_run_ping_test(counters):
if "dry_run" in CONF.ips._fields and CONF.ips.dry_run: # run dry_run latency test as user plane test
- dry_run_latency, dry_run_passed = run_ping_test(CONF.ips.dry_run, 10)
- if dry_run_passed:
- return State.connected, dry_run_latency
- else:
- logging.warning("Ping failed: {}".format(CONF.ips.dry_run))
- return State.disconnected, dry_run_latency
+ return do_ping(CONF.ips.dry_run, 10)
else: # run default user plane test
try:
subprocess.check_output(
"ping -c 3 " + CONF.ips.dns + ">/dev/null 2>&1",
shell=True)
- return State.connected, None
+ return None, True
except subprocess.CalledProcessError as e:
- logging.warning("Ping failed: {}".format(CONF.ips.dns))
- return State.disconnected, None
+ logging.warning("Ping failed for {}: {}".format(CONF.ips.dns, e))
+ return None, False
-def run_ping_test(ip, count):
+def do_ping(ip, count):
'''
Runs the ping test
Input: IP to ping, # times to ping
@@ -222,6 +222,7 @@
if not ip:
return result, True
try:
+ logging.debug("Pinging {}".format(ip))
pingOutput = subprocess.check_output(
"ping -c " + str(count) + " " + \
ip, shell=True).decode("UTF-8").split()
@@ -250,7 +251,7 @@
return result, True
-def get_ping_test(modem, dry_run_latency=None):
+def ping_test(modem, dry_run_latency=None):
'''
Prepares the ping test.
Runs ping tests from 'ips' entry in config.json in order.
@@ -267,13 +268,13 @@
if CONF.ips._fields[i] == "dry_run":
continue
count = 10
- speedtest_ping[CONF.ips._fields[i]], status = run_ping_test(CONF.ips[i], count)
+ speedtest_ping[CONF.ips._fields[i]], status = do_ping(CONF.ips[i], count)
if not status:
ping_test_passed = False
- logging.error("Ping test failed. Not running further tests.")
+ logging.error("Ping failed: {}".format(CONF.ips[i]))
return speedtest_ping, ping_test_passed
-def run_iperf_test(ip, port, time_duration, is_downlink):
+def run_iperf_test(ip, port, time_duration, is_downlink, counters):
'''
Runs iperf test to specified IP in the config file.
- Runs for 10 seconds (10 iterations)
@@ -298,13 +299,14 @@
return result
except Exception as e:
err = e
+ counters['iperf_error'] += 1
time.sleep(5)
pass
logging.error("After " + str(maxRetries) + " retries, iperf test failed for " + ip + ": %s", err)
return result
-def get_iperf_test(modem):
+def iperf_test(counters):
'''
Prepares the iperf test.
'''
@@ -320,19 +322,20 @@
return None
hour_iperf_scheduled_time_last_ran = int(time.strftime("%H"))
- speedtest_iperf['cluster']['downlink'] = run_iperf_test(CONF.ips.iperf_server, CONF.iperf_port, 10, True)
- speedtest_iperf['cluster']['uplink'] = run_iperf_test(CONF.ips.iperf_server, CONF.iperf_port, 10, False)
+ speedtest_iperf['cluster']['downlink'] = run_iperf_test(CONF.ips.iperf_server, CONF.iperf_port, 10, True, counters)
+ speedtest_iperf['cluster']['uplink'] = run_iperf_test(CONF.ips.iperf_server, CONF.iperf_port, 10, False, counters)
return speedtest_iperf
-def get_signal_quality(modem, dongle_stats=None):
+def get_signal_quality(modem, counters, dongle_stats=None):
if not modem and dongle_stats:
if dongle_stats['RSRQ'] != '' and dongle_stats['RSRP'] != '':
rsrq = int((float(dongle_stats['RSRQ']) + 19.5) * 2)
rsrp = int(float(dongle_stats['RSRP']) + 140)
return {'rsrq': rsrq, 'rsrp': rsrp}
else:
+ counters['dongle_rsrp_rsrq_error'] += 1
return {'rsrq': 0, 'rsrp': 0}
# Fall back to modem cmds
@@ -341,9 +344,9 @@
logging.debug("get_signal_quality success %i result %s",success,result)
if not success or 'CESQ: ' not in result:
logging.error("Failed to get signal quality")
+ counters['modem_cesq_error'] += 1
return {'rsrq':0, 'rsrp':0}
- logging.debug("%s", result)
tmp_rsrq = result.split('CESQ:')[1].split(',')[4]
tmp_rsrp = result.split('CESQ:')[1].split(',')[5]
@@ -357,7 +360,7 @@
return result
-def get_dongle_stats():
+def get_dongle_stats(counters):
result = {'SuccessfulFetch' : False}
if "report_in_band" in CONF._fields:
result['inBandReporting'] = CONF.report_in_band
@@ -386,22 +389,25 @@
dongleStatsXML = ET.fromstring(subprocess.check_output("curl -u admin:admin -s 'http://192.168.0.1:8080/cgi-bin/ltestatus.cgi?Command=Status'", shell=True).decode("UTF-8"))
except Exception as e:
logging.error("Failed to fetch dongle stats from URL: " + str(e))
+ counters['dongle_read_error'] += 1
return result
try:
for key in XMLkeys:
try:
result[key] = dongleStatsXML.find(key).text
- except AttributeError as e:
- logging.warn("Failed to find " + key + " in XML.")
+ except AttributeError:
+ logging.error("Failed to find " + key + " in XML")
+ counters['dongle_read_error'] += 1
result[key] = ""
result["SuccessfulFetch"] = True
except Exception as e:
logging.error("Failed to fetch dongle stats from XML: " + str(e))
+ counters['dongle_read_error'] += 1
return result
return result
-def report_status(signal_quality, dongle_stats, cp_state=None, up_state=None, speedtest_ping=None, speedtest_iperf=None):
+def report_status(counters, signal_quality, dongle_stats, cp_state=None, up_state=None, speedtest_ping=None, speedtest_iperf=None):
report = {
'name': CONF.edge_name,
'status': {
@@ -446,6 +452,7 @@
report['speedtest']['iperf'] = speedtest_iperf
report['signal_quality'] = signal_quality
report['dongle_stats'] = dongle_stats
+ report['counters'] = counters
logging.info("Sending report %s", report)
global cycles
@@ -516,6 +523,20 @@
time.sleep(delay)
subprocess.check_output("sudo shutdown -r now", shell=True)
+def init_counters():
+ return {
+ 'dongle_read_error': 0,
+ 'dongle_connect_error': 0,
+ 'dongle_rsrp_rsrq_error': 0,
+ 'modem_cfun0_error': 0,
+ 'modem_cfun1_error': 0,
+ 'modem_cgatt_error': 0,
+ 'modem_cesq_error': 0,
+ 'dry_run_ping_error': 0,
+ 'ping_error': 0,
+ 'iperf_error': 0
+ }
+
def main():
global cycles
global hour_iperf_scheduled_time_last_ran
@@ -554,39 +575,47 @@
modem = None
connect_retries = 0
+ counters = init_counters()
while True:
dongle_retries = 0
- dongle_stats = get_dongle_stats()
+ dongle_stats = get_dongle_stats(counters)
while not dongle_stats['SuccessfulFetch']:
- log.error("Dongle error")
dongle_retries += 1
if dongle_retries > 10:
- logging.warn("Rebooting Pi")
+ logging.warning("Rebooting Pi: dongle not readable")
os.system("shutdown /r /t 0")
sys.exit(1)
- dongle_stats = get_dongle_stats()
+ dongle_stats = get_dongle_stats(counters)
- cp_state = get_control_plane_state(modem, dongle_stats)
+ cp_state = get_control_plane_state(modem, counters, dongle_stats)
if cp_state != State.connected:
logging.error("Dongle not connected")
connect_retries += 1
if connect_retries > 10:
+ logging.warning("Rebooting Pi: dongle not connected")
os.system("shutdown /r /t 0")
sys.exit(1)
- signal_quality = get_signal_quality(modem, dongle_stats)
- up_state, dry_run_latency = get_user_plane_state()
- if up_state is State.disconnected:
+ signal_quality = get_signal_quality(modem, counters, dongle_stats)
+
+ dry_run_ping_latency, dry_run_ping_result = dry_run_ping_test(counters)
+ if not dry_run_ping_result:
logging.error("Dry run ping failed")
+ counters['dry_run_ping_error'] += 1
- speedtest_ping, speedtest_status = get_ping_test(modem, dry_run_latency)
- if not speedtest_status:
- logging.error("Ping tests failed")
+ ping_latency, ping_result = ping_test(modem, dry_run_ping_latency)
+ if not ping_result:
+ logging.error("Ping test failed")
+ counters['ping_error'] += 1
- speedtest_iperf = get_iperf_test(modem)
+ # If either of the ping tests pass, then declare user plane connected
+ if dry_run_ping_result or ping_result:
+ up_state = State.connected
- report_status(signal_quality, dongle_stats, cp_state, up_state, speedtest_ping, speedtest_iperf)
+ speedtest_iperf = iperf_test(counters)
+
+ report_status(counters, signal_quality, dongle_stats, cp_state, up_state, ping_latency, speedtest_iperf)
time.sleep(CONF.report_interval)
modem.close()