Init commit for standalone enodebd

Change-Id: I88eeef5135dd7ba8551ddd9fb6a0695f5325337b
diff --git a/common/health/health_service.py b/common/health/health_service.py
new file mode 100644
index 0000000..4228330
--- /dev/null
+++ b/common/health/health_service.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+
+"""
+Copyright 2020 The Magma Authors.
+
+This source code is licensed under the BSD-style license found in the
+LICENSE file in the root directory of this source tree.
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import asyncio
+import os
+import subprocess
+from datetime import datetime
+
+import apt
+from dateutil import tz
+from common.health.entities import (
+    ActiveState,
+    Errors,
+    HealthStatus,
+    HealthSummary,
+    RestartFrequency,
+    ServiceHealth,
+    Version,
+)
+from common.service import MagmaService
+from common.service_registry import ServiceRegistry
+from configuration.mconfig_managers import load_service_mconfig_as_json
+from magmad.metrics import UNEXPECTED_SERVICE_RESTARTS
+from magmad.service_poller import ServicePoller
+from orc8r.protos import common_pb2, magmad_pb2
+from orc8r.protos.magmad_pb2_grpc import MagmadStub
+from orc8r.protos.mconfig import mconfigs_pb2
+from pystemd.systemd1 import Unit
+
+
+class GenericHealthChecker:
+
+    def ping(self, host, num_packets=4):
+        chan = ServiceRegistry.get_rpc_channel('magmad', ServiceRegistry.LOCAL)
+        client = MagmadStub(chan)
+
+        response = client.RunNetworkTests(
+            magmad_pb2.NetworkTestRequest(
+                pings=[
+                    magmad_pb2.PingParams(
+                        host_or_ip=host,
+                        num_packets=num_packets,
+                    ),
+                ],
+            ),
+        )
+        return response.pings
+
+    def ping_status(self, host):
+        pings = self.ping(host=host, num_packets=4)[0]
+        if pings.error:
+            return HealthStatus.DOWN
+        if pings.avg_response_ms:
+            return HealthStatus.UP
+        return HealthStatus.UNKNOWN
+
+    def get_error_summary(self, service_names):
+        """Get the list of services with the error count.
+
+        Args:
+            service_names: List of service names.
+
+        Returns:
+            A dictionary with service name as a key and the Errors object
+            as a value.
+
+        Raises:
+            PermissionError: User has no permision to exectue the command
+        """
+        configs = {
+            service_name: load_service_mconfig_as_json(service_name)
+            for service_name in service_names
+        }
+        res = {
+            service_name: Errors(
+            log_level=configs[service_name].get('logLevel', 'INFO'),
+            error_count=0,
+            )
+            for service_name in service_names
+        }
+
+        syslog_path = '/var/log/syslog'
+        if not os.access(syslog_path, os.R_OK):
+            raise PermissionError(
+                'syslog is not readable. '
+                'Try `sudo chmod a+r {}`. '
+                'Or execute the command with sudo '
+                'permissions: `venvsudo`'.format(syslog_path),
+            )
+        with open(syslog_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                for service_name in service_names:
+                    if service_name not in line:
+                        continue
+                    # Reset the counter for restart/start
+                    if 'Starting {}...'.format(service_name) in line:
+                        res[service_name].error_count = 0
+                    elif 'ERROR' in line:
+                        res[service_name].error_count += 1
+        return res
+
+    def get_magma_services_summary(self):
+        """ Get health for all the running services """
+        services_health_summary = []
+
+        # DBus objects: https://www.freedesktop.org/wiki/Software/systemd/dbus/
+        chan = ServiceRegistry.get_rpc_channel('magmad', ServiceRegistry.LOCAL)
+        client = MagmadStub(chan)
+
+        configs = client.GetConfigs(common_pb2.Void())
+
+        service_names = [str(name) for name in configs.configs_by_key]
+        services_errors = self.get_error_summary(service_names=service_names)
+
+        for service_name in service_names:
+            unit = Unit(
+                'magma@{}.service'.format(service_name),
+                _autoload=True,
+            )
+            active_state = ActiveState.dbus2state[unit.Unit.ActiveState]
+            sub_state = str(unit.Unit.SubState, 'utf-8')
+            if active_state == ActiveState.ACTIVE:
+                pid = unit.Service.MainPID
+                process = subprocess.Popen(
+                    'ps -o etime= -p {}'.format(pid).split(),
+                    stdout=subprocess.PIPE,
+                )
+
+                time_running, error = process.communicate()
+                if error:
+                    raise ValueError(
+                        'Cannot get time running for the service '
+                        '{} `ps -o etime= -p {}`'
+                        .format(service_name, pid),
+                    )
+            else:
+                time_running = b'00'
+
+            services_health_summary.append(
+                ServiceHealth(
+                    service_name=service_name,
+                    active_state=active_state, sub_state=sub_state,
+                    time_running=str(time_running, 'utf-8').strip(),
+                    errors=services_errors[service_name],
+                ),
+            )
+        return services_health_summary
+
+    def get_unexpected_restart_summary(self):
+        service = MagmaService('magmad', mconfigs_pb2.MagmaD())
+        service_poller = ServicePoller(service.loop, service.config)
+        service_poller.start()
+
+        asyncio.set_event_loop(service.loop)
+
+        # noinspection PyProtectedMember
+        # pylint: disable=protected-access
+        async def fetch_info():
+            restart_frequencies = {}
+            await service_poller._get_service_info()
+            for service_name in service_poller.service_info.keys():
+                restarts = int(
+                    UNEXPECTED_SERVICE_RESTARTS
+                    .labels(service_name=service_name)
+                    ._value.get(),
+                )
+                restart_frequencies[service_name] = RestartFrequency(
+                    count=restarts,
+                    time_interval='',
+                )
+
+            return restart_frequencies
+
+        return service.loop.run_until_complete(fetch_info())
+
+    def get_kernel_version(self):
+        info, error = subprocess.Popen(
+            'uname -a'.split(),
+            stdout=subprocess.PIPE,
+        ).communicate()
+
+        if error:
+            raise ValueError('Cannot get the kernel version')
+        return str(info, 'utf-8')
+
+    def get_magma_version(self):
+        cache = apt.Cache()
+
+        # Return the python version if magma is not there
+        if 'magma' not in cache:
+            return Version(
+                version_code=cache['python3'].versions[0],
+                last_update_time='-',
+            )
+
+        pkg = str(cache['magma'].versions[0])
+        version = pkg.split('-')[0].split('=')[-1]
+        timestamp = int(pkg.split('-')[1])
+
+        return Version(
+            version_code=version,
+            last_update_time=datetime.utcfromtimestamp(timestamp)
+            .replace(tzinfo=tz.tzutc())
+            .astimezone(tz=tz.tzlocal())
+            .strftime('%Y-%m-%d %H:%M:%S'),
+        )
+
+    def get_health_summary(self):
+
+        return HealthSummary(
+            version=self.get_magma_version(),
+            platform=self.get_kernel_version(),
+            services_health=self.get_magma_services_summary(),
+            internet_health=self.ping_status(host='8.8.8.8'),
+            dns_health=self.ping_status(host='google.com'),
+            unexpected_restarts=self.get_unexpected_restart_summary(),
+        )