CORD-2365 Ensure core comes back up if modeling fails

Change-Id: Ife53cc53d813b19b76db1db5d7d8a5a7dcc40f72
diff --git a/xos/coreapi/Makefile b/xos/coreapi/Makefile
index c07f041..2482d1c 100644
--- a/xos/coreapi/Makefile
+++ b/xos/coreapi/Makefile
@@ -14,10 +14,22 @@
 # limitations under the License.
 #
 
-all: prep start
+# Allow MODEL_STATUS and MODEL_OUTPUT to be passed from command line
+# when using "make start"
+
+START_OPTIONS ?=
+ifdef MODEL_STATUS
+  START_OPTIONS += --model_status $(MODEL_STATUS)
+endif
+ifdef MODEL_OUTPUT
+  START_OPTIONS += --model_output $(MODEL_OUTPUT)
+endif
+
+all:
+	echo "please use \"make start\" or \"make prep\""
 
 start:
-	bash -c "source env.sh && python ./core_main.py"
+	bash -c "source env.sh && python ./core_main.py $(START_OPTIONS)"
 
 prep: unload_unwanted app_lists rebuild_protos compile_protos try_models makemigrations migrate
 
diff --git a/xos/coreapi/core_main.py b/xos/coreapi/core_main.py
index 8c3a3fe..2138e75 100644
--- a/xos/coreapi/core_main.py
+++ b/xos/coreapi/core_main.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 
+import argparse
 import os
 import sys
 import time
@@ -25,6 +26,19 @@
 
 log = create_logger(Config().get('logging'))
 
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_status", dest="model_status", type=int, default=0, help="status of model prep")
+    parser.add_argument("--model_output", dest="model_output", type=file, default=None, help="file containing output of model prep step")
+    args = parser.parse_args()
+
+    if args.model_output:
+        args.model_output = args.model_output.read()
+    else:
+        args.model_output = ""
+
+    return args
+
 def init_reaper():
     reaper = None
     try:
@@ -32,16 +46,22 @@
         reaper = ReaperThread()
         reaper.start()
     except:
-        logger.log_exception("Failed to initialize reaper")
+        log.exception("Failed to initialize reaper")
 
     return reaper
 
 if __name__ == '__main__':
-    server = XOSGrpcServer()
-    server.init_django()
+    args = parse_args()
+
+    server = XOSGrpcServer(model_status = args.model_status,
+                           model_output = args.model_output)
     server.start()
 
-    reaper = init_reaper()
+    if server.django_initialized:
+        reaper = init_reaper()
+    else:
+        log.warning("Skipping reaper as django is not initialized")
+        reaper = None
 
     restart_related_containers()
 
@@ -55,4 +75,6 @@
         log.info("XOS core terminated by keyboard interrupt")
 
     server.stop()
-    reaper.stop()
+
+    if reaper:
+        reaper.stop()
diff --git a/xos/coreapi/dynamicbuild.py b/xos/coreapi/dynamicbuild.py
index da59b37..c21ca86 100644
--- a/xos/coreapi/dynamicbuild.py
+++ b/xos/coreapi/dynamicbuild.py
@@ -53,6 +53,18 @@
         for item in request.attics:
             self.pre_validate_file(item)
 
+    def get_manifests(self):
+        manifests=[]
+        for fn in os.listdir(self.manifest_dir):
+            if fn.endswith(".json"):
+                manifest_fn = os.path.join(self.manifest_dir, fn)
+                try:
+                    manifest = json.loads(open(manifest_fn).read())
+                    manifests.append(manifest)
+                except:
+                    log.exception("Error loading manifest", filename=manifest_fn)
+        return manifests
+
     def load_manifest_from_request(self, request):
         manifest_fn = os.path.join(self.manifest_dir, request.name + ".json")
         if os.path.exists(manifest_fn):
diff --git a/xos/coreapi/grpc_server.py b/xos/coreapi/grpc_server.py
index 56811b3..01315e1 100644
--- a/xos/coreapi/grpc_server.py
+++ b/xos/coreapi/grpc_server.py
@@ -35,8 +35,6 @@
 log = create_logger(Config().get('logging'))
 
 from protos import schema_pb2, dynamicload_pb2
-#from xos_modeldefs_api import ModelDefsService
-#from xos_utility_api import UtilityService
 from xos_dynamicload_api import DynamicLoadService
 from dynamicbuild import DynamicBuilder
 from google.protobuf.empty_pb2 import Empty
@@ -96,11 +94,14 @@
 
 class XOSGrpcServer(object):
 
-    def __init__(self, port=50055):
+    def __init__(self, port=50055, model_status=0, model_output=""):
         self.port = port
+        self.model_status = model_status
+        self.model_output = model_output
         log.info('Initializing GRPC Server', port = port)
         self.thread_pool = futures.ThreadPoolExecutor(max_workers=1)
         self.server = grpc.server(self.thread_pool)
+        self.django_initialized = False
 
         server_key = open(SERVER_KEY,"r").read()
         server_cert = open(SERVER_CERT,"r").read()
@@ -117,6 +118,7 @@
         import django
         os.environ.setdefault("DJANGO_SETTINGS_MODULE", "xos.settings")
         django.setup()
+        self.django_initialized = True
 
     def register_core(self):
         from xos_grpc_api import XosService
@@ -147,9 +149,13 @@
                       dynamicload_pb2.add_dynamicloadServicer_to_server,
                       DynamicLoadService(self.thread_pool, self))
 
-        self.register_core()
-        self.register_utility()
-        self.register_modeldefs()
+        if (self.model_status == 0):
+            self.init_django()
+
+        if (self.django_initialized):
+            self.register_core()
+            self.register_utility()
+            self.register_modeldefs()
 
         # open port
         self.server.add_insecure_port('[::]:%s' % self.port)
@@ -217,24 +223,4 @@
     restart_docker_container("xos_tosca_1")
 
 
-# This is to allow running the GRPC server in stand-alone mode
-
-if __name__ == '__main__':
-    server = XOSGrpcServer()
-    server.init_django()
-    server.start()
-
-    restart_related_containers()
-
-    log.info("XOS core entering wait loop")
-    _ONE_DAY_IN_SECONDS = 60 * 60 * 24
-    try:
-        while True:
-            if server.exit_event.wait(_ONE_DAY_IN_SECONDS):
-                break
-    except KeyboardInterrupt:
-        log.info("XOS core terminated by keyboard interrupt")
-
-    server.stop()
-
 
diff --git a/xos/coreapi/protos/dynamicload.proto b/xos/coreapi/protos/dynamicload.proto
index 4504580..7827adb 100644
--- a/xos/coreapi/protos/dynamicload.proto
+++ b/xos/coreapi/protos/dynamicload.proto
@@ -2,7 +2,7 @@
 
 package xos;
 
-// import "google/protobuf/empty.proto";
+import "google/protobuf/empty.proto";
 import "google/api/annotations.proto";
 // import "common.proto";
 
@@ -42,6 +42,18 @@
     string version = 2;
 };
 
+message ServiceModelStatus {
+    string name = 1;
+    string version = 2;
+    string state = 3;
+};
+
+message LoadStatusReply {
+    int32 model_status = 1;
+    string model_output = 2;
+    repeated ServiceModelStatus services = 3;
+}
+
 service dynamicload {
   rpc LoadModels(LoadModelsRequest) returns (LoadModelsReply) {
         option (google.api.http) = {
@@ -55,4 +67,10 @@
             body: "*"
         };
   }
-};
\ No newline at end of file
+  rpc GetLoadStatus(google.protobuf.Empty) returns (LoadStatusReply) {
+        option (google.api.http) = {
+            get: "/xosapi/v1/dynamicload/load_status"
+            body: "*"
+        };
+  }
+};
diff --git a/xos/coreapi/start_coreapi.sh b/xos/coreapi/start_coreapi.sh
index 9a2b7c7..4da995c 100755
--- a/xos/coreapi/start_coreapi.sh
+++ b/xos/coreapi/start_coreapi.sh
@@ -15,6 +15,9 @@
 # limitations under the License.
 
 while true; do
-    make
+    make prep 2>&1 | tee coreapi_output.txt
+    MODEL_STATUS=${PIPESTATUS[0]}
+
+    make start MODEL_STATUS=$MODEL_STATUS MODEL_OUTPUT=coreapi_output.txt
     sleep 1
 done
diff --git a/xos/coreapi/start_grpc_server.sh b/xos/coreapi/start_grpc_server.sh
deleted file mode 100755
index cad1f6b..0000000
--- a/xos/coreapi/start_grpc_server.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright 2017-present Open Networking Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-cd protos
-make rebuild-protos
-make
-cd ..
-source env.sh
-python ./grpc_server.py
diff --git a/xos/coreapi/xos_dynamicload_api.py b/xos/coreapi/xos_dynamicload_api.py
index c7327f8..e794283 100644
--- a/xos/coreapi/xos_dynamicload_api.py
+++ b/xos/coreapi/xos_dynamicload_api.py
@@ -67,4 +67,23 @@
             import traceback; traceback.print_exc()
             raise e
 
+    def GetLoadStatus(self, request, context):
+        try:
+            builder = DynamicBuilder()
+            manifests = builder.get_manifests()
+
+            response = dynamicload_pb2.LoadStatusReply()
+            response.model_status = self.server.model_status
+            response.model_output = self.server.model_output
+            for manifest in manifests:
+                item = response.services.add()
+                item.name = manifest["name"]
+                item.version = manifest["version"]
+                item.state = manifest.get("state", "unspecified")
+
+            return response
+        except Exception, e:
+            import traceback; traceback.print_exc()
+            raise e
+
 
diff --git a/xos/xos_client/xossh b/xos/xos_client/xossh
index 3edc606..374a267 100644
--- a/xos/xos_client/xossh
+++ b/xos/xos_client/xossh
@@ -132,12 +132,17 @@
     request = current_client.dynamicload_pb2.UnloadModelsRequest(name=name, version=version)
     return current_client.dynamicload.UnloadModels(request)
 
+def getLoadStatus():
+    request = Empty()
+    return current_client.dynamicload.GetLoadStatus(request)
+
 def listUtility():
     print 'setDirtyModels(class_name=None)'
     print 'listDirtyModels(class_name=None)'
     print 'listModelDefs()'
     print 'loadModels(name, version, xproto_filenames, decl_filenames, attic_filenames)'
     print 'unloadModels(name, version)'
+    print 'getLoadStatus()'
 
 def examples():
     print 'Slice.objects.all() # list all slices'