Much streamlined protobuf descriptor parsing

commit: d9f400fea2b8f1cd90ca2c508840d9b3b35c4845 [log] [tgz]
author: Zsolt Haraszti <zharaszt@ciena.com> Sun Sep 25 17:19:49 2016 -0700
committer: Zsolt Haraszti <zharaszt@ciena.com> Sun Sep 25 17:19:49 2016 -0700
tree: 53d40a87598b6fbe52f865b1923d71babb9a211d
parent: 162de1768a50801e0996abd77bc9473bb7a7561b [diff]
diff --git a/voltha/core/protos/voltha.proto b/voltha/core/protos/voltha.proto
index a0d65db..a639002 100644
--- a/voltha/core/protos/voltha.proto
+++ b/voltha/core/protos/voltha.proto

@@ -10,15 +10,17 @@
 option java_outer_classname = "VolthaProtos";
 option csharp_namespace = "Opencord.Voltha.Voltha";
 
+// Empty message
 message NullMessage {}
 
+// Encode health status of a Voltha instance
 message HealthStatus {
 
   // Health states
   enum HealthState {
-    HEALTHY = 0;
-    OVERLOADED = 1;
-    DYING = 2;
+    HEALTHY = 0;  // The instance is healthy
+    OVERLOADED = 1;  // The instance is overloaded, decrease query rate
+    DYING = 2;  // The instance is in a critical condition, do not use it
   }
 
   // Current state of health of this Voltha instance

diff --git a/voltha/northbound/grpc/grpc_introspect.py b/voltha/northbound/grpc/grpc_introspect.py
index 0641f32..554a39b 100755
--- a/voltha/northbound/grpc/grpc_introspect.py
+++ b/voltha/northbound/grpc/grpc_introspect.py

@@ -23,57 +23,20 @@
 from collections import OrderedDict
 
 from enum import Enum
-from google.protobuf.descriptor import FieldDescriptor
+from google.protobuf.descriptor import FieldDescriptor, Descriptor
+from google.protobuf.message import Message
 from simplejson import dumps
 
 from google.protobuf import descriptor_pb2
 
 # TODO this hack needs to go
-# don't worry if the below too line is flagged by your IDE as unused and
-# unresolvable; they are fine
+# don't worry if the below too lines are flagged by your IDE as unused and
+# unresolvable; they are fine.
 import voltha.northbound.grpc.pb2_loader
 from google.api import http_pb2
 
 
-class Type(Enum):
-    # 0 is reserved for errors.
-    # Order is weird for historical reasons.
-    TYPE_DOUBLE = 1
-    TYPE_FLOAT = 2
-    # Not ZigZag encoded.  Negative numbers take 10 bytes.  Use TYPE_SINT64 if
-    # negative values are likely.
-    TYPE_INT64 = 3
-    TYPE_UINT64 = 4
-    # Not ZigZag encoded.  Negative numbers take 10 bytes.  Use TYPE_SINT32 if
-    # negative values are likely.
-    TYPE_INT32 = 5
-    TYPE_FIXED64= 6
-    TYPE_FIXED32 = 7
-    TYPE_BOOL = 8
-    TYPE_STRING = 9
-    TYPE_GROUP = 10  # Tag-delimited aggregate.
-    TYPE_MESSAGE = 11  # Length-delimited aggregate.
-
-    # New in version 2.
-    TYPE_BYTES = 12
-    TYPE_UINT32 = 13
-    TYPE_ENUM = 14
-    TYPE_SFIXED32 = 15
-    TYPE_SFIXED64 = 16
-    TYPE_SINT32 = 17  # Uses ZigZag encoding.
-    TYPE_SINT64 = 18  # Uses ZigZag encoding.
-
-
-class Label(Enum):
-    LABEL_OPTIONAL = 1
-    LABEL_REQUIRED = 2
-    LABEL_REPEATED = 3
-
-
-class OptimizeMode(Enum):
-    SPEED = 1
-    CODE_SIZE = 2
-    LITE_RUNTIME = 3
+class InvalidDescriptorError(Exception): pass
 
 
 class DescriptorParser(object):
@@ -81,432 +44,116 @@
     def __init__(self, ignore_empty_source_code_info=True):
         self.ignore_empty_source_code_info = ignore_empty_source_code_info
         self.catalog = {}
+        self.meta, blob = self.load_root_descriptor()
+        self.load_descriptor(blob)
+
+    def load_root_descriptor(self):
+        """Load descriptor.desc to make things more data driven"""
+        with open('descriptor.desc', 'r') as f:
+            blob = f.read()
+        proto = descriptor_pb2.FileDescriptorSet()
+        proto.ParseFromString(blob)
+        assert len(proto.file) == 1
+        fdp = proto.file[0]
+
+        # for i, (fd, v) in enumerate(fdp.ListFields()):
+        #     assert isinstance(fd, FieldDescriptor)
+        #     print fd.name, fd.full_name, fd.number, fd.type, fd.label, fd.message_type, type(v)
+
+        return fdp, blob
 
     def get_catalog(self):
         return self.catalog
 
-    def load_descriptor(self, descriptor_blob):
+    def load_descriptor(self, descriptor_blob, fold_comments=True):
 
         # decode desciription
         file_descriptor_set = descriptor_pb2.FileDescriptorSet()
         file_descriptor_set.ParseFromString(descriptor_blob)
 
-        # walk the proto files and parse them and add to catalog (by .name)
-        for file_descriptor_proto in file_descriptor_set.file:
-            d = self.parse_file_descriptor_proto(file_descriptor_proto)
-            self.catalog[d['name']] = d
+        d = self.parse(file_descriptor_set)
+        for _file in d['file']:
+            if fold_comments:
+                self.fold_comments_in(_file)
+            self.catalog[_file['package']] = _file
 
-    def parse_descriptor_proto(self, o):
-        assert isinstance(o, descriptor_pb2.DescriptorProto)
+    def parse_message(self, m):
+        assert isinstance(m, Message)
         d = OrderedDict()
-        d['name'] = o.name
-        d['field'] = [
-            self.parse_field_description_proto(x) for x in o.field]
-        d['extension'] = [
-            self.parse_field_description_proto(x) for x in o.extension]
-        d['nested_type'] = [
-            self.parse_descriptor_proto(x) for x in o.nested_type]
-        d['enum_type'] = [
-            self.parse_enum_description_proto(x) for x in o.enum_type]
-        d['extension_range'] = [
-            self.parse_extension_range(x) for x in o.extension_range]
-        d['oneof_decl'] = [
-            self.parse_oneof_description_proto(x) for x in o.oneof_decl]
-        if hasattr(o, 'options'):
-            d['options'] = self.parse_message_options(o.options)
-        d['reserved_range'] = [
-            self.parse_reserved_range(x) for x in o.reserved_range]
-        d['reserved_name'] = [x for x in o.reserved_name]
+        for fd, v in m.ListFields():
+            assert isinstance(fd, FieldDescriptor)
+            if fd.label in (1, 2):
+                d[fd.name] = self.parse(v)
+            elif fd.label == 3:
+                d[fd.name] = [self.parse(x) for x in v]
+            else:
+                raise InvalidDescriptorError()
+
         return d
 
-    def parse_enum_description_proto(self, o):
-        assert isinstance(o, descriptor_pb2.EnumDescriptorProto)
-        d = OrderedDict()
-        if hasattr(o, 'name'):
-            d['name'] = o.name
-        d['value'] = [self.parse_enum_value_descriptor_proto(x) for x in
-                      o.value]
-        if hasattr(o, 'options'):
-            d['options'] = self.parse_enum_options(o.options)
-        return d
+    parser_table = {
+        unicode: lambda x: x,
+        int: lambda x: x,
+        bool: lambda x: x,
+    }
 
-    def parse_enum_options(self, o):
-        assert isinstance(o, descriptor_pb2.EnumOptions)
-        d = OrderedDict()
-        if hasattr(o, 'allow_alias'):
-            d['allow_alias'] = o.allow_alias
-        d['deprecated'] = getattr(o, 'deprecated', False)
-        d['uninterpreted_option'] = [self.parse_uninterpreted_option(x) for x
-                                     in o.uninterpreted_option]
-        return d
-
-    def parse_enum_value_descriptor_proto(self, o):
-        assert isinstance(o, descriptor_pb2.EnumValueDescriptorProto)
-        d = OrderedDict()
-        if hasattr(o, 'name'):
-            d['name'] = o.name
-        if hasattr(o, 'number'):
-            d['number'] = o.number
-        if hasattr(o, 'options'):
-            d['options'] = self.parse_enum_value_options(o.options)
-        return d
-
-    def parse_enum_value_options(self, o):
-        assert isinstance(o, descriptor_pb2.EnumValueOptions)
-        d = OrderedDict()
-        d['deprecated'] = getattr(o, 'deprecated', False)
-        d['uninterpreted_option'] = [self.parse_uninterpreted_option(x) for x
-                                     in o.uninterpreted_option]
-        return d
-
-    def parse_extension(self, o):
-        assert isinstance(o, descriptor_pb2.FieldDescriptorProto)
-        print [f for f in dir(o) if f[0].lower() == f[0] and f[0] != '_']
-        raise NotImplementedError()
-
-    def parse_extension_range(self, o):
-        print type(o)
-        print [f for f in dir(o) if f[0].lower() == f[0] and f[0] != '_']
-        raise NotImplementedError()
-
-    def parse_field_description_proto(self, o):
-        assert isinstance(o, descriptor_pb2.FieldDescriptorProto)
-        d = OrderedDict()
-        if hasattr(o, 'name'):
-            d['name'] = o.name
-        if hasattr(o, 'number'):
-            d['number'] = o.number
-        if hasattr(o, 'label'):
-            d['label'] = self.parse_label(o.label)
-        if hasattr(o, 'type'):
-            d['type'] = self.parse_type(o.type)
-        if hasattr(o, 'type_name'):
-            d['type_name'] = o.type_name
-        if hasattr(o, 'extendee'):
-            d['extendee'] = o.extendee
-        if hasattr(o, 'default_value'):
-            d['default_value'] = o.default_value
-        if hasattr(o, 'oneof_index'):
-            d['oneof_index'] = o.oneof_index
-        if hasattr(o, 'json_name'):
-            d['json_name'] = o.json_name
-        if hasattr(o, 'field_options'):
-            d['field_options'] = self.parse_field_options(o.field_options)
-        return d
-
-    def parse_field_options(self, o):
-        assert isinstance(o, descriptor_pb2.FieldOptions)
-        print [f for f in dir(o) if f[0].lower() == f[0] and f[0] != '_']
-        raise NotImplementedError()
-
-    def parse_file_descriptor_proto(self, o):
-        assert isinstance(o, descriptor_pb2.FileDescriptorProto)
-        d = OrderedDict()
-        d['name'] = o.name
-        d['package'] = o.package
-        d['dependency'] =[x for x in o.dependency]
-        d['public_dependency'] = [x for x in o.public_dependency]
-        d['weakdependency'] = [x for x in o.weak_dependency]
-        d['message_type'] = [
-            self.parse_descriptor_proto(x) for x in o.message_type]
-        d['enum_type'] = [
-            self.parse_enum_description_proto(x) for x in o.enum_type]
-        d['service'] = [
-            self.parse_service(x) for x in o.service]
-        d['extension'] = [
-            self.parse_extension(x) for x in o.extension]
-        if hasattr(o, 'options'):
-            d['options'] = self.parse_options(o.options)
-        if hasattr(o, 'source_code_info'):
-            d['source_code_info'] = self.parse_source_code_info(
-                o.source_code_info)
-        if hasattr(o, 'syntax'):
-            d['syntax'] = o.syntax
-        return d
-
-    def parse_label(self, o):
-        isinstance(o, int)
-        return Label(o).name
-
-    def parse_location(self, o):
-        assert isinstance(o, descriptor_pb2.SourceCodeInfo.Location)
-        d = OrderedDict()
-        d['path'] = [x for x in o.path]
-        d['span'] = [x for x in o.span]
-        if hasattr(o, 'leading_comments'):
-            d['leading_comments'] = o.leading_comments
-        if hasattr(o, 'trailing_comments'):
-            d['trailing_comments'] = o.trailing_comments
-        d['leading_detached_comments'] = [
-            x for x in o.leading_detached_comments]
-        return d
-
-    def parse_message_options(self, o):
-        assert isinstance(o, descriptor_pb2.MessageOptions)
-        d = OrderedDict()
-        d['message_set_wire_format'] = getattr(
-            o, 'message_set_wire_format', False)
-        d['no_standard_descriptor_accessor'] = getattr(
-            o, 'no_standard_descriptor_accessor', False)
-        d['deprecated'] = getattr(o, 'deprecated', False)
-        if hasattr(o, 'map_entry'):
-            d['map_entry'] = o.map_entry
-        d['uninterpreted_option'] = [
-            self.parse_uninterpreted_option(x) for x in
-            o.uninterpreted_option]
-        return d
-
-    def parse_method_descriptor_proto(self, o):
-        assert isinstance(o, descriptor_pb2.MethodDescriptorProto)
-        d = OrderedDict()
-        if hasattr(o, 'name'):
-            d['name'] = o.name
-        if hasattr(o, 'input_type'):
-            d['input_type'] = o.input_type
-        if hasattr(o, 'output_type'):
-            d['output_type'] = o.output_type
-        if hasattr(o, 'options'):
-            d['options'] = self.parse_method_options(o.options)
-        d['client_streaming'] = getattr(o, 'client_streamin', False)
-        d['server_streaming'] = getattr(o, 'server_streamin', False)
-        return d
-
-    def parse_method_options(self, o):
-        assert isinstance(o, descriptor_pb2.MethodOptions)
-        d = OrderedDict()
-        d['deprecated'] = getattr(o, 'deprecated', False)
-        d['uninterpreted_option'] = [self.parse_uninterpreted_option(x) for x
-                                     in o.uninterpreted_option]
-        extensions = dict(
-            (k.full_name, self.parse_method_extension(k.full_name, v))
-             for k, v
-             in o.Extensions._extended_message._fields.items()
-            if k.full_name !=
-            'google.protobuf.MethodOptions.uninterpreted_option' )
-        if extensions:
-            d['extensions'] = extensions
-        return d
-
-    def parse_method_extension(self, full_name, o):
-        if full_name == 'google.api.http':
-            d = self.parse_http_rule(o)
+    def parse(self, o):
+        if isinstance(o, Message):
+            return self.parse_message(o)
         else:
-            pass  # ignore unrecognized extensions
-        return d
-
-    def parse_http_rule(self, o):
-        assert isinstance(o, http_pb2.HttpRule)
-        d = OrderedDict()
-        if o.get:
-            method, path = 'get', o.get
-        elif o.put:
-            method, path = 'put', o.put
-        elif o.post:
-            method, path = 'post', o.post
-        elif o.delete:
-            method, path = 'delete', o.delete
-        elif o.patch:
-            method, path = 'patch', o.patch
-        else:
-            custom = self.parse_custom_http_pattern(o.custom)
-            method, path = custom['kind'], custom['path']
-        d['method'] = method
-        d['path'] = path
-        d['body'] = o.body
-        return d
-
-    def parse_custom_http_pattern(self, o):
-        assert isinstance(o, http_pb2.CustomHttpPattern)
-        d = OrderedDict()
-        d['kind'] = o.kind
-        d['path'] = o.path
-        return d
-
-    def parse_oneof_description_proto(self, o):
-        raise NotImplementedError()
-
-    def parse_optimize_mode(self, o):
-        assert isinstance(o, int)
-        return OptimizeMode(o).name
-
-    def parse_options(self, o):
-        assert isinstance(o, descriptor_pb2.FileOptions)
-        d = OrderedDict()
-        if hasattr(o, 'java_package'):
-            d['java+package'] = o.java_package
-        if hasattr(o, 'java_outer_classname'):
-            d['java_outer_classname'] = o.java_outer_classname
-        d['java_multiple_files'] = getattr(o, 'java_multiple_files', False)
-        d['java_generate_equals_and_hash'] = getattr(
-            o, 'java_generate_equals_and_hash', False)
-        d['java_string_check_utf8'] = getattr(
-            o, 'java_string_check_utf8', False)
-        d['optimize_for'] = self.parse_optimize_mode(
-            getattr(o, 'optimize_for', OptimizeMode.SPEED))
-        if hasattr(o, 'go_package'):
-            d['go_package'] = o.go_package
-        d['cc_generic_services'] = getattr(o, 'cc_generic_services', False)
-        d['java_generic_services'] = getattr(o, 'java_generic_services', False)
-        d['py_generic_services'] = getattr(o, 'py_generic_services', False)
-        d['deprecated'] = getattr(o, 'deprecated', False)
-        d['cc_enable_arenas'] = getattr(o, 'cc_enable_arenas', False)
-        if hasattr(o, 'objc_class_prefix'):
-            d['objc_class_prefix'] = o.objc_class_prefix
-        if hasattr(o, 'csharp_namespace'):
-            d['csharp_namespace'] = o.csharp_namespace
-        d['uninterpreted_option'] = [self.parse_uninterpreted_option(x) for x
-                                     in o.uninterpreted_option]
-        return d
-
-    def parse_reserved_range(self, o):
-        print type(o)
-        print [f for f in dir(o) if f[0].lower() == f[0] and f[0] != '_']
-        raise NotImplementedError()
-
-    def parse_service(self, o):
-        assert isinstance(o, descriptor_pb2.ServiceDescriptorProto)
-        d = OrderedDict()
-        if hasattr(o, 'name'):
-            d['name'] = o.name
-        d['method'] = [self.parse_method_descriptor_proto(x) for x in
-                       o.method]
-        if hasattr(o, 'options'):
-            d['options'] = self.parse_service_options(o.options)
-        return d
-
-    def parse_service_options(self, o):
-        assert isinstance(o, descriptor_pb2.ServiceOptions)
-        d = OrderedDict()
-        d['deprecated'] = getattr(o, 'deprecated', False)
-        d['uninterpreted_option'] = [self.parse_uninterpreted_option(x) for x
-                                     in o.uninterpreted_option]
-        return d
-
-    def parse_source_code_info(self, o):
-        assert isinstance(o, descriptor_pb2.SourceCodeInfo)
-
-        def is_location_empty(l):
-            return not (
-                l['leading_comments'] or
-                l['trailing_comments'] or
-                l['leading_detached_comments'])
-
-        d = OrderedDict()
-        locations = (self.parse_location(x) for x in o.location)
-        if self.ignore_empty_source_code_info:
-            locations = [l for l in locations if not is_location_empty(l)]
-        d['location'] = locations
-        return d
-
-    def parse_type(self, o):
-        isinstance(o, int)
-        return Type(o).name
-
-    def parse_uninterpreted_option(self, o):
-        print (type(o))
-        print [f for f in dir(o) if f[0].lower() == f[0] and f[0] != '_']
-        raise NotImplementedError()
-
-    def fold_all_comments(self):
-        """ For each catalog entry, update appropriate nodes (dicts) with a
-            '_description' node with any comments found in
-            the source_code_info. Also, drop
-        """
-        for descriptor in self.catalog.values():
-            self.fold_comments_in(descriptor)
+            return self.parser_table[type(o)](o)
 
     def fold_comments_in(self, descriptor):
         assert isinstance(descriptor, dict)
 
         locations = descriptor.get('source_code_info', {}).get('location', [])
         for location in locations:
-            path = location['path']
+            path = location.get('path', [])
             comments = ''.join([
-                location['leading_comments'].strip(' '),
-                location['trailing_comments'].strip(' '),
+                location.get('leading_comments', '').strip(' '),
+                location.get('trailing_comments', '').strip(' '),
                 ''.join(block.strip(' ') for block
-                          in location['leading_detached_comments'])
+                          in location.get('leading_detached_comments', ''))
             ]).strip()
-            print path, '->', comments
 
-            root_path_map = {
-                4: (self.find_in_message_type, 'message_type'),
-                6: (self.find_in_service, 'service')
-            }
+            # ignore locations with no comments
+            if not comments:
+                continue
 
-            index = path.pop(0)
-            finder, key = root_path_map.get(index, (None, None))
-            if finder is not None:
-                node = finder(descriptor[key], path)
+            # we ignore path with odd number of entries, since these do
+            # not address our schema nodes, but rather the meta schema
+            if (len(path) % 2 == 0):
+                node = self.find_node_by_path(
+                    path, self.meta.DESCRIPTOR, descriptor)
+                assert isinstance(node, dict)
                 node['_description'] = comments
 
         # remove source_code_info
         del descriptor['source_code_info']
 
-    def find_in_message_type(self, message_types, path):
-        index = path.pop(0)
-        message_type = message_types[index]
-        if not path:
-            return message_type
+    def find_node_by_path(self, path, meta, o):
 
-        path_map = {
-            2: (self.find_in_field, 'field'),
-            4: (self.find_in_enum_type, 'enum_type')
-        }
-        index = path.pop(0)
-        finder, key = path_map.get(index, (None, None))
-        if finder is not None:
-            return finder(message_type[key], path)
-        raise NotImplementedError()
-
-    def find_in_methods(self, methods, path):
-        index = path.pop(0)
-        method = methods[index]
+        # stop recursion when path is empty
         if not path:
-            return method
-        raise NotImplementedError()
+            return o
 
-    def find_in_service(self, services, path):
-        index = path.pop(0)
-        service = services[index]
-        if not path:
-            return service
-        path_map = {
-            2: (self.find_in_methods, 'method')
-        }
-        index = path.pop(0)
-        finder, key = path_map.get(index, (None, None))
-        if finder is not None:
-            return finder(service[key], path)
-        raise NotImplementedError()
+        # sanity check
+        assert len(path) >= 2
+        assert isinstance(meta, Descriptor)
+        assert isinstance(o, dict)
 
-    def find_in_field(self, fields, path):
-        index = path.pop(0)
-        field = fields[index]
-        if not path:
-            return field
-        raise NotImplementedError()
+        # find field name, then actual field
+        field_number = path.pop(0)
+        field_def = meta.fields_by_number[field_number]
+        field = o[field_def.name]
 
-    def find_in_enum_type(self, enum_types, path):
+        # field must be a list, extract entry with given index
+        assert isinstance(field, list)  # expected to be a list field
         index = path.pop(0)
-        enum_type = enum_types[index]
-        if not path:
-            return enum_type
-        path_map = {
-            2: (self.find_in_enum_values, 'value')
-        }
-        index = path.pop(0)
-        finder, key = path_map.get(index, (None, None))
-        if finder is not None:
-            return finder(enum_type[key], path)
-        raise NotImplementedError()
+        child_o = field[index]
 
-    def find_in_enum_values(self, enum_values, path):
-        index = path.pop(0)
-        enum_value = enum_values[index]
-        if not path:
-            return enum_value
-        raise NotImplementedError()
+        child_meta = field_def.message_type
+        return self.find_node_by_path(path, child_meta, child_o)
 
 
 if __name__ == '__main__':
@@ -517,11 +164,9 @@
     desc_file = os.path.join(desc_dir, 'voltha.desc')
     with open(desc_file, 'rb') as f:
         descriptor_blob = f.read()
-    print 'read desc blob of {} bytes'.format(len(descriptor_blob))
 
     parser = DescriptorParser()
     parser.load_descriptor(descriptor_blob)
-    parser.fold_all_comments()
     print dumps(parser.get_catalog(), indent=4)
commit	d9f400fea2b8f1cd90ca2c508840d9b3b35c4845	[log] [tgz]
author	Zsolt Haraszti <zharaszt@ciena.com>	Sun Sep 25 17:19:49 2016 -0700
committer	Zsolt Haraszti <zharaszt@ciena.com>	Sun Sep 25 17:19:49 2016 -0700
tree	53d40a87598b6fbe52f865b1923d71babb9a211d
parent	162de1768a50801e0996abd77bc9473bb7a7561b [diff]