This commit is the initial attempt in converting a protobuf schema
into a yang schema. Intructions on how to execute this convertion is
in the proto2yang.py header.  Test cases to follow.

Change-Id: I7a9273138339bbaf521a2b5eab3e5f1205f79bd0
diff --git a/experiments/proto2Yang/descriptor_parser.py b/experiments/proto2Yang/descriptor_parser.py
new file mode 100644
index 0000000..c23f497
--- /dev/null
+++ b/experiments/proto2Yang/descriptor_parser.py
@@ -0,0 +1,164 @@
+#
+# Copyright 2016 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+from collections import OrderedDict
+
+from google.protobuf import descriptor_pb2
+from google.protobuf.descriptor import FieldDescriptor, Descriptor
+from google.protobuf.message import Message
+
+
+class InvalidDescriptorError(Exception): pass
+
+
+class DescriptorParser(object):
+    """
+    Used to parse protobuf FileDescriptor objects into native Python
+    data structures (nested dict/list/intrinsic values. Two of the typical
+    sources of FileDescriptor objects are:
+    1. CodeGeneratorRequest, used as binary input to any protoc plugin,
+       contains a list of these FileDescriptor objects (under the
+       proto_file attribute)
+    2. FileDescriptorSet, as saved by protoc when using the -o option.
+
+    An important feature of the parser is that it can process the source
+    code annotations and can fold comments into the relevant defintions
+    present in the proto file.
+
+    Usage (in a protoc plugin):
+    >>> request = plugin.CodeGeneratorRequest()
+    >>> request.ParseFromString(sys.stdin.read())
+    >>> parser = DescriptorParser()
+    >>> for proto_file in request.proto_file:
+    >>>     parsed_data = parser.parse_file_descriptor()
+    >>>     print json.dumps(parsed_data, indent=4)
+    """
+
+    meta = None
+
+    def __init__(self):
+        if DescriptorParser.meta is None:
+            DescriptorParser.meta = self.load_meta_descriptor()
+
+    def load_meta_descriptor(self):
+        """
+        Load the protobuf version of descriptor.proto to use it in
+        decoding protobuf paths.
+        """
+        fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
+                                             'descriptor.desc'))
+        with open(fpath, 'r') as f:
+            blob = f.read()
+        proto = descriptor_pb2.FileDescriptorSet()
+        proto.ParseFromString(blob)
+        assert len(proto.file) == 1
+        return proto.file[0]
+
+    parser_table = {
+        unicode: lambda x: x,
+        int: lambda x: x,
+        bool: lambda x: x,
+    }
+
+    def parse(self, o, type_tag_name=None):
+        if isinstance(o, Message):
+            return self.parse_message(o, type_tag_name)
+        else:
+            return self.parser_table[type(o)](o)
+
+    def parse_message(self, m, type_tag_name=None):
+        assert isinstance(m, Message)
+        d = OrderedDict()
+        for field, value in m.ListFields():
+            assert isinstance(field, FieldDescriptor)
+            if field.label in (1, 2):
+                d[field.name] = self.parse(value, type_tag_name)
+            elif field.label == 3:
+                d[field.name] = [self.parse(x, type_tag_name) for x in
+                                 value]
+            else:
+                raise InvalidDescriptorError()
+
+        if type_tag_name is not None:
+            d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')
+
+        return d
+
+    def parse_file_descriptor(self, descriptor,
+                              type_tag_name=None,
+                              fold_comments=False):
+
+        d = self.parse(descriptor, type_tag_name=type_tag_name)
+
+        if fold_comments:
+            locations = d.get('source_code_info', {}).get('location', [])
+            for location in locations:
+                path = location.get('path', [])
+                comments = ''.join([
+                    location.get('leading_comments', '').strip(' '),
+                    location.get('trailing_comments', '').strip(' '),
+                    ''.join(block.strip(' ') for block
+                            in
+                            location.get('leading_detached_comments', ''))
+                ]).strip()
+
+                # ignore locations with no comments
+                if not comments:
+                    continue
+
+                # we ignore path with odd number of entries, since these do
+                # not address our schema nodes, but rather the meta schema
+                if (len(path) % 2 == 0):
+                    node = self.find_node_by_path(
+                        path, self.meta.DESCRIPTOR, d)
+                    assert isinstance(node, dict)
+                    node['_description'] = comments
+
+            # remove source_code_info
+            del d['source_code_info']
+
+        return d
+
+    def parse_file_descriptors(self, descriptors,
+                              type_tag_name=None,
+                              fold_comments=False):
+        return [self.parse_file_descriptor(descriptor,
+                                           type_tag_name=type_tag_name,
+                                           fold_comments=fold_comments)
+                for descriptor in descriptors]
+
+    def find_node_by_path(self, path, meta, o):
+        # stop recursion when path is empty
+        if not path:
+            return o
+
+        # sanity check
+        assert len(path) >= 2
+        assert isinstance(meta, Descriptor)
+        assert isinstance(o, dict)
+
+        # find field name, then actual field
+        field_number = path.pop(0)
+        field_def = meta.fields_by_number[field_number]
+        field = o[field_def.name]
+
+        # field must be a list, extract entry with given index
+        assert isinstance(field, list)  # expected to be a list field
+        index = path.pop(0)
+        child_o = field[index]
+
+        child_meta = field_def.message_type
+        return self.find_node_by_path(path, child_meta, child_o)