This commit is the initial attempt in converting a protobuf schema
into a yang schema. Intructions on how to execute this convertion is
in the header.  Test cases to follow.

Change-Id: I7a9273138339bbaf521a2b5eab3e5f1205f79bd0
diff --git a/experiments/proto2Yang/addressbook.proto b/experiments/proto2Yang/addressbook.proto
+++ b/experiments/proto2Yang/addressbook.proto
@@ -0,0 +1,33 @@
+// See README.txt for information and build instructions.
+syntax = "proto3";
+package tutorial;
+option java_package = "com.example.tutorial";
+option java_outer_classname = "AddressBookProtos";
+option csharp_namespace = "Google.Protobuf.Examples.AddressBook";
+message Person {
+  string name = 1;
+  int32 id = 2;        // Unique ID number for this person.
+  string email = 3;
+  enum PhoneType {
+    MOBILE = 0;
+    HOME = 1;
+    WORK = 2;
+  }
+  message PhoneNumber {
+    string number = 1;
+    PhoneType type = 2;
+  }
+  repeated PhoneNumber phones = 4;
+// Our address book file is just one of these.
+message AddressBook {
+  repeated Person people = 1;
diff --git a/experiments/proto2Yang/descriptor.desc b/experiments/proto2Yang/descriptor.desc
+++ b/experiments/proto2Yang/descriptor.desc
diff --git a/experiments/proto2Yang/ b/experiments/proto2Yang/
+++ b/experiments/proto2Yang/
@@ -0,0 +1,164 @@
+# Copyright 2016 the original author or authors.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from collections import OrderedDict
+from google.protobuf import descriptor_pb2
+from google.protobuf.descriptor import FieldDescriptor, Descriptor
+from google.protobuf.message import Message
+class InvalidDescriptorError(Exception): pass
+class DescriptorParser(object):
+    """
+    Used to parse protobuf FileDescriptor objects into native Python
+    data structures (nested dict/list/intrinsic values. Two of the typical
+    sources of FileDescriptor objects are:
+    1. CodeGeneratorRequest, used as binary input to any protoc plugin,
+       contains a list of these FileDescriptor objects (under the
+       proto_file attribute)
+    2. FileDescriptorSet, as saved by protoc when using the -o option.
+    An important feature of the parser is that it can process the source
+    code annotations and can fold comments into the relevant defintions
+    present in the proto file.
+    Usage (in a protoc plugin):
+    >>> request = plugin.CodeGeneratorRequest()
+    >>> request.ParseFromString(
+    >>> parser = DescriptorParser()
+    >>> for proto_file in request.proto_file:
+    >>>     parsed_data = parser.parse_file_descriptor()
+    >>>     print json.dumps(parsed_data, indent=4)
+    """
+    meta = None
+    def __init__(self):
+        if DescriptorParser.meta is None:
+            DescriptorParser.meta = self.load_meta_descriptor()
+    def load_meta_descriptor(self):
+        """
+        Load the protobuf version of descriptor.proto to use it in
+        decoding protobuf paths.
+        """
+        fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
+                                             'descriptor.desc'))
+        with open(fpath, 'r') as f:
+            blob =
+        proto = descriptor_pb2.FileDescriptorSet()
+        proto.ParseFromString(blob)
+        assert len(proto.file) == 1
+        return proto.file[0]
+    parser_table = {
+        unicode: lambda x: x,
+        int: lambda x: x,
+        bool: lambda x: x,
+    }
+    def parse(self, o, type_tag_name=None):
+        if isinstance(o, Message):
+            return self.parse_message(o, type_tag_name)
+        else:
+            return self.parser_table[type(o)](o)
+    def parse_message(self, m, type_tag_name=None):
+        assert isinstance(m, Message)
+        d = OrderedDict()
+        for field, value in m.ListFields():
+            assert isinstance(field, FieldDescriptor)
+            if field.label in (1, 2):
+                d[] = self.parse(value, type_tag_name)
+            elif field.label == 3:
+                d[] = [self.parse(x, type_tag_name) for x in
+                                 value]
+            else:
+                raise InvalidDescriptorError()
+        if type_tag_name is not None:
+            d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')
+        return d
+    def parse_file_descriptor(self, descriptor,
+                              type_tag_name=None,
+                              fold_comments=False):
+        d = self.parse(descriptor, type_tag_name=type_tag_name)
+        if fold_comments:
+            locations = d.get('source_code_info', {}).get('location', [])
+            for location in locations:
+                path = location.get('path', [])
+                comments = ''.join([
+                    location.get('leading_comments', '').strip(' '),
+                    location.get('trailing_comments', '').strip(' '),
+                    ''.join(block.strip(' ') for block
+                            in
+                            location.get('leading_detached_comments', ''))
+                ]).strip()
+                # ignore locations with no comments
+                if not comments:
+                    continue
+                # we ignore path with odd number of entries, since these do
+                # not address our schema nodes, but rather the meta schema
+                if (len(path) % 2 == 0):
+                    node = self.find_node_by_path(
+                        path, self.meta.DESCRIPTOR, d)
+                    assert isinstance(node, dict)
+                    node['_description'] = comments
+            # remove source_code_info
+            del d['source_code_info']
+        return d
+    def parse_file_descriptors(self, descriptors,
+                              type_tag_name=None,
+                              fold_comments=False):
+        return [self.parse_file_descriptor(descriptor,
+                                           type_tag_name=type_tag_name,
+                                           fold_comments=fold_comments)
+                for descriptor in descriptors]
+    def find_node_by_path(self, path, meta, o):
+        # stop recursion when path is empty
+        if not path:
+            return o
+        # sanity check
+        assert len(path) >= 2
+        assert isinstance(meta, Descriptor)
+        assert isinstance(o, dict)
+        # find field name, then actual field
+        field_number = path.pop(0)
+        field_def = meta.fields_by_number[field_number]
+        field = o[]
+        # field must be a list, extract entry with given index
+        assert isinstance(field, list)  # expected to be a list field
+        index = path.pop(0)
+        child_o = field[index]
+        child_meta = field_def.message_type
+        return self.find_node_by_path(path, child_meta, child_o)
diff --git a/experiments/proto2Yang/ b/experiments/proto2Yang/
+++ b/experiments/proto2Yang/
@@ -0,0 +1,376 @@
+#!/usr/bin/env python
+# Copyright 2016 the original author or authors.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""protoc plugin to convert a protobuf schema to a yang schema
+   - basic support for message, fields. enumeration, service, method
+   - yang semantic rules needs to be implemented
+   - to run this plugin :
+   $ python -m -I.
+   --plugin=protoc-gen-custom=./ --custom_out=. <proto file>.proto
+   - the above will produce a <proto file>.yang file formatted for yang
+   - two examples of proto that can be used in the same directory are
+   yang.proto and addressbook.proto
+import sys
+from jinja2 import Template
+from google.protobuf.compiler import plugin_pb2 as plugin
+from descriptor_parser import DescriptorParser
+from google.protobuf.descriptor import FieldDescriptor
+template_yang = Template("""
+module {{ }} {
+    namespace "{{ module.package }}";
+    yang-version 1.1;
+    prefix "voltha";
+    revision 2016-11-15 {{ module.revision }} {
+    {% if module.description %}
+        /* {{ message.description }} */
+    {% else %}
+        description "Initial revision.";
+    {% endif %}
+    }
+    {% for message in module.messages recursive %}
+    grouping {{ }} {
+        {% if message.description %}
+        /* {{ message.description }} */
+        {% endif %}
+        {% if message.key %}
+        key {{ message.key_name }} ;
+        {% endif %}
+        {% for field in message.fields %}
+        {% if field.type_ref %}
+        {% if field.description %}
+        /* {{ field.description }} */
+        {% endif %}
+        uses {{ field.type }} ;
+        {% else %}
+        leaf {{ }} {
+            {% if field.type == "decimal64" %}
+            type {{ field.type }} {
+               fraction-digits 5;
+            }
+            {% else %}
+            type {{ field.type }} ;
+            {% endif %}
+            {% if field.description %}
+            description
+                "{{ field.description }}" ;
+            {% endif %}
+        }
+        {% endif %}
+        {% endfor %}
+        {% for enum_type in message.enums %}
+        {% if enum_type.description %}
+        /* {{ enum_type.description }} */ ;
+        {% endif %}
+        typedef {{ }} {
+            type enumeration {
+            {% for v in enum_type.value %}
+            {% if v.description %}
+                enum {{ }} {
+                    description "{{ v.description }}";
+                }
+            {% else %}
+                enum {{ }} ;
+            {% endif %}
+            {% endfor %}
+            }
+        }
+        {% endfor %}
+        {% for oneof in message.oneof %}
+        choice {{ }} {
+        }
+        {% endfor %}
+    {% if message.messages %}
+    {{ loop (message.messages)|indent(4, false) }}
+    {% endif %}
+    }
+    {% endfor %}
+    {% for service in %}
+    {% if service.description %}
+    /*  {{ service.description }}" */
+    {% endif %}
+    {% for method in service.methods %}
+    {% if method.description %}
+    /* {{ method.description }} */
+    {% endif %}
+    rpc {{ service.service }}-{{ method.method }} {
+        {% if method.input %}
+        input {
+            {% if method.input_ref %}
+            uses {{ method.input }} ;
+            {% else %}
+            leaf {{ method.input }} {
+                type {{ method.input }} ;
+            }
+            {% endif %}
+        }
+        {% endif %}
+        {% if method.output %}
+        output {
+            {% if method.output_ref %}
+            uses {{ method.output }} ;
+            {% else %}
+            leaf {{ method.output }} {
+                type {{ method.output }} ;
+            }
+            {% endif %}
+        }
+        {% endif %}
+    }
+    {% endfor %}
+    {% endfor %}
+""", trim_blocks=True, lstrip_blocks=True)
+def _traverse_messages(message_types):
+    messages = []
+    for message_type in message_types:
+        assert message_type['_type'] == 'google.protobuf.DescriptorProto'
+        # parse the fields
+        fields = _traverse_fields(message_type.get('field', []))
+        # parse the enums
+        enums = _traverse_enums(message_type.get('enum_type', []))
+        # parse nested messages
+        nested = message_type.get('nested_type', [])
+        nested_messages = _traverse_messages(nested)
+        messages.append(
+            {
+                'name': message_type.get('name', ''),
+                'fields': fields,
+                'enums': enums,
+                # 'extensions': extensions,
+                'messages': nested_messages,
+                'description': message_type.get('_description', ''),
+                # 'extension_ranges': extension_ranges,
+                # 'oneof': oneof
+            }
+        )
+    return messages
+def _traverse_fields(fields_desc):
+    fields = []
+    for field in fields_desc:
+        assert field['_type'] == 'google.protobuf.FieldDescriptorProto'
+        fields.append(
+            {
+                'name': field.get('name', ''),
+                'label': field.get('label', ''),
+                'number': field.get('number', ''),
+                'options': field.get('options', ''),
+                'type_name': field.get('type_name', ''),
+                'type': get_yang_type(field),
+                'type_ref': not is_base_type(field['type']),
+                'description': remove_unsupported_characters(field.get(
+                    '_description', ''))
+            }
+        )
+    return fields
+def _traverse_enums(enums_desc):
+    enums = []
+    for enum in enums_desc:
+        assert enum['_type'] == 'google.protobuf.EnumDescriptorProto'
+        enums.append(
+            {
+                'name': enum.get('name', ''),
+                'value': enum.get('value', ''),
+                'description': enum.get('_description', '')
+            }
+        )
+    return enums
+def _traverse_services(service_desc):
+    services = []
+    for service in service_desc:
+        methods = []
+        for method in service.get('method', []):
+            assert method['_type'] == 'google.protobuf.MethodDescriptorProto'
+            input_name = method.get('input_type')
+            input_ref = False
+            if not is_base_type(input_name):
+                input_name = input_name.split('.')[-1]
+                input_ref = True
+            output_name = method.get('output_type')
+            output_ref = False
+            if not is_base_type(output_name):
+                output_name = output_name.split('.')[-1]
+                output_ref = True
+            methods.append(
+                {
+                    'method': method.get('name', ''),
+                    'input': input_name,
+                    'input_ref': input_ref,
+                    'output': output_name,
+                    'output_ref': output_ref,
+                    'description': method.get('_description', ''),
+                    'server_streaming': method.get('server_streaming',
+                                                   False) == True
+                }
+            )
+        services.append(
+            {
+                'service': service.get('name', ''),
+                'methods': methods,
+                'description': service.get('_description', ''),
+            }
+        )
+    return services
+def _rchop(thestring, ending):
+    if thestring.endswith(ending):
+        return thestring[:-len(ending)]
+    return thestring
+def _traverse_desc(descriptor):
+    name = _rchop(descriptor.get('name', ''), '.proto')
+    package = descriptor.get('package', '')
+    description = descriptor.get('_description', '')
+    messages = _traverse_messages(descriptor.get('message_type', []))
+    enums = _traverse_enums(descriptor.get('enum_type', []))
+    services = _traverse_services(descriptor.get('service', []))
+    # extensions = _traverse_extensions(descriptors)
+    # options = _traverse_options(descriptors)
+    data = {
+        'name': name,
+        'package': package,
+        'description' : description,
+        'messages': messages,
+        'enums': enums,
+        'services': services,
+        # 'extensions': extensions,
+        # 'options': options
+    }
+    return data
+def generate_code(request, response):
+    assert isinstance(request, plugin.CodeGeneratorRequest)
+    parser = DescriptorParser()
+    # idx = 1
+    for proto_file in request.proto_file:
+        native_data = parser.parse_file_descriptor(proto_file,
+                                                   type_tag_name='_type',
+                                                   fold_comments=True)
+        # print native_data
+        yang_data = _traverse_desc(native_data)
+        f = response.file.add()
+        #TODO: We should have a separate file for each output. There is an
+        # issue reusing the same filename with an incremental suffix.  Using
+        # a different file name works but not the actual proto file name
+ ='.proto', '.yang')
+        # = '{}_{}{}'.format(_rchop(, '.proto'), idx,
+        #                            '.yang')
+        # idx += 1
+        f.content = template_yang.render(module=yang_data)
+def get_yang_type(field):
+    type = field['type']
+    if type in YANG_TYPE_MAP.keys():
+        _type, _ = YANG_TYPE_MAP[type]
+        if _type in ['enumeration', 'message', 'group']:
+            return field['type_name'].split('.')[-1]
+        else:
+            return _type
+    else:
+        return type
+def is_base_type(type):
+    # check numeric value of the type first
+    if type in YANG_TYPE_MAP.keys():
+        _type, _ = YANG_TYPE_MAP[type]
+        return _type not in ['message', 'group']
+    else:
+        # proto name of the type
+        result = [ _format for ( _ , _format) in YANG_TYPE_MAP.values() if
+                   _format == type and _format not in ['message', 'group']]
+        return len(result) > 0
+def remove_unsupported_characters(text):
+    unsupported_characters = ["{", "}", "[", "]", "\"", "/", "\\"]
+    return ''.join([i if i not in unsupported_characters else ' ' for i in text])
+    FieldDescriptor.TYPE_BOOL: ('boolean', 'boolean'),
+    FieldDescriptor.TYPE_BYTES: ('binary', 'byte'),
+    FieldDescriptor.TYPE_DOUBLE: ('decimal64', 'double'),
+    FieldDescriptor.TYPE_ENUM: ('enumeration', 'enum'),
+    FieldDescriptor.TYPE_FIXED32: ('int32', 'int64'),
+    FieldDescriptor.TYPE_FIXED64: ('int64', 'uint64'),
+    FieldDescriptor.TYPE_FLOAT: ('decimal64', 'float'),
+    FieldDescriptor.TYPE_INT32: ('int32', 'int32'),
+    FieldDescriptor.TYPE_INT64: ('int64', 'int64'),
+    FieldDescriptor.TYPE_SFIXED32: ('int32', 'int32'),
+    FieldDescriptor.TYPE_SFIXED64: ('int64', 'int64'),
+    FieldDescriptor.TYPE_STRING: ('string', 'string'),
+    FieldDescriptor.TYPE_SINT32: ('int32', 'int32'),
+    FieldDescriptor.TYPE_SINT64: ('int64', 'int64'),
+    FieldDescriptor.TYPE_UINT32: ('uint32', 'int64'),
+    FieldDescriptor.TYPE_UINT64: ('uint64', 'uint64'),
+    FieldDescriptor.TYPE_MESSAGE: ('message', 'message'),
+    FieldDescriptor.TYPE_GROUP: ('group', 'group')
+if __name__ == '__main__':
+    # Read request message from stdin
+    data =
+    # Parse request
+    request = plugin.CodeGeneratorRequest()
+    request.ParseFromString(data)
+    # Create response
+    response = plugin.CodeGeneratorResponse()
+    # Generate code
+    generate_code(request, response)
+    # Serialise response message
+    output = response.SerializeToString()
+    # Write to stdout
+    sys.stdout.write(output)
+    # print is_base_type(9)
\ No newline at end of file
diff --git a/experiments/proto2Yang/yang.proto b/experiments/proto2Yang/yang.proto
+++ b/experiments/proto2Yang/yang.proto
@@ -0,0 +1,67 @@
+syntax = "proto3";
+package experiment;
+//import "google/protobuf/empty.proto";
+message AsyncEvent {
+    int32 seq = 1;
+    enum EventType {
+        BIG_BANG = 0;  // just a big bang
+        SMALL_BANG = 1;  // so small bang
+        NO_BANG = 2;
+    }
+    EventType type = 2;
+    string details = 3;
+enum khenType {
+    BIG_KHEN = 0;
+    SMALL_KHEN = 1;
+    NO_KHEN = 2;
+message Packet {
+    int32 source = 1;
+    bytes content = 2;
+    message Result {
+        string url = 1;
+        string title = 2;
+        repeated string snippets = 3;
+        message Success {
+            string input = 1;
+            string desc = 2;
+        }
+        repeated Success success = 4;
+    }
+    repeated Result results = 3;
+message Echo {
+    string msg = 1;
+    float delay = 2;
+message testMessage{
+    oneof oneOfTest {
+        string test2 = 1;
+        int32 test3 = 2;
+    }
+service ExperimentalService {
+    rpc GetEcho(Echo) returns(Echo);
+    // For server to send async stream to client
+    rpc ReceiveStreamedEvents(Packet)
+        returns(stream AsyncEvent);
+    // For server to send async packets to client
+    rpc ReceivePackets(Echo) returns(stream Packet);
+    // For client to send async packets to server
+    rpc SendPackets(stream Packet) returns(Echo);
\ No newline at end of file