protoc_plugins/descriptor_parser.py - chameleon - Gitiles

 #
 # Copyright 2017 the original author or authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 import os
 from collections import OrderedDict

 from google.protobuf import descriptor_pb2
 from google.protobuf.descriptor import FieldDescriptor, Descriptor
 from google.protobuf.message import Message


 class InvalidDescriptorError(Exception):
     pass


 class DescriptorParser(object):
     """
     Used to parse protobuf FileDescriptor objects into native Python
     data structures (nested dict/list/intrinsic values. Two of the typical
     sources of FileDescriptor objects are:
     1. CodeGeneratorRequest, used as binary input to any protoc plugin,
        contains a list of these FileDescriptor objects (under the
        proto_file attribute)
     2. FileDescriptorSet, as saved by protoc when using the -o option.

     An important feature of the parser is that it can process the source
     code annotations and can fold comments into the relevant defintions
     present in the proto file.

     Usage (in a protoc plugin):
     >>> request = plugin.CodeGeneratorRequest()
     >>> request.ParseFromString(sys.stdin.read())
     >>> parser = DescriptorParser()
     >>> for proto_file in request.proto_file:
     >>>     parsed_data = parser.parse_file_descriptor()
     >>>     print json.dumps(parsed_data, indent=4)
     """

     meta = None

     def __init__(self):
         if DescriptorParser.meta is None:
             DescriptorParser.meta = self.load_meta_descriptor()

     def load_meta_descriptor(self):
         """
         Load the protobuf version of descriptor.proto to use it in
         decoding protobuf paths.
         """
         fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                              'descriptor.desc'))
         with open(fpath, 'r') as f:
             blob = f.read()
         proto = descriptor_pb2.FileDescriptorSet()
         proto.ParseFromString(blob)
         assert len(proto.file) == 1
         return proto.file[0]

     parser_table = {
         unicode: lambda x: x,
         int: lambda x: x,
         bool: lambda x: x,
     }

     def parse(self, o, type_tag_name=None):
         if isinstance(o, Message):
             return self.parse_message(o, type_tag_name)
         else:
             return self.parser_table[type(o)](o)

     def parse_message(self, m, type_tag_name=None):
         assert isinstance(m, Message)
         d = OrderedDict()
         for field, value in m.ListFields():
             assert isinstance(field, FieldDescriptor)
             if field.label in (1, 2):
                 d[field.name] = self.parse(value, type_tag_name)
             elif field.label == 3:
                 d[field.name] = [self.parse(x, type_tag_name) for x in
                                  value]
             else:
                 raise InvalidDescriptorError()

         if type_tag_name is not None:
             d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')

         return d

     def parse_file_descriptor(self, descriptor,
                               type_tag_name=None,
                               fold_comments=False):

         d = self.parse(descriptor, type_tag_name=type_tag_name)

         if fold_comments:
             locations = d.get('source_code_info', {}).get('location', [])
             for location in locations:
                 path = location.get('path', [])
                 comments = ''.join([
                     location.get('leading_comments', '').strip(' '),
                     location.get('trailing_comments', '').strip(' '),
                     ''.join(block.strip(' ') for block
                             in
                             location.get('leading_detached_comments', ''))
                 ]).strip()

                 # ignore locations with no comments
                 if not comments:
                     continue

                 # we ignore path with odd number of entries, since these do
                 # not address our schema nodes, but rather the meta schema
                 if (len(path) % 2 == 0):
                     node = self.find_node_by_path(
                         path, self.meta.DESCRIPTOR, d)
                     assert isinstance(node, dict)
                     node['_description'] = comments

             # remove source_code_info
             del d['source_code_info']

         return d

     def parse_file_descriptors(self, descriptors,
                                type_tag_name=None,
                                fold_comments=False):
         return [self.parse_file_descriptor(descriptor,
                                            type_tag_name=type_tag_name,
                                            fold_comments=fold_comments)
                 for descriptor in descriptors]

     def find_node_by_path(self, path, meta, o):
         # stop recursion when path is empty
         if not path:
             return o

         # sanity check
         assert len(path) >= 2
         assert isinstance(meta, Descriptor)
         assert isinstance(o, dict)

         # find field name, then actual field
         field_number = path.pop(0)
         field_def = meta.fields_by_number[field_number]
         field = o[field_def.name]

         # field must be a list, extract entry with given index
         assert isinstance(field, list)  # expected to be a list field
         index = path.pop(0)
         child_o = field[index]

         child_meta = field_def.message_type
         return self.find_node_by_path(path, child_meta, child_o)
	#
	# Copyright 2017 the original author or authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	import os
	from collections import OrderedDict

	from google.protobuf import descriptor_pb2
	from google.protobuf.descriptor import FieldDescriptor, Descriptor
	from google.protobuf.message import Message


	class InvalidDescriptorError(Exception):
	pass


	class DescriptorParser(object):
	"""
	Used to parse protobuf FileDescriptor objects into native Python
	data structures (nested dict/list/intrinsic values. Two of the typical
	sources of FileDescriptor objects are:
	1. CodeGeneratorRequest, used as binary input to any protoc plugin,
	contains a list of these FileDescriptor objects (under the
	proto_file attribute)
	2. FileDescriptorSet, as saved by protoc when using the -o option.

	An important feature of the parser is that it can process the source
	code annotations and can fold comments into the relevant defintions
	present in the proto file.

	Usage (in a protoc plugin):
	>>> request = plugin.CodeGeneratorRequest()
	>>> request.ParseFromString(sys.stdin.read())
	>>> parser = DescriptorParser()
	>>> for proto_file in request.proto_file:
	>>> parsed_data = parser.parse_file_descriptor()
	>>> print json.dumps(parsed_data, indent=4)
	"""

	meta = None

	def __init__(self):
	if DescriptorParser.meta is None:
	DescriptorParser.meta = self.load_meta_descriptor()

	def load_meta_descriptor(self):
	"""
	Load the protobuf version of descriptor.proto to use it in
	decoding protobuf paths.
	"""
	fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
	'descriptor.desc'))
	with open(fpath, 'r') as f:
	blob = f.read()
	proto = descriptor_pb2.FileDescriptorSet()
	proto.ParseFromString(blob)
	assert len(proto.file) == 1
	return proto.file[0]

	parser_table = {
	unicode: lambda x: x,
	int: lambda x: x,
	bool: lambda x: x,
	}

	def parse(self, o, type_tag_name=None):
	if isinstance(o, Message):
	return self.parse_message(o, type_tag_name)
	else:
	return self.parser_table[type(o)](o)

	def parse_message(self, m, type_tag_name=None):
	assert isinstance(m, Message)
	d = OrderedDict()
	for field, value in m.ListFields():
	assert isinstance(field, FieldDescriptor)
	if field.label in (1, 2):
	d[field.name] = self.parse(value, type_tag_name)
	elif field.label == 3:
	d[field.name] = [self.parse(x, type_tag_name) for x in
	value]
	else:
	raise InvalidDescriptorError()

	if type_tag_name is not None:
	d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')

	return d

	def parse_file_descriptor(self, descriptor,
	type_tag_name=None,
	fold_comments=False):

	d = self.parse(descriptor, type_tag_name=type_tag_name)

	if fold_comments:
	locations = d.get('source_code_info', {}).get('location', [])
	for location in locations:
	path = location.get('path', [])
	comments = ''.join([
	location.get('leading_comments', '').strip(' '),
	location.get('trailing_comments', '').strip(' '),
	''.join(block.strip(' ') for block
	in
	location.get('leading_detached_comments', ''))
	]).strip()

	# ignore locations with no comments
	if not comments:
	continue

	# we ignore path with odd number of entries, since these do
	# not address our schema nodes, but rather the meta schema
	if (len(path) % 2 == 0):
	node = self.find_node_by_path(
	path, self.meta.DESCRIPTOR, d)
	assert isinstance(node, dict)
	node['_description'] = comments

	# remove source_code_info
	del d['source_code_info']

	return d

	def parse_file_descriptors(self, descriptors,
	type_tag_name=None,
	fold_comments=False):
	return [self.parse_file_descriptor(descriptor,
	type_tag_name=type_tag_name,
	fold_comments=fold_comments)
	for descriptor in descriptors]

	def find_node_by_path(self, path, meta, o):
	# stop recursion when path is empty
	if not path:
	return o

	# sanity check
	assert len(path) >= 2
	assert isinstance(meta, Descriptor)
	assert isinstance(o, dict)

	# find field name, then actual field
	field_number = path.pop(0)
	field_def = meta.fields_by_number[field_number]
	field = o[field_def.name]

	# field must be a list, extract entry with given index
	assert isinstance(field, list) # expected to be a list field
	index = path.pop(0)
	child_o = field[index]

	child_meta = field_def.message_type
	return self.find_node_by_path(path, child_meta, child_o)