Blame - protoc_plugins/descriptor_parser.py - chameleon

blob: c23f49754844cc717810d7a5d7c72443b19005cc [file] [log] [blame]

Zsolt Haraszti	46c7200	2016-10-10 09:55:30 -0700	[diff] [blame]	1	#
				2	# Copyright 2016 the original author or authors.
				3	#
				4	# Licensed under the Apache License, Version 2.0 (the "License");
				5	# you may not use this file except in compliance with the License.
				6	# You may obtain a copy of the License at
				7	#
				8	# http://www.apache.org/licenses/LICENSE-2.0
				9	#
				10	# Unless required by applicable law or agreed to in writing, software
				11	# distributed under the License is distributed on an "AS IS" BASIS,
				12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	# See the License for the specific language governing permissions and
				14	# limitations under the License.
				15	#
				16	import os
				17	from collections import OrderedDict
				18
				19	from google.protobuf import descriptor_pb2
				20	from google.protobuf.descriptor import FieldDescriptor, Descriptor
				21	from google.protobuf.message import Message
				22
				23
				24	class InvalidDescriptorError(Exception): pass
				25
				26
				27	class DescriptorParser(object):
				28	"""
				29	Used to parse protobuf FileDescriptor objects into native Python
				30	data structures (nested dict/list/intrinsic values. Two of the typical
				31	sources of FileDescriptor objects are:
				32	1. CodeGeneratorRequest, used as binary input to any protoc plugin,
				33	contains a list of these FileDescriptor objects (under the
				34	proto_file attribute)
				35	2. FileDescriptorSet, as saved by protoc when using the -o option.
				36
				37	An important feature of the parser is that it can process the source
				38	code annotations and can fold comments into the relevant defintions
				39	present in the proto file.
				40
				41	Usage (in a protoc plugin):
				42	>>> request = plugin.CodeGeneratorRequest()
				43	>>> request.ParseFromString(sys.stdin.read())
				44	>>> parser = DescriptorParser()
				45	>>> for proto_file in request.proto_file:
				46	>>> parsed_data = parser.parse_file_descriptor()
				47	>>> print json.dumps(parsed_data, indent=4)
				48	"""
				49
				50	meta = None
				51
				52	def __init__(self):
				53	if DescriptorParser.meta is None:
				54	DescriptorParser.meta = self.load_meta_descriptor()
				55
				56	def load_meta_descriptor(self):
				57	"""
				58	Load the protobuf version of descriptor.proto to use it in
				59	decoding protobuf paths.
				60	"""
				61	fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
				62	'descriptor.desc'))
				63	with open(fpath, 'r') as f:
				64	blob = f.read()
				65	proto = descriptor_pb2.FileDescriptorSet()
				66	proto.ParseFromString(blob)
				67	assert len(proto.file) == 1
				68	return proto.file[0]
				69
				70	parser_table = {
				71	unicode: lambda x: x,
				72	int: lambda x: x,
				73	bool: lambda x: x,
				74	}
				75
				76	def parse(self, o, type_tag_name=None):
				77	if isinstance(o, Message):
				78	return self.parse_message(o, type_tag_name)
				79	else:
				80	return self.parser_table[type(o)](o)
				81
				82	def parse_message(self, m, type_tag_name=None):
				83	assert isinstance(m, Message)
				84	d = OrderedDict()
				85	for field, value in m.ListFields():
				86	assert isinstance(field, FieldDescriptor)
				87	if field.label in (1, 2):
				88	d[field.name] = self.parse(value, type_tag_name)
				89	elif field.label == 3:
				90	d[field.name] = [self.parse(x, type_tag_name) for x in
				91	value]
				92	else:
				93	raise InvalidDescriptorError()
				94
				95	if type_tag_name is not None:
				96	d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')
				97
				98	return d
				99
				100	def parse_file_descriptor(self, descriptor,
				101	type_tag_name=None,
				102	fold_comments=False):
				103
				104	d = self.parse(descriptor, type_tag_name=type_tag_name)
				105
				106	if fold_comments:
				107	locations = d.get('source_code_info', {}).get('location', [])
				108	for location in locations:
				109	path = location.get('path', [])
				110	comments = ''.join([
				111	location.get('leading_comments', '').strip(' '),
				112	location.get('trailing_comments', '').strip(' '),
				113	''.join(block.strip(' ') for block
				114	in
				115	location.get('leading_detached_comments', ''))
				116	]).strip()
				117
				118	# ignore locations with no comments
				119	if not comments:
				120	continue
				121
				122	# we ignore path with odd number of entries, since these do
				123	# not address our schema nodes, but rather the meta schema
				124	if (len(path) % 2 == 0):
				125	node = self.find_node_by_path(
				126	path, self.meta.DESCRIPTOR, d)
				127	assert isinstance(node, dict)
				128	node['_description'] = comments
				129
				130	# remove source_code_info
				131	del d['source_code_info']
				132
				133	return d
				134
				135	def parse_file_descriptors(self, descriptors,
				136	type_tag_name=None,
				137	fold_comments=False):
				138	return [self.parse_file_descriptor(descriptor,
				139	type_tag_name=type_tag_name,
				140	fold_comments=fold_comments)
				141	for descriptor in descriptors]
				142
				143	def find_node_by_path(self, path, meta, o):
				144	# stop recursion when path is empty
				145	if not path:
				146	return o
				147
				148	# sanity check
				149	assert len(path) >= 2
				150	assert isinstance(meta, Descriptor)
				151	assert isinstance(o, dict)
				152
				153	# find field name, then actual field
				154	field_number = path.pop(0)
				155	field_def = meta.fields_by_number[field_number]
				156	field = o[field_def.name]
				157
				158	# field must be a list, extract entry with given index
				159	assert isinstance(field, list) # expected to be a list field
				160	index = path.pop(0)
				161	child_o = field[index]
				162
				163	child_meta = field_def.message_type
				164	return self.find_node_by_path(path, child_meta, child_o)