Blame - protoc_plugins/descriptor_parser.py - chameleon

blob: 13bdc5e2e65429a94963811694de60dd9c662d99 [file] [log] [blame]

Zsolt Haraszti	46c7200	2016-10-10 09:55:30 -0700	[diff] [blame]	1	#
Zsolt Haraszti	accad4a	2017-01-03 21:56:48 -0800	[diff] [blame]	2	# Copyright 2017 the original author or authors.
Zsolt Haraszti	46c7200	2016-10-10 09:55:30 -0700	[diff] [blame]	3	#
				4	# Licensed under the Apache License, Version 2.0 (the "License");
				5	# you may not use this file except in compliance with the License.
				6	# You may obtain a copy of the License at
				7	#
				8	# http://www.apache.org/licenses/LICENSE-2.0
				9	#
				10	# Unless required by applicable law or agreed to in writing, software
				11	# distributed under the License is distributed on an "AS IS" BASIS,
				12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	# See the License for the specific language governing permissions and
				14	# limitations under the License.
				15	#
				16	import os
				17	from collections import OrderedDict
				18
				19	from google.protobuf import descriptor_pb2
				20	from google.protobuf.descriptor import FieldDescriptor, Descriptor
				21	from google.protobuf.message import Message
				22
				23
Zack Williams	7eb36d0	2019-03-19 07:16:12 -0700	[diff] [blame]	24	class InvalidDescriptorError(Exception):
				25	pass
Zsolt Haraszti	46c7200	2016-10-10 09:55:30 -0700	[diff] [blame]	26
				27
				28	class DescriptorParser(object):
				29	"""
				30	Used to parse protobuf FileDescriptor objects into native Python
				31	data structures (nested dict/list/intrinsic values. Two of the typical
				32	sources of FileDescriptor objects are:
				33	1. CodeGeneratorRequest, used as binary input to any protoc plugin,
				34	contains a list of these FileDescriptor objects (under the
				35	proto_file attribute)
				36	2. FileDescriptorSet, as saved by protoc when using the -o option.
				37
				38	An important feature of the parser is that it can process the source
				39	code annotations and can fold comments into the relevant defintions
				40	present in the proto file.
				41
				42	Usage (in a protoc plugin):
				43	>>> request = plugin.CodeGeneratorRequest()
				44	>>> request.ParseFromString(sys.stdin.read())
				45	>>> parser = DescriptorParser()
				46	>>> for proto_file in request.proto_file:
				47	>>> parsed_data = parser.parse_file_descriptor()
				48	>>> print json.dumps(parsed_data, indent=4)
				49	"""
				50
				51	meta = None
				52
				53	def __init__(self):
				54	if DescriptorParser.meta is None:
				55	DescriptorParser.meta = self.load_meta_descriptor()
				56
				57	def load_meta_descriptor(self):
				58	"""
				59	Load the protobuf version of descriptor.proto to use it in
				60	decoding protobuf paths.
				61	"""
				62	fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
				63	'descriptor.desc'))
				64	with open(fpath, 'r') as f:
				65	blob = f.read()
				66	proto = descriptor_pb2.FileDescriptorSet()
				67	proto.ParseFromString(blob)
				68	assert len(proto.file) == 1
				69	return proto.file[0]
				70
				71	parser_table = {
				72	unicode: lambda x: x,
				73	int: lambda x: x,
				74	bool: lambda x: x,
				75	}
				76
				77	def parse(self, o, type_tag_name=None):
				78	if isinstance(o, Message):
				79	return self.parse_message(o, type_tag_name)
				80	else:
				81	return self.parser_table[type(o)](o)
				82
				83	def parse_message(self, m, type_tag_name=None):
				84	assert isinstance(m, Message)
				85	d = OrderedDict()
				86	for field, value in m.ListFields():
				87	assert isinstance(field, FieldDescriptor)
				88	if field.label in (1, 2):
				89	d[field.name] = self.parse(value, type_tag_name)
				90	elif field.label == 3:
				91	d[field.name] = [self.parse(x, type_tag_name) for x in
				92	value]
				93	else:
				94	raise InvalidDescriptorError()
				95
				96	if type_tag_name is not None:
				97	d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')
				98
				99	return d
				100
				101	def parse_file_descriptor(self, descriptor,
				102	type_tag_name=None,
				103	fold_comments=False):
				104
				105	d = self.parse(descriptor, type_tag_name=type_tag_name)
				106
				107	if fold_comments:
				108	locations = d.get('source_code_info', {}).get('location', [])
				109	for location in locations:
				110	path = location.get('path', [])
				111	comments = ''.join([
				112	location.get('leading_comments', '').strip(' '),
				113	location.get('trailing_comments', '').strip(' '),
				114	''.join(block.strip(' ') for block
				115	in
				116	location.get('leading_detached_comments', ''))
				117	]).strip()
				118
				119	# ignore locations with no comments
				120	if not comments:
				121	continue
				122
				123	# we ignore path with odd number of entries, since these do
				124	# not address our schema nodes, but rather the meta schema
				125	if (len(path) % 2 == 0):
				126	node = self.find_node_by_path(
				127	path, self.meta.DESCRIPTOR, d)
				128	assert isinstance(node, dict)
				129	node['_description'] = comments
				130
				131	# remove source_code_info
				132	del d['source_code_info']
				133
				134	return d
				135
				136	def parse_file_descriptors(self, descriptors,
Zack Williams	7eb36d0	2019-03-19 07:16:12 -0700	[diff] [blame]	137	type_tag_name=None,
				138	fold_comments=False):
Zsolt Haraszti	46c7200	2016-10-10 09:55:30 -0700	[diff] [blame]	139	return [self.parse_file_descriptor(descriptor,
				140	type_tag_name=type_tag_name,
				141	fold_comments=fold_comments)
				142	for descriptor in descriptors]
				143
				144	def find_node_by_path(self, path, meta, o):
				145	# stop recursion when path is empty
				146	if not path:
				147	return o
				148
				149	# sanity check
				150	assert len(path) >= 2
				151	assert isinstance(meta, Descriptor)
				152	assert isinstance(o, dict)
				153
				154	# find field name, then actual field
				155	field_number = path.pop(0)
				156	field_def = meta.fields_by_number[field_number]
				157	field = o[field_def.name]
				158
				159	# field must be a list, extract entry with given index
				160	assert isinstance(field, list) # expected to be a list field
				161	index = path.pop(0)
				162	child_o = field[index]
				163
				164	child_meta = field_def.message_type
				165	return self.find_node_by_path(path, child_meta, child_o)