blob: 00343791da22b638998543fdfbb19e602c938402 [file] [log] [blame]
#
# Copyright 2017 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
from collections import OrderedDict
from google.protobuf import descriptor_pb2
from google.protobuf.descriptor import FieldDescriptor, Descriptor
from google.protobuf.message import Message
class InvalidDescriptorError(Exception): pass
class DescriptorParser(object):
"""
Used to parse protobuf FileDescriptor objects into native Python
data structures (nested dict/list/intrinsic values. Two of the typical
sources of FileDescriptor objects are:
1. CodeGeneratorRequest, used as binary input to any protoc plugin,
contains a list of these FileDescriptor objects (under the
proto_file attribute)
2. FileDescriptorSet, as saved by protoc when using the -o option.
An important feature of the parser is that it can process the source
code annotations and can fold comments into the relevant defintions
present in the proto file.
Usage (in a protoc plugin):
>>> request = plugin.CodeGeneratorRequest()
>>> request.ParseFromString(sys.stdin.read())
>>> parser = DescriptorParser()
>>> for proto_file in request.proto_file:
>>> parsed_data = parser.parse_file_descriptor()
>>> print json.dumps(parsed_data, indent=4)
"""
meta = None
def __init__(self):
if DescriptorParser.meta is None:
DescriptorParser.meta = self.load_meta_descriptor()
def load_meta_descriptor(self):
"""
Load the protobuf version of descriptor.proto to use it in
decoding protobuf paths.
"""
fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
'descriptor.desc'))
with open(fpath, 'r') as f:
blob = f.read()
proto = descriptor_pb2.FileDescriptorSet()
proto.ParseFromString(blob)
assert len(proto.file) == 1
return proto.file[0]
parser_table = {
unicode: lambda x: x,
int: lambda x: x,
bool: lambda x: x,
}
def parse(self, o, type_tag_name=None):
if isinstance(o, Message):
return self.parse_message(o, type_tag_name)
else:
return self.parser_table[type(o)](o)
def parse_message(self, m, type_tag_name=None):
assert isinstance(m, Message)
d = OrderedDict()
for field, value in m.ListFields():
assert isinstance(field, FieldDescriptor)
if field.label in (1, 2):
d[field.name] = self.parse(value, type_tag_name)
elif field.label == 3:
d[field.name] = [self.parse(x, type_tag_name) for x in
value]
else:
raise InvalidDescriptorError()
if type_tag_name is not None:
d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')
return d
def parse_file_descriptor(self, descriptor,
type_tag_name=None,
fold_comments=False):
d = self.parse(descriptor, type_tag_name=type_tag_name)
if fold_comments:
locations = d.get('source_code_info', {}).get('location', [])
for location in locations:
path = location.get('path', [])
comments = ''.join([
location.get('leading_comments', '').strip(' '),
location.get('trailing_comments', '').strip(' '),
''.join(block.strip(' ') for block
in
location.get('leading_detached_comments', ''))
]).strip()
# ignore locations with no comments
if not comments:
continue
# we ignore path with odd number of entries, since these do
# not address our schema nodes, but rather the meta schema
if (len(path) % 2 == 0):
node = self.find_node_by_path(
path, self.meta.DESCRIPTOR, d)
assert isinstance(node, dict)
node['_description'] = comments
# remove source_code_info
del d['source_code_info']
return d
def parse_file_descriptors(self, descriptors,
type_tag_name=None,
fold_comments=False):
return [self.parse_file_descriptor(descriptor,
type_tag_name=type_tag_name,
fold_comments=fold_comments)
for descriptor in descriptors]
def find_node_by_path(self, path, meta, o):
# stop recursion when path is empty
if not path:
return o
# sanity check
assert len(path) >= 2
assert isinstance(meta, Descriptor)
assert isinstance(o, dict)
# find field name, then actual field
field_number = path.pop(0)
field_def = meta.fields_by_number[field_number]
field = o[field_def.name]
# field must be a list, extract entry with given index
assert isinstance(field, list) # expected to be a list field
index = path.pop(0)
child_o = field[index]
child_meta = field_def.message_type
return self.find_node_by_path(path, child_meta, child_o)