blob: 00343791da22b638998543fdfbb19e602c938402 [file] [log] [blame]
Zsolt Haraszti46c72002016-10-10 09:55:30 -07001#
Zsolt Harasztiaccad4a2017-01-03 21:56:48 -08002# Copyright 2017 the original author or authors.
Zsolt Haraszti46c72002016-10-10 09:55:30 -07003#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16import os
17from collections import OrderedDict
18
19from google.protobuf import descriptor_pb2
20from google.protobuf.descriptor import FieldDescriptor, Descriptor
21from google.protobuf.message import Message
22
23
24class InvalidDescriptorError(Exception): pass
25
26
27class DescriptorParser(object):
28 """
29 Used to parse protobuf FileDescriptor objects into native Python
30 data structures (nested dict/list/intrinsic values. Two of the typical
31 sources of FileDescriptor objects are:
32 1. CodeGeneratorRequest, used as binary input to any protoc plugin,
33 contains a list of these FileDescriptor objects (under the
34 proto_file attribute)
35 2. FileDescriptorSet, as saved by protoc when using the -o option.
36
37 An important feature of the parser is that it can process the source
38 code annotations and can fold comments into the relevant defintions
39 present in the proto file.
40
41 Usage (in a protoc plugin):
42 >>> request = plugin.CodeGeneratorRequest()
43 >>> request.ParseFromString(sys.stdin.read())
44 >>> parser = DescriptorParser()
45 >>> for proto_file in request.proto_file:
46 >>> parsed_data = parser.parse_file_descriptor()
47 >>> print json.dumps(parsed_data, indent=4)
48 """
49
50 meta = None
51
52 def __init__(self):
53 if DescriptorParser.meta is None:
54 DescriptorParser.meta = self.load_meta_descriptor()
55
56 def load_meta_descriptor(self):
57 """
58 Load the protobuf version of descriptor.proto to use it in
59 decoding protobuf paths.
60 """
61 fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
62 'descriptor.desc'))
63 with open(fpath, 'r') as f:
64 blob = f.read()
65 proto = descriptor_pb2.FileDescriptorSet()
66 proto.ParseFromString(blob)
67 assert len(proto.file) == 1
68 return proto.file[0]
69
70 parser_table = {
71 unicode: lambda x: x,
72 int: lambda x: x,
73 bool: lambda x: x,
74 }
75
76 def parse(self, o, type_tag_name=None):
77 if isinstance(o, Message):
78 return self.parse_message(o, type_tag_name)
79 else:
80 return self.parser_table[type(o)](o)
81
82 def parse_message(self, m, type_tag_name=None):
83 assert isinstance(m, Message)
84 d = OrderedDict()
85 for field, value in m.ListFields():
86 assert isinstance(field, FieldDescriptor)
87 if field.label in (1, 2):
88 d[field.name] = self.parse(value, type_tag_name)
89 elif field.label == 3:
90 d[field.name] = [self.parse(x, type_tag_name) for x in
91 value]
92 else:
93 raise InvalidDescriptorError()
94
95 if type_tag_name is not None:
96 d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')
97
98 return d
99
100 def parse_file_descriptor(self, descriptor,
101 type_tag_name=None,
102 fold_comments=False):
103
104 d = self.parse(descriptor, type_tag_name=type_tag_name)
105
106 if fold_comments:
107 locations = d.get('source_code_info', {}).get('location', [])
108 for location in locations:
109 path = location.get('path', [])
110 comments = ''.join([
111 location.get('leading_comments', '').strip(' '),
112 location.get('trailing_comments', '').strip(' '),
113 ''.join(block.strip(' ') for block
114 in
115 location.get('leading_detached_comments', ''))
116 ]).strip()
117
118 # ignore locations with no comments
119 if not comments:
120 continue
121
122 # we ignore path with odd number of entries, since these do
123 # not address our schema nodes, but rather the meta schema
124 if (len(path) % 2 == 0):
125 node = self.find_node_by_path(
126 path, self.meta.DESCRIPTOR, d)
127 assert isinstance(node, dict)
128 node['_description'] = comments
129
130 # remove source_code_info
131 del d['source_code_info']
132
133 return d
134
135 def parse_file_descriptors(self, descriptors,
136 type_tag_name=None,
137 fold_comments=False):
138 return [self.parse_file_descriptor(descriptor,
139 type_tag_name=type_tag_name,
140 fold_comments=fold_comments)
141 for descriptor in descriptors]
142
143 def find_node_by_path(self, path, meta, o):
144 # stop recursion when path is empty
145 if not path:
146 return o
147
148 # sanity check
149 assert len(path) >= 2
150 assert isinstance(meta, Descriptor)
151 assert isinstance(o, dict)
152
153 # find field name, then actual field
154 field_number = path.pop(0)
155 field_def = meta.fields_by_number[field_number]
156 field = o[field_def.name]
157
158 # field must be a list, extract entry with given index
159 assert isinstance(field, list) # expected to be a list field
160 index = path.pop(0)
161 child_o = field[index]
162
163 child_meta = field_def.message_type
164 return self.find_node_by_path(path, child_meta, child_o)