blob: 13bdc5e2e65429a94963811694de60dd9c662d99 [file] [log] [blame]
Zsolt Haraszti46c72002016-10-10 09:55:30 -07001#
Zsolt Harasztiaccad4a2017-01-03 21:56:48 -08002# Copyright 2017 the original author or authors.
Zsolt Haraszti46c72002016-10-10 09:55:30 -07003#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16import os
17from collections import OrderedDict
18
19from google.protobuf import descriptor_pb2
20from google.protobuf.descriptor import FieldDescriptor, Descriptor
21from google.protobuf.message import Message
22
23
Zack Williams7eb36d02019-03-19 07:16:12 -070024class InvalidDescriptorError(Exception):
25 pass
Zsolt Haraszti46c72002016-10-10 09:55:30 -070026
27
28class DescriptorParser(object):
29 """
30 Used to parse protobuf FileDescriptor objects into native Python
31 data structures (nested dict/list/intrinsic values. Two of the typical
32 sources of FileDescriptor objects are:
33 1. CodeGeneratorRequest, used as binary input to any protoc plugin,
34 contains a list of these FileDescriptor objects (under the
35 proto_file attribute)
36 2. FileDescriptorSet, as saved by protoc when using the -o option.
37
38 An important feature of the parser is that it can process the source
39 code annotations and can fold comments into the relevant defintions
40 present in the proto file.
41
42 Usage (in a protoc plugin):
43 >>> request = plugin.CodeGeneratorRequest()
44 >>> request.ParseFromString(sys.stdin.read())
45 >>> parser = DescriptorParser()
46 >>> for proto_file in request.proto_file:
47 >>> parsed_data = parser.parse_file_descriptor()
48 >>> print json.dumps(parsed_data, indent=4)
49 """
50
51 meta = None
52
53 def __init__(self):
54 if DescriptorParser.meta is None:
55 DescriptorParser.meta = self.load_meta_descriptor()
56
57 def load_meta_descriptor(self):
58 """
59 Load the protobuf version of descriptor.proto to use it in
60 decoding protobuf paths.
61 """
62 fpath = os.path.abspath(os.path.join(os.path.dirname(__file__),
63 'descriptor.desc'))
64 with open(fpath, 'r') as f:
65 blob = f.read()
66 proto = descriptor_pb2.FileDescriptorSet()
67 proto.ParseFromString(blob)
68 assert len(proto.file) == 1
69 return proto.file[0]
70
71 parser_table = {
72 unicode: lambda x: x,
73 int: lambda x: x,
74 bool: lambda x: x,
75 }
76
77 def parse(self, o, type_tag_name=None):
78 if isinstance(o, Message):
79 return self.parse_message(o, type_tag_name)
80 else:
81 return self.parser_table[type(o)](o)
82
83 def parse_message(self, m, type_tag_name=None):
84 assert isinstance(m, Message)
85 d = OrderedDict()
86 for field, value in m.ListFields():
87 assert isinstance(field, FieldDescriptor)
88 if field.label in (1, 2):
89 d[field.name] = self.parse(value, type_tag_name)
90 elif field.label == 3:
91 d[field.name] = [self.parse(x, type_tag_name) for x in
92 value]
93 else:
94 raise InvalidDescriptorError()
95
96 if type_tag_name is not None:
97 d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.')
98
99 return d
100
101 def parse_file_descriptor(self, descriptor,
102 type_tag_name=None,
103 fold_comments=False):
104
105 d = self.parse(descriptor, type_tag_name=type_tag_name)
106
107 if fold_comments:
108 locations = d.get('source_code_info', {}).get('location', [])
109 for location in locations:
110 path = location.get('path', [])
111 comments = ''.join([
112 location.get('leading_comments', '').strip(' '),
113 location.get('trailing_comments', '').strip(' '),
114 ''.join(block.strip(' ') for block
115 in
116 location.get('leading_detached_comments', ''))
117 ]).strip()
118
119 # ignore locations with no comments
120 if not comments:
121 continue
122
123 # we ignore path with odd number of entries, since these do
124 # not address our schema nodes, but rather the meta schema
125 if (len(path) % 2 == 0):
126 node = self.find_node_by_path(
127 path, self.meta.DESCRIPTOR, d)
128 assert isinstance(node, dict)
129 node['_description'] = comments
130
131 # remove source_code_info
132 del d['source_code_info']
133
134 return d
135
136 def parse_file_descriptors(self, descriptors,
Zack Williams7eb36d02019-03-19 07:16:12 -0700137 type_tag_name=None,
138 fold_comments=False):
Zsolt Haraszti46c72002016-10-10 09:55:30 -0700139 return [self.parse_file_descriptor(descriptor,
140 type_tag_name=type_tag_name,
141 fold_comments=fold_comments)
142 for descriptor in descriptors]
143
144 def find_node_by_path(self, path, meta, o):
145 # stop recursion when path is empty
146 if not path:
147 return o
148
149 # sanity check
150 assert len(path) >= 2
151 assert isinstance(meta, Descriptor)
152 assert isinstance(o, dict)
153
154 # find field name, then actual field
155 field_number = path.pop(0)
156 field_def = meta.fields_by_number[field_number]
157 field = o[field_def.name]
158
159 # field must be a list, extract entry with given index
160 assert isinstance(field, list) # expected to be a list field
161 index = path.pop(0)
162 child_o = field[index]
163
164 child_meta = field_def.message_type
165 return self.find_node_by_path(path, child_meta, child_o)