Zsolt Haraszti | 46c7200 | 2016-10-10 09:55:30 -0700 | [diff] [blame] | 1 | # |
| 2 | # Copyright 2016 the original author or authors. |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | # |
| 16 | import os |
| 17 | from collections import OrderedDict |
| 18 | |
| 19 | from google.protobuf import descriptor_pb2 |
| 20 | from google.protobuf.descriptor import FieldDescriptor, Descriptor |
| 21 | from google.protobuf.message import Message |
| 22 | |
| 23 | |
| 24 | class InvalidDescriptorError(Exception): pass |
| 25 | |
| 26 | |
| 27 | class DescriptorParser(object): |
| 28 | """ |
| 29 | Used to parse protobuf FileDescriptor objects into native Python |
| 30 | data structures (nested dict/list/intrinsic values. Two of the typical |
| 31 | sources of FileDescriptor objects are: |
| 32 | 1. CodeGeneratorRequest, used as binary input to any protoc plugin, |
| 33 | contains a list of these FileDescriptor objects (under the |
| 34 | proto_file attribute) |
| 35 | 2. FileDescriptorSet, as saved by protoc when using the -o option. |
| 36 | |
| 37 | An important feature of the parser is that it can process the source |
| 38 | code annotations and can fold comments into the relevant defintions |
| 39 | present in the proto file. |
| 40 | |
| 41 | Usage (in a protoc plugin): |
| 42 | >>> request = plugin.CodeGeneratorRequest() |
| 43 | >>> request.ParseFromString(sys.stdin.read()) |
| 44 | >>> parser = DescriptorParser() |
| 45 | >>> for proto_file in request.proto_file: |
| 46 | >>> parsed_data = parser.parse_file_descriptor() |
| 47 | >>> print json.dumps(parsed_data, indent=4) |
| 48 | """ |
| 49 | |
| 50 | meta = None |
| 51 | |
| 52 | def __init__(self): |
| 53 | if DescriptorParser.meta is None: |
| 54 | DescriptorParser.meta = self.load_meta_descriptor() |
| 55 | |
| 56 | def load_meta_descriptor(self): |
| 57 | """ |
| 58 | Load the protobuf version of descriptor.proto to use it in |
| 59 | decoding protobuf paths. |
| 60 | """ |
| 61 | fpath = os.path.abspath(os.path.join(os.path.dirname(__file__), |
| 62 | 'descriptor.desc')) |
| 63 | with open(fpath, 'r') as f: |
| 64 | blob = f.read() |
| 65 | proto = descriptor_pb2.FileDescriptorSet() |
| 66 | proto.ParseFromString(blob) |
| 67 | assert len(proto.file) == 1 |
| 68 | return proto.file[0] |
| 69 | |
| 70 | parser_table = { |
| 71 | unicode: lambda x: x, |
| 72 | int: lambda x: x, |
| 73 | bool: lambda x: x, |
| 74 | } |
| 75 | |
| 76 | def parse(self, o, type_tag_name=None): |
| 77 | if isinstance(o, Message): |
| 78 | return self.parse_message(o, type_tag_name) |
| 79 | else: |
| 80 | return self.parser_table[type(o)](o) |
| 81 | |
| 82 | def parse_message(self, m, type_tag_name=None): |
| 83 | assert isinstance(m, Message) |
| 84 | d = OrderedDict() |
| 85 | for field, value in m.ListFields(): |
| 86 | assert isinstance(field, FieldDescriptor) |
| 87 | if field.label in (1, 2): |
| 88 | d[field.name] = self.parse(value, type_tag_name) |
| 89 | elif field.label == 3: |
| 90 | d[field.name] = [self.parse(x, type_tag_name) for x in |
| 91 | value] |
| 92 | else: |
| 93 | raise InvalidDescriptorError() |
| 94 | |
| 95 | if type_tag_name is not None: |
| 96 | d[type_tag_name] = m.DESCRIPTOR.full_name.strip('.') |
| 97 | |
| 98 | return d |
| 99 | |
| 100 | def parse_file_descriptor(self, descriptor, |
| 101 | type_tag_name=None, |
| 102 | fold_comments=False): |
| 103 | |
| 104 | d = self.parse(descriptor, type_tag_name=type_tag_name) |
| 105 | |
| 106 | if fold_comments: |
| 107 | locations = d.get('source_code_info', {}).get('location', []) |
| 108 | for location in locations: |
| 109 | path = location.get('path', []) |
| 110 | comments = ''.join([ |
| 111 | location.get('leading_comments', '').strip(' '), |
| 112 | location.get('trailing_comments', '').strip(' '), |
| 113 | ''.join(block.strip(' ') for block |
| 114 | in |
| 115 | location.get('leading_detached_comments', '')) |
| 116 | ]).strip() |
| 117 | |
| 118 | # ignore locations with no comments |
| 119 | if not comments: |
| 120 | continue |
| 121 | |
| 122 | # we ignore path with odd number of entries, since these do |
| 123 | # not address our schema nodes, but rather the meta schema |
| 124 | if (len(path) % 2 == 0): |
| 125 | node = self.find_node_by_path( |
| 126 | path, self.meta.DESCRIPTOR, d) |
| 127 | assert isinstance(node, dict) |
| 128 | node['_description'] = comments |
| 129 | |
| 130 | # remove source_code_info |
| 131 | del d['source_code_info'] |
| 132 | |
| 133 | return d |
| 134 | |
| 135 | def parse_file_descriptors(self, descriptors, |
| 136 | type_tag_name=None, |
| 137 | fold_comments=False): |
| 138 | return [self.parse_file_descriptor(descriptor, |
| 139 | type_tag_name=type_tag_name, |
| 140 | fold_comments=fold_comments) |
| 141 | for descriptor in descriptors] |
| 142 | |
| 143 | def find_node_by_path(self, path, meta, o): |
| 144 | # stop recursion when path is empty |
| 145 | if not path: |
| 146 | return o |
| 147 | |
| 148 | # sanity check |
| 149 | assert len(path) >= 2 |
| 150 | assert isinstance(meta, Descriptor) |
| 151 | assert isinstance(o, dict) |
| 152 | |
| 153 | # find field name, then actual field |
| 154 | field_number = path.pop(0) |
| 155 | field_def = meta.fields_by_number[field_number] |
| 156 | field = o[field_def.name] |
| 157 | |
| 158 | # field must be a list, extract entry with given index |
| 159 | assert isinstance(field, list) # expected to be a list field |
| 160 | index = path.pop(0) |
| 161 | child_o = field[index] |
| 162 | |
| 163 | child_meta = field_def.message_type |
| 164 | return self.find_node_by_path(path, child_meta, child_o) |