blob: 275a2ca2e569598bf66f86fa18a36e8dfcfb3f3c [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
2__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
3__license__ = "Apache License, Version 2.0"
4__version__ = "1.0"
5
6import ply.lex as lex
7import ply.yacc as yacc
8from .model import *
9
10class ProtobufLexer(object):
11 keywords = ('double', 'float', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
12 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'string', 'bytes',
13 'message', 'required', 'optional', 'repeated', 'enum', 'extensions', 'max', 'extends', 'extend',
14 'to', 'package', 'service', 'rpc', 'returns', 'true', 'false', 'option', 'import')
15
16 tokens = [
17 'NAME',
18 'NUM',
19 'STRING_LITERAL',
20 'LINE_COMMENT', 'BLOCK_COMMENT',
21
22 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klinecc9b031a2014-11-10 13:21:08 +010023 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT',
24 'PLUSPLUSPLUS'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010025
26 ] + [k.upper() for k in keywords]
27 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
28
29 t_NUM = r'[+-]?\d+'
30 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
31
32 t_ignore_LINE_COMMENT = '//.*'
33 def t_BLOCK_COMMENT(self, t):
34 r'/\*(.|\n)*?\*/'
35 t.lexer.lineno += t.value.count('\n')
36
37 t_LBRACE = '{'
38 t_RBRACE = '}'
39 t_LBRACK = '\\['
40 t_RBRACK = '\\]'
41 t_LPAR = '\\('
42 t_RPAR = '\\)'
43 t_EQ = '='
44 t_SEMI = ';'
Dusan Klineca4fae112014-11-10 08:50:27 +010045 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010046 t_ignore = ' \t\f'
Dusan Klinecc9b031a2014-11-10 13:21:08 +010047 t_PLUSPLUSPLUS = '\\+\\+\\+'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010048
49 def t_NAME(self, t):
50 '[A-Za-z_$][A-Za-z0-9_$]*'
51 if t.value in ProtobufLexer.keywords:
Dusan Klineca4fae112014-11-10 08:50:27 +010052 #print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +010053 t.type = t.value.upper()
54 return t
55
56 def t_newline(self, t):
57 r'\n+'
58 t.lexer.lineno += len(t.value)
59
60 def t_newline2(self, t):
61 r'(\r\n)+'
62 t.lexer.lineno += len(t.value) / 2
63
64 def t_error(self, t):
65 print("Illegal character '{}' ({}) in line {}".format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
66 t.lexer.skip(1)
67
Dusan Klinecc9b031a2014-11-10 13:21:08 +010068class LexHelper:
69 @staticmethod
70 def get_max_linespan(p):
71 mSpan=[1e60, -1]
72 for sp in range(0, len(p)):
73 csp = p.linespan(sp)
74 if csp[0]==0 and csp[1]==0: continue
75 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
76 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
77 return tuple(mSpan)
78
79 @staticmethod
80 def get_max_lexspan(p):
81 mSpan=[1e60, -1]
82 for sp in range(0, len(p)):
83 csp = p.lexspan(sp)
84 if csp[0]==0 and csp[1]==0: continue
85 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
86 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
87 return tuple(mSpan)
88
89 @staticmethod
90 def set_parse_object(dst, p):
91 dst.setLexData(linespan=LexHelper.get_max_linespan(p), lexspan=LexHelper.get_max_lexspan(p))
92 dst.setLexObj(p)
93
Dusan Klinecccaa0d92014-11-09 03:21:31 +010094class ProtobufParser(object):
95 tokens = ProtobufLexer.tokens
96
97 def p_empty(self, p):
98 '''empty :'''
99 pass
100
101 def p_field_modifier(self,p):
102 '''field_modifier : REQUIRED
103 | OPTIONAL
104 | REPEATED'''
105 p[0] = p[1]
106
107 def p_primitive_type(self, p):
108 '''primitive_type : DOUBLE
109 | FLOAT
110 | INT32
111 | INT64
112 | UINT32
113 | UINT64
114 | SINT32
115 | SINT64
116 | FIXED32
117 | FIXED64
118 | SFIXED32
119 | SFIXED64
120 | BOOL
121 | STRING
122 | BYTES'''
123 p[0] = p[1]
124
125 def p_field_id(self, p):
126 '''field_id : NUM'''
127 p[0] = p[1]
128
129 def p_rvalue(self, p):
130 '''rvalue : NUM
131 | TRUE
132 | FALSE'''
133 p[0] = p[1]
134
135 def p_rvalue2(self, p):
136 '''rvalue : NAME'''
137 p[0] = Name(p[1])
138
139 def p_field_directive(self, p):
140 '''field_directive : LBRACK NAME EQ rvalue RBRACK'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100141 p[0] = FieldDirective(Name(p[2]), p[4])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100142 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100143
144 def p_field_directive_times(self, p):
145 '''field_directive_times : field_directive_plus'''
146 p[0] = p[1]
147
148 def p_field_directive_times2(self, p):
149 '''field_directive_times : empty'''
150 p[0] = []
151
152 def p_field_directive_plus(self, p):
153 '''field_directive_plus : field_directive
154 | field_directive_plus field_directive'''
155 if len(p) == 2:
156 p[0] = [p[1]]
157 else:
158 p[0] = p[1] + [p[2]]
159
Dusan Klineca4fae112014-11-10 08:50:27 +0100160 def p_dotname(self, p):
161 '''dotname : NAME
162 | dotname DOT NAME'''
163 p[0] = p[1]
164
165 # Hack for cases when there is a field named 'message' or 'max'
166 def p_fieldName(self, p):
167 '''field_name : NAME
168 | MESSAGE
169 | MAX'''
170 p[0] = p[1]
171
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100172 def p_field_type(self, p):
173 '''field_type : primitive_type'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100174 p[0] = FieldType(p[1])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100175 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100176
177 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100178 '''field_type : dotname'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100179 p[0] = Name(p[1])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100180 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100181
182 # Root of the field declaration.
183 def p_field_definition(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100184 '''field_definition : field_modifier field_type field_name EQ field_id field_directive_times SEMI'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100185 p[0] = FieldDefinition(p[1], p[2], Name(p[3]), p[5], p[6])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100186 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100187
188 # Root of the enum field declaration.
189 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100190 '''enum_field : field_name EQ NUM SEMI'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100191 p[0] = EnumFieldDefinition(Name(p[1]), p[3])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100192 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100193
194 def p_enum_body_part(self, p):
195 '''enum_body_part : enum_field
196 | option_directive'''
197 p[0] = p[1]
198
199 def p_enum_body(self, p):
200 '''enum_body : enum_body_part
201 | enum_body enum_body_part'''
202 if len(p) == 2:
203 p[0] = [p[1]]
204 else:
205 p[0] = p[1] + [p[2]]
206
207 def p_enum_body_opt(self, p):
208 '''enum_body_opt : empty'''
209 p[0] = []
210
211 def p_enum_body_opt2(self, p):
212 '''enum_body_opt : enum_body'''
213 p[0] = p[1]
214
215 # Root of the enum declaration.
216 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
217 def p_enum_definition(self, p):
218 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100219 p[0] = EnumDefinition(Name(p[2]), p[4])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100220 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100221
222 def p_extensions_to(self, p):
223 '''extensions_to : MAX'''
224 p[0] = ExtensionsMax()
225
226 def p_extensions_to2(self, p):
227 '''extensions_to : NUM'''
228 p[0] = p[1]
229
230 # extensions_definition ::= 'extensions' integer 'to' integer ';'
231 def p_extensions_definition(self, p):
232 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
233 p[0] = ExtensionsDirective(p[2], p[4])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100234 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100235
236 # message_extension ::= 'extend' ident '{' message_body '}'
237 def p_message_extension(self, p):
238 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
239 p[0] = MessageExtension(Name(p[2]), p[4])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100240 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100241
242 def p_message_body_part(self, p):
243 '''message_body_part : field_definition
244 | enum_definition
245 | message_definition
246 | extensions_definition
247 | message_extension'''
248 p[0] = p[1]
249
250 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
251 def p_message_body(self, p):
252 '''message_body : empty'''
253 p[0] = []
254
255 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
256 def p_message_body2(self, p):
257 '''message_body : message_body_part
258 | message_body message_body_part'''
259 if len(p) == 2:
260 p[0] = [p[1]]
261 else:
262 p[0] = p[1] + [p[2]]
263
264 # Root of the message declaration.
265 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
266 def p_message_definition(self, p):
267 '''message_definition : MESSAGE NAME LBRACE message_body RBRACE'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100268 p[0] = MessageDefinition(Name(p[2]), p[4])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100269 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100270
271 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
272 def p_method_definition(self, p):
273 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
274 p[0] = MethodDefinition(Name(p[2]), Name(p[4]), Name(p[8]))
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100275 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100276
277 def p_method_definition_opt(self, p):
278 '''method_definition_opt : empty'''
279 p[0] = []
280
281 def p_method_definition_opt2(self, p):
282 '''method_definition_opt : method_definition
283 | method_definition_opt method_definition'''
284 if len(p) == 2:
285 p[0] = [p[1]]
286 else:
287 p[0] = p[1] + [p[2]]
288
289 # service_definition ::= 'service' ident '{' method_definition* '}'
290 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
291 def p_service_definition(self, p):
292 '''service_definition : SERVICE NAME LBRACE method_definition_opt RBRACE'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100293 p[0] = ServiceDefinition(Name(p[2]), p[4])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100294 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100295
296 # package_directive ::= 'package' ident [ '.' ident]* ';'
297 def p_package_directive(self,p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100298 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100299 p[0] = PackageStatement(Name(p[2]))
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100300 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100301
302 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
303 def p_import_directive(self, p):
304 '''import_directive : IMPORT STRING_LITERAL SEMI'''
305 p[0] = ImportStatement(Literal(p[2]))
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100306 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100307
308 def p_option_rvalue(self, p):
309 '''option_rvalue : NUM
310 | TRUE
311 | FALSE'''
312 p[0] = p[1]
313
314 def p_option_rvalue2(self, p):
315 '''option_rvalue : STRING_LITERAL'''
316 p[0] = Literal(p[1])
317
318 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
319 def p_option_directive(self, p):
320 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100321 p[0] = OptionStatement(Name(p[2]), p[4])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100322 LexHelper.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100323
324 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive)
325 def p_topLevel(self,p):
326 '''topLevel : message_definition
327 | message_extension
328 | enum_definition
329 | service_definition
330 | import_directive
331 | option_directive'''
332 p[0] = p[1]
333
334 def p_package_definition(self, p):
335 '''package_definition : package_directive'''
336 p[0] = p[1]
337
338 def p_packages2(self, p):
339 '''package_definition : empty'''
340 p[0] = []
341
342 def p_statements2(self, p):
343 '''statements : topLevel
344 | statements topLevel'''
345 if len(p) == 2:
346 p[0] = [p[1]]
347 else:
348 p[0] = p[1] + [p[2]]
349
350 def p_statements(self, p):
351 '''statements : empty'''
352 p[0] = []
353
354 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100355 def p_protofile(self, p):
356 '''protofile : package_definition statements'''
357 p[0] = ProtoFile(p[1], p[2])
358 LexHelper.set_parse_object(p[0], p)
359
360 # Parsing starting point
361 def p_goal(self, p):
362 '''goal : PLUSPLUSPLUS protofile'''
363 p[0] = p[2]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100364
365 def p_error(self, p):
366 print('error: {}'.format(p))
367
368class ProtobufAnalyzer(object):
369
370 def __init__(self):
371 self.lexer = lex.lex(module=ProtobufLexer(), optimize=1)
372 self.parser = yacc.yacc(module=ProtobufParser(), start='goal', optimize=1)
373
374 def tokenize_string(self, code):
375 self.lexer.input(code)
376 for token in self.lexer:
377 print(token)
378
379 def tokenize_file(self, _file):
380 if type(_file) == str:
381 _file = file(_file)
382 content = ''
383 for line in _file:
384 content += line
385 return self.tokenize_string(content)
386
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100387 def parse_string(self, code, debug=0, lineno=1, prefix='+++'):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100388 self.lexer.lineno = lineno
389 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
390
391 def parse_file(self, _file, debug=0):
392 if type(_file) == str:
393 _file = file(_file)
394 content = ''
395 for line in _file:
396 content += line
397 return self.parse_string(content, debug=debug)