blob: 7c1df0754038699382ecab07813978770dcd979f [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
2__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
3__license__ = "Apache License, Version 2.0"
4__version__ = "1.0"
5
6import ply.lex as lex
7import ply.yacc as yacc
8from .model import *
9
10class ProtobufLexer(object):
11 keywords = ('double', 'float', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
12 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'string', 'bytes',
13 'message', 'required', 'optional', 'repeated', 'enum', 'extensions', 'max', 'extends', 'extend',
14 'to', 'package', 'service', 'rpc', 'returns', 'true', 'false', 'option', 'import')
15
16 tokens = [
17 'NAME',
18 'NUM',
19 'STRING_LITERAL',
20 'LINE_COMMENT', 'BLOCK_COMMENT',
21
22 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klineca4fae112014-11-10 08:50:27 +010023 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010024
25 ] + [k.upper() for k in keywords]
26 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
27
28 t_NUM = r'[+-]?\d+'
29 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
30
31 t_ignore_LINE_COMMENT = '//.*'
32 def t_BLOCK_COMMENT(self, t):
33 r'/\*(.|\n)*?\*/'
34 t.lexer.lineno += t.value.count('\n')
35
36 t_LBRACE = '{'
37 t_RBRACE = '}'
38 t_LBRACK = '\\['
39 t_RBRACK = '\\]'
40 t_LPAR = '\\('
41 t_RPAR = '\\)'
42 t_EQ = '='
43 t_SEMI = ';'
Dusan Klineca4fae112014-11-10 08:50:27 +010044 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010045 t_ignore = ' \t\f'
46
47 def t_NAME(self, t):
48 '[A-Za-z_$][A-Za-z0-9_$]*'
49 if t.value in ProtobufLexer.keywords:
Dusan Klineca4fae112014-11-10 08:50:27 +010050 #print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +010051 t.type = t.value.upper()
52 return t
53
54 def t_newline(self, t):
55 r'\n+'
56 t.lexer.lineno += len(t.value)
57
58 def t_newline2(self, t):
59 r'(\r\n)+'
60 t.lexer.lineno += len(t.value) / 2
61
62 def t_error(self, t):
63 print("Illegal character '{}' ({}) in line {}".format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
64 t.lexer.skip(1)
65
66class ProtobufParser(object):
67 tokens = ProtobufLexer.tokens
68
69 def p_empty(self, p):
70 '''empty :'''
71 pass
72
73 def p_field_modifier(self,p):
74 '''field_modifier : REQUIRED
75 | OPTIONAL
76 | REPEATED'''
77 p[0] = p[1]
78
79 def p_primitive_type(self, p):
80 '''primitive_type : DOUBLE
81 | FLOAT
82 | INT32
83 | INT64
84 | UINT32
85 | UINT64
86 | SINT32
87 | SINT64
88 | FIXED32
89 | FIXED64
90 | SFIXED32
91 | SFIXED64
92 | BOOL
93 | STRING
94 | BYTES'''
95 p[0] = p[1]
96
97 def p_field_id(self, p):
98 '''field_id : NUM'''
99 p[0] = p[1]
100
101 def p_rvalue(self, p):
102 '''rvalue : NUM
103 | TRUE
104 | FALSE'''
105 p[0] = p[1]
106
107 def p_rvalue2(self, p):
108 '''rvalue : NAME'''
109 p[0] = Name(p[1])
110
111 def p_field_directive(self, p):
112 '''field_directive : LBRACK NAME EQ rvalue RBRACK'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100113 p[0] = FieldDirective(Name(p[2]), p[4])
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100114
115 def p_field_directive_times(self, p):
116 '''field_directive_times : field_directive_plus'''
117 p[0] = p[1]
118
119 def p_field_directive_times2(self, p):
120 '''field_directive_times : empty'''
121 p[0] = []
122
123 def p_field_directive_plus(self, p):
124 '''field_directive_plus : field_directive
125 | field_directive_plus field_directive'''
126 if len(p) == 2:
127 p[0] = [p[1]]
128 else:
129 p[0] = p[1] + [p[2]]
130
Dusan Klineca4fae112014-11-10 08:50:27 +0100131 def p_dotname(self, p):
132 '''dotname : NAME
133 | dotname DOT NAME'''
134 p[0] = p[1]
135
136 # Hack for cases when there is a field named 'message' or 'max'
137 def p_fieldName(self, p):
138 '''field_name : NAME
139 | MESSAGE
140 | MAX'''
141 p[0] = p[1]
142
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100143 def p_field_type(self, p):
144 '''field_type : primitive_type'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100145 p[0] = FieldType(p[1])
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100146
147 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100148 '''field_type : dotname'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100149 p[0] = Name(p[1])
150
151 # Root of the field declaration.
152 def p_field_definition(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100153 '''field_definition : field_modifier field_type field_name EQ field_id field_directive_times SEMI'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100154 p[0] = FieldDefinition(p[1], p[2], Name(p[3]), p[5], p[6])
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100155
156 # Root of the enum field declaration.
157 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100158 '''enum_field : field_name EQ NUM SEMI'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100159 p[0] = EnumFieldDefinition(Name(p[1]), p[3])
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100160
161 def p_enum_body_part(self, p):
162 '''enum_body_part : enum_field
163 | option_directive'''
164 p[0] = p[1]
165
166 def p_enum_body(self, p):
167 '''enum_body : enum_body_part
168 | enum_body enum_body_part'''
169 if len(p) == 2:
170 p[0] = [p[1]]
171 else:
172 p[0] = p[1] + [p[2]]
173
174 def p_enum_body_opt(self, p):
175 '''enum_body_opt : empty'''
176 p[0] = []
177
178 def p_enum_body_opt2(self, p):
179 '''enum_body_opt : enum_body'''
180 p[0] = p[1]
181
182 # Root of the enum declaration.
183 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
184 def p_enum_definition(self, p):
185 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100186 p[0] = EnumDefinition(Name(p[2]), p[4])
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100187
188 def p_extensions_to(self, p):
189 '''extensions_to : MAX'''
190 p[0] = ExtensionsMax()
191
192 def p_extensions_to2(self, p):
193 '''extensions_to : NUM'''
194 p[0] = p[1]
195
196 # extensions_definition ::= 'extensions' integer 'to' integer ';'
197 def p_extensions_definition(self, p):
198 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
199 p[0] = ExtensionsDirective(p[2], p[4])
200
201 # message_extension ::= 'extend' ident '{' message_body '}'
202 def p_message_extension(self, p):
203 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
204 p[0] = MessageExtension(Name(p[2]), p[4])
205
206 def p_message_body_part(self, p):
207 '''message_body_part : field_definition
208 | enum_definition
209 | message_definition
210 | extensions_definition
211 | message_extension'''
212 p[0] = p[1]
213
214 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
215 def p_message_body(self, p):
216 '''message_body : empty'''
217 p[0] = []
218
219 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
220 def p_message_body2(self, p):
221 '''message_body : message_body_part
222 | message_body message_body_part'''
223 if len(p) == 2:
224 p[0] = [p[1]]
225 else:
226 p[0] = p[1] + [p[2]]
227
228 # Root of the message declaration.
229 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
230 def p_message_definition(self, p):
231 '''message_definition : MESSAGE NAME LBRACE message_body RBRACE'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100232 p[0] = MessageDefinition(Name(p[2]), p[4])
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100233
234 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
235 def p_method_definition(self, p):
236 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
237 p[0] = MethodDefinition(Name(p[2]), Name(p[4]), Name(p[8]))
238
239 def p_method_definition_opt(self, p):
240 '''method_definition_opt : empty'''
241 p[0] = []
242
243 def p_method_definition_opt2(self, p):
244 '''method_definition_opt : method_definition
245 | method_definition_opt method_definition'''
246 if len(p) == 2:
247 p[0] = [p[1]]
248 else:
249 p[0] = p[1] + [p[2]]
250
251 # service_definition ::= 'service' ident '{' method_definition* '}'
252 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
253 def p_service_definition(self, p):
254 '''service_definition : SERVICE NAME LBRACE method_definition_opt RBRACE'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100255 p[0] = ServiceDefinition(Name(p[2]), p[4])
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100256
257 # package_directive ::= 'package' ident [ '.' ident]* ';'
258 def p_package_directive(self,p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100259 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100260 p[0] = PackageStatement(Name(p[2]))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100261
262 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
263 def p_import_directive(self, p):
264 '''import_directive : IMPORT STRING_LITERAL SEMI'''
265 p[0] = ImportStatement(Literal(p[2]))
266
267 def p_option_rvalue(self, p):
268 '''option_rvalue : NUM
269 | TRUE
270 | FALSE'''
271 p[0] = p[1]
272
273 def p_option_rvalue2(self, p):
274 '''option_rvalue : STRING_LITERAL'''
275 p[0] = Literal(p[1])
276
277 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
278 def p_option_directive(self, p):
279 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Dusan Klinece26bb022014-11-09 12:21:37 +0100280 p[0] = OptionStatement(Name(p[2]), p[4])
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100281
282 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive)
283 def p_topLevel(self,p):
284 '''topLevel : message_definition
285 | message_extension
286 | enum_definition
287 | service_definition
288 | import_directive
289 | option_directive'''
290 p[0] = p[1]
291
292 def p_package_definition(self, p):
293 '''package_definition : package_directive'''
294 p[0] = p[1]
295
296 def p_packages2(self, p):
297 '''package_definition : empty'''
298 p[0] = []
299
300 def p_statements2(self, p):
301 '''statements : topLevel
302 | statements topLevel'''
303 if len(p) == 2:
304 p[0] = [p[1]]
305 else:
306 p[0] = p[1] + [p[2]]
307
308 def p_statements(self, p):
309 '''statements : empty'''
310 p[0] = []
311
312 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
313 def p_goal2(self, p):
314 '''goal : package_definition statements'''
315 p[0] = [p[1], p[2]]
316
317 def p_error(self, p):
318 print('error: {}'.format(p))
319
320class ProtobufAnalyzer(object):
321
322 def __init__(self):
323 self.lexer = lex.lex(module=ProtobufLexer(), optimize=1)
324 self.parser = yacc.yacc(module=ProtobufParser(), start='goal', optimize=1)
325
326 def tokenize_string(self, code):
327 self.lexer.input(code)
328 for token in self.lexer:
329 print(token)
330
331 def tokenize_file(self, _file):
332 if type(_file) == str:
333 _file = file(_file)
334 content = ''
335 for line in _file:
336 content += line
337 return self.tokenize_string(content)
338
339 def parse_string(self, code, debug=0, lineno=1, prefix=''):
340 self.lexer.lineno = lineno
341 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
342
343 def parse_file(self, _file, debug=0):
344 if type(_file) == str:
345 _file = file(_file)
346 content = ''
347 for line in _file:
348 content += line
349 return self.parse_string(content, debug=debug)