blob: 545faee6faea1529fe41a0eeb78a6698143edfa7 [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
2__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
3__license__ = "Apache License, Version 2.0"
4__version__ = "1.0"
5
6import ply.lex as lex
7import ply.yacc as yacc
8from .model import *
9
10class ProtobufLexer(object):
11 keywords = ('double', 'float', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
12 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'string', 'bytes',
13 'message', 'required', 'optional', 'repeated', 'enum', 'extensions', 'max', 'extends', 'extend',
14 'to', 'package', 'service', 'rpc', 'returns', 'true', 'false', 'option', 'import')
15
16 tokens = [
17 'NAME',
18 'NUM',
19 'STRING_LITERAL',
20 'LINE_COMMENT', 'BLOCK_COMMENT',
21
22 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
23 'LPAR', 'RPAR', 'EQ', 'SEMI'
24
25 ] + [k.upper() for k in keywords]
26 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
27
28 t_NUM = r'[+-]?\d+'
29 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
30
31 t_ignore_LINE_COMMENT = '//.*'
32 def t_BLOCK_COMMENT(self, t):
33 r'/\*(.|\n)*?\*/'
34 t.lexer.lineno += t.value.count('\n')
35
36 t_LBRACE = '{'
37 t_RBRACE = '}'
38 t_LBRACK = '\\['
39 t_RBRACK = '\\]'
40 t_LPAR = '\\('
41 t_RPAR = '\\)'
42 t_EQ = '='
43 t_SEMI = ';'
44 t_ignore = ' \t\f'
45
46 def t_NAME(self, t):
47 '[A-Za-z_$][A-Za-z0-9_$]*'
48 if t.value in ProtobufLexer.keywords:
49 t.type = t.value.upper()
50 return t
51
52 def t_newline(self, t):
53 r'\n+'
54 t.lexer.lineno += len(t.value)
55
56 def t_newline2(self, t):
57 r'(\r\n)+'
58 t.lexer.lineno += len(t.value) / 2
59
60 def t_error(self, t):
61 print("Illegal character '{}' ({}) in line {}".format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
62 t.lexer.skip(1)
63
64class ProtobufParser(object):
65 tokens = ProtobufLexer.tokens
66
67 def p_empty(self, p):
68 '''empty :'''
69 pass
70
71 def p_field_modifier(self,p):
72 '''field_modifier : REQUIRED
73 | OPTIONAL
74 | REPEATED'''
75 p[0] = p[1]
76
77 def p_primitive_type(self, p):
78 '''primitive_type : DOUBLE
79 | FLOAT
80 | INT32
81 | INT64
82 | UINT32
83 | UINT64
84 | SINT32
85 | SINT64
86 | FIXED32
87 | FIXED64
88 | SFIXED32
89 | SFIXED64
90 | BOOL
91 | STRING
92 | BYTES'''
93 p[0] = p[1]
94
95 def p_field_id(self, p):
96 '''field_id : NUM'''
97 p[0] = p[1]
98
99 def p_rvalue(self, p):
100 '''rvalue : NUM
101 | TRUE
102 | FALSE'''
103 p[0] = p[1]
104
105 def p_rvalue2(self, p):
106 '''rvalue : NAME'''
107 p[0] = Name(p[1])
108
109 def p_field_directive(self, p):
110 '''field_directive : LBRACK NAME EQ rvalue RBRACK'''
111 p[0] = FieldDirectiveDeclaration(Name(p[2]), p[4])
112
113 def p_field_directive_times(self, p):
114 '''field_directive_times : field_directive_plus'''
115 p[0] = p[1]
116
117 def p_field_directive_times2(self, p):
118 '''field_directive_times : empty'''
119 p[0] = []
120
121 def p_field_directive_plus(self, p):
122 '''field_directive_plus : field_directive
123 | field_directive_plus field_directive'''
124 if len(p) == 2:
125 p[0] = [p[1]]
126 else:
127 p[0] = p[1] + [p[2]]
128
129 def p_field_type(self, p):
130 '''field_type : primitive_type'''
131 p[0] = FieldPrimitiveType(p[1])
132
133 def p_field_type2(self, p):
134 '''field_type : NAME'''
135 p[0] = Name(p[1])
136
137 # Root of the field declaration.
138 def p_field_definition(self, p):
139 '''field_definition : field_modifier field_type NAME EQ field_id field_directive_times SEMI'''
140 p[0] = FieldDeclaration(p[1], p[2], Name(p[3]), p[5], p[6])
141
142 # Root of the enum field declaration.
143 def p_enum_field(self, p):
144 '''enum_field : NAME EQ NUM SEMI'''
145 p[0] = EnumFieldDeclaration(Name(p[1]), p[3])
146
147 def p_enum_body_part(self, p):
148 '''enum_body_part : enum_field
149 | option_directive'''
150 p[0] = p[1]
151
152 def p_enum_body(self, p):
153 '''enum_body : enum_body_part
154 | enum_body enum_body_part'''
155 if len(p) == 2:
156 p[0] = [p[1]]
157 else:
158 p[0] = p[1] + [p[2]]
159
160 def p_enum_body_opt(self, p):
161 '''enum_body_opt : empty'''
162 p[0] = []
163
164 def p_enum_body_opt2(self, p):
165 '''enum_body_opt : enum_body'''
166 p[0] = p[1]
167
168 # Root of the enum declaration.
169 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
170 def p_enum_definition(self, p):
171 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
172 p[0] = EnumDeclaration(Name(p[2]), p[4])
173
174 def p_extensions_to(self, p):
175 '''extensions_to : MAX'''
176 p[0] = ExtensionsMax()
177
178 def p_extensions_to2(self, p):
179 '''extensions_to : NUM'''
180 p[0] = p[1]
181
182 # extensions_definition ::= 'extensions' integer 'to' integer ';'
183 def p_extensions_definition(self, p):
184 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
185 p[0] = ExtensionsDirective(p[2], p[4])
186
187 # message_extension ::= 'extend' ident '{' message_body '}'
188 def p_message_extension(self, p):
189 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
190 p[0] = MessageExtension(Name(p[2]), p[4])
191
192 def p_message_body_part(self, p):
193 '''message_body_part : field_definition
194 | enum_definition
195 | message_definition
196 | extensions_definition
197 | message_extension'''
198 p[0] = p[1]
199
200 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
201 def p_message_body(self, p):
202 '''message_body : empty'''
203 p[0] = []
204
205 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
206 def p_message_body2(self, p):
207 '''message_body : message_body_part
208 | message_body message_body_part'''
209 if len(p) == 2:
210 p[0] = [p[1]]
211 else:
212 p[0] = p[1] + [p[2]]
213
214 # Root of the message declaration.
215 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
216 def p_message_definition(self, p):
217 '''message_definition : MESSAGE NAME LBRACE message_body RBRACE'''
218 p[0] = MessageDeclaration(Name(p[2]), p[4])
219
220 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
221 def p_method_definition(self, p):
222 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
223 p[0] = MethodDefinition(Name(p[2]), Name(p[4]), Name(p[8]))
224
225 def p_method_definition_opt(self, p):
226 '''method_definition_opt : empty'''
227 p[0] = []
228
229 def p_method_definition_opt2(self, p):
230 '''method_definition_opt : method_definition
231 | method_definition_opt method_definition'''
232 if len(p) == 2:
233 p[0] = [p[1]]
234 else:
235 p[0] = p[1] + [p[2]]
236
237 # service_definition ::= 'service' ident '{' method_definition* '}'
238 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
239 def p_service_definition(self, p):
240 '''service_definition : SERVICE NAME LBRACE method_definition_opt RBRACE'''
241 p[0] = ServiceDeclaration(Name(p[2]), p[4])
242
243 # package_directive ::= 'package' ident [ '.' ident]* ';'
244 def p_package_directive(self,p):
245 '''package_directive : PACKAGE NAME SEMI'''
246 p[0] = PackageDeclaration(Name(p[2]))
247
248 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
249 def p_import_directive(self, p):
250 '''import_directive : IMPORT STRING_LITERAL SEMI'''
251 p[0] = ImportStatement(Literal(p[2]))
252
253 def p_option_rvalue(self, p):
254 '''option_rvalue : NUM
255 | TRUE
256 | FALSE'''
257 p[0] = p[1]
258
259 def p_option_rvalue2(self, p):
260 '''option_rvalue : STRING_LITERAL'''
261 p[0] = Literal(p[1])
262
263 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
264 def p_option_directive(self, p):
265 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
266 p[0] = OptionDefinition(Name(p[2]), p[4])
267
268 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive)
269 def p_topLevel(self,p):
270 '''topLevel : message_definition
271 | message_extension
272 | enum_definition
273 | service_definition
274 | import_directive
275 | option_directive'''
276 p[0] = p[1]
277
278 def p_package_definition(self, p):
279 '''package_definition : package_directive'''
280 p[0] = p[1]
281
282 def p_packages2(self, p):
283 '''package_definition : empty'''
284 p[0] = []
285
286 def p_statements2(self, p):
287 '''statements : topLevel
288 | statements topLevel'''
289 if len(p) == 2:
290 p[0] = [p[1]]
291 else:
292 p[0] = p[1] + [p[2]]
293
294 def p_statements(self, p):
295 '''statements : empty'''
296 p[0] = []
297
298 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
299 def p_goal2(self, p):
300 '''goal : package_definition statements'''
301 p[0] = [p[1], p[2]]
302
303 def p_error(self, p):
304 print('error: {}'.format(p))
305
306class ProtobufAnalyzer(object):
307
308 def __init__(self):
309 self.lexer = lex.lex(module=ProtobufLexer(), optimize=1)
310 self.parser = yacc.yacc(module=ProtobufParser(), start='goal', optimize=1)
311
312 def tokenize_string(self, code):
313 self.lexer.input(code)
314 for token in self.lexer:
315 print(token)
316
317 def tokenize_file(self, _file):
318 if type(_file) == str:
319 _file = file(_file)
320 content = ''
321 for line in _file:
322 content += line
323 return self.tokenize_string(content)
324
325 def parse_string(self, code, debug=0, lineno=1, prefix=''):
326 self.lexer.lineno = lineno
327 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
328
329 def parse_file(self, _file, debug=0):
330 if type(_file) == str:
331 _file = file(_file)
332 content = ''
333 for line in _file:
334 content += line
335 return self.parse_string(content, debug=debug)