blob: 585d0b9fbe0bd2c6722b2c57d0a6c18d2f6bc606 [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
Sapan Bhatiab1225872017-03-29 20:47:47 +02002
Dusan Klinecccaa0d92014-11-09 03:21:31 +01003__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
4__license__ = "Apache License, Version 2.0"
5__version__ = "1.0"
6
7import ply.lex as lex
8import ply.yacc as yacc
9from .model import *
10
Sapan Bhatiab1225872017-03-29 20:47:47 +020011import pdb
12
Dusan Klinecccaa0d92014-11-09 03:21:31 +010013class ProtobufLexer(object):
14 keywords = ('double', 'float', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
15 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'string', 'bytes',
16 'message', 'required', 'optional', 'repeated', 'enum', 'extensions', 'max', 'extends', 'extend',
Sapan Bhatiab1225872017-03-29 20:47:47 +020017 'to', 'package', '_service', 'rpc', 'returns', 'true', 'false', 'option', 'import', 'manytoone', 'manytomany', 'onetoone')
Dusan Klinecccaa0d92014-11-09 03:21:31 +010018
19 tokens = [
20 'NAME',
Sapan Bhatia87792a12017-04-10 19:35:05 -070021 'PLUS',
Dusan Klinecccaa0d92014-11-09 03:21:31 +010022 'NUM',
23 'STRING_LITERAL',
24 'LINE_COMMENT', 'BLOCK_COMMENT',
25
26 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klinecc9b031a2014-11-10 13:21:08 +010027 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT',
Sapan Bhatiab1225872017-03-29 20:47:47 +020028 'ARROW', 'COLON', 'COMMA',
Dusan Klinecaa9ff472014-11-10 18:02:03 +010029 'STARTTOKEN'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010030
31 ] + [k.upper() for k in keywords]
32 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
33
Sapan Bhatia87792a12017-04-10 19:35:05 -070034 t_NUM = r'[+-]?\d+(\.\d+)?'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010035 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
36
37 t_ignore_LINE_COMMENT = '//.*'
38 def t_BLOCK_COMMENT(self, t):
39 r'/\*(.|\n)*?\*/'
40 t.lexer.lineno += t.value.count('\n')
41
42 t_LBRACE = '{'
43 t_RBRACE = '}'
44 t_LBRACK = '\\['
45 t_RBRACK = '\\]'
46 t_LPAR = '\\('
47 t_RPAR = '\\)'
Sapan Bhatia87792a12017-04-10 19:35:05 -070048 t_PLUS = '\\+'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010049 t_EQ = '='
50 t_SEMI = ';'
Sapan Bhatiab1225872017-03-29 20:47:47 +020051 t_ARROW = '\\-\\>'
52 t_COLON = '\\:'
53 t_COMMA = '\\,'
Dusan Klineca4fae112014-11-10 08:50:27 +010054 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010055 t_ignore = ' \t\f'
Dusan Klinecaa9ff472014-11-10 18:02:03 +010056 t_STARTTOKEN = '\\+'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010057
58 def t_NAME(self, t):
59 '[A-Za-z_$][A-Za-z0-9_$]*'
60 if t.value in ProtobufLexer.keywords:
Dusan Klineca4fae112014-11-10 08:50:27 +010061 #print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +010062 t.type = t.value.upper()
63 return t
64
65 def t_newline(self, t):
66 r'\n+'
67 t.lexer.lineno += len(t.value)
68
69 def t_newline2(self, t):
70 r'(\r\n)+'
71 t.lexer.lineno += len(t.value) / 2
72
73 def t_error(self, t):
74 print("Illegal character '{}' ({}) in line {}".format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
75 t.lexer.skip(1)
76
Dusan Klinecc9b031a2014-11-10 13:21:08 +010077class LexHelper:
Dusan Klinecaa9ff472014-11-10 18:02:03 +010078 offset = 0
79 def get_max_linespan(self, p):
80 defSpan=[1e60, -1]
Dusan Klinecc9b031a2014-11-10 13:21:08 +010081 mSpan=[1e60, -1]
82 for sp in range(0, len(p)):
83 csp = p.linespan(sp)
Dusan Klinecaa9ff472014-11-10 18:02:03 +010084 if csp[0] == 0 and csp[1] == 0:
85 if hasattr(p[sp], "linespan"):
86 csp = p[sp].linespan
87 else:
88 continue
89 if csp == None or len(csp) != 2: continue
90 if csp[0] == 0 and csp[1] == 0: continue
Dusan Klinecc9b031a2014-11-10 13:21:08 +010091 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
92 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
Dusan Klinecaa9ff472014-11-10 18:02:03 +010093 if defSpan == mSpan: return (0,0)
94 return tuple([mSpan[0]-self.offset, mSpan[1]-self.offset])
Dusan Klinecc9b031a2014-11-10 13:21:08 +010095
Dusan Klinecaa9ff472014-11-10 18:02:03 +010096 def get_max_lexspan(self, p):
97 defSpan=[1e60, -1]
Dusan Klinecc9b031a2014-11-10 13:21:08 +010098 mSpan=[1e60, -1]
99 for sp in range(0, len(p)):
100 csp = p.lexspan(sp)
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100101 if csp[0] == 0 and csp[1] == 0:
102 if hasattr(p[sp], "lexspan"):
103 csp = p[sp].lexspan
104 else:
105 continue
106 if csp == None or len(csp) != 2: continue
107 if csp[0] == 0 and csp[1] == 0: continue
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100108 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
109 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100110 if defSpan == mSpan: return (0,0)
111 return tuple([mSpan[0]-self.offset, mSpan[1]-self.offset])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100112
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100113 def set_parse_object(self, dst, p):
114 dst.setLexData(linespan=self.get_max_linespan(p), lexspan=self.get_max_lexspan(p))
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100115 dst.setLexObj(p)
116
Sapan Bhatiab1225872017-03-29 20:47:47 +0200117def srcPort(x):
118 if (x):
119 return [FieldDirective(Name('port'),x)]
120 else:
121 return []
122
123
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100124class ProtobufParser(object):
125 tokens = ProtobufLexer.tokens
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100126 offset = 0
127 lh = LexHelper()
128
129 def setOffset(self, of):
130 self.offset = of
131 self.lh.offset = of
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100132
133 def p_empty(self, p):
134 '''empty :'''
135 pass
136
137 def p_field_modifier(self,p):
138 '''field_modifier : REQUIRED
139 | OPTIONAL
140 | REPEATED'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100141 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100142
143 def p_primitive_type(self, p):
144 '''primitive_type : DOUBLE
145 | FLOAT
146 | INT32
147 | INT64
148 | UINT32
149 | UINT64
150 | SINT32
151 | SINT64
152 | FIXED32
153 | FIXED64
154 | SFIXED32
155 | SFIXED64
156 | BOOL
157 | STRING
158 | BYTES'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100159 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100160
Sapan Bhatiab1225872017-03-29 20:47:47 +0200161 def p_link_type(self, p):
162 '''link_type : ONETOONE
163 | MANYTOONE
164 | MANYTOMANY'''
165 p[0] = LU.i(p,1)
166
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100167 def p_field_id(self, p):
168 '''field_id : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100169 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100170
171 def p_rvalue(self, p):
172 '''rvalue : NUM
173 | TRUE
174 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100175 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100176
Sapan Bhatiab1225872017-03-29 20:47:47 +0200177 def p_rvalue3(self, p):
178 '''rvalue : STRING_LITERAL'''
179 p[0] = Name(LU.i(p, 1))
180 self.lh.set_parse_object(p[0], p)
181 p[0].deriveLex()
182
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100183 def p_rvalue2(self, p):
184 '''rvalue : NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100185 p[0] = Name(LU.i(p, 1))
186 self.lh.set_parse_object(p[0], p)
187 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100188
Sapan Bhatiab1225872017-03-29 20:47:47 +0200189 def p_field_directives2(self, p):
190 '''field_directives : empty'''
191 p[0] = []
192
193 def p_field_directives(self, p):
194 '''field_directives : LBRACK field_directive_times RBRACK'''
195 p[0] = p[2]
196 #self.lh.set_parse_object(p[0], p)
197
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100198 def p_field_directive(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200199 '''field_directive : NAME EQ rvalue'''
200 p[0] = FieldDirective(Name(LU.i(p, 1)), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100201 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100202
Sapan Bhatia87792a12017-04-10 19:35:05 -0700203
204 def p_csv_expr(self, p):
205 '''csv_expr : LPAR csv RPAR'''
206 p[0] = p[2]
207
208 def p_csv_expr2(self, p):
209 '''csv_expr : empty'''
210 p[0] = []
211
212 def p_csv2(self, p):
213 '''csv : empty'''
214
215 def p_csv(self, p):
216 '''csv : NAME
217 | csv COMMA NAME'''
218
219 if len(p) == 2:
220 p[0] = [LU(p,1)]
221 else:
222 p[0] = p[1] + [LU(p,3)]
223
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100224 def p_field_directive_times(self, p):
225 '''field_directive_times : field_directive_plus'''
226 p[0] = p[1]
227
228 def p_field_directive_times2(self, p):
229 '''field_directive_times : empty'''
230 p[0] = []
231
232 def p_field_directive_plus(self, p):
233 '''field_directive_plus : field_directive
Sapan Bhatiab1225872017-03-29 20:47:47 +0200234 | field_directive_plus COMMA field_directive'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100235 if len(p) == 2:
Dusan Klineca9f6d362014-11-10 21:07:08 +0100236 p[0] = [LU(p,1)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100237 else:
Sapan Bhatiab1225872017-03-29 20:47:47 +0200238 p[0] = p[1] + [LU(p,3)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100239
Dusan Klineca4fae112014-11-10 08:50:27 +0100240 def p_dotname(self, p):
241 '''dotname : NAME
242 | dotname DOT NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100243 if len(p) == 2:
244 p[0] = [LU(p,1)]
245 else:
246 p[0] = p[1] + [LU(p,3)]
Dusan Klineca4fae112014-11-10 08:50:27 +0100247
248 # Hack for cases when there is a field named 'message' or 'max'
249 def p_fieldName(self, p):
250 '''field_name : NAME
251 | MESSAGE
Sapan Bhatia87792a12017-04-10 19:35:05 -0700252 | PLUS
Dusan Klineca4fae112014-11-10 08:50:27 +0100253 | MAX'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100254 p[0] = Name(LU.i(p,1))
255 self.lh.set_parse_object(p[0], p)
256 p[0].deriveLex()
Dusan Klineca4fae112014-11-10 08:50:27 +0100257
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100258 def p_field_type(self, p):
259 '''field_type : primitive_type'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100260 p[0] = FieldType(LU.i(p,1))
261 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100262
263 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100264 '''field_type : dotname'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100265 p[0] = DotName(LU.i(p, 1))
266 self.lh.set_parse_object(p[0], p)
267 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100268
Sapan Bhatiab1225872017-03-29 20:47:47 +0200269 def p_colon_fieldname(self, p):
270 '''colon_fieldname : COLON field_name'''
271 p[0] = p[2]
272 self.lh.set_parse_object(p[0], p)
273
274 def p_colon_fieldname2(self, p):
275 '''colon_fieldname : empty'''
276 p[0] = None
277
278 # TODO: Add directives to link definition
279 def p_link_definition(self, p):
280 '''link_definition : field_modifier link_type field_name ARROW NAME colon_fieldname EQ field_id field_directives SEMI'''
281 p[0] = LinkSpec(
282 FieldDefinition(LU.i(p,1), Name('int32'), LU.i(p, 3), LU.i(p, 8), [FieldDirective(Name('type'), Name('link')), FieldDirective(Name('model'),LU.i(p, 5))] + srcPort(LU.i(p,6)) + LU.i(p,9)),
283 LinkDefinition(LU.i(p,2), LU.i(p,3), LU.i(p,5), LU.i(p,6)))
284
285 self.lh.set_parse_object(p[0], p)
286
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100287 # Root of the field declaration.
288 def p_field_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200289 '''field_definition : field_modifier field_type field_name EQ field_id field_directives SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100290 p[0] = FieldDefinition(LU.i(p,1), LU.i(p,2), LU.i(p, 3), LU.i(p,5), LU.i(p,6))
291 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100292
293 # Root of the enum field declaration.
294 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100295 '''enum_field : field_name EQ NUM SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100296 p[0] = EnumFieldDefinition(LU.i(p, 1), LU.i(p,3))
297 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100298
299 def p_enum_body_part(self, p):
300 '''enum_body_part : enum_field
301 | option_directive'''
302 p[0] = p[1]
303
304 def p_enum_body(self, p):
305 '''enum_body : enum_body_part
306 | enum_body enum_body_part'''
307 if len(p) == 2:
308 p[0] = [p[1]]
309 else:
310 p[0] = p[1] + [p[2]]
311
312 def p_enum_body_opt(self, p):
313 '''enum_body_opt : empty'''
314 p[0] = []
315
316 def p_enum_body_opt2(self, p):
317 '''enum_body_opt : enum_body'''
318 p[0] = p[1]
319
320 # Root of the enum declaration.
321 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
322 def p_enum_definition(self, p):
323 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100324 p[0] = EnumDefinition(Name(LU.i(p, 2)), LU.i(p,4))
325 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100326
327 def p_extensions_to(self, p):
328 '''extensions_to : MAX'''
329 p[0] = ExtensionsMax()
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100330 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100331
332 def p_extensions_to2(self, p):
333 '''extensions_to : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100334 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100335
336 # extensions_definition ::= 'extensions' integer 'to' integer ';'
337 def p_extensions_definition(self, p):
338 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100339 p[0] = ExtensionsDirective(LU.i(p,2), LU.i(p,4))
340 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100341
342 # message_extension ::= 'extend' ident '{' message_body '}'
343 def p_message_extension(self, p):
344 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100345 p[0] = MessageExtension(Name(LU.i(p, 2)), LU.i(p,4))
346 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100347
348 def p_message_body_part(self, p):
349 '''message_body_part : field_definition
Sapan Bhatiab1225872017-03-29 20:47:47 +0200350 | link_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100351 | enum_definition
352 | message_definition
353 | extensions_definition
354 | message_extension'''
355 p[0] = p[1]
356
357 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
358 def p_message_body(self, p):
359 '''message_body : empty'''
360 p[0] = []
361
362 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
363 def p_message_body2(self, p):
364 '''message_body : message_body_part
365 | message_body message_body_part'''
366 if len(p) == 2:
367 p[0] = [p[1]]
368 else:
369 p[0] = p[1] + [p[2]]
370
371 # Root of the message declaration.
372 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
373 def p_message_definition(self, p):
Sapan Bhatia87792a12017-04-10 19:35:05 -0700374 '''message_definition : MESSAGE NAME csv_expr LBRACE message_body RBRACE'''
Sapan Bhatiab1225872017-03-29 20:47:47 +0200375 p[0] = MessageDefinition(Name(LU.i(p, 2)), LU.i(p, 3), LU.i(p,5))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100376 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100377
378 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
379 def p_method_definition(self, p):
380 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100381 p[0] = MethodDefinition(Name(LU.i(p, 2)), Name(LU.i(p, 4)), Name(LU.i(p, 8)))
382 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100383
384 def p_method_definition_opt(self, p):
385 '''method_definition_opt : empty'''
386 p[0] = []
387
388 def p_method_definition_opt2(self, p):
389 '''method_definition_opt : method_definition
390 | method_definition_opt method_definition'''
391 if len(p) == 2:
392 p[0] = [p[1]]
393 else:
394 p[0] = p[1] + [p[2]]
395
396 # service_definition ::= 'service' ident '{' method_definition* '}'
397 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
398 def p_service_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200399 '''service_definition : _SERVICE NAME LBRACE method_definition_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100400 p[0] = ServiceDefinition(Name(LU.i(p, 2)), LU.i(p,4))
401 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100402
403 # package_directive ::= 'package' ident [ '.' ident]* ';'
404 def p_package_directive(self,p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100405 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100406 p[0] = PackageStatement(Name(LU.i(p, 2)))
407 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100408
409 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
410 def p_import_directive(self, p):
411 '''import_directive : IMPORT STRING_LITERAL SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100412 p[0] = ImportStatement(Literal(LU.i(p,2)))
413 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100414
415 def p_option_rvalue(self, p):
416 '''option_rvalue : NUM
417 | TRUE
418 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100419 p[0] = LU(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100420
421 def p_option_rvalue2(self, p):
422 '''option_rvalue : STRING_LITERAL'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100423 p[0] = Literal(LU(p,1))
424
425 def p_option_rvalue3(self, p):
426 '''option_rvalue : NAME'''
427 p[0] = Name(LU.i(p,1))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100428
429 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
430 def p_option_directive(self, p):
431 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100432 p[0] = OptionStatement(Name(LU.i(p, 2)), LU.i(p,4))
433 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100434
435 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive)
436 def p_topLevel(self,p):
437 '''topLevel : message_definition
438 | message_extension
439 | enum_definition
440 | service_definition
441 | import_directive
442 | option_directive'''
443 p[0] = p[1]
444
445 def p_package_definition(self, p):
446 '''package_definition : package_directive'''
447 p[0] = p[1]
448
449 def p_packages2(self, p):
450 '''package_definition : empty'''
451 p[0] = []
452
453 def p_statements2(self, p):
454 '''statements : topLevel
455 | statements topLevel'''
456 if len(p) == 2:
457 p[0] = [p[1]]
458 else:
459 p[0] = p[1] + [p[2]]
460
461 def p_statements(self, p):
462 '''statements : empty'''
463 p[0] = []
464
465 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100466 def p_protofile(self, p):
467 '''protofile : package_definition statements'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100468 p[0] = ProtoFile(LU.i(p,1), LU.i(p,2))
469 self.lh.set_parse_object(p[0], p)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100470
471 # Parsing starting point
472 def p_goal(self, p):
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100473 '''goal : STARTTOKEN protofile'''
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100474 p[0] = p[2]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100475
476 def p_error(self, p):
477 print('error: {}'.format(p))
478
479class ProtobufAnalyzer(object):
480
481 def __init__(self):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200482 self.lexer = lex.lex(module=ProtobufLexer())#, optimize=1)
483 self.parser = yacc.yacc(module=ProtobufParser(), start='goal', debug=0)#optimize=1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100484
485 def tokenize_string(self, code):
486 self.lexer.input(code)
487 for token in self.lexer:
488 print(token)
489
490 def tokenize_file(self, _file):
491 if type(_file) == str:
492 _file = file(_file)
493 content = ''
494 for line in _file:
495 content += line
496 return self.tokenize_string(content)
497
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100498 def parse_string(self, code, debug=0, lineno=1, prefix='+'):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100499 self.lexer.lineno = lineno
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100500 self.parser.offset = len(prefix)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100501 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
502
503 def parse_file(self, _file, debug=0):
504 if type(_file) == str:
505 _file = file(_file)
506 content = ''
507 for line in _file:
508 content += line
509 return self.parse_string(content, debug=debug)