blob: eb2d776d7f90e04cfd274097547add68a677cc6e [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
Sapan Bhatiab1225872017-03-29 20:47:47 +02002
Dusan Klinecccaa0d92014-11-09 03:21:31 +01003__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
4__license__ = "Apache License, Version 2.0"
5__version__ = "1.0"
6
7import ply.lex as lex
8import ply.yacc as yacc
9from .model import *
10
Sapan Bhatiab1225872017-03-29 20:47:47 +020011import pdb
12
Dusan Klinecccaa0d92014-11-09 03:21:31 +010013class ProtobufLexer(object):
14 keywords = ('double', 'float', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
15 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'string', 'bytes',
16 'message', 'required', 'optional', 'repeated', 'enum', 'extensions', 'max', 'extends', 'extend',
Sapan Bhatiab1225872017-03-29 20:47:47 +020017 'to', 'package', '_service', 'rpc', 'returns', 'true', 'false', 'option', 'import', 'manytoone', 'manytomany', 'onetoone')
Dusan Klinecccaa0d92014-11-09 03:21:31 +010018
19 tokens = [
20 'NAME',
21 'NUM',
22 'STRING_LITERAL',
23 'LINE_COMMENT', 'BLOCK_COMMENT',
24
25 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klinecc9b031a2014-11-10 13:21:08 +010026 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT',
Sapan Bhatiab1225872017-03-29 20:47:47 +020027 'ARROW', 'COLON', 'COMMA',
Dusan Klinecaa9ff472014-11-10 18:02:03 +010028 'STARTTOKEN'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010029
30 ] + [k.upper() for k in keywords]
31 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
32
33 t_NUM = r'[+-]?\d+'
34 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
35
36 t_ignore_LINE_COMMENT = '//.*'
37 def t_BLOCK_COMMENT(self, t):
38 r'/\*(.|\n)*?\*/'
39 t.lexer.lineno += t.value.count('\n')
40
41 t_LBRACE = '{'
42 t_RBRACE = '}'
43 t_LBRACK = '\\['
44 t_RBRACK = '\\]'
45 t_LPAR = '\\('
46 t_RPAR = '\\)'
47 t_EQ = '='
48 t_SEMI = ';'
Sapan Bhatiab1225872017-03-29 20:47:47 +020049 t_ARROW = '\\-\\>'
50 t_COLON = '\\:'
51 t_COMMA = '\\,'
Dusan Klineca4fae112014-11-10 08:50:27 +010052 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010053 t_ignore = ' \t\f'
Dusan Klinecaa9ff472014-11-10 18:02:03 +010054 t_STARTTOKEN = '\\+'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010055
56 def t_NAME(self, t):
57 '[A-Za-z_$][A-Za-z0-9_$]*'
58 if t.value in ProtobufLexer.keywords:
Dusan Klineca4fae112014-11-10 08:50:27 +010059 #print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +010060 t.type = t.value.upper()
61 return t
62
63 def t_newline(self, t):
64 r'\n+'
65 t.lexer.lineno += len(t.value)
66
67 def t_newline2(self, t):
68 r'(\r\n)+'
69 t.lexer.lineno += len(t.value) / 2
70
71 def t_error(self, t):
72 print("Illegal character '{}' ({}) in line {}".format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
73 t.lexer.skip(1)
74
Dusan Klinecc9b031a2014-11-10 13:21:08 +010075class LexHelper:
Dusan Klinecaa9ff472014-11-10 18:02:03 +010076 offset = 0
77 def get_max_linespan(self, p):
78 defSpan=[1e60, -1]
Dusan Klinecc9b031a2014-11-10 13:21:08 +010079 mSpan=[1e60, -1]
80 for sp in range(0, len(p)):
81 csp = p.linespan(sp)
Dusan Klinecaa9ff472014-11-10 18:02:03 +010082 if csp[0] == 0 and csp[1] == 0:
83 if hasattr(p[sp], "linespan"):
84 csp = p[sp].linespan
85 else:
86 continue
87 if csp == None or len(csp) != 2: continue
88 if csp[0] == 0 and csp[1] == 0: continue
Dusan Klinecc9b031a2014-11-10 13:21:08 +010089 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
90 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
Dusan Klinecaa9ff472014-11-10 18:02:03 +010091 if defSpan == mSpan: return (0,0)
92 return tuple([mSpan[0]-self.offset, mSpan[1]-self.offset])
Dusan Klinecc9b031a2014-11-10 13:21:08 +010093
Dusan Klinecaa9ff472014-11-10 18:02:03 +010094 def get_max_lexspan(self, p):
95 defSpan=[1e60, -1]
Dusan Klinecc9b031a2014-11-10 13:21:08 +010096 mSpan=[1e60, -1]
97 for sp in range(0, len(p)):
98 csp = p.lexspan(sp)
Dusan Klinecaa9ff472014-11-10 18:02:03 +010099 if csp[0] == 0 and csp[1] == 0:
100 if hasattr(p[sp], "lexspan"):
101 csp = p[sp].lexspan
102 else:
103 continue
104 if csp == None or len(csp) != 2: continue
105 if csp[0] == 0 and csp[1] == 0: continue
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100106 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
107 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100108 if defSpan == mSpan: return (0,0)
109 return tuple([mSpan[0]-self.offset, mSpan[1]-self.offset])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100110
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100111 def set_parse_object(self, dst, p):
112 dst.setLexData(linespan=self.get_max_linespan(p), lexspan=self.get_max_lexspan(p))
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100113 dst.setLexObj(p)
114
Sapan Bhatiab1225872017-03-29 20:47:47 +0200115def srcPort(x):
116 if (x):
117 return [FieldDirective(Name('port'),x)]
118 else:
119 return []
120
121
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100122class ProtobufParser(object):
123 tokens = ProtobufLexer.tokens
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100124 offset = 0
125 lh = LexHelper()
126
127 def setOffset(self, of):
128 self.offset = of
129 self.lh.offset = of
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100130
131 def p_empty(self, p):
132 '''empty :'''
133 pass
134
135 def p_field_modifier(self,p):
136 '''field_modifier : REQUIRED
137 | OPTIONAL
138 | REPEATED'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100139 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100140
141 def p_primitive_type(self, p):
142 '''primitive_type : DOUBLE
143 | FLOAT
144 | INT32
145 | INT64
146 | UINT32
147 | UINT64
148 | SINT32
149 | SINT64
150 | FIXED32
151 | FIXED64
152 | SFIXED32
153 | SFIXED64
154 | BOOL
155 | STRING
156 | BYTES'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100157 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100158
Sapan Bhatiab1225872017-03-29 20:47:47 +0200159 def p_link_type(self, p):
160 '''link_type : ONETOONE
161 | MANYTOONE
162 | MANYTOMANY'''
163 p[0] = LU.i(p,1)
164
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100165 def p_field_id(self, p):
166 '''field_id : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100167 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100168
169 def p_rvalue(self, p):
170 '''rvalue : NUM
171 | TRUE
172 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100173 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100174
Sapan Bhatiab1225872017-03-29 20:47:47 +0200175 def p_rvalue3(self, p):
176 '''rvalue : STRING_LITERAL'''
177 p[0] = Name(LU.i(p, 1))
178 self.lh.set_parse_object(p[0], p)
179 p[0].deriveLex()
180
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100181 def p_rvalue2(self, p):
182 '''rvalue : NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100183 p[0] = Name(LU.i(p, 1))
184 self.lh.set_parse_object(p[0], p)
185 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100186
Sapan Bhatiab1225872017-03-29 20:47:47 +0200187 def p_field_directives2(self, p):
188 '''field_directives : empty'''
189 p[0] = []
190
191 def p_field_directives(self, p):
192 '''field_directives : LBRACK field_directive_times RBRACK'''
193 p[0] = p[2]
194 #self.lh.set_parse_object(p[0], p)
195
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100196 def p_field_directive(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200197 '''field_directive : NAME EQ rvalue'''
198 p[0] = FieldDirective(Name(LU.i(p, 1)), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100199 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100200
201 def p_field_directive_times(self, p):
202 '''field_directive_times : field_directive_plus'''
203 p[0] = p[1]
204
205 def p_field_directive_times2(self, p):
206 '''field_directive_times : empty'''
207 p[0] = []
208
209 def p_field_directive_plus(self, p):
210 '''field_directive_plus : field_directive
Sapan Bhatiab1225872017-03-29 20:47:47 +0200211 | field_directive_plus COMMA field_directive'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100212 if len(p) == 2:
Dusan Klineca9f6d362014-11-10 21:07:08 +0100213 p[0] = [LU(p,1)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100214 else:
Sapan Bhatiab1225872017-03-29 20:47:47 +0200215 p[0] = p[1] + [LU(p,3)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100216
Dusan Klineca4fae112014-11-10 08:50:27 +0100217 def p_dotname(self, p):
218 '''dotname : NAME
219 | dotname DOT NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100220 if len(p) == 2:
221 p[0] = [LU(p,1)]
222 else:
223 p[0] = p[1] + [LU(p,3)]
Dusan Klineca4fae112014-11-10 08:50:27 +0100224
225 # Hack for cases when there is a field named 'message' or 'max'
226 def p_fieldName(self, p):
227 '''field_name : NAME
228 | MESSAGE
229 | MAX'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100230 p[0] = Name(LU.i(p,1))
231 self.lh.set_parse_object(p[0], p)
232 p[0].deriveLex()
Dusan Klineca4fae112014-11-10 08:50:27 +0100233
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100234 def p_field_type(self, p):
235 '''field_type : primitive_type'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100236 p[0] = FieldType(LU.i(p,1))
237 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100238
239 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100240 '''field_type : dotname'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100241 p[0] = DotName(LU.i(p, 1))
242 self.lh.set_parse_object(p[0], p)
243 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100244
Sapan Bhatiab1225872017-03-29 20:47:47 +0200245 def p_colon_fieldname(self, p):
246 '''colon_fieldname : COLON field_name'''
247 p[0] = p[2]
248 self.lh.set_parse_object(p[0], p)
249
250 def p_colon_fieldname2(self, p):
251 '''colon_fieldname : empty'''
252 p[0] = None
253
254 # TODO: Add directives to link definition
255 def p_link_definition(self, p):
256 '''link_definition : field_modifier link_type field_name ARROW NAME colon_fieldname EQ field_id field_directives SEMI'''
257 p[0] = LinkSpec(
258 FieldDefinition(LU.i(p,1), Name('int32'), LU.i(p, 3), LU.i(p, 8), [FieldDirective(Name('type'), Name('link')), FieldDirective(Name('model'),LU.i(p, 5))] + srcPort(LU.i(p,6)) + LU.i(p,9)),
259 LinkDefinition(LU.i(p,2), LU.i(p,3), LU.i(p,5), LU.i(p,6)))
260
261 self.lh.set_parse_object(p[0], p)
262
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100263 # Root of the field declaration.
264 def p_field_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200265 '''field_definition : field_modifier field_type field_name EQ field_id field_directives SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100266 p[0] = FieldDefinition(LU.i(p,1), LU.i(p,2), LU.i(p, 3), LU.i(p,5), LU.i(p,6))
267 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100268
269 # Root of the enum field declaration.
270 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100271 '''enum_field : field_name EQ NUM SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100272 p[0] = EnumFieldDefinition(LU.i(p, 1), LU.i(p,3))
273 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100274
275 def p_enum_body_part(self, p):
276 '''enum_body_part : enum_field
277 | option_directive'''
278 p[0] = p[1]
279
280 def p_enum_body(self, p):
281 '''enum_body : enum_body_part
282 | enum_body enum_body_part'''
283 if len(p) == 2:
284 p[0] = [p[1]]
285 else:
286 p[0] = p[1] + [p[2]]
287
288 def p_enum_body_opt(self, p):
289 '''enum_body_opt : empty'''
290 p[0] = []
291
292 def p_enum_body_opt2(self, p):
293 '''enum_body_opt : enum_body'''
294 p[0] = p[1]
295
296 # Root of the enum declaration.
297 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
298 def p_enum_definition(self, p):
299 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100300 p[0] = EnumDefinition(Name(LU.i(p, 2)), LU.i(p,4))
301 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100302
303 def p_extensions_to(self, p):
304 '''extensions_to : MAX'''
305 p[0] = ExtensionsMax()
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100306 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100307
308 def p_extensions_to2(self, p):
309 '''extensions_to : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100310 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100311
312 # extensions_definition ::= 'extensions' integer 'to' integer ';'
313 def p_extensions_definition(self, p):
314 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100315 p[0] = ExtensionsDirective(LU.i(p,2), LU.i(p,4))
316 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100317
318 # message_extension ::= 'extend' ident '{' message_body '}'
319 def p_message_extension(self, p):
320 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100321 p[0] = MessageExtension(Name(LU.i(p, 2)), LU.i(p,4))
322 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100323
324 def p_message_body_part(self, p):
325 '''message_body_part : field_definition
Sapan Bhatiab1225872017-03-29 20:47:47 +0200326 | link_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100327 | enum_definition
328 | message_definition
329 | extensions_definition
330 | message_extension'''
331 p[0] = p[1]
332
333 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
334 def p_message_body(self, p):
335 '''message_body : empty'''
336 p[0] = []
337
338 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
339 def p_message_body2(self, p):
340 '''message_body : message_body_part
341 | message_body message_body_part'''
342 if len(p) == 2:
343 p[0] = [p[1]]
344 else:
345 p[0] = p[1] + [p[2]]
346
Sapan Bhatiab1225872017-03-29 20:47:47 +0200347 def p_base_definition(self, p):
348 '''base_definition : LPAR NAME RPAR'''
349 p[0] = p[2]
350
351 def p_base_definition2(self, p):
352 '''base_definition : empty'''
353 p[0] = None
354
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100355 # Root of the message declaration.
356 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
357 def p_message_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200358 '''message_definition : MESSAGE NAME base_definition LBRACE message_body RBRACE'''
359 p[0] = MessageDefinition(Name(LU.i(p, 2)), LU.i(p, 3), LU.i(p,5))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100360 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100361
362 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
363 def p_method_definition(self, p):
364 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100365 p[0] = MethodDefinition(Name(LU.i(p, 2)), Name(LU.i(p, 4)), Name(LU.i(p, 8)))
366 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100367
368 def p_method_definition_opt(self, p):
369 '''method_definition_opt : empty'''
370 p[0] = []
371
372 def p_method_definition_opt2(self, p):
373 '''method_definition_opt : method_definition
374 | method_definition_opt method_definition'''
375 if len(p) == 2:
376 p[0] = [p[1]]
377 else:
378 p[0] = p[1] + [p[2]]
379
380 # service_definition ::= 'service' ident '{' method_definition* '}'
381 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
382 def p_service_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200383 '''service_definition : _SERVICE NAME LBRACE method_definition_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100384 p[0] = ServiceDefinition(Name(LU.i(p, 2)), LU.i(p,4))
385 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100386
387 # package_directive ::= 'package' ident [ '.' ident]* ';'
388 def p_package_directive(self,p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100389 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100390 p[0] = PackageStatement(Name(LU.i(p, 2)))
391 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100392
393 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
394 def p_import_directive(self, p):
395 '''import_directive : IMPORT STRING_LITERAL SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100396 p[0] = ImportStatement(Literal(LU.i(p,2)))
397 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100398
399 def p_option_rvalue(self, p):
400 '''option_rvalue : NUM
401 | TRUE
402 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100403 p[0] = LU(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100404
405 def p_option_rvalue2(self, p):
406 '''option_rvalue : STRING_LITERAL'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100407 p[0] = Literal(LU(p,1))
408
409 def p_option_rvalue3(self, p):
410 '''option_rvalue : NAME'''
411 p[0] = Name(LU.i(p,1))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100412
413 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
414 def p_option_directive(self, p):
415 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100416 p[0] = OptionStatement(Name(LU.i(p, 2)), LU.i(p,4))
417 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100418
419 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive)
420 def p_topLevel(self,p):
421 '''topLevel : message_definition
422 | message_extension
423 | enum_definition
424 | service_definition
425 | import_directive
426 | option_directive'''
427 p[0] = p[1]
428
429 def p_package_definition(self, p):
430 '''package_definition : package_directive'''
431 p[0] = p[1]
432
433 def p_packages2(self, p):
434 '''package_definition : empty'''
435 p[0] = []
436
437 def p_statements2(self, p):
438 '''statements : topLevel
439 | statements topLevel'''
440 if len(p) == 2:
441 p[0] = [p[1]]
442 else:
443 p[0] = p[1] + [p[2]]
444
445 def p_statements(self, p):
446 '''statements : empty'''
447 p[0] = []
448
449 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100450 def p_protofile(self, p):
451 '''protofile : package_definition statements'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100452 p[0] = ProtoFile(LU.i(p,1), LU.i(p,2))
453 self.lh.set_parse_object(p[0], p)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100454
455 # Parsing starting point
456 def p_goal(self, p):
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100457 '''goal : STARTTOKEN protofile'''
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100458 p[0] = p[2]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100459
460 def p_error(self, p):
461 print('error: {}'.format(p))
462
463class ProtobufAnalyzer(object):
464
465 def __init__(self):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200466 self.lexer = lex.lex(module=ProtobufLexer())#, optimize=1)
467 self.parser = yacc.yacc(module=ProtobufParser(), start='goal', debug=0)#optimize=1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100468
469 def tokenize_string(self, code):
470 self.lexer.input(code)
471 for token in self.lexer:
472 print(token)
473
474 def tokenize_file(self, _file):
475 if type(_file) == str:
476 _file = file(_file)
477 content = ''
478 for line in _file:
479 content += line
480 return self.tokenize_string(content)
481
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100482 def parse_string(self, code, debug=0, lineno=1, prefix='+'):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100483 self.lexer.lineno = lineno
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100484 self.parser.offset = len(prefix)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100485 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
486
487 def parse_file(self, _file, debug=0):
488 if type(_file) == str:
489 _file = file(_file)
490 content = ''
491 for line in _file:
492 content += line
493 return self.parse_string(content, debug=debug)