blob: 2051c8f11e61a9f80004cf73cca04fe192c032d7 [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
Sapan Bhatiab1225872017-03-29 20:47:47 +02002
Dusan Klinecccaa0d92014-11-09 03:21:31 +01003__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
4__license__ = "Apache License, Version 2.0"
5__version__ = "1.0"
6
7import ply.lex as lex
8import ply.yacc as yacc
9from .model import *
10
Sapan Bhatiab1225872017-03-29 20:47:47 +020011import pdb
12
Dusan Klinecccaa0d92014-11-09 03:21:31 +010013class ProtobufLexer(object):
14 keywords = ('double', 'float', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
15 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'string', 'bytes',
Sapan Bhatia78fee772017-04-21 19:00:48 +020016 'message', 'required', 'optional', 'repeated', 'enum', 'extensions', 'max', 'extend',
Sapan Bhatiab1225872017-03-29 20:47:47 +020017 'to', 'package', '_service', 'rpc', 'returns', 'true', 'false', 'option', 'import', 'manytoone', 'manytomany', 'onetoone')
Dusan Klinecccaa0d92014-11-09 03:21:31 +010018
19 tokens = [
20 'NAME',
21 'NUM',
22 'STRING_LITERAL',
Sapan Bhatia78fee772017-04-21 19:00:48 +020023 #'LINE_COMMENT', 'BLOCK_COMMENT',
Dusan Klinecccaa0d92014-11-09 03:21:31 +010024
25 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klinecc9b031a2014-11-10 13:21:08 +010026 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT',
Sapan Bhatia78fee772017-04-21 19:00:48 +020027 'ARROW', 'COLON', 'COMMA', 'SLASH',
Dusan Klinecaa9ff472014-11-10 18:02:03 +010028 'STARTTOKEN'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010029
30 ] + [k.upper() for k in keywords]
31 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
32
Sapan Bhatia87792a12017-04-10 19:35:05 -070033 t_NUM = r'[+-]?\d+(\.\d+)?'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010034 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
35
36 t_ignore_LINE_COMMENT = '//.*'
37 def t_BLOCK_COMMENT(self, t):
38 r'/\*(.|\n)*?\*/'
39 t.lexer.lineno += t.value.count('\n')
40
41 t_LBRACE = '{'
42 t_RBRACE = '}'
43 t_LBRACK = '\\['
44 t_RBRACK = '\\]'
45 t_LPAR = '\\('
46 t_RPAR = '\\)'
47 t_EQ = '='
48 t_SEMI = ';'
Sapan Bhatiab1225872017-03-29 20:47:47 +020049 t_ARROW = '\\-\\>'
50 t_COLON = '\\:'
Sapan Bhatia78fee772017-04-21 19:00:48 +020051 t_SLASH = '\\/'
Sapan Bhatiab1225872017-03-29 20:47:47 +020052 t_COMMA = '\\,'
Dusan Klineca4fae112014-11-10 08:50:27 +010053 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010054 t_ignore = ' \t\f'
Dusan Klinecaa9ff472014-11-10 18:02:03 +010055 t_STARTTOKEN = '\\+'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010056
57 def t_NAME(self, t):
Sapan Bhatia78fee772017-04-21 19:00:48 +020058 '[A-Za-z_$][A-Za-z0-9_+$]*'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010059 if t.value in ProtobufLexer.keywords:
Dusan Klineca4fae112014-11-10 08:50:27 +010060 #print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +010061 t.type = t.value.upper()
62 return t
63
64 def t_newline(self, t):
65 r'\n+'
66 t.lexer.lineno += len(t.value)
67
68 def t_newline2(self, t):
69 r'(\r\n)+'
70 t.lexer.lineno += len(t.value) / 2
71
72 def t_error(self, t):
73 print("Illegal character '{}' ({}) in line {}".format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
74 t.lexer.skip(1)
75
Dusan Klinecc9b031a2014-11-10 13:21:08 +010076class LexHelper:
Dusan Klinecaa9ff472014-11-10 18:02:03 +010077 offset = 0
78 def get_max_linespan(self, p):
79 defSpan=[1e60, -1]
Dusan Klinecc9b031a2014-11-10 13:21:08 +010080 mSpan=[1e60, -1]
81 for sp in range(0, len(p)):
82 csp = p.linespan(sp)
Dusan Klinecaa9ff472014-11-10 18:02:03 +010083 if csp[0] == 0 and csp[1] == 0:
84 if hasattr(p[sp], "linespan"):
85 csp = p[sp].linespan
86 else:
87 continue
88 if csp == None or len(csp) != 2: continue
89 if csp[0] == 0 and csp[1] == 0: continue
Dusan Klinecc9b031a2014-11-10 13:21:08 +010090 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
91 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
Dusan Klinecaa9ff472014-11-10 18:02:03 +010092 if defSpan == mSpan: return (0,0)
93 return tuple([mSpan[0]-self.offset, mSpan[1]-self.offset])
Dusan Klinecc9b031a2014-11-10 13:21:08 +010094
Dusan Klinecaa9ff472014-11-10 18:02:03 +010095 def get_max_lexspan(self, p):
96 defSpan=[1e60, -1]
Dusan Klinecc9b031a2014-11-10 13:21:08 +010097 mSpan=[1e60, -1]
98 for sp in range(0, len(p)):
99 csp = p.lexspan(sp)
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100100 if csp[0] == 0 and csp[1] == 0:
101 if hasattr(p[sp], "lexspan"):
102 csp = p[sp].lexspan
103 else:
104 continue
105 if csp == None or len(csp) != 2: continue
106 if csp[0] == 0 and csp[1] == 0: continue
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100107 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
108 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100109 if defSpan == mSpan: return (0,0)
110 return tuple([mSpan[0]-self.offset, mSpan[1]-self.offset])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100111
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100112 def set_parse_object(self, dst, p):
113 dst.setLexData(linespan=self.get_max_linespan(p), lexspan=self.get_max_lexspan(p))
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100114 dst.setLexObj(p)
115
Sapan Bhatiab1225872017-03-29 20:47:47 +0200116def srcPort(x):
117 if (x):
118 return [FieldDirective(Name('port'),x)]
119 else:
120 return []
121
122
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100123class ProtobufParser(object):
124 tokens = ProtobufLexer.tokens
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100125 offset = 0
126 lh = LexHelper()
127
128 def setOffset(self, of):
129 self.offset = of
130 self.lh.offset = of
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100131
132 def p_empty(self, p):
133 '''empty :'''
134 pass
135
136 def p_field_modifier(self,p):
137 '''field_modifier : REQUIRED
138 | OPTIONAL
139 | REPEATED'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100140 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100141
142 def p_primitive_type(self, p):
143 '''primitive_type : DOUBLE
144 | FLOAT
145 | INT32
146 | INT64
147 | UINT32
148 | UINT64
149 | SINT32
150 | SINT64
151 | FIXED32
152 | FIXED64
153 | SFIXED32
154 | SFIXED64
155 | BOOL
156 | STRING
157 | BYTES'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100158 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100159
Sapan Bhatiab1225872017-03-29 20:47:47 +0200160 def p_link_type(self, p):
161 '''link_type : ONETOONE
162 | MANYTOONE
163 | MANYTOMANY'''
164 p[0] = LU.i(p,1)
165
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100166 def p_field_id(self, p):
167 '''field_id : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100168 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100169
170 def p_rvalue(self, p):
171 '''rvalue : NUM
172 | TRUE
173 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100174 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100175
Sapan Bhatiab1225872017-03-29 20:47:47 +0200176 def p_rvalue3(self, p):
177 '''rvalue : STRING_LITERAL'''
178 p[0] = Name(LU.i(p, 1))
179 self.lh.set_parse_object(p[0], p)
180 p[0].deriveLex()
181
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100182 def p_rvalue2(self, p):
183 '''rvalue : NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100184 p[0] = Name(LU.i(p, 1))
185 self.lh.set_parse_object(p[0], p)
186 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100187
Sapan Bhatiab1225872017-03-29 20:47:47 +0200188 def p_field_directives2(self, p):
189 '''field_directives : empty'''
190 p[0] = []
191
192 def p_field_directives(self, p):
193 '''field_directives : LBRACK field_directive_times RBRACK'''
194 p[0] = p[2]
195 #self.lh.set_parse_object(p[0], p)
196
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100197 def p_field_directive(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200198 '''field_directive : NAME EQ rvalue'''
199 p[0] = FieldDirective(Name(LU.i(p, 1)), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100200 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100201
Sapan Bhatia87792a12017-04-10 19:35:05 -0700202
203 def p_csv_expr(self, p):
204 '''csv_expr : LPAR csv RPAR'''
205 p[0] = p[2]
206
207 def p_csv_expr2(self, p):
208 '''csv_expr : empty'''
209 p[0] = []
210
211 def p_csv2(self, p):
212 '''csv : empty'''
213
214 def p_csv(self, p):
215 '''csv : NAME
216 | csv COMMA NAME'''
217
218 if len(p) == 2:
219 p[0] = [LU(p,1)]
220 else:
221 p[0] = p[1] + [LU(p,3)]
222
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100223 def p_field_directive_times(self, p):
224 '''field_directive_times : field_directive_plus'''
225 p[0] = p[1]
226
227 def p_field_directive_times2(self, p):
228 '''field_directive_times : empty'''
229 p[0] = []
230
231 def p_field_directive_plus(self, p):
232 '''field_directive_plus : field_directive
Sapan Bhatiab1225872017-03-29 20:47:47 +0200233 | field_directive_plus COMMA field_directive'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100234 if len(p) == 2:
Dusan Klineca9f6d362014-11-10 21:07:08 +0100235 p[0] = [LU(p,1)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100236 else:
Sapan Bhatiab1225872017-03-29 20:47:47 +0200237 p[0] = p[1] + [LU(p,3)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100238
Dusan Klineca4fae112014-11-10 08:50:27 +0100239 def p_dotname(self, p):
240 '''dotname : NAME
241 | dotname DOT NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100242 if len(p) == 2:
243 p[0] = [LU(p,1)]
244 else:
245 p[0] = p[1] + [LU(p,3)]
Dusan Klineca4fae112014-11-10 08:50:27 +0100246
247 # Hack for cases when there is a field named 'message' or 'max'
248 def p_fieldName(self, p):
Sapan Bhatia78fee772017-04-21 19:00:48 +0200249 '''field_name : STARTTOKEN
250 | NAME
Dusan Klineca4fae112014-11-10 08:50:27 +0100251 | MESSAGE
252 | MAX'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100253 p[0] = Name(LU.i(p,1))
254 self.lh.set_parse_object(p[0], p)
255 p[0].deriveLex()
Dusan Klineca4fae112014-11-10 08:50:27 +0100256
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100257 def p_field_type(self, p):
258 '''field_type : primitive_type'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100259 p[0] = FieldType(LU.i(p,1))
260 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100261
262 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100263 '''field_type : dotname'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100264 p[0] = DotName(LU.i(p, 1))
265 self.lh.set_parse_object(p[0], p)
266 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100267
Sapan Bhatia78fee772017-04-21 19:00:48 +0200268 def p_slash_name(self, p):
269 '''slash_name : SLASH NAME'''
270 p[0] = p[2]
271 #self.lh.set_parse_object(p[0], p)
272
273 def p_slash_name2(self, p):
274 '''slash_name : empty'''
275 p[0] = None
276
Sapan Bhatiab1225872017-03-29 20:47:47 +0200277 def p_colon_fieldname(self, p):
278 '''colon_fieldname : COLON field_name'''
279 p[0] = p[2]
280 self.lh.set_parse_object(p[0], p)
281
282 def p_colon_fieldname2(self, p):
283 '''colon_fieldname : empty'''
284 p[0] = None
285
286 # TODO: Add directives to link definition
287 def p_link_definition(self, p):
Sapan Bhatia78fee772017-04-21 19:00:48 +0200288 '''link_definition : field_modifier link_type field_name ARROW NAME slash_name colon_fieldname EQ field_id field_directives SEMI'''
Sapan Bhatiab1225872017-03-29 20:47:47 +0200289 p[0] = LinkSpec(
Sapan Bhatia78fee772017-04-21 19:00:48 +0200290 FieldDefinition(LU.i(p,1), Name('int32'), LU.i(p, 3), LU.i(p, 9), [FieldDirective(Name('type'), Name('link')), FieldDirective(Name('model'),LU.i(p, 5))] + srcPort(LU.i(p,7)) + LU.i(p,10)),
291 LinkDefinition(LU.i(p,2), LU.i(p,3), LU.i(p,5), LU.i(p,6), LU.i(p,7)))
Sapan Bhatiab1225872017-03-29 20:47:47 +0200292
293 self.lh.set_parse_object(p[0], p)
294
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100295 # Root of the field declaration.
296 def p_field_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200297 '''field_definition : field_modifier field_type field_name EQ field_id field_directives SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100298 p[0] = FieldDefinition(LU.i(p,1), LU.i(p,2), LU.i(p, 3), LU.i(p,5), LU.i(p,6))
299 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100300
301 # Root of the enum field declaration.
302 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100303 '''enum_field : field_name EQ NUM SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100304 p[0] = EnumFieldDefinition(LU.i(p, 1), LU.i(p,3))
305 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100306
307 def p_enum_body_part(self, p):
308 '''enum_body_part : enum_field
309 | option_directive'''
310 p[0] = p[1]
311
312 def p_enum_body(self, p):
313 '''enum_body : enum_body_part
314 | enum_body enum_body_part'''
315 if len(p) == 2:
316 p[0] = [p[1]]
317 else:
318 p[0] = p[1] + [p[2]]
319
320 def p_enum_body_opt(self, p):
321 '''enum_body_opt : empty'''
322 p[0] = []
323
324 def p_enum_body_opt2(self, p):
325 '''enum_body_opt : enum_body'''
326 p[0] = p[1]
327
328 # Root of the enum declaration.
329 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
330 def p_enum_definition(self, p):
331 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100332 p[0] = EnumDefinition(Name(LU.i(p, 2)), LU.i(p,4))
333 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100334
335 def p_extensions_to(self, p):
336 '''extensions_to : MAX'''
337 p[0] = ExtensionsMax()
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100338 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100339
340 def p_extensions_to2(self, p):
341 '''extensions_to : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100342 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100343
344 # extensions_definition ::= 'extensions' integer 'to' integer ';'
345 def p_extensions_definition(self, p):
346 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100347 p[0] = ExtensionsDirective(LU.i(p,2), LU.i(p,4))
348 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100349
350 # message_extension ::= 'extend' ident '{' message_body '}'
351 def p_message_extension(self, p):
352 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100353 p[0] = MessageExtension(Name(LU.i(p, 2)), LU.i(p,4))
354 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100355
356 def p_message_body_part(self, p):
357 '''message_body_part : field_definition
Sapan Bhatiab1225872017-03-29 20:47:47 +0200358 | link_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100359 | enum_definition
360 | message_definition
361 | extensions_definition
362 | message_extension'''
363 p[0] = p[1]
364
365 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
366 def p_message_body(self, p):
367 '''message_body : empty'''
368 p[0] = []
369
370 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
371 def p_message_body2(self, p):
372 '''message_body : message_body_part
373 | message_body message_body_part'''
374 if len(p) == 2:
375 p[0] = [p[1]]
376 else:
377 p[0] = p[1] + [p[2]]
378
379 # Root of the message declaration.
380 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
381 def p_message_definition(self, p):
Sapan Bhatia87792a12017-04-10 19:35:05 -0700382 '''message_definition : MESSAGE NAME csv_expr LBRACE message_body RBRACE'''
Sapan Bhatiab1225872017-03-29 20:47:47 +0200383 p[0] = MessageDefinition(Name(LU.i(p, 2)), LU.i(p, 3), LU.i(p,5))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100384 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100385
386 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
387 def p_method_definition(self, p):
388 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100389 p[0] = MethodDefinition(Name(LU.i(p, 2)), Name(LU.i(p, 4)), Name(LU.i(p, 8)))
390 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100391
392 def p_method_definition_opt(self, p):
393 '''method_definition_opt : empty'''
394 p[0] = []
395
396 def p_method_definition_opt2(self, p):
397 '''method_definition_opt : method_definition
398 | method_definition_opt method_definition'''
399 if len(p) == 2:
400 p[0] = [p[1]]
401 else:
402 p[0] = p[1] + [p[2]]
403
404 # service_definition ::= 'service' ident '{' method_definition* '}'
405 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
406 def p_service_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200407 '''service_definition : _SERVICE NAME LBRACE method_definition_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100408 p[0] = ServiceDefinition(Name(LU.i(p, 2)), LU.i(p,4))
409 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100410
411 # package_directive ::= 'package' ident [ '.' ident]* ';'
412 def p_package_directive(self,p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100413 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100414 p[0] = PackageStatement(Name(LU.i(p, 2)))
415 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100416
417 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
418 def p_import_directive(self, p):
419 '''import_directive : IMPORT STRING_LITERAL SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100420 p[0] = ImportStatement(Literal(LU.i(p,2)))
421 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100422
423 def p_option_rvalue(self, p):
424 '''option_rvalue : NUM
425 | TRUE
426 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100427 p[0] = LU(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100428
429 def p_option_rvalue2(self, p):
430 '''option_rvalue : STRING_LITERAL'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100431 p[0] = Literal(LU(p,1))
432
433 def p_option_rvalue3(self, p):
434 '''option_rvalue : NAME'''
435 p[0] = Name(LU.i(p,1))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100436
437 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
438 def p_option_directive(self, p):
439 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100440 p[0] = OptionStatement(Name(LU.i(p, 2)), LU.i(p,4))
441 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100442
443 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive)
444 def p_topLevel(self,p):
445 '''topLevel : message_definition
446 | message_extension
447 | enum_definition
448 | service_definition
449 | import_directive
450 | option_directive'''
451 p[0] = p[1]
452
453 def p_package_definition(self, p):
454 '''package_definition : package_directive'''
455 p[0] = p[1]
456
457 def p_packages2(self, p):
458 '''package_definition : empty'''
459 p[0] = []
460
461 def p_statements2(self, p):
462 '''statements : topLevel
463 | statements topLevel'''
464 if len(p) == 2:
465 p[0] = [p[1]]
466 else:
467 p[0] = p[1] + [p[2]]
468
469 def p_statements(self, p):
470 '''statements : empty'''
471 p[0] = []
472
473 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100474 def p_protofile(self, p):
475 '''protofile : package_definition statements'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100476 p[0] = ProtoFile(LU.i(p,1), LU.i(p,2))
477 self.lh.set_parse_object(p[0], p)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100478
479 # Parsing starting point
480 def p_goal(self, p):
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100481 '''goal : STARTTOKEN protofile'''
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100482 p[0] = p[2]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100483
484 def p_error(self, p):
485 print('error: {}'.format(p))
486
487class ProtobufAnalyzer(object):
488
489 def __init__(self):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200490 self.lexer = lex.lex(module=ProtobufLexer())#, optimize=1)
491 self.parser = yacc.yacc(module=ProtobufParser(), start='goal', debug=0)#optimize=1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100492
493 def tokenize_string(self, code):
494 self.lexer.input(code)
495 for token in self.lexer:
496 print(token)
497
498 def tokenize_file(self, _file):
499 if type(_file) == str:
500 _file = file(_file)
501 content = ''
502 for line in _file:
503 content += line
504 return self.tokenize_string(content)
505
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100506 def parse_string(self, code, debug=0, lineno=1, prefix='+'):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100507 self.lexer.lineno = lineno
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100508 self.parser.offset = len(prefix)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100509 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
510
511 def parse_file(self, _file, debug=0):
512 if type(_file) == str:
513 _file = file(_file)
514 content = ''
515 for line in _file:
516 content += line
517 return self.parse_string(content, debug=debug)