blob: 4c80f6f7b23b6355dcf056a3405912d28bae1c09 [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
Sapan Bhatiab1225872017-03-29 20:47:47 +02002
Dusan Klinecccaa0d92014-11-09 03:21:31 +01003__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
4__license__ = "Apache License, Version 2.0"
5__version__ = "1.0"
6
7import ply.lex as lex
8import ply.yacc as yacc
9from .model import *
10
Sapan Bhatiab1225872017-03-29 20:47:47 +020011import pdb
12
Dusan Klinecccaa0d92014-11-09 03:21:31 +010013class ProtobufLexer(object):
14 keywords = ('double', 'float', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
15 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'string', 'bytes',
Sapan Bhatia78fee772017-04-21 19:00:48 +020016 'message', 'required', 'optional', 'repeated', 'enum', 'extensions', 'max', 'extend',
Sapan Bhatiab1225872017-03-29 20:47:47 +020017 'to', 'package', '_service', 'rpc', 'returns', 'true', 'false', 'option', 'import', 'manytoone', 'manytomany', 'onetoone')
Dusan Klinecccaa0d92014-11-09 03:21:31 +010018
19 tokens = [
20 'NAME',
21 'NUM',
22 'STRING_LITERAL',
Sapan Bhatia78fee772017-04-21 19:00:48 +020023 #'LINE_COMMENT', 'BLOCK_COMMENT',
Dusan Klinecccaa0d92014-11-09 03:21:31 +010024
25 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klinecc9b031a2014-11-10 13:21:08 +010026 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT',
Sapan Bhatia78fee772017-04-21 19:00:48 +020027 'ARROW', 'COLON', 'COMMA', 'SLASH',
Dusan Klinecaa9ff472014-11-10 18:02:03 +010028 'STARTTOKEN'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010029
30 ] + [k.upper() for k in keywords]
31 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
32
Sapan Bhatia87792a12017-04-10 19:35:05 -070033 t_NUM = r'[+-]?\d+(\.\d+)?'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010034 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
35
36 t_ignore_LINE_COMMENT = '//.*'
37 def t_BLOCK_COMMENT(self, t):
38 r'/\*(.|\n)*?\*/'
39 t.lexer.lineno += t.value.count('\n')
40
41 t_LBRACE = '{'
42 t_RBRACE = '}'
43 t_LBRACK = '\\['
44 t_RBRACK = '\\]'
45 t_LPAR = '\\('
46 t_RPAR = '\\)'
47 t_EQ = '='
48 t_SEMI = ';'
Sapan Bhatiab1225872017-03-29 20:47:47 +020049 t_ARROW = '\\-\\>'
50 t_COLON = '\\:'
Sapan Bhatia78fee772017-04-21 19:00:48 +020051 t_SLASH = '\\/'
Sapan Bhatiab1225872017-03-29 20:47:47 +020052 t_COMMA = '\\,'
Dusan Klineca4fae112014-11-10 08:50:27 +010053 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010054 t_ignore = ' \t\f'
Dusan Klinecaa9ff472014-11-10 18:02:03 +010055 t_STARTTOKEN = '\\+'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010056
57 def t_NAME(self, t):
Sapan Bhatia78fee772017-04-21 19:00:48 +020058 '[A-Za-z_$][A-Za-z0-9_+$]*'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010059 if t.value in ProtobufLexer.keywords:
Dusan Klineca4fae112014-11-10 08:50:27 +010060 #print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +010061 t.type = t.value.upper()
62 return t
63
64 def t_newline(self, t):
65 r'\n+'
66 t.lexer.lineno += len(t.value)
67
68 def t_newline2(self, t):
69 r'(\r\n)+'
70 t.lexer.lineno += len(t.value) / 2
71
72 def t_error(self, t):
73 print("Illegal character '{}' ({}) in line {}".format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
74 t.lexer.skip(1)
75
Dusan Klinecc9b031a2014-11-10 13:21:08 +010076class LexHelper:
Dusan Klinecaa9ff472014-11-10 18:02:03 +010077 offset = 0
78 def get_max_linespan(self, p):
79 defSpan=[1e60, -1]
Dusan Klinecc9b031a2014-11-10 13:21:08 +010080 mSpan=[1e60, -1]
81 for sp in range(0, len(p)):
82 csp = p.linespan(sp)
Dusan Klinecaa9ff472014-11-10 18:02:03 +010083 if csp[0] == 0 and csp[1] == 0:
84 if hasattr(p[sp], "linespan"):
85 csp = p[sp].linespan
86 else:
87 continue
88 if csp == None or len(csp) != 2: continue
89 if csp[0] == 0 and csp[1] == 0: continue
Dusan Klinecc9b031a2014-11-10 13:21:08 +010090 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
91 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
Dusan Klinecaa9ff472014-11-10 18:02:03 +010092 if defSpan == mSpan: return (0,0)
93 return tuple([mSpan[0]-self.offset, mSpan[1]-self.offset])
Dusan Klinecc9b031a2014-11-10 13:21:08 +010094
Dusan Klinecaa9ff472014-11-10 18:02:03 +010095 def get_max_lexspan(self, p):
96 defSpan=[1e60, -1]
Dusan Klinecc9b031a2014-11-10 13:21:08 +010097 mSpan=[1e60, -1]
98 for sp in range(0, len(p)):
99 csp = p.lexspan(sp)
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100100 if csp[0] == 0 and csp[1] == 0:
101 if hasattr(p[sp], "lexspan"):
102 csp = p[sp].lexspan
103 else:
104 continue
105 if csp == None or len(csp) != 2: continue
106 if csp[0] == 0 and csp[1] == 0: continue
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100107 if csp[0] < mSpan[0]: mSpan[0] = csp[0]
108 if csp[1] > mSpan[1]: mSpan[1] = csp[1]
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100109 if defSpan == mSpan: return (0,0)
110 return tuple([mSpan[0]-self.offset, mSpan[1]-self.offset])
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100111
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100112 def set_parse_object(self, dst, p):
113 dst.setLexData(linespan=self.get_max_linespan(p), lexspan=self.get_max_lexspan(p))
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100114 dst.setLexObj(p)
115
Sapan Bhatiab1225872017-03-29 20:47:47 +0200116def srcPort(x):
117 if (x):
118 return [FieldDirective(Name('port'),x)]
119 else:
120 return []
121
122
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100123class ProtobufParser(object):
124 tokens = ProtobufLexer.tokens
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100125 offset = 0
126 lh = LexHelper()
127
128 def setOffset(self, of):
129 self.offset = of
130 self.lh.offset = of
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100131
132 def p_empty(self, p):
133 '''empty :'''
134 pass
135
136 def p_field_modifier(self,p):
137 '''field_modifier : REQUIRED
138 | OPTIONAL
139 | REPEATED'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100140 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100141
142 def p_primitive_type(self, p):
143 '''primitive_type : DOUBLE
144 | FLOAT
145 | INT32
146 | INT64
147 | UINT32
148 | UINT64
149 | SINT32
150 | SINT64
151 | FIXED32
152 | FIXED64
153 | SFIXED32
154 | SFIXED64
155 | BOOL
156 | STRING
157 | BYTES'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100158 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100159
Sapan Bhatiab1225872017-03-29 20:47:47 +0200160 def p_link_type(self, p):
161 '''link_type : ONETOONE
162 | MANYTOONE
163 | MANYTOMANY'''
164 p[0] = LU.i(p,1)
165
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100166 def p_field_id(self, p):
167 '''field_id : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100168 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100169
170 def p_rvalue(self, p):
171 '''rvalue : NUM
172 | TRUE
173 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100174 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100175
Sapan Bhatiab1225872017-03-29 20:47:47 +0200176 def p_rvalue3(self, p):
177 '''rvalue : STRING_LITERAL'''
178 p[0] = Name(LU.i(p, 1))
179 self.lh.set_parse_object(p[0], p)
180 p[0].deriveLex()
181
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100182 def p_rvalue2(self, p):
183 '''rvalue : NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100184 p[0] = Name(LU.i(p, 1))
185 self.lh.set_parse_object(p[0], p)
186 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100187
Sapan Bhatiab1225872017-03-29 20:47:47 +0200188 def p_field_directives2(self, p):
189 '''field_directives : empty'''
190 p[0] = []
191
192 def p_field_directives(self, p):
193 '''field_directives : LBRACK field_directive_times RBRACK'''
194 p[0] = p[2]
195 #self.lh.set_parse_object(p[0], p)
196
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100197 def p_field_directive(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200198 '''field_directive : NAME EQ rvalue'''
199 p[0] = FieldDirective(Name(LU.i(p, 1)), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100200 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100201
Sapan Bhatia87792a12017-04-10 19:35:05 -0700202
203 def p_csv_expr(self, p):
204 '''csv_expr : LPAR csv RPAR'''
205 p[0] = p[2]
206
207 def p_csv_expr2(self, p):
208 '''csv_expr : empty'''
209 p[0] = []
210
211 def p_csv2(self, p):
212 '''csv : empty'''
213
214 def p_csv(self, p):
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700215 '''csv : dotname
216 | csv COMMA dotname'''
Sapan Bhatia87792a12017-04-10 19:35:05 -0700217
218 if len(p) == 2:
219 p[0] = [LU(p,1)]
220 else:
221 p[0] = p[1] + [LU(p,3)]
222
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100223 def p_field_directive_times(self, p):
224 '''field_directive_times : field_directive_plus'''
225 p[0] = p[1]
226
227 def p_field_directive_times2(self, p):
228 '''field_directive_times : empty'''
229 p[0] = []
230
231 def p_field_directive_plus(self, p):
232 '''field_directive_plus : field_directive
Sapan Bhatiab1225872017-03-29 20:47:47 +0200233 | field_directive_plus COMMA field_directive'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100234 if len(p) == 2:
Dusan Klineca9f6d362014-11-10 21:07:08 +0100235 p[0] = [LU(p,1)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100236 else:
Sapan Bhatiab1225872017-03-29 20:47:47 +0200237 p[0] = p[1] + [LU(p,3)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100238
Dusan Klineca4fae112014-11-10 08:50:27 +0100239 def p_dotname(self, p):
240 '''dotname : NAME
241 | dotname DOT NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100242 if len(p) == 2:
243 p[0] = [LU(p,1)]
244 else:
245 p[0] = p[1] + [LU(p,3)]
Dusan Klineca4fae112014-11-10 08:50:27 +0100246
247 # Hack for cases when there is a field named 'message' or 'max'
248 def p_fieldName(self, p):
Sapan Bhatia78fee772017-04-21 19:00:48 +0200249 '''field_name : STARTTOKEN
250 | NAME
Dusan Klineca4fae112014-11-10 08:50:27 +0100251 | MESSAGE
252 | MAX'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100253 p[0] = Name(LU.i(p,1))
254 self.lh.set_parse_object(p[0], p)
255 p[0].deriveLex()
Dusan Klineca4fae112014-11-10 08:50:27 +0100256
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100257 def p_field_type(self, p):
258 '''field_type : primitive_type'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100259 p[0] = FieldType(LU.i(p,1))
260 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100261
262 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100263 '''field_type : dotname'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100264 p[0] = DotName(LU.i(p, 1))
265 self.lh.set_parse_object(p[0], p)
266 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100267
Sapan Bhatia78fee772017-04-21 19:00:48 +0200268 def p_slash_name(self, p):
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700269 '''slash_name : SLASH dotname'''
Sapan Bhatia78fee772017-04-21 19:00:48 +0200270 p[0] = p[2]
271 #self.lh.set_parse_object(p[0], p)
272
273 def p_slash_name2(self, p):
274 '''slash_name : empty'''
275 p[0] = None
276
Sapan Bhatiab1225872017-03-29 20:47:47 +0200277 def p_colon_fieldname(self, p):
278 '''colon_fieldname : COLON field_name'''
279 p[0] = p[2]
280 self.lh.set_parse_object(p[0], p)
281
282 def p_colon_fieldname2(self, p):
283 '''colon_fieldname : empty'''
284 p[0] = None
285
286 # TODO: Add directives to link definition
287 def p_link_definition(self, p):
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700288 '''link_definition : field_modifier link_type field_name ARROW dotname slash_name colon_fieldname EQ field_id field_directives SEMI'''
Sapan Bhatiab1225872017-03-29 20:47:47 +0200289 p[0] = LinkSpec(
Sapan Bhatia78fee772017-04-21 19:00:48 +0200290 FieldDefinition(LU.i(p,1), Name('int32'), LU.i(p, 3), LU.i(p, 9), [FieldDirective(Name('type'), Name('link')), FieldDirective(Name('model'),LU.i(p, 5))] + srcPort(LU.i(p,7)) + LU.i(p,10)),
291 LinkDefinition(LU.i(p,2), LU.i(p,3), LU.i(p,5), LU.i(p,6), LU.i(p,7)))
Sapan Bhatiab1225872017-03-29 20:47:47 +0200292
293 self.lh.set_parse_object(p[0], p)
294
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100295 # Root of the field declaration.
296 def p_field_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200297 '''field_definition : field_modifier field_type field_name EQ field_id field_directives SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100298 p[0] = FieldDefinition(LU.i(p,1), LU.i(p,2), LU.i(p, 3), LU.i(p,5), LU.i(p,6))
299 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100300
301 # Root of the enum field declaration.
302 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100303 '''enum_field : field_name EQ NUM SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100304 p[0] = EnumFieldDefinition(LU.i(p, 1), LU.i(p,3))
305 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100306
307 def p_enum_body_part(self, p):
308 '''enum_body_part : enum_field
309 | option_directive'''
310 p[0] = p[1]
311
312 def p_enum_body(self, p):
313 '''enum_body : enum_body_part
314 | enum_body enum_body_part'''
315 if len(p) == 2:
316 p[0] = [p[1]]
317 else:
318 p[0] = p[1] + [p[2]]
319
320 def p_enum_body_opt(self, p):
321 '''enum_body_opt : empty'''
322 p[0] = []
323
324 def p_enum_body_opt2(self, p):
325 '''enum_body_opt : enum_body'''
326 p[0] = p[1]
327
328 # Root of the enum declaration.
329 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
330 def p_enum_definition(self, p):
331 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100332 p[0] = EnumDefinition(Name(LU.i(p, 2)), LU.i(p,4))
333 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100334
335 def p_extensions_to(self, p):
336 '''extensions_to : MAX'''
337 p[0] = ExtensionsMax()
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100338 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100339
340 def p_extensions_to2(self, p):
341 '''extensions_to : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100342 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100343
344 # extensions_definition ::= 'extensions' integer 'to' integer ';'
345 def p_extensions_definition(self, p):
346 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100347 p[0] = ExtensionsDirective(LU.i(p,2), LU.i(p,4))
348 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100349
350 # message_extension ::= 'extend' ident '{' message_body '}'
351 def p_message_extension(self, p):
352 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100353 p[0] = MessageExtension(Name(LU.i(p, 2)), LU.i(p,4))
354 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100355
356 def p_message_body_part(self, p):
357 '''message_body_part : field_definition
Sapan Bhatiab1225872017-03-29 20:47:47 +0200358 | link_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100359 | enum_definition
Sapan Bhatia4a159ac2017-04-29 20:10:05 +0200360 | option_directive
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100361 | message_definition
362 | extensions_definition
363 | message_extension'''
364 p[0] = p[1]
365
366 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
367 def p_message_body(self, p):
368 '''message_body : empty'''
369 p[0] = []
370
371 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
372 def p_message_body2(self, p):
373 '''message_body : message_body_part
374 | message_body message_body_part'''
375 if len(p) == 2:
376 p[0] = [p[1]]
377 else:
378 p[0] = p[1] + [p[2]]
379
380 # Root of the message declaration.
381 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
382 def p_message_definition(self, p):
Sapan Bhatia87792a12017-04-10 19:35:05 -0700383 '''message_definition : MESSAGE NAME csv_expr LBRACE message_body RBRACE'''
Sapan Bhatiab1225872017-03-29 20:47:47 +0200384 p[0] = MessageDefinition(Name(LU.i(p, 2)), LU.i(p, 3), LU.i(p,5))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100385 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100386
387 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
388 def p_method_definition(self, p):
389 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100390 p[0] = MethodDefinition(Name(LU.i(p, 2)), Name(LU.i(p, 4)), Name(LU.i(p, 8)))
391 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100392
393 def p_method_definition_opt(self, p):
394 '''method_definition_opt : empty'''
395 p[0] = []
396
397 def p_method_definition_opt2(self, p):
398 '''method_definition_opt : method_definition
399 | method_definition_opt method_definition'''
400 if len(p) == 2:
401 p[0] = [p[1]]
402 else:
403 p[0] = p[1] + [p[2]]
404
405 # service_definition ::= 'service' ident '{' method_definition* '}'
406 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
407 def p_service_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200408 '''service_definition : _SERVICE NAME LBRACE method_definition_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100409 p[0] = ServiceDefinition(Name(LU.i(p, 2)), LU.i(p,4))
410 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100411
412 # package_directive ::= 'package' ident [ '.' ident]* ';'
413 def p_package_directive(self,p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100414 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100415 p[0] = PackageStatement(Name(LU.i(p, 2)))
416 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100417
418 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
419 def p_import_directive(self, p):
420 '''import_directive : IMPORT STRING_LITERAL SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100421 p[0] = ImportStatement(Literal(LU.i(p,2)))
422 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100423
424 def p_option_rvalue(self, p):
425 '''option_rvalue : NUM
426 | TRUE
427 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100428 p[0] = LU(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100429
430 def p_option_rvalue2(self, p):
431 '''option_rvalue : STRING_LITERAL'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100432 p[0] = Literal(LU(p,1))
433
434 def p_option_rvalue3(self, p):
435 '''option_rvalue : NAME'''
436 p[0] = Name(LU.i(p,1))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100437
438 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
439 def p_option_directive(self, p):
440 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100441 p[0] = OptionStatement(Name(LU.i(p, 2)), LU.i(p,4))
442 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100443
444 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive)
445 def p_topLevel(self,p):
446 '''topLevel : message_definition
447 | message_extension
448 | enum_definition
449 | service_definition
450 | import_directive
451 | option_directive'''
452 p[0] = p[1]
453
454 def p_package_definition(self, p):
455 '''package_definition : package_directive'''
456 p[0] = p[1]
457
458 def p_packages2(self, p):
459 '''package_definition : empty'''
460 p[0] = []
461
462 def p_statements2(self, p):
463 '''statements : topLevel
464 | statements topLevel'''
465 if len(p) == 2:
466 p[0] = [p[1]]
467 else:
468 p[0] = p[1] + [p[2]]
469
470 def p_statements(self, p):
471 '''statements : empty'''
472 p[0] = []
473
474 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100475 def p_protofile(self, p):
476 '''protofile : package_definition statements'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100477 p[0] = ProtoFile(LU.i(p,1), LU.i(p,2))
478 self.lh.set_parse_object(p[0], p)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100479
480 # Parsing starting point
481 def p_goal(self, p):
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100482 '''goal : STARTTOKEN protofile'''
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100483 p[0] = p[2]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100484
485 def p_error(self, p):
486 print('error: {}'.format(p))
487
488class ProtobufAnalyzer(object):
489
490 def __init__(self):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200491 self.lexer = lex.lex(module=ProtobufLexer())#, optimize=1)
Sapan Bhatia44609112017-05-15 00:00:25 +0200492 self.parser = yacc.yacc(module=ProtobufParser(), start='goal', debug=0, outputdir='/tmp')#optimize=1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100493
494 def tokenize_string(self, code):
495 self.lexer.input(code)
496 for token in self.lexer:
497 print(token)
498
499 def tokenize_file(self, _file):
500 if type(_file) == str:
501 _file = file(_file)
502 content = ''
503 for line in _file:
504 content += line
505 return self.tokenize_string(content)
506
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100507 def parse_string(self, code, debug=0, lineno=1, prefix='+'):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100508 self.lexer.lineno = lineno
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100509 self.parser.offset = len(prefix)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100510 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
511
512 def parse_file(self, _file, debug=0):
513 if type(_file) == str:
514 _file = file(_file)
515 content = ''
516 for line in _file:
517 content += line
518 return self.parse_string(content, debug=debug)