blob: 6736e8a7983ddaada00f4322e9d7e008d7c9444e [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
Sapan Bhatiab1225872017-03-29 20:47:47 +02002
Dusan Klinecccaa0d92014-11-09 03:21:31 +01003__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
4__license__ = "Apache License, Version 2.0"
5__version__ = "1.0"
6
7import ply.lex as lex
8import ply.yacc as yacc
9from .model import *
10
Sapan Bhatiab1225872017-03-29 20:47:47 +020011import pdb
Sapan Bhatia64c72512017-06-23 02:32:45 -070012from helpers import LexHelper, LU
13from logicparser import FOLParser, FOLLexer
Sapan Bhatiab1225872017-03-29 20:47:47 +020014
Dusan Klinecccaa0d92014-11-09 03:21:31 +010015class ProtobufLexer(object):
16 keywords = ('double', 'float', 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
17 'fixed32', 'fixed64', 'sfixed32', 'sfixed64', 'bool', 'string', 'bytes',
Sapan Bhatia78fee772017-04-21 19:00:48 +020018 'message', 'required', 'optional', 'repeated', 'enum', 'extensions', 'max', 'extend',
Sapan Bhatia64c72512017-06-23 02:32:45 -070019 'to', 'package', '_service', 'rpc', 'returns', 'true', 'false', 'option', 'import', 'manytoone', 'manytomany', 'onetoone', 'policy')
Dusan Klinecccaa0d92014-11-09 03:21:31 +010020
21 tokens = [
Sapan Bhatia64c72512017-06-23 02:32:45 -070022 'POLICYBODY',
Dusan Klinecccaa0d92014-11-09 03:21:31 +010023 'NAME',
24 'NUM',
25 'STRING_LITERAL',
Sapan Bhatia78fee772017-04-21 19:00:48 +020026 #'LINE_COMMENT', 'BLOCK_COMMENT',
Dusan Klinecccaa0d92014-11-09 03:21:31 +010027 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klinecc9b031a2014-11-10 13:21:08 +010028 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT',
Sapan Bhatia78fee772017-04-21 19:00:48 +020029 'ARROW', 'COLON', 'COMMA', 'SLASH',
Sapan Bhatia64c72512017-06-23 02:32:45 -070030 'DOUBLECOLON',
Dusan Klinecaa9ff472014-11-10 18:02:03 +010031 'STARTTOKEN'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010032 ] + [k.upper() for k in keywords]
Sapan Bhatia64c72512017-06-23 02:32:45 -070033
34
35 t_POLICYBODY = r'< (.|\n)*? >'
36
Dusan Klinecccaa0d92014-11-09 03:21:31 +010037 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
38
Sapan Bhatia87792a12017-04-10 19:35:05 -070039 t_NUM = r'[+-]?\d+(\.\d+)?'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010040 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
41
42 t_ignore_LINE_COMMENT = '//.*'
43 def t_BLOCK_COMMENT(self, t):
44 r'/\*(.|\n)*?\*/'
45 t.lexer.lineno += t.value.count('\n')
46
47 t_LBRACE = '{'
48 t_RBRACE = '}'
49 t_LBRACK = '\\['
50 t_RBRACK = '\\]'
Sapan Bhatia64c72512017-06-23 02:32:45 -070051
52
Dusan Klinecccaa0d92014-11-09 03:21:31 +010053 t_LPAR = '\\('
54 t_RPAR = '\\)'
55 t_EQ = '='
56 t_SEMI = ';'
Sapan Bhatiab1225872017-03-29 20:47:47 +020057 t_ARROW = '\\-\\>'
58 t_COLON = '\\:'
Sapan Bhatia78fee772017-04-21 19:00:48 +020059 t_SLASH = '\\/'
Sapan Bhatiab1225872017-03-29 20:47:47 +020060 t_COMMA = '\\,'
Dusan Klineca4fae112014-11-10 08:50:27 +010061 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010062 t_ignore = ' \t\f'
Dusan Klinecaa9ff472014-11-10 18:02:03 +010063 t_STARTTOKEN = '\\+'
Sapan Bhatia64c72512017-06-23 02:32:45 -070064 t_DOUBLECOLON = '\\:\\:'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010065
66 def t_NAME(self, t):
Sapan Bhatia78fee772017-04-21 19:00:48 +020067 '[A-Za-z_$][A-Za-z0-9_+$]*'
Dusan Klinecccaa0d92014-11-09 03:21:31 +010068 if t.value in ProtobufLexer.keywords:
Dusan Klineca4fae112014-11-10 08:50:27 +010069 #print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +010070 t.type = t.value.upper()
71 return t
72
73 def t_newline(self, t):
74 r'\n+'
75 t.lexer.lineno += len(t.value)
76
77 def t_newline2(self, t):
78 r'(\r\n)+'
79 t.lexer.lineno += len(t.value) / 2
80
81 def t_error(self, t):
82 print("Illegal character '{}' ({}) in line {}".format(t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
83 t.lexer.skip(1)
84
Dusan Klinecc9b031a2014-11-10 13:21:08 +010085
Sapan Bhatiab1225872017-03-29 20:47:47 +020086def srcPort(x):
87 if (x):
88 return [FieldDirective(Name('port'),x)]
89 else:
90 return []
91
92
Dusan Klinecccaa0d92014-11-09 03:21:31 +010093class ProtobufParser(object):
94 tokens = ProtobufLexer.tokens
Dusan Klinecaa9ff472014-11-10 18:02:03 +010095 offset = 0
96 lh = LexHelper()
Sapan Bhatia64c72512017-06-23 02:32:45 -070097 fol_lexer = lex.lex(module=FOLLexer())#, optimize=1)
98 fol_parser = yacc.yacc(module=FOLParser(), start='goal')
Dusan Klinecaa9ff472014-11-10 18:02:03 +010099
100 def setOffset(self, of):
101 self.offset = of
102 self.lh.offset = of
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100103
104 def p_empty(self, p):
105 '''empty :'''
106 pass
107
108 def p_field_modifier(self,p):
109 '''field_modifier : REQUIRED
110 | OPTIONAL
111 | REPEATED'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100112 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100113
114 def p_primitive_type(self, p):
115 '''primitive_type : DOUBLE
116 | FLOAT
117 | INT32
118 | INT64
119 | UINT32
120 | UINT64
121 | SINT32
122 | SINT64
123 | FIXED32
124 | FIXED64
125 | SFIXED32
126 | SFIXED64
127 | BOOL
128 | STRING
129 | BYTES'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100130 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100131
Sapan Bhatiab1225872017-03-29 20:47:47 +0200132 def p_link_type(self, p):
133 '''link_type : ONETOONE
134 | MANYTOONE
135 | MANYTOMANY'''
136 p[0] = LU.i(p,1)
137
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100138 def p_field_id(self, p):
139 '''field_id : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100140 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100141
142 def p_rvalue(self, p):
143 '''rvalue : NUM
144 | TRUE
145 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100146 p[0] = LU.i(p,1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100147
Sapan Bhatiab1225872017-03-29 20:47:47 +0200148 def p_rvalue3(self, p):
149 '''rvalue : STRING_LITERAL'''
150 p[0] = Name(LU.i(p, 1))
151 self.lh.set_parse_object(p[0], p)
152 p[0].deriveLex()
153
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100154 def p_rvalue2(self, p):
155 '''rvalue : NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100156 p[0] = Name(LU.i(p, 1))
157 self.lh.set_parse_object(p[0], p)
158 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100159
Sapan Bhatiab1225872017-03-29 20:47:47 +0200160 def p_field_directives2(self, p):
161 '''field_directives : empty'''
162 p[0] = []
163
164 def p_field_directives(self, p):
165 '''field_directives : LBRACK field_directive_times RBRACK'''
166 p[0] = p[2]
167 #self.lh.set_parse_object(p[0], p)
168
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100169 def p_field_directive(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200170 '''field_directive : NAME EQ rvalue'''
171 p[0] = FieldDirective(Name(LU.i(p, 1)), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100172 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100173
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700174 def p_policy_opt_explicit(self, p):
175 '''policy_opt : DOUBLECOLON NAME'''
176 p[0] = p[2]
177
178
179 def p_policy_opt_empty(self, p):
180 '''policy_opt : empty'''
181 p[0] = None
Sapan Bhatia87792a12017-04-10 19:35:05 -0700182
183 def p_csv_expr(self, p):
184 '''csv_expr : LPAR csv RPAR'''
185 p[0] = p[2]
186
187 def p_csv_expr2(self, p):
188 '''csv_expr : empty'''
189 p[0] = []
190
191 def p_csv2(self, p):
192 '''csv : empty'''
193
194 def p_csv(self, p):
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700195 '''csv : dotname
196 | csv COMMA dotname'''
Sapan Bhatia87792a12017-04-10 19:35:05 -0700197
198 if len(p) == 2:
199 p[0] = [LU(p,1)]
200 else:
201 p[0] = p[1] + [LU(p,3)]
202
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100203 def p_field_directive_times(self, p):
204 '''field_directive_times : field_directive_plus'''
205 p[0] = p[1]
206
207 def p_field_directive_times2(self, p):
208 '''field_directive_times : empty'''
209 p[0] = []
210
211 def p_field_directive_plus(self, p):
212 '''field_directive_plus : field_directive
Sapan Bhatiab1225872017-03-29 20:47:47 +0200213 | field_directive_plus COMMA field_directive'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100214 if len(p) == 2:
Dusan Klineca9f6d362014-11-10 21:07:08 +0100215 p[0] = [LU(p,1)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100216 else:
Sapan Bhatiab1225872017-03-29 20:47:47 +0200217 p[0] = p[1] + [LU(p,3)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100218
Dusan Klineca4fae112014-11-10 08:50:27 +0100219 def p_dotname(self, p):
220 '''dotname : NAME
221 | dotname DOT NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100222 if len(p) == 2:
223 p[0] = [LU(p,1)]
224 else:
225 p[0] = p[1] + [LU(p,3)]
Dusan Klineca4fae112014-11-10 08:50:27 +0100226
227 # Hack for cases when there is a field named 'message' or 'max'
228 def p_fieldName(self, p):
Sapan Bhatia78fee772017-04-21 19:00:48 +0200229 '''field_name : STARTTOKEN
230 | NAME
Dusan Klineca4fae112014-11-10 08:50:27 +0100231 | MESSAGE
232 | MAX'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100233 p[0] = Name(LU.i(p,1))
234 self.lh.set_parse_object(p[0], p)
235 p[0].deriveLex()
Dusan Klineca4fae112014-11-10 08:50:27 +0100236
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100237 def p_field_type(self, p):
238 '''field_type : primitive_type'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100239 p[0] = FieldType(LU.i(p,1))
240 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100241
242 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100243 '''field_type : dotname'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100244 p[0] = DotName(LU.i(p, 1))
245 self.lh.set_parse_object(p[0], p)
246 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100247
Sapan Bhatia78fee772017-04-21 19:00:48 +0200248 def p_slash_name(self, p):
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700249 '''slash_name : SLASH dotname'''
Sapan Bhatia78fee772017-04-21 19:00:48 +0200250 p[0] = p[2]
251 #self.lh.set_parse_object(p[0], p)
252
253 def p_slash_name2(self, p):
254 '''slash_name : empty'''
255 p[0] = None
256
Sapan Bhatiab1225872017-03-29 20:47:47 +0200257 def p_colon_fieldname(self, p):
258 '''colon_fieldname : COLON field_name'''
259 p[0] = p[2]
260 self.lh.set_parse_object(p[0], p)
261
262 def p_colon_fieldname2(self, p):
263 '''colon_fieldname : empty'''
264 p[0] = None
265
266 # TODO: Add directives to link definition
267 def p_link_definition(self, p):
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700268 '''link_definition : field_modifier link_type field_name ARROW dotname slash_name colon_fieldname EQ field_id field_directives SEMI'''
Sapan Bhatiab1225872017-03-29 20:47:47 +0200269 p[0] = LinkSpec(
Sapan Bhatia78fee772017-04-21 19:00:48 +0200270 FieldDefinition(LU.i(p,1), Name('int32'), LU.i(p, 3), LU.i(p, 9), [FieldDirective(Name('type'), Name('link')), FieldDirective(Name('model'),LU.i(p, 5))] + srcPort(LU.i(p,7)) + LU.i(p,10)),
271 LinkDefinition(LU.i(p,2), LU.i(p,3), LU.i(p,5), LU.i(p,6), LU.i(p,7)))
Sapan Bhatiab1225872017-03-29 20:47:47 +0200272
273 self.lh.set_parse_object(p[0], p)
274
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100275 # Root of the field declaration.
276 def p_field_definition(self, p):
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700277 '''field_definition : field_modifier field_type field_name policy_opt EQ field_id field_directives SEMI'''
278 p[0] = FieldDefinition(LU.i(p,1), LU.i(p,2), LU.i(p, 3), LU.i(p,4), LU.i(p,6), LU.i(p,7))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100279 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100280
281 # Root of the enum field declaration.
282 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100283 '''enum_field : field_name EQ NUM SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100284 p[0] = EnumFieldDefinition(LU.i(p, 1), LU.i(p,3))
285 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100286
287 def p_enum_body_part(self, p):
288 '''enum_body_part : enum_field
289 | option_directive'''
290 p[0] = p[1]
291
292 def p_enum_body(self, p):
293 '''enum_body : enum_body_part
294 | enum_body enum_body_part'''
295 if len(p) == 2:
296 p[0] = [p[1]]
297 else:
298 p[0] = p[1] + [p[2]]
299
300 def p_enum_body_opt(self, p):
301 '''enum_body_opt : empty'''
302 p[0] = []
303
304 def p_enum_body_opt2(self, p):
305 '''enum_body_opt : enum_body'''
306 p[0] = p[1]
307
Sapan Bhatia64c72512017-06-23 02:32:45 -0700308 def p_policy_definition(self, p):
309 '''policy_definition : POLICY NAME POLICYBODY'''
310 fol = self.fol_parser.parse(p[3], lexer = self.fol_lexer)
311 p[0] = PolicyDefinition(Name(LU.i(p, 2)), fol)
312 self.lh.set_parse_object(p[0], p)
313
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100314 # Root of the enum declaration.
315 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
316 def p_enum_definition(self, p):
317 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100318 p[0] = EnumDefinition(Name(LU.i(p, 2)), LU.i(p,4))
319 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100320
321 def p_extensions_to(self, p):
322 '''extensions_to : MAX'''
323 p[0] = ExtensionsMax()
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100324 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100325
326 def p_extensions_to2(self, p):
327 '''extensions_to : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100328 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100329
330 # extensions_definition ::= 'extensions' integer 'to' integer ';'
331 def p_extensions_definition(self, p):
332 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100333 p[0] = ExtensionsDirective(LU.i(p,2), LU.i(p,4))
334 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100335
336 # message_extension ::= 'extend' ident '{' message_body '}'
337 def p_message_extension(self, p):
338 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100339 p[0] = MessageExtension(Name(LU.i(p, 2)), LU.i(p,4))
340 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100341
342 def p_message_body_part(self, p):
343 '''message_body_part : field_definition
Sapan Bhatiab1225872017-03-29 20:47:47 +0200344 | link_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100345 | enum_definition
Sapan Bhatia4a159ac2017-04-29 20:10:05 +0200346 | option_directive
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100347 | message_definition
348 | extensions_definition
349 | message_extension'''
350 p[0] = p[1]
351
352 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
353 def p_message_body(self, p):
354 '''message_body : empty'''
355 p[0] = []
356
357 # message_body ::= { field_definition | enum_definition | message_definition | extensions_definition | message_extension }*
358 def p_message_body2(self, p):
359 '''message_body : message_body_part
360 | message_body message_body_part'''
361 if len(p) == 2:
362 p[0] = [p[1]]
363 else:
364 p[0] = p[1] + [p[2]]
365
366 # Root of the message declaration.
367 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
368 def p_message_definition(self, p):
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700369 '''message_definition : MESSAGE NAME policy_opt csv_expr LBRACE message_body RBRACE'''
370 p[0] = MessageDefinition(Name(LU.i(p, 2)), LU.i(p,3), LU.i(p, 4), LU.i(p,6))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100371 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100372
373 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
374 def p_method_definition(self, p):
375 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100376 p[0] = MethodDefinition(Name(LU.i(p, 2)), Name(LU.i(p, 4)), Name(LU.i(p, 8)))
377 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100378
379 def p_method_definition_opt(self, p):
380 '''method_definition_opt : empty'''
381 p[0] = []
382
383 def p_method_definition_opt2(self, p):
384 '''method_definition_opt : method_definition
385 | method_definition_opt method_definition'''
386 if len(p) == 2:
387 p[0] = [p[1]]
388 else:
389 p[0] = p[1] + [p[2]]
390
391 # service_definition ::= 'service' ident '{' method_definition* '}'
392 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
393 def p_service_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200394 '''service_definition : _SERVICE NAME LBRACE method_definition_opt RBRACE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100395 p[0] = ServiceDefinition(Name(LU.i(p, 2)), LU.i(p,4))
396 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100397
398 # package_directive ::= 'package' ident [ '.' ident]* ';'
399 def p_package_directive(self,p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100400 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100401 p[0] = PackageStatement(Name(LU.i(p, 2)))
402 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100403
404 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
405 def p_import_directive(self, p):
406 '''import_directive : IMPORT STRING_LITERAL SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100407 p[0] = ImportStatement(Literal(LU.i(p,2)))
408 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100409
410 def p_option_rvalue(self, p):
411 '''option_rvalue : NUM
412 | TRUE
413 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100414 p[0] = LU(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100415
416 def p_option_rvalue2(self, p):
417 '''option_rvalue : STRING_LITERAL'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100418 p[0] = Literal(LU(p,1))
419
420 def p_option_rvalue3(self, p):
421 '''option_rvalue : NAME'''
422 p[0] = Name(LU.i(p,1))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100423
424 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
425 def p_option_directive(self, p):
426 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100427 p[0] = OptionStatement(Name(LU.i(p, 2)), LU.i(p,4))
428 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100429
Sapan Bhatiaff86b012017-06-11 14:44:15 -0700430 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive | package_definition)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100431 def p_topLevel(self,p):
432 '''topLevel : message_definition
433 | message_extension
434 | enum_definition
Sapan Bhatia64c72512017-06-23 02:32:45 -0700435 | policy_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100436 | service_definition
437 | import_directive
Sapan Bhatiaff86b012017-06-11 14:44:15 -0700438 | package_directive
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100439 | option_directive'''
440 p[0] = p[1]
441
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100442 def p_statements2(self, p):
443 '''statements : topLevel
444 | statements topLevel'''
445 if len(p) == 2:
446 p[0] = [p[1]]
447 else:
448 p[0] = p[1] + [p[2]]
449
450 def p_statements(self, p):
451 '''statements : empty'''
452 p[0] = []
453
454 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100455 def p_protofile(self, p):
Sapan Bhatiaff86b012017-06-11 14:44:15 -0700456 '''protofile : statements'''
457 p[0] = ProtoFile(LU.i(p,1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100458 self.lh.set_parse_object(p[0], p)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100459
460 # Parsing starting point
461 def p_goal(self, p):
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100462 '''goal : STARTTOKEN protofile'''
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100463 p[0] = p[2]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100464
465 def p_error(self, p):
466 print('error: {}'.format(p))
467
468class ProtobufAnalyzer(object):
469
470 def __init__(self):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200471 self.lexer = lex.lex(module=ProtobufLexer())#, optimize=1)
Sapan Bhatia44609112017-05-15 00:00:25 +0200472 self.parser = yacc.yacc(module=ProtobufParser(), start='goal', debug=0, outputdir='/tmp')#optimize=1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100473
474 def tokenize_string(self, code):
475 self.lexer.input(code)
476 for token in self.lexer:
477 print(token)
478
479 def tokenize_file(self, _file):
480 if type(_file) == str:
481 _file = file(_file)
482 content = ''
483 for line in _file:
484 content += line
485 return self.tokenize_string(content)
486
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100487 def parse_string(self, code, debug=0, lineno=1, prefix='+'):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100488 self.lexer.lineno = lineno
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100489 self.parser.offset = len(prefix)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100490 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
491
492 def parse_file(self, _file, debug=0):
493 if type(_file) == str:
494 _file = file(_file)
495 content = ''
496 for line in _file:
497 content += line
498 return self.parse_string(content, debug=debug)