blob: e976727183d03f19d1f3b5c02b10c90f46cf0eba [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001__author__ = "Dusan (Ph4r05) Klinec"
Sapan Bhatiab1225872017-03-29 20:47:47 +02002
Dusan Klinecccaa0d92014-11-09 03:21:31 +01003__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec"
4__license__ = "Apache License, Version 2.0"
5__version__ = "1.0"
6
7import ply.lex as lex
8import ply.yacc as yacc
Dusan Klinecccaa0d92014-11-09 03:21:31 +01009
Zack Williamsbe7f36d2018-02-02 11:37:11 -070010from .model import (
11 DotName,
12 EnumDefinition,
13 EnumFieldDefinition,
14 ExtensionsDirective,
15 ExtensionsMax,
16 FieldDefinition,
17 FieldDirective,
18 FieldType,
19 ImportStatement,
20 LinkDefinition,
21 LinkSpec,
22 Literal,
23 MapDefinition,
24 MessageDefinition,
25 MessageExtension,
26 MethodDefinition,
27 Name,
28 OptionStatement,
29 PackageStatement,
30 PolicyDefinition,
31 ProtoFile,
32 ReduceDefinition,
33 ServiceDefinition,
34)
35
Sapan Bhatia64c72512017-06-23 02:32:45 -070036from helpers import LexHelper, LU
Sapan Bhatia9c579722018-01-12 13:45:09 -050037from logicparser import FOLParser, FOLLexer, FOLParsingError
Sapan Bhatiaad79fee2017-06-26 23:35:57 -070038import ast
39
Zack Williamsbe7f36d2018-02-02 11:37:11 -070040
Sapan Bhatiaad79fee2017-06-26 23:35:57 -070041class PythonError(Exception):
42 pass
Sapan Bhatiab1225872017-03-29 20:47:47 +020043
Zack Williamsbe7f36d2018-02-02 11:37:11 -070044
Sapan Bhatia9c579722018-01-12 13:45:09 -050045class ParsingError(Exception):
Zack Williamsbe7f36d2018-02-02 11:37:11 -070046
Sapan Bhatia9c579722018-01-12 13:45:09 -050047 def __init__(self, message, error_range):
48 super(ParsingError, self).__init__(message)
49 self.error_range = error_range
50
51
Dusan Klinecccaa0d92014-11-09 03:21:31 +010052class ProtobufLexer(object):
Zack Williamsbe7f36d2018-02-02 11:37:11 -070053 keywords = (
54 'double',
55 'float',
56 'int32',
57 'int64',
58 'uint32',
59 'uint64',
60 'sint32',
61 'sint64',
62 'fixed32',
63 'fixed64',
64 'sfixed32',
65 'sfixed64',
66 'bool',
67 'string',
68 'bytes',
69 'message',
70 'required',
71 'optional',
72 'repeated',
73 'enum',
74 'extensions',
75 'max',
76 'extend',
77 'to',
78 'package',
79 '_service',
80 'rpc',
81 'returns',
82 'true',
83 'false',
84 'option',
85 'import',
86 'manytoone',
87 'manytomany',
88 'onetoone',
89 'policy',
90 'map',
91 'reduce')
Dusan Klinecccaa0d92014-11-09 03:21:31 +010092
93 tokens = [
Sapan Bhatia64c72512017-06-23 02:32:45 -070094 'POLICYBODY',
Dusan Klinecccaa0d92014-11-09 03:21:31 +010095 'NAME',
96 'NUM',
97 'STRING_LITERAL',
Zack Williamsbe7f36d2018-02-02 11:37:11 -070098 # 'LINE_COMMENT', 'BLOCK_COMMENT',
Dusan Klinecccaa0d92014-11-09 03:21:31 +010099 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100100 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT',
Sapan Bhatia78fee772017-04-21 19:00:48 +0200101 'ARROW', 'COLON', 'COMMA', 'SLASH',
Sapan Bhatia64c72512017-06-23 02:32:45 -0700102 'DOUBLECOLON',
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100103 'STARTTOKEN'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100104 ] + [k.upper() for k in keywords]
Sapan Bhatia64c72512017-06-23 02:32:45 -0700105
Sapan Bhatia9c579722018-01-12 13:45:09 -0500106 def t_POLICYBODY(self, t):
107 r'< (.|\n)*? [^-]>'
108 t.lexer.lineno += t.value.count('\n')
109 return t
Sapan Bhatia64c72512017-06-23 02:32:45 -0700110
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100111 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
112
Sapan Bhatia87792a12017-04-10 19:35:05 -0700113 t_NUM = r'[+-]?\d+(\.\d+)?'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100114 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
115
116 t_ignore_LINE_COMMENT = '//.*'
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700117
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100118 def t_BLOCK_COMMENT(self, t):
119 r'/\*(.|\n)*?\*/'
120 t.lexer.lineno += t.value.count('\n')
121
122 t_LBRACE = '{'
123 t_RBRACE = '}'
124 t_LBRACK = '\\['
125 t_RBRACK = '\\]'
Sapan Bhatia64c72512017-06-23 02:32:45 -0700126
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100127 t_LPAR = '\\('
128 t_RPAR = '\\)'
129 t_EQ = '='
130 t_SEMI = ';'
Sapan Bhatiab1225872017-03-29 20:47:47 +0200131 t_ARROW = '\\-\\>'
132 t_COLON = '\\:'
Sapan Bhatia78fee772017-04-21 19:00:48 +0200133 t_SLASH = '\\/'
Sapan Bhatiab1225872017-03-29 20:47:47 +0200134 t_COMMA = '\\,'
Dusan Klineca4fae112014-11-10 08:50:27 +0100135 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100136 t_ignore = ' \t\f'
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100137 t_STARTTOKEN = '\\+'
Sapan Bhatia64c72512017-06-23 02:32:45 -0700138 t_DOUBLECOLON = '\\:\\:'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100139
140 def t_NAME(self, t):
Sapan Bhatia78fee772017-04-21 19:00:48 +0200141 '[A-Za-z_$][A-Za-z0-9_+$]*'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100142 if t.value in ProtobufLexer.keywords:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700143 # print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100144 t.type = t.value.upper()
145 return t
146
147 def t_newline(self, t):
148 r'\n+'
149 t.lexer.lineno += len(t.value)
150
151 def t_newline2(self, t):
152 r'(\r\n)+'
153 t.lexer.lineno += len(t.value) / 2
154
155 def t_error(self, t):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700156 print("Illegal character '{}' ({}) in line {}".format(
157 t.value[0], hex(ord(t.value[0])), t.lexer.lineno))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100158 t.lexer.skip(1)
159
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100160
Sapan Bhatiab1225872017-03-29 20:47:47 +0200161def srcPort(x):
162 if (x):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700163 return [FieldDirective(Name('port'), x)]
Sapan Bhatiab1225872017-03-29 20:47:47 +0200164 else:
165 return []
166
167
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100168class ProtobufParser(object):
169 tokens = ProtobufLexer.tokens
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100170 offset = 0
171 lh = LexHelper()
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700172 fol_lexer = lex.lex(module=FOLLexer()) # , optimize=1)
173 fol_parser = yacc.yacc(
174 module=FOLParser(),
175 start='goal',
176 outputdir='/tmp',
177 debug=0)
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100178
179 def setOffset(self, of):
180 self.offset = of
181 self.lh.offset = of
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100182
183 def p_empty(self, p):
184 '''empty :'''
185 pass
186
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700187 def p_field_modifier(self, p):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100188 '''field_modifier : REQUIRED
189 | OPTIONAL
190 | REPEATED'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700191 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100192
193 def p_primitive_type(self, p):
194 '''primitive_type : DOUBLE
195 | FLOAT
196 | INT32
197 | INT64
198 | UINT32
199 | UINT64
200 | SINT32
201 | SINT64
202 | FIXED32
203 | FIXED64
204 | SFIXED32
205 | SFIXED64
206 | BOOL
207 | STRING
208 | BYTES'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700209 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100210
Sapan Bhatiab1225872017-03-29 20:47:47 +0200211 def p_link_type(self, p):
212 '''link_type : ONETOONE
213 | MANYTOONE
214 | MANYTOMANY'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700215 p[0] = LU.i(p, 1)
Sapan Bhatiab1225872017-03-29 20:47:47 +0200216
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100217 def p_field_id(self, p):
218 '''field_id : NUM'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700219 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100220
221 def p_rvalue(self, p):
222 '''rvalue : NUM
223 | TRUE
224 | FALSE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700225 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100226
Sapan Bhatiab1225872017-03-29 20:47:47 +0200227 def p_rvalue3(self, p):
228 '''rvalue : STRING_LITERAL'''
229 p[0] = Name(LU.i(p, 1))
230 self.lh.set_parse_object(p[0], p)
231 p[0].deriveLex()
232
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100233 def p_rvalue2(self, p):
234 '''rvalue : NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100235 p[0] = Name(LU.i(p, 1))
236 self.lh.set_parse_object(p[0], p)
237 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100238
Sapan Bhatiab1225872017-03-29 20:47:47 +0200239 def p_field_directives2(self, p):
240 '''field_directives : empty'''
241 p[0] = []
242
243 def p_field_directives(self, p):
244 '''field_directives : LBRACK field_directive_times RBRACK'''
245 p[0] = p[2]
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700246 # self.lh.set_parse_object(p[0], p)
Sapan Bhatiab1225872017-03-29 20:47:47 +0200247
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100248 def p_field_directive(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200249 '''field_directive : NAME EQ rvalue'''
250 p[0] = FieldDirective(Name(LU.i(p, 1)), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100251 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100252
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700253 def p_policy_opt_explicit(self, p):
254 '''policy_opt : DOUBLECOLON NAME'''
255 p[0] = p[2]
256
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700257 def p_policy_opt_empty(self, p):
258 '''policy_opt : empty'''
259 p[0] = None
Sapan Bhatia87792a12017-04-10 19:35:05 -0700260
261 def p_csv_expr(self, p):
262 '''csv_expr : LPAR csv RPAR'''
263 p[0] = p[2]
264
265 def p_csv_expr2(self, p):
266 '''csv_expr : empty'''
267 p[0] = []
268
269 def p_csv2(self, p):
270 '''csv : empty'''
271
272 def p_csv(self, p):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700273 '''csv : dotname
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700274 | csv COMMA dotname'''
Sapan Bhatia87792a12017-04-10 19:35:05 -0700275
276 if len(p) == 2:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700277 p[0] = [LU(p, 1)]
Sapan Bhatia87792a12017-04-10 19:35:05 -0700278 else:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700279 p[0] = p[1] + [LU(p, 3)]
Sapan Bhatia87792a12017-04-10 19:35:05 -0700280
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100281 def p_field_directive_times(self, p):
282 '''field_directive_times : field_directive_plus'''
283 p[0] = p[1]
284
285 def p_field_directive_times2(self, p):
286 '''field_directive_times : empty'''
287 p[0] = []
288
289 def p_field_directive_plus(self, p):
290 '''field_directive_plus : field_directive
Sapan Bhatiab1225872017-03-29 20:47:47 +0200291 | field_directive_plus COMMA field_directive'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100292 if len(p) == 2:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700293 p[0] = [LU(p, 1)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100294 else:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700295 p[0] = p[1] + [LU(p, 3)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100296
Dusan Klineca4fae112014-11-10 08:50:27 +0100297 def p_dotname(self, p):
298 '''dotname : NAME
299 | dotname DOT NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100300 if len(p) == 2:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700301 p[0] = [LU(p, 1)]
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100302 else:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700303 p[0] = p[1] + [LU(p, 3)]
Dusan Klineca4fae112014-11-10 08:50:27 +0100304
305 # Hack for cases when there is a field named 'message' or 'max'
306 def p_fieldName(self, p):
Sapan Bhatia78fee772017-04-21 19:00:48 +0200307 '''field_name : STARTTOKEN
308 | NAME
Dusan Klineca4fae112014-11-10 08:50:27 +0100309 | MESSAGE
310 | MAX'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700311 p[0] = Name(LU.i(p, 1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100312 self.lh.set_parse_object(p[0], p)
313 p[0].deriveLex()
Dusan Klineca4fae112014-11-10 08:50:27 +0100314
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100315 def p_field_type(self, p):
316 '''field_type : primitive_type'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700317 p[0] = FieldType(LU.i(p, 1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100318 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100319
320 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100321 '''field_type : dotname'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100322 p[0] = DotName(LU.i(p, 1))
323 self.lh.set_parse_object(p[0], p)
324 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100325
Sapan Bhatia78fee772017-04-21 19:00:48 +0200326 def p_slash_name(self, p):
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700327 '''slash_name : SLASH dotname'''
Sapan Bhatia78fee772017-04-21 19:00:48 +0200328 p[0] = p[2]
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700329 # self.lh.set_parse_object(p[0], p)
Sapan Bhatia78fee772017-04-21 19:00:48 +0200330
331 def p_slash_name2(self, p):
332 '''slash_name : empty'''
333 p[0] = None
334
Sapan Bhatiab1225872017-03-29 20:47:47 +0200335 def p_colon_fieldname(self, p):
336 '''colon_fieldname : COLON field_name'''
337 p[0] = p[2]
338 self.lh.set_parse_object(p[0], p)
339
340 def p_colon_fieldname2(self, p):
341 '''colon_fieldname : empty'''
342 p[0] = None
343
344 # TODO: Add directives to link definition
345 def p_link_definition(self, p):
Sapan Bhatiad70c3782017-06-28 22:47:22 -0700346 '''link_definition : field_modifier link_type field_name policy_opt ARROW dotname slash_name colon_fieldname EQ field_id field_directives SEMI'''
Sapan Bhatiab1225872017-03-29 20:47:47 +0200347 p[0] = LinkSpec(
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700348 FieldDefinition(
349 LU.i(
350 p, 1), Name('int32'), LU.i(
351 p, 3), LU.i(
352 p, 4), LU.i(
353 p, 10), [
354 FieldDirective(
355 Name('type'), Name('link')), FieldDirective(
356 Name('model'), LU.i(
357 p, 6))] + srcPort(
358 LU.i(
359 p, 8)) + LU.i(
360 p, 11)), LinkDefinition(
361 LU.i(
362 p, 2), LU.i(
363 p, 3), LU.i(
364 p, 6), LU.i(
365 p, 7), LU.i(
366 p, 8)))
Sapan Bhatiab1225872017-03-29 20:47:47 +0200367
368 self.lh.set_parse_object(p[0], p)
369
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100370 # Root of the field declaration.
371 def p_field_definition(self, p):
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700372 '''field_definition : field_modifier field_type field_name policy_opt EQ field_id field_directives SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700373 p[0] = FieldDefinition(
374 LU.i(
375 p, 1), LU.i(
376 p, 2), LU.i(
377 p, 3), LU.i(
378 p, 4), LU.i(
379 p, 6), LU.i(
380 p, 7))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100381 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100382
383 # Root of the enum field declaration.
384 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100385 '''enum_field : field_name EQ NUM SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700386 p[0] = EnumFieldDefinition(LU.i(p, 1), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100387 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100388
389 def p_enum_body_part(self, p):
390 '''enum_body_part : enum_field
391 | option_directive'''
392 p[0] = p[1]
393
394 def p_enum_body(self, p):
395 '''enum_body : enum_body_part
396 | enum_body enum_body_part'''
397 if len(p) == 2:
398 p[0] = [p[1]]
399 else:
400 p[0] = p[1] + [p[2]]
401
402 def p_enum_body_opt(self, p):
403 '''enum_body_opt : empty'''
404 p[0] = []
405
406 def p_enum_body_opt2(self, p):
407 '''enum_body_opt : enum_body'''
408 p[0] = p[1]
409
Sapan Bhatiaad79fee2017-06-26 23:35:57 -0700410 def p_reduce_definition(self, p):
411 '''reduce_definition : REDUCE NAME POLICYBODY'''
412 ltxt = p[3].lstrip('<').rstrip('>')
413 l = ast.parse(ltxt).body[0]
414 if not isinstance(l, ast.Expr):
415 raise PythonError("reduce operator needs to be an expression")
416 elif not isinstance(l.value, ast.Lambda):
417 raise PythonError("reduce operator needs to be a lambda")
418
419 p[0] = ReduceDefinition(Name(LU.i(p, 2)), ltxt)
420 self.lh.set_parse_object(p[0], p)
421
422 def p_map_definition(self, p):
423 '''map_definition : MAP NAME POLICYBODY'''
424 ltxt = p[3].lstrip('<').rstrip('>')
425 l = ast.parse(ltxt).body[0]
426 if not isinstance(l, ast.Expr):
427 raise PythonError("map operator needs to be an expression")
428 elif not isinstance(l.value, ast.Lambda):
429 raise PythonError("map operator needs to be a lambda")
430
431 p[0] = MapDefinition(Name(LU.i(p, 2)), ltxt)
432 self.lh.set_parse_object(p[0], p)
433
Sapan Bhatia64c72512017-06-23 02:32:45 -0700434 def p_policy_definition(self, p):
435 '''policy_definition : POLICY NAME POLICYBODY'''
Sapan Bhatia9c579722018-01-12 13:45:09 -0500436 try:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700437 fol = self.fol_parser.parse(p[3], lexer=self.fol_lexer)
438 except FOLParsingError as e:
Sapan Bhatia9c579722018-01-12 13:45:09 -0500439 lineno, lexpos, length = e.error_range
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700440 raise ParsingError(
441 "Policy parsing error in policy %s" %
442 p[2], (p.lineno(3) + lineno, lexpos + p.lexpos(3), length))
Sapan Bhatia64c72512017-06-23 02:32:45 -0700443 p[0] = PolicyDefinition(Name(LU.i(p, 2)), fol)
444 self.lh.set_parse_object(p[0], p)
445
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100446 # Root of the enum declaration.
447 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
448 def p_enum_definition(self, p):
449 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700450 p[0] = EnumDefinition(Name(LU.i(p, 2)), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100451 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100452
453 def p_extensions_to(self, p):
454 '''extensions_to : MAX'''
455 p[0] = ExtensionsMax()
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100456 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100457
458 def p_extensions_to2(self, p):
459 '''extensions_to : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100460 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100461
462 # extensions_definition ::= 'extensions' integer 'to' integer ';'
463 def p_extensions_definition(self, p):
464 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700465 p[0] = ExtensionsDirective(LU.i(p, 2), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100466 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100467
468 # message_extension ::= 'extend' ident '{' message_body '}'
469 def p_message_extension(self, p):
470 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700471 p[0] = MessageExtension(Name(LU.i(p, 2)), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100472 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100473
474 def p_message_body_part(self, p):
475 '''message_body_part : field_definition
Sapan Bhatiab1225872017-03-29 20:47:47 +0200476 | link_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100477 | enum_definition
Sapan Bhatia4a159ac2017-04-29 20:10:05 +0200478 | option_directive
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100479 | message_definition
480 | extensions_definition
481 | message_extension'''
482 p[0] = p[1]
483
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700484 # message_body ::= { field_definition | enum_definition |
485 # message_definition | extensions_definition | message_extension }*
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100486 def p_message_body(self, p):
487 '''message_body : empty'''
488 p[0] = []
489
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700490 # message_body ::= { field_definition | enum_definition |
491 # message_definition | extensions_definition | message_extension }*
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100492 def p_message_body2(self, p):
493 '''message_body : message_body_part
494 | message_body message_body_part'''
495 if len(p) == 2:
496 p[0] = [p[1]]
497 else:
498 p[0] = p[1] + [p[2]]
499
500 # Root of the message declaration.
501 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
502 def p_message_definition(self, p):
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700503 '''message_definition : MESSAGE NAME policy_opt csv_expr LBRACE message_body RBRACE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700504 p[0] = MessageDefinition(
505 Name(
506 LU.i(
507 p, 2)), LU.i(
508 p, 3), LU.i(
509 p, 4), LU.i(
510 p, 6))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100511 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100512
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700513 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [
514 # ident ] ')' ';'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100515 def p_method_definition(self, p):
516 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700517 p[0] = MethodDefinition(
518 Name(
519 LU.i(
520 p, 2)), Name(
521 LU.i(
522 p, 4)), Name(
523 LU.i(
524 p, 8)))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100525 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100526
527 def p_method_definition_opt(self, p):
528 '''method_definition_opt : empty'''
529 p[0] = []
530
531 def p_method_definition_opt2(self, p):
532 '''method_definition_opt : method_definition
533 | method_definition_opt method_definition'''
534 if len(p) == 2:
535 p[0] = [p[1]]
536 else:
537 p[0] = p[1] + [p[2]]
538
539 # service_definition ::= 'service' ident '{' method_definition* '}'
540 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
541 def p_service_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200542 '''service_definition : _SERVICE NAME LBRACE method_definition_opt RBRACE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700543 p[0] = ServiceDefinition(Name(LU.i(p, 2)), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100544 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100545
546 # package_directive ::= 'package' ident [ '.' ident]* ';'
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700547 def p_package_directive(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100548 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100549 p[0] = PackageStatement(Name(LU.i(p, 2)))
550 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100551
552 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
553 def p_import_directive(self, p):
554 '''import_directive : IMPORT STRING_LITERAL SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700555 p[0] = ImportStatement(Literal(LU.i(p, 2)))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100556 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100557
558 def p_option_rvalue(self, p):
559 '''option_rvalue : NUM
560 | TRUE
561 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100562 p[0] = LU(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100563
564 def p_option_rvalue2(self, p):
565 '''option_rvalue : STRING_LITERAL'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700566 p[0] = Literal(LU(p, 1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100567
568 def p_option_rvalue3(self, p):
569 '''option_rvalue : NAME'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700570 p[0] = Name(LU.i(p, 1))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100571
572 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
573 def p_option_directive(self, p):
574 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700575 p[0] = OptionStatement(Name(LU.i(p, 2)), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100576 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100577
Sapan Bhatiaff86b012017-06-11 14:44:15 -0700578 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition | import_directive | option_directive | package_definition)
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700579 def p_topLevel(self, p):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100580 '''topLevel : message_definition
581 | message_extension
582 | enum_definition
Sapan Bhatia64c72512017-06-23 02:32:45 -0700583 | policy_definition
Sapan Bhatia89bbaa52017-06-28 22:58:15 -0700584 | map_definition
585 | reduce_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100586 | service_definition
587 | import_directive
Sapan Bhatiaff86b012017-06-11 14:44:15 -0700588 | package_directive
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100589 | option_directive'''
590 p[0] = p[1]
591
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100592 def p_statements2(self, p):
593 '''statements : topLevel
594 | statements topLevel'''
595 if len(p) == 2:
596 p[0] = [p[1]]
597 else:
598 p[0] = p[1] + [p[2]]
599
600 def p_statements(self, p):
601 '''statements : empty'''
602 p[0] = []
603
604 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100605 def p_protofile(self, p):
Sapan Bhatiaff86b012017-06-11 14:44:15 -0700606 '''protofile : statements'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700607 p[0] = ProtoFile(LU.i(p, 1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100608 self.lh.set_parse_object(p[0], p)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100609
610 # Parsing starting point
611 def p_goal(self, p):
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100612 '''goal : STARTTOKEN protofile'''
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100613 p[0] = p[2]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100614
615 def p_error(self, p):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700616 raise ParsingError("Parsing Error", (p.lineno, p.lexpos, len(p.value)))
617
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100618
619class ProtobufAnalyzer(object):
620
621 def __init__(self):
Sapan Bhatia9c579722018-01-12 13:45:09 -0500622 self.lexer = lex.lex(module=ProtobufLexer())
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700623 self.parser = yacc.yacc(
624 module=ProtobufParser(),
625 start='goal',
626 debug=0,
627 outputdir='/tmp')
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100628
629 def tokenize_string(self, code):
630 self.lexer.input(code)
631 for token in self.lexer:
632 print(token)
633
634 def tokenize_file(self, _file):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700635 if isinstance(_file, str):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100636 _file = file(_file)
637 content = ''
638 for line in _file:
639 content += line
640 return self.tokenize_string(content)
641
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100642 def parse_string(self, code, debug=0, lineno=1, prefix='+'):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100643 self.lexer.lineno = lineno
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100644 self.parser.offset = len(prefix)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100645 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
646
647 def parse_file(self, _file, debug=0):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700648 if isinstance(_file, str):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100649 _file = file(_file)
650 content = ''
651 for line in _file:
652 content += line
653 return self.parse_string(content, debug=debug)