blob: aabf2ebe6999788555aab15753e915d48d214228 [file] [log] [blame]
Zack Williams28f1e492019-02-01 10:02:56 -07001from __future__ import absolute_import
2from __future__ import print_function
Sapan Bhatiab1225872017-03-29 20:47:47 +02003
Zack Williams28f1e492019-02-01 10:02:56 -07004__author__ = "Dusan (Ph4r05) Klinec, Sapan Bhatia, ONF"
5__copyright__ = "Copyright (C) 2014 Dusan (ph4r05) Klinec, 2017-2019 ONF"
Dusan Klinecccaa0d92014-11-09 03:21:31 +01006__license__ = "Apache License, Version 2.0"
Dusan Klinecccaa0d92014-11-09 03:21:31 +01007
8import ply.lex as lex
9import ply.yacc as yacc
Dusan Klinecccaa0d92014-11-09 03:21:31 +010010
Zack Williamsbe7f36d2018-02-02 11:37:11 -070011from .model import (
12 DotName,
13 EnumDefinition,
14 EnumFieldDefinition,
15 ExtensionsDirective,
16 ExtensionsMax,
17 FieldDefinition,
18 FieldDirective,
19 FieldType,
20 ImportStatement,
21 LinkDefinition,
22 LinkSpec,
23 Literal,
24 MapDefinition,
25 MessageDefinition,
26 MessageExtension,
27 MethodDefinition,
28 Name,
29 OptionStatement,
30 PackageStatement,
31 PolicyDefinition,
32 ProtoFile,
33 ReduceDefinition,
34 ServiceDefinition,
35)
36
Zack Williams28f1e492019-02-01 10:02:56 -070037from .helpers import LexHelper, LU
38from .logicparser import FOLParser, FOLLexer, FOLParsingError
Sapan Bhatiaad79fee2017-06-26 23:35:57 -070039import ast
40
Zack Williamsbe7f36d2018-02-02 11:37:11 -070041
Sapan Bhatiaad79fee2017-06-26 23:35:57 -070042class PythonError(Exception):
43 pass
Sapan Bhatiab1225872017-03-29 20:47:47 +020044
Zack Williamsbe7f36d2018-02-02 11:37:11 -070045
Sapan Bhatia9c579722018-01-12 13:45:09 -050046class ParsingError(Exception):
Zack Williamsbe7f36d2018-02-02 11:37:11 -070047
Sapan Bhatia9c579722018-01-12 13:45:09 -050048 def __init__(self, message, error_range):
49 super(ParsingError, self).__init__(message)
50 self.error_range = error_range
51
52
Dusan Klinecccaa0d92014-11-09 03:21:31 +010053class ProtobufLexer(object):
Zack Williamsbe7f36d2018-02-02 11:37:11 -070054 keywords = (
55 'double',
56 'float',
57 'int32',
58 'int64',
59 'uint32',
60 'uint64',
61 'sint32',
62 'sint64',
63 'fixed32',
64 'fixed64',
65 'sfixed32',
66 'sfixed64',
67 'bool',
68 'string',
69 'bytes',
70 'message',
71 'required',
72 'optional',
73 'repeated',
74 'enum',
75 'extensions',
76 'max',
77 'extend',
78 'to',
79 'package',
80 '_service',
81 'rpc',
82 'returns',
83 'true',
84 'false',
85 'option',
86 'import',
87 'manytoone',
88 'manytomany',
89 'onetoone',
90 'policy',
91 'map',
92 'reduce')
Dusan Klinecccaa0d92014-11-09 03:21:31 +010093
94 tokens = [
Sapan Bhatia64c72512017-06-23 02:32:45 -070095 'POLICYBODY',
Dusan Klinecccaa0d92014-11-09 03:21:31 +010096 'NAME',
97 'NUM',
98 'STRING_LITERAL',
Zack Williamsbe7f36d2018-02-02 11:37:11 -070099 # 'LINE_COMMENT', 'BLOCK_COMMENT',
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100100 'LBRACE', 'RBRACE', 'LBRACK', 'RBRACK',
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100101 'LPAR', 'RPAR', 'EQ', 'SEMI', 'DOT',
Sapan Bhatia78fee772017-04-21 19:00:48 +0200102 'ARROW', 'COLON', 'COMMA', 'SLASH',
Sapan Bhatia64c72512017-06-23 02:32:45 -0700103 'DOUBLECOLON',
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100104 'STARTTOKEN'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100105 ] + [k.upper() for k in keywords]
Sapan Bhatia64c72512017-06-23 02:32:45 -0700106
Sapan Bhatia9c579722018-01-12 13:45:09 -0500107 def t_POLICYBODY(self, t):
108 r'< (.|\n)*? [^-]>'
109 t.lexer.lineno += t.value.count('\n')
110 return t
Sapan Bhatia64c72512017-06-23 02:32:45 -0700111
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100112 literals = '()+-*/=?:,.^|&~!=[]{};<>@%'
113
Sapan Bhatia87792a12017-04-10 19:35:05 -0700114 t_NUM = r'[+-]?\d+(\.\d+)?'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100115 t_STRING_LITERAL = r'\"([^\\\n]|(\\.))*?\"'
116
117 t_ignore_LINE_COMMENT = '//.*'
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700118
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100119 def t_BLOCK_COMMENT(self, t):
120 r'/\*(.|\n)*?\*/'
121 t.lexer.lineno += t.value.count('\n')
122
123 t_LBRACE = '{'
124 t_RBRACE = '}'
125 t_LBRACK = '\\['
126 t_RBRACK = '\\]'
Sapan Bhatia64c72512017-06-23 02:32:45 -0700127
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100128 t_LPAR = '\\('
129 t_RPAR = '\\)'
130 t_EQ = '='
131 t_SEMI = ';'
Sapan Bhatiab1225872017-03-29 20:47:47 +0200132 t_ARROW = '\\-\\>'
133 t_COLON = '\\:'
Sapan Bhatia78fee772017-04-21 19:00:48 +0200134 t_SLASH = '\\/'
Sapan Bhatiab1225872017-03-29 20:47:47 +0200135 t_COMMA = '\\,'
Dusan Klineca4fae112014-11-10 08:50:27 +0100136 t_DOT = '\\.'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100137 t_ignore = ' \t\f'
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100138 t_STARTTOKEN = '\\+'
Sapan Bhatia64c72512017-06-23 02:32:45 -0700139 t_DOUBLECOLON = '\\:\\:'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100140
141 def t_NAME(self, t):
Sapan Bhatia78fee772017-04-21 19:00:48 +0200142 '[A-Za-z_$][A-Za-z0-9_+$]*'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100143 if t.value in ProtobufLexer.keywords:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700144 # print "type: %s val %s t %s" % (t.type, t.value, t)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100145 t.type = t.value.upper()
146 return t
147
148 def t_newline(self, t):
149 r'\n+'
150 t.lexer.lineno += len(t.value)
151
152 def t_newline2(self, t):
153 r'(\r\n)+'
154 t.lexer.lineno += len(t.value) / 2
155
156 def t_error(self, t):
Zack Williams28f1e492019-02-01 10:02:56 -0700157 print(("Illegal character '{}' ({}) in line {}".format(
158 t.value[0], hex(ord(t.value[0])), t.lexer.lineno)))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100159 t.lexer.skip(1)
160
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100161
Sapan Bhatiab1225872017-03-29 20:47:47 +0200162def srcPort(x):
163 if (x):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700164 return [FieldDirective(Name('port'), x)]
Sapan Bhatiab1225872017-03-29 20:47:47 +0200165 else:
166 return []
167
168
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100169class ProtobufParser(object):
170 tokens = ProtobufLexer.tokens
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100171 offset = 0
172 lh = LexHelper()
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700173 fol_lexer = lex.lex(module=FOLLexer()) # , optimize=1)
174 fol_parser = yacc.yacc(
175 module=FOLParser(),
176 start='goal',
177 outputdir='/tmp',
178 debug=0)
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100179
180 def setOffset(self, of):
181 self.offset = of
182 self.lh.offset = of
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100183
184 def p_empty(self, p):
185 '''empty :'''
186 pass
187
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700188 def p_field_modifier(self, p):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100189 '''field_modifier : REQUIRED
190 | OPTIONAL
191 | REPEATED'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700192 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100193
194 def p_primitive_type(self, p):
195 '''primitive_type : DOUBLE
196 | FLOAT
197 | INT32
198 | INT64
199 | UINT32
200 | UINT64
201 | SINT32
202 | SINT64
203 | FIXED32
204 | FIXED64
205 | SFIXED32
206 | SFIXED64
207 | BOOL
208 | STRING
209 | BYTES'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700210 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100211
Sapan Bhatiab1225872017-03-29 20:47:47 +0200212 def p_link_type(self, p):
213 '''link_type : ONETOONE
214 | MANYTOONE
215 | MANYTOMANY'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700216 p[0] = LU.i(p, 1)
Sapan Bhatiab1225872017-03-29 20:47:47 +0200217
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100218 def p_field_id(self, p):
219 '''field_id : NUM'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700220 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100221
Scott Bakerba36b492018-09-28 17:18:41 -0700222 def p_reverse_id(self, p):
223 '''reverse_id : NUM'''
224 p[0] = LU.i(p, 1)
225
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100226 def p_rvalue(self, p):
227 '''rvalue : NUM
228 | TRUE
229 | FALSE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700230 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100231
Sapan Bhatiab1225872017-03-29 20:47:47 +0200232 def p_rvalue3(self, p):
233 '''rvalue : STRING_LITERAL'''
234 p[0] = Name(LU.i(p, 1))
235 self.lh.set_parse_object(p[0], p)
236 p[0].deriveLex()
237
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100238 def p_rvalue2(self, p):
239 '''rvalue : NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100240 p[0] = Name(LU.i(p, 1))
241 self.lh.set_parse_object(p[0], p)
242 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100243
Sapan Bhatiab1225872017-03-29 20:47:47 +0200244 def p_field_directives2(self, p):
245 '''field_directives : empty'''
246 p[0] = []
247
248 def p_field_directives(self, p):
249 '''field_directives : LBRACK field_directive_times RBRACK'''
250 p[0] = p[2]
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700251 # self.lh.set_parse_object(p[0], p)
Sapan Bhatiab1225872017-03-29 20:47:47 +0200252
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100253 def p_field_directive(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200254 '''field_directive : NAME EQ rvalue'''
255 p[0] = FieldDirective(Name(LU.i(p, 1)), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100256 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100257
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700258 def p_policy_opt_explicit(self, p):
259 '''policy_opt : DOUBLECOLON NAME'''
260 p[0] = p[2]
261
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700262 def p_policy_opt_empty(self, p):
263 '''policy_opt : empty'''
264 p[0] = None
Sapan Bhatia87792a12017-04-10 19:35:05 -0700265
266 def p_csv_expr(self, p):
267 '''csv_expr : LPAR csv RPAR'''
268 p[0] = p[2]
269
270 def p_csv_expr2(self, p):
271 '''csv_expr : empty'''
272 p[0] = []
273
274 def p_csv2(self, p):
275 '''csv : empty'''
276
277 def p_csv(self, p):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700278 '''csv : dotname
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700279 | csv COMMA dotname'''
Sapan Bhatia87792a12017-04-10 19:35:05 -0700280
281 if len(p) == 2:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700282 p[0] = [LU(p, 1)]
Sapan Bhatia87792a12017-04-10 19:35:05 -0700283 else:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700284 p[0] = p[1] + [LU(p, 3)]
Sapan Bhatia87792a12017-04-10 19:35:05 -0700285
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100286 def p_field_directive_times(self, p):
287 '''field_directive_times : field_directive_plus'''
288 p[0] = p[1]
289
290 def p_field_directive_times2(self, p):
291 '''field_directive_times : empty'''
292 p[0] = []
293
294 def p_field_directive_plus(self, p):
295 '''field_directive_plus : field_directive
Sapan Bhatiab1225872017-03-29 20:47:47 +0200296 | field_directive_plus COMMA field_directive'''
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100297 if len(p) == 2:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700298 p[0] = [LU(p, 1)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100299 else:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700300 p[0] = p[1] + [LU(p, 3)]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100301
Dusan Klineca4fae112014-11-10 08:50:27 +0100302 def p_dotname(self, p):
303 '''dotname : NAME
304 | dotname DOT NAME'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100305 if len(p) == 2:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700306 p[0] = [LU(p, 1)]
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100307 else:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700308 p[0] = p[1] + [LU(p, 3)]
Dusan Klineca4fae112014-11-10 08:50:27 +0100309
310 # Hack for cases when there is a field named 'message' or 'max'
311 def p_fieldName(self, p):
Sapan Bhatia78fee772017-04-21 19:00:48 +0200312 '''field_name : STARTTOKEN
313 | NAME
Dusan Klineca4fae112014-11-10 08:50:27 +0100314 | MESSAGE
315 | MAX'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700316 p[0] = Name(LU.i(p, 1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100317 self.lh.set_parse_object(p[0], p)
318 p[0].deriveLex()
Dusan Klineca4fae112014-11-10 08:50:27 +0100319
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100320 def p_field_type(self, p):
321 '''field_type : primitive_type'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700322 p[0] = FieldType(LU.i(p, 1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100323 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100324
325 def p_field_type2(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100326 '''field_type : dotname'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100327 p[0] = DotName(LU.i(p, 1))
328 self.lh.set_parse_object(p[0], p)
329 p[0].deriveLex()
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100330
Sapan Bhatia78fee772017-04-21 19:00:48 +0200331 def p_slash_name(self, p):
Sapan Bhatia2ddf83a2017-06-10 04:31:40 -0700332 '''slash_name : SLASH dotname'''
Sapan Bhatia78fee772017-04-21 19:00:48 +0200333 p[0] = p[2]
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700334 # self.lh.set_parse_object(p[0], p)
Sapan Bhatia78fee772017-04-21 19:00:48 +0200335
336 def p_slash_name2(self, p):
337 '''slash_name : empty'''
338 p[0] = None
339
Sapan Bhatiab1225872017-03-29 20:47:47 +0200340 def p_colon_fieldname(self, p):
341 '''colon_fieldname : COLON field_name'''
342 p[0] = p[2]
343 self.lh.set_parse_object(p[0], p)
344
345 def p_colon_fieldname2(self, p):
346 '''colon_fieldname : empty'''
347 p[0] = None
348
349 # TODO: Add directives to link definition
350 def p_link_definition(self, p):
Zack Williams28f1e492019-02-01 10:02:56 -0700351 '''
352 link_definition : field_modifier link_type field_name policy_opt ARROW dotname slash_name colon_fieldname EQ field_id field_directives SEMI
353 '''
354
Sapan Bhatiab1225872017-03-29 20:47:47 +0200355 p[0] = LinkSpec(
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700356 FieldDefinition(
357 LU.i(
358 p, 1), Name('int32'), LU.i(
359 p, 3), LU.i(
360 p, 4), LU.i(
361 p, 10), [
362 FieldDirective(
363 Name('type'), Name('link')), FieldDirective(
364 Name('model'), LU.i(
365 p, 6))] + srcPort(
366 LU.i(
367 p, 8)) + LU.i(
368 p, 11)), LinkDefinition(
369 LU.i(
370 p, 2), LU.i(
371 p, 3), LU.i(
372 p, 6), LU.i(
373 p, 7), LU.i(
374 p, 8)))
Sapan Bhatiab1225872017-03-29 20:47:47 +0200375
376 self.lh.set_parse_object(p[0], p)
377
Scott Bakerba36b492018-09-28 17:18:41 -0700378 # TODO: Add directives to link definition
Zack Williams28f1e492019-02-01 10:02:56 -0700379 def p_link_definition_with_reverse(self, p):
380 '''
381 link_definition_with_reverse : field_modifier link_type field_name policy_opt ARROW dotname slash_name colon_fieldname EQ field_id COLON reverse_id field_directives SEMI
382 '''
Scott Bakerba36b492018-09-28 17:18:41 -0700383 p[0] = LinkSpec(
384 FieldDefinition(
385 LU.i(
386 p, 1), Name('int32'), LU.i(
387 p, 3), LU.i(
388 p, 4), LU.i(
389 p, 10), [
390 FieldDirective(
391 Name('type'), Name('link')), FieldDirective(
392 Name('model'), LU.i(
393 p, 6))] + srcPort(
394 LU.i(
395 p, 8)) + LU.i(
396 p, 13)), LinkDefinition(
397 LU.i(
398 p, 2), LU.i(
399 p, 3), LU.i(
400 p, 6), LU.i(
401 p, 7), LU.i(
Zack Williams28f1e492019-02-01 10:02:56 -0700402 p, 8), reverse_id=LU.i(p, 12)))
Scott Bakerba36b492018-09-28 17:18:41 -0700403
404 self.lh.set_parse_object(p[0], p)
405
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100406 # Root of the field declaration.
407 def p_field_definition(self, p):
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700408 '''field_definition : field_modifier field_type field_name policy_opt EQ field_id field_directives SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700409 p[0] = FieldDefinition(
410 LU.i(
411 p, 1), LU.i(
412 p, 2), LU.i(
413 p, 3), LU.i(
414 p, 4), LU.i(
415 p, 6), LU.i(
416 p, 7))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100417 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100418
419 # Root of the enum field declaration.
420 def p_enum_field(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100421 '''enum_field : field_name EQ NUM SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700422 p[0] = EnumFieldDefinition(LU.i(p, 1), LU.i(p, 3))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100423 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100424
425 def p_enum_body_part(self, p):
426 '''enum_body_part : enum_field
427 | option_directive'''
428 p[0] = p[1]
429
430 def p_enum_body(self, p):
431 '''enum_body : enum_body_part
432 | enum_body enum_body_part'''
433 if len(p) == 2:
434 p[0] = [p[1]]
435 else:
436 p[0] = p[1] + [p[2]]
437
438 def p_enum_body_opt(self, p):
439 '''enum_body_opt : empty'''
440 p[0] = []
441
442 def p_enum_body_opt2(self, p):
443 '''enum_body_opt : enum_body'''
444 p[0] = p[1]
445
Sapan Bhatiaad79fee2017-06-26 23:35:57 -0700446 def p_reduce_definition(self, p):
447 '''reduce_definition : REDUCE NAME POLICYBODY'''
448 ltxt = p[3].lstrip('<').rstrip('>')
Zack Williams28f1e492019-02-01 10:02:56 -0700449 al = ast.parse(ltxt).body[0]
450 if not isinstance(al, ast.Expr):
Sapan Bhatiaad79fee2017-06-26 23:35:57 -0700451 raise PythonError("reduce operator needs to be an expression")
Zack Williams28f1e492019-02-01 10:02:56 -0700452 elif not isinstance(al.value, ast.Lambda):
Sapan Bhatiaad79fee2017-06-26 23:35:57 -0700453 raise PythonError("reduce operator needs to be a lambda")
454
455 p[0] = ReduceDefinition(Name(LU.i(p, 2)), ltxt)
456 self.lh.set_parse_object(p[0], p)
457
458 def p_map_definition(self, p):
459 '''map_definition : MAP NAME POLICYBODY'''
460 ltxt = p[3].lstrip('<').rstrip('>')
Zack Williams28f1e492019-02-01 10:02:56 -0700461 al = ast.parse(ltxt).body[0]
462 if not isinstance(al, ast.Expr):
Sapan Bhatiaad79fee2017-06-26 23:35:57 -0700463 raise PythonError("map operator needs to be an expression")
Zack Williams28f1e492019-02-01 10:02:56 -0700464 elif not isinstance(al.value, ast.Lambda):
Sapan Bhatiaad79fee2017-06-26 23:35:57 -0700465 raise PythonError("map operator needs to be a lambda")
466
467 p[0] = MapDefinition(Name(LU.i(p, 2)), ltxt)
468 self.lh.set_parse_object(p[0], p)
469
Sapan Bhatia64c72512017-06-23 02:32:45 -0700470 def p_policy_definition(self, p):
471 '''policy_definition : POLICY NAME POLICYBODY'''
Sapan Bhatia9c579722018-01-12 13:45:09 -0500472 try:
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700473 fol = self.fol_parser.parse(p[3], lexer=self.fol_lexer)
474 except FOLParsingError as e:
Sapan Bhatia9c579722018-01-12 13:45:09 -0500475 lineno, lexpos, length = e.error_range
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700476 raise ParsingError(
477 "Policy parsing error in policy %s" %
478 p[2], (p.lineno(3) + lineno, lexpos + p.lexpos(3), length))
Sapan Bhatia64c72512017-06-23 02:32:45 -0700479 p[0] = PolicyDefinition(Name(LU.i(p, 2)), fol)
480 self.lh.set_parse_object(p[0], p)
481
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100482 # Root of the enum declaration.
483 # enum_definition ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
484 def p_enum_definition(self, p):
485 '''enum_definition : ENUM NAME LBRACE enum_body_opt RBRACE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700486 p[0] = EnumDefinition(Name(LU.i(p, 2)), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100487 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100488
489 def p_extensions_to(self, p):
490 '''extensions_to : MAX'''
491 p[0] = ExtensionsMax()
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100492 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100493
494 def p_extensions_to2(self, p):
495 '''extensions_to : NUM'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100496 p[0] = LU.i(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100497
498 # extensions_definition ::= 'extensions' integer 'to' integer ';'
499 def p_extensions_definition(self, p):
500 '''extensions_definition : EXTENSIONS NUM TO extensions_to SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700501 p[0] = ExtensionsDirective(LU.i(p, 2), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100502 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100503
504 # message_extension ::= 'extend' ident '{' message_body '}'
505 def p_message_extension(self, p):
506 '''message_extension : EXTEND NAME LBRACE message_body RBRACE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700507 p[0] = MessageExtension(Name(LU.i(p, 2)), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100508 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100509
510 def p_message_body_part(self, p):
511 '''message_body_part : field_definition
Sapan Bhatiab1225872017-03-29 20:47:47 +0200512 | link_definition
Scott Bakerba36b492018-09-28 17:18:41 -0700513 | link_definition_with_reverse
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100514 | enum_definition
Sapan Bhatia4a159ac2017-04-29 20:10:05 +0200515 | option_directive
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100516 | message_definition
517 | extensions_definition
518 | message_extension'''
519 p[0] = p[1]
520
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700521 # message_body ::= { field_definition | enum_definition |
522 # message_definition | extensions_definition | message_extension }*
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100523 def p_message_body(self, p):
524 '''message_body : empty'''
525 p[0] = []
526
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700527 # message_body ::= { field_definition | enum_definition |
528 # message_definition | extensions_definition | message_extension }*
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100529 def p_message_body2(self, p):
530 '''message_body : message_body_part
531 | message_body message_body_part'''
532 if len(p) == 2:
533 p[0] = [p[1]]
534 else:
535 p[0] = p[1] + [p[2]]
536
537 # Root of the message declaration.
538 # message_definition = MESSAGE_ - ident("messageId") + LBRACE + message_body("body") + RBRACE
539 def p_message_definition(self, p):
Sapan Bhatiaa3686022017-06-24 07:24:19 -0700540 '''message_definition : MESSAGE NAME policy_opt csv_expr LBRACE message_body RBRACE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700541 p[0] = MessageDefinition(
542 Name(
543 LU.i(
544 p, 2)), LU.i(
545 p, 3), LU.i(
546 p, 4), LU.i(
547 p, 6))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100548 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100549
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700550 # method_definition ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [
551 # ident ] ')' ';'
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100552 def p_method_definition(self, p):
553 '''method_definition : RPC NAME LPAR NAME RPAR RETURNS LPAR NAME RPAR'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700554 p[0] = MethodDefinition(
555 Name(
556 LU.i(
557 p, 2)), Name(
558 LU.i(
559 p, 4)), Name(
560 LU.i(
561 p, 8)))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100562 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100563
564 def p_method_definition_opt(self, p):
565 '''method_definition_opt : empty'''
566 p[0] = []
567
568 def p_method_definition_opt2(self, p):
569 '''method_definition_opt : method_definition
570 | method_definition_opt method_definition'''
571 if len(p) == 2:
572 p[0] = [p[1]]
573 else:
574 p[0] = p[1] + [p[2]]
575
576 # service_definition ::= 'service' ident '{' method_definition* '}'
577 # service_definition = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE
578 def p_service_definition(self, p):
Sapan Bhatiab1225872017-03-29 20:47:47 +0200579 '''service_definition : _SERVICE NAME LBRACE method_definition_opt RBRACE'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700580 p[0] = ServiceDefinition(Name(LU.i(p, 2)), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100581 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100582
583 # package_directive ::= 'package' ident [ '.' ident]* ';'
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700584 def p_package_directive(self, p):
Dusan Klineca4fae112014-11-10 08:50:27 +0100585 '''package_directive : PACKAGE dotname SEMI'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100586 p[0] = PackageStatement(Name(LU.i(p, 2)))
587 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100588
589 # import_directive = IMPORT_ - quotedString("importFileSpec") + SEMI
590 def p_import_directive(self, p):
591 '''import_directive : IMPORT STRING_LITERAL SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700592 p[0] = ImportStatement(Literal(LU.i(p, 2)))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100593 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100594
595 def p_option_rvalue(self, p):
596 '''option_rvalue : NUM
597 | TRUE
598 | FALSE'''
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100599 p[0] = LU(p, 1)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100600
601 def p_option_rvalue2(self, p):
602 '''option_rvalue : STRING_LITERAL'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700603 p[0] = Literal(LU(p, 1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100604
605 def p_option_rvalue3(self, p):
606 '''option_rvalue : NAME'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700607 p[0] = Name(LU.i(p, 1))
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100608
609 # option_directive = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
610 def p_option_directive(self, p):
611 '''option_directive : OPTION NAME EQ option_rvalue SEMI'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700612 p[0] = OptionStatement(Name(LU.i(p, 2)), LU.i(p, 4))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100613 self.lh.set_parse_object(p[0], p)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100614
Zack Williams28f1e492019-02-01 10:02:56 -0700615 # topLevelStatement = Group(message_definition | message_extension | enum_definition | service_definition |
616 # import_directive | option_directive | package_definition)
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700617 def p_topLevel(self, p):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100618 '''topLevel : message_definition
619 | message_extension
620 | enum_definition
Sapan Bhatia64c72512017-06-23 02:32:45 -0700621 | policy_definition
Sapan Bhatia89bbaa52017-06-28 22:58:15 -0700622 | map_definition
623 | reduce_definition
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100624 | service_definition
625 | import_directive
Sapan Bhatiaff86b012017-06-11 14:44:15 -0700626 | package_directive
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100627 | option_directive'''
628 p[0] = p[1]
629
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100630 def p_statements2(self, p):
631 '''statements : topLevel
632 | statements topLevel'''
633 if len(p) == 2:
634 p[0] = [p[1]]
635 else:
636 p[0] = p[1] + [p[2]]
637
638 def p_statements(self, p):
639 '''statements : empty'''
640 p[0] = []
641
642 # parser = Optional(package_directive) + ZeroOrMore(topLevelStatement)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100643 def p_protofile(self, p):
Sapan Bhatiaff86b012017-06-11 14:44:15 -0700644 '''protofile : statements'''
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700645 p[0] = ProtoFile(LU.i(p, 1))
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100646 self.lh.set_parse_object(p[0], p)
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100647
648 # Parsing starting point
649 def p_goal(self, p):
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100650 '''goal : STARTTOKEN protofile'''
Dusan Klinecc9b031a2014-11-10 13:21:08 +0100651 p[0] = p[2]
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100652
653 def p_error(self, p):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700654 raise ParsingError("Parsing Error", (p.lineno, p.lexpos, len(p.value)))
655
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100656
657class ProtobufAnalyzer(object):
658
659 def __init__(self):
Sapan Bhatia9c579722018-01-12 13:45:09 -0500660 self.lexer = lex.lex(module=ProtobufLexer())
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700661 self.parser = yacc.yacc(
662 module=ProtobufParser(),
663 start='goal',
664 debug=0,
665 outputdir='/tmp')
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100666
667 def tokenize_string(self, code):
668 self.lexer.input(code)
669 for token in self.lexer:
670 print(token)
671
672 def tokenize_file(self, _file):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700673 if isinstance(_file, str):
Zack Williams28f1e492019-02-01 10:02:56 -0700674 _file = open(_file)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100675 content = ''
676 for line in _file:
677 content += line
678 return self.tokenize_string(content)
679
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100680 def parse_string(self, code, debug=0, lineno=1, prefix='+'):
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100681 self.lexer.lineno = lineno
Dusan Klinecaa9ff472014-11-10 18:02:03 +0100682 self.parser.offset = len(prefix)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100683 return self.parser.parse(prefix + code, lexer=self.lexer, debug=debug)
684
685 def parse_file(self, _file, debug=0):
Zack Williamsbe7f36d2018-02-02 11:37:11 -0700686 if isinstance(_file, str):
Zack Williams28f1e492019-02-01 10:02:56 -0700687 _file = open(_file)
Dusan Klinecccaa0d92014-11-09 03:21:31 +0100688 content = ''
689 for line in _file:
690 content += line
691 return self.parse_string(content, debug=debug)