blob: 2f6a0300de798265cb034e9c1efdf036cc07ca81 [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001# -----------------------------------------------------------------------------
2# cpp.py
3#
4# Author: David Beazley (http://www.dabeaz.com)
5# Copyright (C) 2007
6# All rights reserved
7#
8# This module implements an ANSI-C style lexical preprocessor for PLY.
9# -----------------------------------------------------------------------------
10from __future__ import generators
11
12# -----------------------------------------------------------------------------
13# Default preprocessor lexer definitions. These tokens are enough to get
14# a basic preprocessor working. Other modules may import these if they want
15# -----------------------------------------------------------------------------
16
17tokens = (
18 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
19)
20
21literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
22
23# Whitespace
24def t_CPP_WS(t):
25 r'\s+'
26 t.lexer.lineno += t.value.count("\n")
27 return t
28
29t_CPP_POUND = r'\#'
30t_CPP_DPOUND = r'\#\#'
31
32# Identifier
33t_CPP_ID = r'[A-Za-z_][\w_]*'
34
35# Integer literal
36def CPP_INTEGER(t):
37 r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
38 return t
39
40t_CPP_INTEGER = CPP_INTEGER
41
42# Floating literal
43t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
44
45# String literal
46def t_CPP_STRING(t):
47 r'\"([^\\\n]|(\\(.|\n)))*?\"'
48 t.lexer.lineno += t.value.count("\n")
49 return t
50
51# Character constant 'c' or L'c'
52def t_CPP_CHAR(t):
53 r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
54 t.lexer.lineno += t.value.count("\n")
55 return t
56
57# Comment
58def t_CPP_COMMENT1(t):
59 r'(/\*(.|\n)*?\*/)'
60 ncr = t.value.count("\n")
61 t.lexer.lineno += ncr
62 # replace with one space or a number of '\n'
63 t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
64 return t
65
66# Line comment
67def t_CPP_COMMENT2(t):
68 r'(//.*?(\n|$))'
69 # replace with '/n'
70 t.type = 'CPP_WS'; t.value = '\n'
71
72def t_error(t):
73 t.type = t.value[0]
74 t.value = t.value[0]
75 t.lexer.skip(1)
76 return t
77
78import re
79import copy
80import time
81import os.path
82
83# -----------------------------------------------------------------------------
84# trigraph()
85#
86# Given an input string, this function replaces all trigraph sequences.
87# The following mapping is used:
88#
89# ??= #
90# ??/ \
91# ??' ^
92# ??( [
93# ??) ]
94# ??! |
95# ??< {
96# ??> }
97# ??- ~
98# -----------------------------------------------------------------------------
99
100_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
101_trigraph_rep = {
102 '=':'#',
103 '/':'\\',
104 "'":'^',
105 '(':'[',
106 ')':']',
107 '!':'|',
108 '<':'{',
109 '>':'}',
110 '-':'~'
111}
112
113def trigraph(input):
114 return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
115
116# ------------------------------------------------------------------
117# Macro object
118#
119# This object holds information about preprocessor macros
120#
121# .name - Macro name (string)
122# .value - Macro value (a list of tokens)
123# .arglist - List of argument names
124# .variadic - Boolean indicating whether or not variadic macro
125# .vararg - Name of the variadic parameter
126#
127# When a macro is created, the macro replacement token sequence is
128# pre-scanned and used to create patch lists that are later used
129# during macro expansion
130# ------------------------------------------------------------------
131
132class Macro(object):
133 def __init__(self,name,value,arglist=None,variadic=False):
134 self.name = name
135 self.value = value
136 self.arglist = arglist
137 self.variadic = variadic
138 if variadic:
139 self.vararg = arglist[-1]
140 self.source = None
141
142# ------------------------------------------------------------------
143# Preprocessor object
144#
145# Object representing a preprocessor. Contains macro definitions,
146# include directories, and other information
147# ------------------------------------------------------------------
148
149class Preprocessor(object):
150 def __init__(self,lexer=None):
151 if lexer is None:
152 lexer = lex.lexer
153 self.lexer = lexer
154 self.macros = { }
155 self.path = []
156 self.temp_path = []
157
158 # Probe the lexer for selected tokens
159 self.lexprobe()
160
161 tm = time.localtime()
162 self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
163 self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
164 self.parser = None
165
166 # -----------------------------------------------------------------------------
167 # tokenize()
168 #
169 # Utility function. Given a string of text, tokenize into a list of tokens
170 # -----------------------------------------------------------------------------
171
172 def tokenize(self,text):
173 tokens = []
174 self.lexer.input(text)
175 while True:
176 tok = self.lexer.token()
177 if not tok: break
178 tokens.append(tok)
179 return tokens
180
181 # ---------------------------------------------------------------------
182 # error()
183 #
184 # Report a preprocessor error/warning of some kind
185 # ----------------------------------------------------------------------
186
187 def error(self,file,line,msg):
188 print("%s:%d %s" % (file,line,msg))
189
190 # ----------------------------------------------------------------------
191 # lexprobe()
192 #
193 # This method probes the preprocessor lexer object to discover
194 # the token types of symbols that are important to the preprocessor.
195 # If this works right, the preprocessor will simply "work"
196 # with any suitable lexer regardless of how tokens have been named.
197 # ----------------------------------------------------------------------
198
199 def lexprobe(self):
200
201 # Determine the token type for identifiers
202 self.lexer.input("identifier")
203 tok = self.lexer.token()
204 if not tok or tok.value != "identifier":
205 print("Couldn't determine identifier type")
206 else:
207 self.t_ID = tok.type
208
209 # Determine the token type for integers
210 self.lexer.input("12345")
211 tok = self.lexer.token()
212 if not tok or int(tok.value) != 12345:
213 print("Couldn't determine integer type")
214 else:
215 self.t_INTEGER = tok.type
216 self.t_INTEGER_TYPE = type(tok.value)
217
218 # Determine the token type for strings enclosed in double quotes
219 self.lexer.input("\"filename\"")
220 tok = self.lexer.token()
221 if not tok or tok.value != "\"filename\"":
222 print("Couldn't determine string type")
223 else:
224 self.t_STRING = tok.type
225
226 # Determine the token type for whitespace--if any
227 self.lexer.input(" ")
228 tok = self.lexer.token()
229 if not tok or tok.value != " ":
230 self.t_SPACE = None
231 else:
232 self.t_SPACE = tok.type
233
234 # Determine the token type for newlines
235 self.lexer.input("\n")
236 tok = self.lexer.token()
237 if not tok or tok.value != "\n":
238 self.t_NEWLINE = None
239 print("Couldn't determine token for newlines")
240 else:
241 self.t_NEWLINE = tok.type
242
243 self.t_WS = (self.t_SPACE, self.t_NEWLINE)
244
245 # Check for other characters used by the preprocessor
246 chars = [ '<','>','#','##','\\','(',')',',','.']
247 for c in chars:
248 self.lexer.input(c)
249 tok = self.lexer.token()
250 if not tok or tok.value != c:
251 print("Unable to lex '%s' required for preprocessor" % c)
252
253 # ----------------------------------------------------------------------
254 # add_path()
255 #
256 # Adds a search path to the preprocessor.
257 # ----------------------------------------------------------------------
258
259 def add_path(self,path):
260 self.path.append(path)
261
262 # ----------------------------------------------------------------------
263 # group_lines()
264 #
265 # Given an input string, this function splits it into lines. Trailing whitespace
266 # is removed. Any line ending with \ is grouped with the next line. This
267 # function forms the lowest level of the preprocessor---grouping into text into
268 # a line-by-line format.
269 # ----------------------------------------------------------------------
270
271 def group_lines(self,input):
272 lex = self.lexer.clone()
273 lines = [x.rstrip() for x in input.splitlines()]
274 for i in xrange(len(lines)):
275 j = i+1
276 while lines[i].endswith('\\') and (j < len(lines)):
277 lines[i] = lines[i][:-1]+lines[j]
278 lines[j] = ""
279 j += 1
280
281 input = "\n".join(lines)
282 lex.input(input)
283 lex.lineno = 1
284
285 current_line = []
286 while True:
287 tok = lex.token()
288 if not tok:
289 break
290 current_line.append(tok)
291 if tok.type in self.t_WS and '\n' in tok.value:
292 yield current_line
293 current_line = []
294
295 if current_line:
296 yield current_line
297
298 # ----------------------------------------------------------------------
299 # tokenstrip()
300 #
301 # Remove leading/trailing whitespace tokens from a token list
302 # ----------------------------------------------------------------------
303
304 def tokenstrip(self,tokens):
305 i = 0
306 while i < len(tokens) and tokens[i].type in self.t_WS:
307 i += 1
308 del tokens[:i]
309 i = len(tokens)-1
310 while i >= 0 and tokens[i].type in self.t_WS:
311 i -= 1
312 del tokens[i+1:]
313 return tokens
314
315
316 # ----------------------------------------------------------------------
317 # collect_args()
318 #
319 # Collects comma separated arguments from a list of tokens. The arguments
320 # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
321 # where tokencount is the number of tokens consumed, args is a list of arguments,
322 # and positions is a list of integers containing the starting index of each
323 # argument. Each argument is represented by a list of tokens.
324 #
325 # When collecting arguments, leading and trailing whitespace is removed
326 # from each argument.
327 #
328 # This function properly handles nested parenthesis and commas---these do not
329 # define new arguments.
330 # ----------------------------------------------------------------------
331
332 def collect_args(self,tokenlist):
333 args = []
334 positions = []
335 current_arg = []
336 nesting = 1
337 tokenlen = len(tokenlist)
338
339 # Search for the opening '('.
340 i = 0
341 while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
342 i += 1
343
344 if (i < tokenlen) and (tokenlist[i].value == '('):
345 positions.append(i+1)
346 else:
347 self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
348 return 0, [], []
349
350 i += 1
351
352 while i < tokenlen:
353 t = tokenlist[i]
354 if t.value == '(':
355 current_arg.append(t)
356 nesting += 1
357 elif t.value == ')':
358 nesting -= 1
359 if nesting == 0:
360 if current_arg:
361 args.append(self.tokenstrip(current_arg))
362 positions.append(i)
363 return i+1,args,positions
364 current_arg.append(t)
365 elif t.value == ',' and nesting == 1:
366 args.append(self.tokenstrip(current_arg))
367 positions.append(i+1)
368 current_arg = []
369 else:
370 current_arg.append(t)
371 i += 1
372
373 # Missing end argument
374 self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
375 return 0, [],[]
376
377 # ----------------------------------------------------------------------
378 # macro_prescan()
379 #
380 # Examine the macro value (token sequence) and identify patch points
381 # This is used to speed up macro expansion later on---we'll know
382 # right away where to apply patches to the value to form the expansion
383 # ----------------------------------------------------------------------
384
385 def macro_prescan(self,macro):
386 macro.patch = [] # Standard macro arguments
387 macro.str_patch = [] # String conversion expansion
388 macro.var_comma_patch = [] # Variadic macro comma patch
389 i = 0
390 while i < len(macro.value):
391 if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
392 argnum = macro.arglist.index(macro.value[i].value)
393 # Conversion of argument to a string
394 if i > 0 and macro.value[i-1].value == '#':
395 macro.value[i] = copy.copy(macro.value[i])
396 macro.value[i].type = self.t_STRING
397 del macro.value[i-1]
398 macro.str_patch.append((argnum,i-1))
399 continue
400 # Concatenation
401 elif (i > 0 and macro.value[i-1].value == '##'):
402 macro.patch.append(('c',argnum,i-1))
403 del macro.value[i-1]
404 continue
405 elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
406 macro.patch.append(('c',argnum,i))
407 i += 1
408 continue
409 # Standard expansion
410 else:
411 macro.patch.append(('e',argnum,i))
412 elif macro.value[i].value == '##':
413 if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
414 ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
415 (macro.value[i+1].value == macro.vararg):
416 macro.var_comma_patch.append(i-1)
417 i += 1
418 macro.patch.sort(key=lambda x: x[2],reverse=True)
419
420 # ----------------------------------------------------------------------
421 # macro_expand_args()
422 #
423 # Given a Macro and list of arguments (each a token list), this method
424 # returns an expanded version of a macro. The return value is a token sequence
425 # representing the replacement macro tokens
426 # ----------------------------------------------------------------------
427
428 def macro_expand_args(self,macro,args):
429 # Make a copy of the macro token sequence
430 rep = [copy.copy(_x) for _x in macro.value]
431
432 # Make string expansion patches. These do not alter the length of the replacement sequence
433
434 str_expansion = {}
435 for argnum, i in macro.str_patch:
436 if argnum not in str_expansion:
437 str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
438 rep[i] = copy.copy(rep[i])
439 rep[i].value = str_expansion[argnum]
440
441 # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
442 comma_patch = False
443 if macro.variadic and not args[-1]:
444 for i in macro.var_comma_patch:
445 rep[i] = None
446 comma_patch = True
447
448 # Make all other patches. The order of these matters. It is assumed that the patch list
449 # has been sorted in reverse order of patch location since replacements will cause the
450 # size of the replacement sequence to expand from the patch point.
451
452 expanded = { }
453 for ptype, argnum, i in macro.patch:
454 # Concatenation. Argument is left unexpanded
455 if ptype == 'c':
456 rep[i:i+1] = args[argnum]
457 # Normal expansion. Argument is macro expanded first
458 elif ptype == 'e':
459 if argnum not in expanded:
460 expanded[argnum] = self.expand_macros(args[argnum])
461 rep[i:i+1] = expanded[argnum]
462
463 # Get rid of removed comma if necessary
464 if comma_patch:
465 rep = [_i for _i in rep if _i]
466
467 return rep
468
469
470 # ----------------------------------------------------------------------
471 # expand_macros()
472 #
473 # Given a list of tokens, this function performs macro expansion.
474 # The expanded argument is a dictionary that contains macros already
475 # expanded. This is used to prevent infinite recursion.
476 # ----------------------------------------------------------------------
477
478 def expand_macros(self,tokens,expanded=None):
479 if expanded is None:
480 expanded = {}
481 i = 0
482 while i < len(tokens):
483 t = tokens[i]
484 if t.type == self.t_ID:
485 if t.value in self.macros and t.value not in expanded:
486 # Yes, we found a macro match
487 expanded[t.value] = True
488
489 m = self.macros[t.value]
490 if not m.arglist:
491 # A simple macro
492 ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
493 for e in ex:
494 e.lineno = t.lineno
495 tokens[i:i+1] = ex
496 i += len(ex)
497 else:
498 # A macro with arguments
499 j = i + 1
500 while j < len(tokens) and tokens[j].type in self.t_WS:
501 j += 1
502 if tokens[j].value == '(':
503 tokcount,args,positions = self.collect_args(tokens[j:])
504 if not m.variadic and len(args) != len(m.arglist):
505 self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
506 i = j + tokcount
507 elif m.variadic and len(args) < len(m.arglist)-1:
508 if len(m.arglist) > 2:
509 self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
510 else:
511 self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
512 i = j + tokcount
513 else:
514 if m.variadic:
515 if len(args) == len(m.arglist)-1:
516 args.append([])
517 else:
518 args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
519 del args[len(m.arglist):]
520
521 # Get macro replacement text
522 rep = self.macro_expand_args(m,args)
523 rep = self.expand_macros(rep,expanded)
524 for r in rep:
525 r.lineno = t.lineno
526 tokens[i:j+tokcount] = rep
527 i += len(rep)
528 del expanded[t.value]
529 continue
530 elif t.value == '__LINE__':
531 t.type = self.t_INTEGER
532 t.value = self.t_INTEGER_TYPE(t.lineno)
533
534 i += 1
535 return tokens
536
537 # ----------------------------------------------------------------------
538 # evalexpr()
539 #
540 # Evaluate an expression token sequence for the purposes of evaluating
541 # integral expressions.
542 # ----------------------------------------------------------------------
543
544 def evalexpr(self,tokens):
545 # tokens = tokenize(line)
546 # Search for defined macros
547 i = 0
548 while i < len(tokens):
549 if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
550 j = i + 1
551 needparen = False
552 result = "0L"
553 while j < len(tokens):
554 if tokens[j].type in self.t_WS:
555 j += 1
556 continue
557 elif tokens[j].type == self.t_ID:
558 if tokens[j].value in self.macros:
559 result = "1L"
560 else:
561 result = "0L"
562 if not needparen: break
563 elif tokens[j].value == '(':
564 needparen = True
565 elif tokens[j].value == ')':
566 break
567 else:
568 self.error(self.source,tokens[i].lineno,"Malformed defined()")
569 j += 1
570 tokens[i].type = self.t_INTEGER
571 tokens[i].value = self.t_INTEGER_TYPE(result)
572 del tokens[i+1:j+1]
573 i += 1
574 tokens = self.expand_macros(tokens)
575 for i,t in enumerate(tokens):
576 if t.type == self.t_ID:
577 tokens[i] = copy.copy(t)
578 tokens[i].type = self.t_INTEGER
579 tokens[i].value = self.t_INTEGER_TYPE("0L")
580 elif t.type == self.t_INTEGER:
581 tokens[i] = copy.copy(t)
582 # Strip off any trailing suffixes
583 tokens[i].value = str(tokens[i].value)
584 while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
585 tokens[i].value = tokens[i].value[:-1]
586
587 expr = "".join([str(x.value) for x in tokens])
588 expr = expr.replace("&&"," and ")
589 expr = expr.replace("||"," or ")
590 expr = expr.replace("!"," not ")
591 try:
592 result = eval(expr)
593 except StandardError:
594 self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
595 result = 0
596 return result
597
598 # ----------------------------------------------------------------------
599 # parsegen()
600 #
601 # Parse an input string/
602 # ----------------------------------------------------------------------
603 def parsegen(self,input,source=None):
604
605 # Replace trigraph sequences
606 t = trigraph(input)
607 lines = self.group_lines(t)
608
609 if not source:
610 source = ""
611
612 self.define("__FILE__ \"%s\"" % source)
613
614 self.source = source
615 chunk = []
616 enable = True
617 iftrigger = False
618 ifstack = []
619
620 for x in lines:
621 for i,tok in enumerate(x):
622 if tok.type not in self.t_WS: break
623 if tok.value == '#':
624 # Preprocessor directive
625
626 # insert necessary whitespace instead of eaten tokens
627 for tok in x:
628 if tok.type in self.t_WS and '\n' in tok.value:
629 chunk.append(tok)
630
631 dirtokens = self.tokenstrip(x[i+1:])
632 if dirtokens:
633 name = dirtokens[0].value
634 args = self.tokenstrip(dirtokens[1:])
635 else:
636 name = ""
637 args = []
638
639 if name == 'define':
640 if enable:
641 for tok in self.expand_macros(chunk):
642 yield tok
643 chunk = []
644 self.define(args)
645 elif name == 'include':
646 if enable:
647 for tok in self.expand_macros(chunk):
648 yield tok
649 chunk = []
650 oldfile = self.macros['__FILE__']
651 for tok in self.include(args):
652 yield tok
653 self.macros['__FILE__'] = oldfile
654 self.source = source
655 elif name == 'undef':
656 if enable:
657 for tok in self.expand_macros(chunk):
658 yield tok
659 chunk = []
660 self.undef(args)
661 elif name == 'ifdef':
662 ifstack.append((enable,iftrigger))
663 if enable:
664 if not args[0].value in self.macros:
665 enable = False
666 iftrigger = False
667 else:
668 iftrigger = True
669 elif name == 'ifndef':
670 ifstack.append((enable,iftrigger))
671 if enable:
672 if args[0].value in self.macros:
673 enable = False
674 iftrigger = False
675 else:
676 iftrigger = True
677 elif name == 'if':
678 ifstack.append((enable,iftrigger))
679 if enable:
680 result = self.evalexpr(args)
681 if not result:
682 enable = False
683 iftrigger = False
684 else:
685 iftrigger = True
686 elif name == 'elif':
687 if ifstack:
688 if ifstack[-1][0]: # We only pay attention if outer "if" allows this
689 if enable: # If already true, we flip enable False
690 enable = False
691 elif not iftrigger: # If False, but not triggered yet, we'll check expression
692 result = self.evalexpr(args)
693 if result:
694 enable = True
695 iftrigger = True
696 else:
697 self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
698
699 elif name == 'else':
700 if ifstack:
701 if ifstack[-1][0]:
702 if enable:
703 enable = False
704 elif not iftrigger:
705 enable = True
706 iftrigger = True
707 else:
708 self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
709
710 elif name == 'endif':
711 if ifstack:
712 enable,iftrigger = ifstack.pop()
713 else:
714 self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
715 else:
716 # Unknown preprocessor directive
717 pass
718
719 else:
720 # Normal text
721 if enable:
722 chunk.extend(x)
723
724 for tok in self.expand_macros(chunk):
725 yield tok
726 chunk = []
727
728 # ----------------------------------------------------------------------
729 # include()
730 #
731 # Implementation of file-inclusion
732 # ----------------------------------------------------------------------
733
734 def include(self,tokens):
735 # Try to extract the filename and then process an include file
736 if not tokens:
737 return
738 if tokens:
739 if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
740 tokens = self.expand_macros(tokens)
741
742 if tokens[0].value == '<':
743 # Include <...>
744 i = 1
745 while i < len(tokens):
746 if tokens[i].value == '>':
747 break
748 i += 1
749 else:
750 print("Malformed #include <...>")
751 return
752 filename = "".join([x.value for x in tokens[1:i]])
753 path = self.path + [""] + self.temp_path
754 elif tokens[0].type == self.t_STRING:
755 filename = tokens[0].value[1:-1]
756 path = self.temp_path + [""] + self.path
757 else:
758 print("Malformed #include statement")
759 return
760 for p in path:
761 iname = os.path.join(p,filename)
762 try:
763 data = open(iname,"r").read()
764 dname = os.path.dirname(iname)
765 if dname:
766 self.temp_path.insert(0,dname)
767 for tok in self.parsegen(data,filename):
768 yield tok
769 if dname:
770 del self.temp_path[0]
771 break
772 except IOError:
773 pass
774 else:
775 print("Couldn't find '%s'" % filename)
776
777 # ----------------------------------------------------------------------
778 # define()
779 #
780 # Define a new macro
781 # ----------------------------------------------------------------------
782
783 def define(self,tokens):
784 if isinstance(tokens,(str,unicode)):
785 tokens = self.tokenize(tokens)
786
787 linetok = tokens
788 try:
789 name = linetok[0]
790 if len(linetok) > 1:
791 mtype = linetok[1]
792 else:
793 mtype = None
794 if not mtype:
795 m = Macro(name.value,[])
796 self.macros[name.value] = m
797 elif mtype.type in self.t_WS:
798 # A normal macro
799 m = Macro(name.value,self.tokenstrip(linetok[2:]))
800 self.macros[name.value] = m
801 elif mtype.value == '(':
802 # A macro with arguments
803 tokcount, args, positions = self.collect_args(linetok[1:])
804 variadic = False
805 for a in args:
806 if variadic:
807 print("No more arguments may follow a variadic argument")
808 break
809 astr = "".join([str(_i.value) for _i in a])
810 if astr == "...":
811 variadic = True
812 a[0].type = self.t_ID
813 a[0].value = '__VA_ARGS__'
814 variadic = True
815 del a[1:]
816 continue
817 elif astr[-3:] == "..." and a[0].type == self.t_ID:
818 variadic = True
819 del a[1:]
820 # If, for some reason, "." is part of the identifier, strip off the name for the purposes
821 # of macro expansion
822 if a[0].value[-3:] == '...':
823 a[0].value = a[0].value[:-3]
824 continue
825 if len(a) > 1 or a[0].type != self.t_ID:
826 print("Invalid macro argument")
827 break
828 else:
829 mvalue = self.tokenstrip(linetok[1+tokcount:])
830 i = 0
831 while i < len(mvalue):
832 if i+1 < len(mvalue):
833 if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
834 del mvalue[i]
835 continue
836 elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
837 del mvalue[i+1]
838 i += 1
839 m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
840 self.macro_prescan(m)
841 self.macros[name.value] = m
842 else:
843 print("Bad macro definition")
844 except LookupError:
845 print("Bad macro definition")
846
847 # ----------------------------------------------------------------------
848 # undef()
849 #
850 # Undefine a macro
851 # ----------------------------------------------------------------------
852
853 def undef(self,tokens):
854 id = tokens[0].value
855 try:
856 del self.macros[id]
857 except LookupError:
858 pass
859
860 # ----------------------------------------------------------------------
861 # parse()
862 #
863 # Parse input text.
864 # ----------------------------------------------------------------------
865 def parse(self,input,source=None,ignore={}):
866 self.ignore = ignore
867 self.parser = self.parsegen(input,source)
868
869 # ----------------------------------------------------------------------
870 # token()
871 #
872 # Method to return individual tokens
873 # ----------------------------------------------------------------------
874 def token(self):
875 try:
876 while True:
877 tok = next(self.parser)
878 if tok.type not in self.ignore: return tok
879 except StopIteration:
880 self.parser = None
881 return None
882
883if __name__ == '__main__':
884 import ply.lex as lex
885 lexer = lex.lex()
886
887 # Run a preprocessor
888 import sys
889 f = open(sys.argv[1])
890 input = f.read()
891
892 p = Preprocessor(lexer)
893 p.parse(input,sys.argv[1])
894 while True:
895 tok = p.token()
896 if not tok: break
897 print(p.source, tok)
898
899
900
901
902
903
904
905
906
907
908