| # ----------------------------------------------------------------------------- |
| # cpp.py |
| # |
| # Author: David Beazley (http://www.dabeaz.com) |
| # Copyright (C) 2007 |
| # All rights reserved |
| # |
| # This module implements an ANSI-C style lexical preprocessor for PLY. |
| # ----------------------------------------------------------------------------- |
| from __future__ import generators |
| |
| # ----------------------------------------------------------------------------- |
| # Default preprocessor lexer definitions. These tokens are enough to get |
| # a basic preprocessor working. Other modules may import these if they want |
| # ----------------------------------------------------------------------------- |
| |
| tokens = ( |
| 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND' |
| ) |
| |
| literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" |
| |
| # Whitespace |
| def t_CPP_WS(t): |
| r'\s+' |
| t.lexer.lineno += t.value.count("\n") |
| return t |
| |
| t_CPP_POUND = r'\#' |
| t_CPP_DPOUND = r'\#\#' |
| |
| # Identifier |
| t_CPP_ID = r'[A-Za-z_][\w_]*' |
| |
| # Integer literal |
| def CPP_INTEGER(t): |
| r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' |
| return t |
| |
| t_CPP_INTEGER = CPP_INTEGER |
| |
| # Floating literal |
| t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' |
| |
| # String literal |
| def t_CPP_STRING(t): |
| r'\"([^\\\n]|(\\(.|\n)))*?\"' |
| t.lexer.lineno += t.value.count("\n") |
| return t |
| |
| # Character constant 'c' or L'c' |
| def t_CPP_CHAR(t): |
| r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' |
| t.lexer.lineno += t.value.count("\n") |
| return t |
| |
| # Comment |
| def t_CPP_COMMENT1(t): |
| r'(/\*(.|\n)*?\*/)' |
| ncr = t.value.count("\n") |
| t.lexer.lineno += ncr |
| # replace with one space or a number of '\n' |
| t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' |
| return t |
| |
| # Line comment |
| def t_CPP_COMMENT2(t): |
| r'(//.*?(\n|$))' |
| # replace with '/n' |
| t.type = 'CPP_WS'; t.value = '\n' |
| |
| def t_error(t): |
| t.type = t.value[0] |
| t.value = t.value[0] |
| t.lexer.skip(1) |
| return t |
| |
| import re |
| import copy |
| import time |
| import os.path |
| |
| # ----------------------------------------------------------------------------- |
| # trigraph() |
| # |
| # Given an input string, this function replaces all trigraph sequences. |
| # The following mapping is used: |
| # |
| # ??= # |
| # ??/ \ |
| # ??' ^ |
| # ??( [ |
| # ??) ] |
| # ??! | |
| # ??< { |
| # ??> } |
| # ??- ~ |
| # ----------------------------------------------------------------------------- |
| |
| _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') |
| _trigraph_rep = { |
| '=':'#', |
| '/':'\\', |
| "'":'^', |
| '(':'[', |
| ')':']', |
| '!':'|', |
| '<':'{', |
| '>':'}', |
| '-':'~' |
| } |
| |
| def trigraph(input): |
| return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) |
| |
| # ------------------------------------------------------------------ |
| # Macro object |
| # |
| # This object holds information about preprocessor macros |
| # |
| # .name - Macro name (string) |
| # .value - Macro value (a list of tokens) |
| # .arglist - List of argument names |
| # .variadic - Boolean indicating whether or not variadic macro |
| # .vararg - Name of the variadic parameter |
| # |
| # When a macro is created, the macro replacement token sequence is |
| # pre-scanned and used to create patch lists that are later used |
| # during macro expansion |
| # ------------------------------------------------------------------ |
| |
| class Macro(object): |
| def __init__(self,name,value,arglist=None,variadic=False): |
| self.name = name |
| self.value = value |
| self.arglist = arglist |
| self.variadic = variadic |
| if variadic: |
| self.vararg = arglist[-1] |
| self.source = None |
| |
| # ------------------------------------------------------------------ |
| # Preprocessor object |
| # |
| # Object representing a preprocessor. Contains macro definitions, |
| # include directories, and other information |
| # ------------------------------------------------------------------ |
| |
| class Preprocessor(object): |
| def __init__(self,lexer=None): |
| if lexer is None: |
| lexer = lex.lexer |
| self.lexer = lexer |
| self.macros = { } |
| self.path = [] |
| self.temp_path = [] |
| |
| # Probe the lexer for selected tokens |
| self.lexprobe() |
| |
| tm = time.localtime() |
| self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) |
| self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) |
| self.parser = None |
| |
| # ----------------------------------------------------------------------------- |
| # tokenize() |
| # |
| # Utility function. Given a string of text, tokenize into a list of tokens |
| # ----------------------------------------------------------------------------- |
| |
| def tokenize(self,text): |
| tokens = [] |
| self.lexer.input(text) |
| while True: |
| tok = self.lexer.token() |
| if not tok: break |
| tokens.append(tok) |
| return tokens |
| |
| # --------------------------------------------------------------------- |
| # error() |
| # |
| # Report a preprocessor error/warning of some kind |
| # ---------------------------------------------------------------------- |
| |
| def error(self,file,line,msg): |
| print("%s:%d %s" % (file,line,msg)) |
| |
| # ---------------------------------------------------------------------- |
| # lexprobe() |
| # |
| # This method probes the preprocessor lexer object to discover |
| # the token types of symbols that are important to the preprocessor. |
| # If this works right, the preprocessor will simply "work" |
| # with any suitable lexer regardless of how tokens have been named. |
| # ---------------------------------------------------------------------- |
| |
| def lexprobe(self): |
| |
| # Determine the token type for identifiers |
| self.lexer.input("identifier") |
| tok = self.lexer.token() |
| if not tok or tok.value != "identifier": |
| print("Couldn't determine identifier type") |
| else: |
| self.t_ID = tok.type |
| |
| # Determine the token type for integers |
| self.lexer.input("12345") |
| tok = self.lexer.token() |
| if not tok or int(tok.value) != 12345: |
| print("Couldn't determine integer type") |
| else: |
| self.t_INTEGER = tok.type |
| self.t_INTEGER_TYPE = type(tok.value) |
| |
| # Determine the token type for strings enclosed in double quotes |
| self.lexer.input("\"filename\"") |
| tok = self.lexer.token() |
| if not tok or tok.value != "\"filename\"": |
| print("Couldn't determine string type") |
| else: |
| self.t_STRING = tok.type |
| |
| # Determine the token type for whitespace--if any |
| self.lexer.input(" ") |
| tok = self.lexer.token() |
| if not tok or tok.value != " ": |
| self.t_SPACE = None |
| else: |
| self.t_SPACE = tok.type |
| |
| # Determine the token type for newlines |
| self.lexer.input("\n") |
| tok = self.lexer.token() |
| if not tok or tok.value != "\n": |
| self.t_NEWLINE = None |
| print("Couldn't determine token for newlines") |
| else: |
| self.t_NEWLINE = tok.type |
| |
| self.t_WS = (self.t_SPACE, self.t_NEWLINE) |
| |
| # Check for other characters used by the preprocessor |
| chars = [ '<','>','#','##','\\','(',')',',','.'] |
| for c in chars: |
| self.lexer.input(c) |
| tok = self.lexer.token() |
| if not tok or tok.value != c: |
| print("Unable to lex '%s' required for preprocessor" % c) |
| |
| # ---------------------------------------------------------------------- |
| # add_path() |
| # |
| # Adds a search path to the preprocessor. |
| # ---------------------------------------------------------------------- |
| |
| def add_path(self,path): |
| self.path.append(path) |
| |
| # ---------------------------------------------------------------------- |
| # group_lines() |
| # |
| # Given an input string, this function splits it into lines. Trailing whitespace |
| # is removed. Any line ending with \ is grouped with the next line. This |
| # function forms the lowest level of the preprocessor---grouping into text into |
| # a line-by-line format. |
| # ---------------------------------------------------------------------- |
| |
| def group_lines(self,input): |
| lex = self.lexer.clone() |
| lines = [x.rstrip() for x in input.splitlines()] |
| for i in xrange(len(lines)): |
| j = i+1 |
| while lines[i].endswith('\\') and (j < len(lines)): |
| lines[i] = lines[i][:-1]+lines[j] |
| lines[j] = "" |
| j += 1 |
| |
| input = "\n".join(lines) |
| lex.input(input) |
| lex.lineno = 1 |
| |
| current_line = [] |
| while True: |
| tok = lex.token() |
| if not tok: |
| break |
| current_line.append(tok) |
| if tok.type in self.t_WS and '\n' in tok.value: |
| yield current_line |
| current_line = [] |
| |
| if current_line: |
| yield current_line |
| |
| # ---------------------------------------------------------------------- |
| # tokenstrip() |
| # |
| # Remove leading/trailing whitespace tokens from a token list |
| # ---------------------------------------------------------------------- |
| |
| def tokenstrip(self,tokens): |
| i = 0 |
| while i < len(tokens) and tokens[i].type in self.t_WS: |
| i += 1 |
| del tokens[:i] |
| i = len(tokens)-1 |
| while i >= 0 and tokens[i].type in self.t_WS: |
| i -= 1 |
| del tokens[i+1:] |
| return tokens |
| |
| |
| # ---------------------------------------------------------------------- |
| # collect_args() |
| # |
| # Collects comma separated arguments from a list of tokens. The arguments |
| # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) |
| # where tokencount is the number of tokens consumed, args is a list of arguments, |
| # and positions is a list of integers containing the starting index of each |
| # argument. Each argument is represented by a list of tokens. |
| # |
| # When collecting arguments, leading and trailing whitespace is removed |
| # from each argument. |
| # |
| # This function properly handles nested parenthesis and commas---these do not |
| # define new arguments. |
| # ---------------------------------------------------------------------- |
| |
| def collect_args(self,tokenlist): |
| args = [] |
| positions = [] |
| current_arg = [] |
| nesting = 1 |
| tokenlen = len(tokenlist) |
| |
| # Search for the opening '('. |
| i = 0 |
| while (i < tokenlen) and (tokenlist[i].type in self.t_WS): |
| i += 1 |
| |
| if (i < tokenlen) and (tokenlist[i].value == '('): |
| positions.append(i+1) |
| else: |
| self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") |
| return 0, [], [] |
| |
| i += 1 |
| |
| while i < tokenlen: |
| t = tokenlist[i] |
| if t.value == '(': |
| current_arg.append(t) |
| nesting += 1 |
| elif t.value == ')': |
| nesting -= 1 |
| if nesting == 0: |
| if current_arg: |
| args.append(self.tokenstrip(current_arg)) |
| positions.append(i) |
| return i+1,args,positions |
| current_arg.append(t) |
| elif t.value == ',' and nesting == 1: |
| args.append(self.tokenstrip(current_arg)) |
| positions.append(i+1) |
| current_arg = [] |
| else: |
| current_arg.append(t) |
| i += 1 |
| |
| # Missing end argument |
| self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") |
| return 0, [],[] |
| |
| # ---------------------------------------------------------------------- |
| # macro_prescan() |
| # |
| # Examine the macro value (token sequence) and identify patch points |
| # This is used to speed up macro expansion later on---we'll know |
| # right away where to apply patches to the value to form the expansion |
| # ---------------------------------------------------------------------- |
| |
| def macro_prescan(self,macro): |
| macro.patch = [] # Standard macro arguments |
| macro.str_patch = [] # String conversion expansion |
| macro.var_comma_patch = [] # Variadic macro comma patch |
| i = 0 |
| while i < len(macro.value): |
| if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: |
| argnum = macro.arglist.index(macro.value[i].value) |
| # Conversion of argument to a string |
| if i > 0 and macro.value[i-1].value == '#': |
| macro.value[i] = copy.copy(macro.value[i]) |
| macro.value[i].type = self.t_STRING |
| del macro.value[i-1] |
| macro.str_patch.append((argnum,i-1)) |
| continue |
| # Concatenation |
| elif (i > 0 and macro.value[i-1].value == '##'): |
| macro.patch.append(('c',argnum,i-1)) |
| del macro.value[i-1] |
| continue |
| elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): |
| macro.patch.append(('c',argnum,i)) |
| i += 1 |
| continue |
| # Standard expansion |
| else: |
| macro.patch.append(('e',argnum,i)) |
| elif macro.value[i].value == '##': |
| if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ |
| ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ |
| (macro.value[i+1].value == macro.vararg): |
| macro.var_comma_patch.append(i-1) |
| i += 1 |
| macro.patch.sort(key=lambda x: x[2],reverse=True) |
| |
| # ---------------------------------------------------------------------- |
| # macro_expand_args() |
| # |
| # Given a Macro and list of arguments (each a token list), this method |
| # returns an expanded version of a macro. The return value is a token sequence |
| # representing the replacement macro tokens |
| # ---------------------------------------------------------------------- |
| |
| def macro_expand_args(self,macro,args): |
| # Make a copy of the macro token sequence |
| rep = [copy.copy(_x) for _x in macro.value] |
| |
| # Make string expansion patches. These do not alter the length of the replacement sequence |
| |
| str_expansion = {} |
| for argnum, i in macro.str_patch: |
| if argnum not in str_expansion: |
| str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") |
| rep[i] = copy.copy(rep[i]) |
| rep[i].value = str_expansion[argnum] |
| |
| # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid |
| comma_patch = False |
| if macro.variadic and not args[-1]: |
| for i in macro.var_comma_patch: |
| rep[i] = None |
| comma_patch = True |
| |
| # Make all other patches. The order of these matters. It is assumed that the patch list |
| # has been sorted in reverse order of patch location since replacements will cause the |
| # size of the replacement sequence to expand from the patch point. |
| |
| expanded = { } |
| for ptype, argnum, i in macro.patch: |
| # Concatenation. Argument is left unexpanded |
| if ptype == 'c': |
| rep[i:i+1] = args[argnum] |
| # Normal expansion. Argument is macro expanded first |
| elif ptype == 'e': |
| if argnum not in expanded: |
| expanded[argnum] = self.expand_macros(args[argnum]) |
| rep[i:i+1] = expanded[argnum] |
| |
| # Get rid of removed comma if necessary |
| if comma_patch: |
| rep = [_i for _i in rep if _i] |
| |
| return rep |
| |
| |
| # ---------------------------------------------------------------------- |
| # expand_macros() |
| # |
| # Given a list of tokens, this function performs macro expansion. |
| # The expanded argument is a dictionary that contains macros already |
| # expanded. This is used to prevent infinite recursion. |
| # ---------------------------------------------------------------------- |
| |
| def expand_macros(self,tokens,expanded=None): |
| if expanded is None: |
| expanded = {} |
| i = 0 |
| while i < len(tokens): |
| t = tokens[i] |
| if t.type == self.t_ID: |
| if t.value in self.macros and t.value not in expanded: |
| # Yes, we found a macro match |
| expanded[t.value] = True |
| |
| m = self.macros[t.value] |
| if not m.arglist: |
| # A simple macro |
| ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) |
| for e in ex: |
| e.lineno = t.lineno |
| tokens[i:i+1] = ex |
| i += len(ex) |
| else: |
| # A macro with arguments |
| j = i + 1 |
| while j < len(tokens) and tokens[j].type in self.t_WS: |
| j += 1 |
| if tokens[j].value == '(': |
| tokcount,args,positions = self.collect_args(tokens[j:]) |
| if not m.variadic and len(args) != len(m.arglist): |
| self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) |
| i = j + tokcount |
| elif m.variadic and len(args) < len(m.arglist)-1: |
| if len(m.arglist) > 2: |
| self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) |
| else: |
| self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) |
| i = j + tokcount |
| else: |
| if m.variadic: |
| if len(args) == len(m.arglist)-1: |
| args.append([]) |
| else: |
| args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] |
| del args[len(m.arglist):] |
| |
| # Get macro replacement text |
| rep = self.macro_expand_args(m,args) |
| rep = self.expand_macros(rep,expanded) |
| for r in rep: |
| r.lineno = t.lineno |
| tokens[i:j+tokcount] = rep |
| i += len(rep) |
| del expanded[t.value] |
| continue |
| elif t.value == '__LINE__': |
| t.type = self.t_INTEGER |
| t.value = self.t_INTEGER_TYPE(t.lineno) |
| |
| i += 1 |
| return tokens |
| |
| # ---------------------------------------------------------------------- |
| # evalexpr() |
| # |
| # Evaluate an expression token sequence for the purposes of evaluating |
| # integral expressions. |
| # ---------------------------------------------------------------------- |
| |
| def evalexpr(self,tokens): |
| # tokens = tokenize(line) |
| # Search for defined macros |
| i = 0 |
| while i < len(tokens): |
| if tokens[i].type == self.t_ID and tokens[i].value == 'defined': |
| j = i + 1 |
| needparen = False |
| result = "0L" |
| while j < len(tokens): |
| if tokens[j].type in self.t_WS: |
| j += 1 |
| continue |
| elif tokens[j].type == self.t_ID: |
| if tokens[j].value in self.macros: |
| result = "1L" |
| else: |
| result = "0L" |
| if not needparen: break |
| elif tokens[j].value == '(': |
| needparen = True |
| elif tokens[j].value == ')': |
| break |
| else: |
| self.error(self.source,tokens[i].lineno,"Malformed defined()") |
| j += 1 |
| tokens[i].type = self.t_INTEGER |
| tokens[i].value = self.t_INTEGER_TYPE(result) |
| del tokens[i+1:j+1] |
| i += 1 |
| tokens = self.expand_macros(tokens) |
| for i,t in enumerate(tokens): |
| if t.type == self.t_ID: |
| tokens[i] = copy.copy(t) |
| tokens[i].type = self.t_INTEGER |
| tokens[i].value = self.t_INTEGER_TYPE("0L") |
| elif t.type == self.t_INTEGER: |
| tokens[i] = copy.copy(t) |
| # Strip off any trailing suffixes |
| tokens[i].value = str(tokens[i].value) |
| while tokens[i].value[-1] not in "0123456789abcdefABCDEF": |
| tokens[i].value = tokens[i].value[:-1] |
| |
| expr = "".join([str(x.value) for x in tokens]) |
| expr = expr.replace("&&"," and ") |
| expr = expr.replace("||"," or ") |
| expr = expr.replace("!"," not ") |
| try: |
| result = eval(expr) |
| except StandardError: |
| self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") |
| result = 0 |
| return result |
| |
| # ---------------------------------------------------------------------- |
| # parsegen() |
| # |
| # Parse an input string/ |
| # ---------------------------------------------------------------------- |
| def parsegen(self,input,source=None): |
| |
| # Replace trigraph sequences |
| t = trigraph(input) |
| lines = self.group_lines(t) |
| |
| if not source: |
| source = "" |
| |
| self.define("__FILE__ \"%s\"" % source) |
| |
| self.source = source |
| chunk = [] |
| enable = True |
| iftrigger = False |
| ifstack = [] |
| |
| for x in lines: |
| for i,tok in enumerate(x): |
| if tok.type not in self.t_WS: break |
| if tok.value == '#': |
| # Preprocessor directive |
| |
| # insert necessary whitespace instead of eaten tokens |
| for tok in x: |
| if tok.type in self.t_WS and '\n' in tok.value: |
| chunk.append(tok) |
| |
| dirtokens = self.tokenstrip(x[i+1:]) |
| if dirtokens: |
| name = dirtokens[0].value |
| args = self.tokenstrip(dirtokens[1:]) |
| else: |
| name = "" |
| args = [] |
| |
| if name == 'define': |
| if enable: |
| for tok in self.expand_macros(chunk): |
| yield tok |
| chunk = [] |
| self.define(args) |
| elif name == 'include': |
| if enable: |
| for tok in self.expand_macros(chunk): |
| yield tok |
| chunk = [] |
| oldfile = self.macros['__FILE__'] |
| for tok in self.include(args): |
| yield tok |
| self.macros['__FILE__'] = oldfile |
| self.source = source |
| elif name == 'undef': |
| if enable: |
| for tok in self.expand_macros(chunk): |
| yield tok |
| chunk = [] |
| self.undef(args) |
| elif name == 'ifdef': |
| ifstack.append((enable,iftrigger)) |
| if enable: |
| if not args[0].value in self.macros: |
| enable = False |
| iftrigger = False |
| else: |
| iftrigger = True |
| elif name == 'ifndef': |
| ifstack.append((enable,iftrigger)) |
| if enable: |
| if args[0].value in self.macros: |
| enable = False |
| iftrigger = False |
| else: |
| iftrigger = True |
| elif name == 'if': |
| ifstack.append((enable,iftrigger)) |
| if enable: |
| result = self.evalexpr(args) |
| if not result: |
| enable = False |
| iftrigger = False |
| else: |
| iftrigger = True |
| elif name == 'elif': |
| if ifstack: |
| if ifstack[-1][0]: # We only pay attention if outer "if" allows this |
| if enable: # If already true, we flip enable False |
| enable = False |
| elif not iftrigger: # If False, but not triggered yet, we'll check expression |
| result = self.evalexpr(args) |
| if result: |
| enable = True |
| iftrigger = True |
| else: |
| self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") |
| |
| elif name == 'else': |
| if ifstack: |
| if ifstack[-1][0]: |
| if enable: |
| enable = False |
| elif not iftrigger: |
| enable = True |
| iftrigger = True |
| else: |
| self.error(self.source,dirtokens[0].lineno,"Misplaced #else") |
| |
| elif name == 'endif': |
| if ifstack: |
| enable,iftrigger = ifstack.pop() |
| else: |
| self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") |
| else: |
| # Unknown preprocessor directive |
| pass |
| |
| else: |
| # Normal text |
| if enable: |
| chunk.extend(x) |
| |
| for tok in self.expand_macros(chunk): |
| yield tok |
| chunk = [] |
| |
| # ---------------------------------------------------------------------- |
| # include() |
| # |
| # Implementation of file-inclusion |
| # ---------------------------------------------------------------------- |
| |
| def include(self,tokens): |
| # Try to extract the filename and then process an include file |
| if not tokens: |
| return |
| if tokens: |
| if tokens[0].value != '<' and tokens[0].type != self.t_STRING: |
| tokens = self.expand_macros(tokens) |
| |
| if tokens[0].value == '<': |
| # Include <...> |
| i = 1 |
| while i < len(tokens): |
| if tokens[i].value == '>': |
| break |
| i += 1 |
| else: |
| print("Malformed #include <...>") |
| return |
| filename = "".join([x.value for x in tokens[1:i]]) |
| path = self.path + [""] + self.temp_path |
| elif tokens[0].type == self.t_STRING: |
| filename = tokens[0].value[1:-1] |
| path = self.temp_path + [""] + self.path |
| else: |
| print("Malformed #include statement") |
| return |
| for p in path: |
| iname = os.path.join(p,filename) |
| try: |
| data = open(iname,"r").read() |
| dname = os.path.dirname(iname) |
| if dname: |
| self.temp_path.insert(0,dname) |
| for tok in self.parsegen(data,filename): |
| yield tok |
| if dname: |
| del self.temp_path[0] |
| break |
| except IOError: |
| pass |
| else: |
| print("Couldn't find '%s'" % filename) |
| |
| # ---------------------------------------------------------------------- |
| # define() |
| # |
| # Define a new macro |
| # ---------------------------------------------------------------------- |
| |
| def define(self,tokens): |
| if isinstance(tokens,(str,unicode)): |
| tokens = self.tokenize(tokens) |
| |
| linetok = tokens |
| try: |
| name = linetok[0] |
| if len(linetok) > 1: |
| mtype = linetok[1] |
| else: |
| mtype = None |
| if not mtype: |
| m = Macro(name.value,[]) |
| self.macros[name.value] = m |
| elif mtype.type in self.t_WS: |
| # A normal macro |
| m = Macro(name.value,self.tokenstrip(linetok[2:])) |
| self.macros[name.value] = m |
| elif mtype.value == '(': |
| # A macro with arguments |
| tokcount, args, positions = self.collect_args(linetok[1:]) |
| variadic = False |
| for a in args: |
| if variadic: |
| print("No more arguments may follow a variadic argument") |
| break |
| astr = "".join([str(_i.value) for _i in a]) |
| if astr == "...": |
| variadic = True |
| a[0].type = self.t_ID |
| a[0].value = '__VA_ARGS__' |
| variadic = True |
| del a[1:] |
| continue |
| elif astr[-3:] == "..." and a[0].type == self.t_ID: |
| variadic = True |
| del a[1:] |
| # If, for some reason, "." is part of the identifier, strip off the name for the purposes |
| # of macro expansion |
| if a[0].value[-3:] == '...': |
| a[0].value = a[0].value[:-3] |
| continue |
| if len(a) > 1 or a[0].type != self.t_ID: |
| print("Invalid macro argument") |
| break |
| else: |
| mvalue = self.tokenstrip(linetok[1+tokcount:]) |
| i = 0 |
| while i < len(mvalue): |
| if i+1 < len(mvalue): |
| if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': |
| del mvalue[i] |
| continue |
| elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: |
| del mvalue[i+1] |
| i += 1 |
| m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) |
| self.macro_prescan(m) |
| self.macros[name.value] = m |
| else: |
| print("Bad macro definition") |
| except LookupError: |
| print("Bad macro definition") |
| |
| # ---------------------------------------------------------------------- |
| # undef() |
| # |
| # Undefine a macro |
| # ---------------------------------------------------------------------- |
| |
| def undef(self,tokens): |
| id = tokens[0].value |
| try: |
| del self.macros[id] |
| except LookupError: |
| pass |
| |
| # ---------------------------------------------------------------------- |
| # parse() |
| # |
| # Parse input text. |
| # ---------------------------------------------------------------------- |
| def parse(self,input,source=None,ignore={}): |
| self.ignore = ignore |
| self.parser = self.parsegen(input,source) |
| |
| # ---------------------------------------------------------------------- |
| # token() |
| # |
| # Method to return individual tokens |
| # ---------------------------------------------------------------------- |
| def token(self): |
| try: |
| while True: |
| tok = next(self.parser) |
| if tok.type not in self.ignore: return tok |
| except StopIteration: |
| self.parser = None |
| return None |
| |
| if __name__ == '__main__': |
| import ply.lex as lex |
| lexer = lex.lex() |
| |
| # Run a preprocessor |
| import sys |
| f = open(sys.argv[1]) |
| input = f.read() |
| |
| p = Preprocessor(lexer) |
| p.parse(input,sys.argv[1]) |
| while True: |
| tok = p.token() |
| if not tok: break |
| print(p.source, tok) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |