Dusan Klinec | ccaa0d9 | 2014-11-09 03:21:31 +0100 | [diff] [blame] | 1 | # ---------------------------------------------------------------------- |
| 2 | # ctokens.py |
| 3 | # |
| 4 | # Token specifications for symbols in ANSI C and C++. This file is |
| 5 | # meant to be used as a library in other tokenizers. |
| 6 | # ---------------------------------------------------------------------- |
| 7 | |
| 8 | # Reserved words |
| 9 | |
| 10 | tokens = [ |
| 11 | # Literals (identifier, integer constant, float constant, string constant, char const) |
| 12 | 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER', |
| 13 | |
| 14 | # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) |
| 15 | 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO', |
| 16 | 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', |
| 17 | 'LOR', 'LAND', 'LNOT', |
| 18 | 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', |
| 19 | |
| 20 | # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) |
| 21 | 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', |
| 22 | 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', |
| 23 | |
| 24 | # Increment/decrement (++,--) |
| 25 | 'INCREMENT', 'DECREMENT', |
| 26 | |
| 27 | # Structure dereference (->) |
| 28 | 'ARROW', |
| 29 | |
| 30 | # Ternary operator (?) |
| 31 | 'TERNARY', |
| 32 | |
| 33 | # Delimeters ( ) [ ] { } , . ; : |
| 34 | 'LPAREN', 'RPAREN', |
| 35 | 'LBRACKET', 'RBRACKET', |
| 36 | 'LBRACE', 'RBRACE', |
| 37 | 'COMMA', 'PERIOD', 'SEMI', 'COLON', |
| 38 | |
| 39 | # Ellipsis (...) |
| 40 | 'ELLIPSIS', |
| 41 | ] |
| 42 | |
| 43 | # Operators |
| 44 | t_PLUS = r'\+' |
| 45 | t_MINUS = r'-' |
| 46 | t_TIMES = r'\*' |
| 47 | t_DIVIDE = r'/' |
| 48 | t_MODULO = r'%' |
| 49 | t_OR = r'\|' |
| 50 | t_AND = r'&' |
| 51 | t_NOT = r'~' |
| 52 | t_XOR = r'\^' |
| 53 | t_LSHIFT = r'<<' |
| 54 | t_RSHIFT = r'>>' |
| 55 | t_LOR = r'\|\|' |
| 56 | t_LAND = r'&&' |
| 57 | t_LNOT = r'!' |
| 58 | t_LT = r'<' |
| 59 | t_GT = r'>' |
| 60 | t_LE = r'<=' |
| 61 | t_GE = r'>=' |
| 62 | t_EQ = r'==' |
| 63 | t_NE = r'!=' |
| 64 | |
| 65 | # Assignment operators |
| 66 | |
| 67 | t_EQUALS = r'=' |
| 68 | t_TIMESEQUAL = r'\*=' |
| 69 | t_DIVEQUAL = r'/=' |
| 70 | t_MODEQUAL = r'%=' |
| 71 | t_PLUSEQUAL = r'\+=' |
| 72 | t_MINUSEQUAL = r'-=' |
| 73 | t_LSHIFTEQUAL = r'<<=' |
| 74 | t_RSHIFTEQUAL = r'>>=' |
| 75 | t_ANDEQUAL = r'&=' |
| 76 | t_OREQUAL = r'\|=' |
| 77 | t_XOREQUAL = r'\^=' |
| 78 | |
| 79 | # Increment/decrement |
| 80 | t_INCREMENT = r'\+\+' |
| 81 | t_DECREMENT = r'--' |
| 82 | |
| 83 | # -> |
| 84 | t_ARROW = r'->' |
| 85 | |
| 86 | # ? |
| 87 | t_TERNARY = r'\?' |
| 88 | |
| 89 | # Delimeters |
| 90 | t_LPAREN = r'\(' |
| 91 | t_RPAREN = r'\)' |
| 92 | t_LBRACKET = r'\[' |
| 93 | t_RBRACKET = r'\]' |
| 94 | t_LBRACE = r'\{' |
| 95 | t_RBRACE = r'\}' |
| 96 | t_COMMA = r',' |
| 97 | t_PERIOD = r'\.' |
| 98 | t_SEMI = r';' |
| 99 | t_COLON = r':' |
| 100 | t_ELLIPSIS = r'\.\.\.' |
| 101 | |
| 102 | # Identifiers |
| 103 | t_ID = r'[A-Za-z_][A-Za-z0-9_]*' |
| 104 | |
| 105 | # Integer literal |
| 106 | t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' |
| 107 | |
| 108 | # Floating literal |
| 109 | t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' |
| 110 | |
| 111 | # String literal |
| 112 | t_STRING = r'\"([^\\\n]|(\\.))*?\"' |
| 113 | |
| 114 | # Character constant 'c' or L'c' |
| 115 | t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' |
| 116 | |
| 117 | # Comment (C-Style) |
| 118 | def t_COMMENT(t): |
| 119 | r'/\*(.|\n)*?\*/' |
| 120 | t.lexer.lineno += t.value.count('\n') |
| 121 | return t |
| 122 | |
| 123 | # Comment (C++-Style) |
| 124 | def t_CPPCOMMENT(t): |
| 125 | r'//.*\n' |
| 126 | t.lexer.lineno += 1 |
| 127 | return t |
| 128 | |
| 129 | |
| 130 | |
| 131 | |
| 132 | |
| 133 | |