blob: f6f6952d605ee5fa0a25eff03f18769b6b445fae [file] [log] [blame]
Dusan Klinecccaa0d92014-11-09 03:21:31 +01001# ----------------------------------------------------------------------
2# ctokens.py
3#
4# Token specifications for symbols in ANSI C and C++. This file is
5# meant to be used as a library in other tokenizers.
6# ----------------------------------------------------------------------
7
8# Reserved words
9
10tokens = [
11 # Literals (identifier, integer constant, float constant, string constant, char const)
12 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
13
14 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
15 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
16 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
17 'LOR', 'LAND', 'LNOT',
18 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
19
20 # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
21 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
22 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
23
24 # Increment/decrement (++,--)
25 'INCREMENT', 'DECREMENT',
26
27 # Structure dereference (->)
28 'ARROW',
29
30 # Ternary operator (?)
31 'TERNARY',
32
33 # Delimeters ( ) [ ] { } , . ; :
34 'LPAREN', 'RPAREN',
35 'LBRACKET', 'RBRACKET',
36 'LBRACE', 'RBRACE',
37 'COMMA', 'PERIOD', 'SEMI', 'COLON',
38
39 # Ellipsis (...)
40 'ELLIPSIS',
41]
42
43# Operators
44t_PLUS = r'\+'
45t_MINUS = r'-'
46t_TIMES = r'\*'
47t_DIVIDE = r'/'
48t_MODULO = r'%'
49t_OR = r'\|'
50t_AND = r'&'
51t_NOT = r'~'
52t_XOR = r'\^'
53t_LSHIFT = r'<<'
54t_RSHIFT = r'>>'
55t_LOR = r'\|\|'
56t_LAND = r'&&'
57t_LNOT = r'!'
58t_LT = r'<'
59t_GT = r'>'
60t_LE = r'<='
61t_GE = r'>='
62t_EQ = r'=='
63t_NE = r'!='
64
65# Assignment operators
66
67t_EQUALS = r'='
68t_TIMESEQUAL = r'\*='
69t_DIVEQUAL = r'/='
70t_MODEQUAL = r'%='
71t_PLUSEQUAL = r'\+='
72t_MINUSEQUAL = r'-='
73t_LSHIFTEQUAL = r'<<='
74t_RSHIFTEQUAL = r'>>='
75t_ANDEQUAL = r'&='
76t_OREQUAL = r'\|='
77t_XOREQUAL = r'\^='
78
79# Increment/decrement
80t_INCREMENT = r'\+\+'
81t_DECREMENT = r'--'
82
83# ->
84t_ARROW = r'->'
85
86# ?
87t_TERNARY = r'\?'
88
89# Delimeters
90t_LPAREN = r'\('
91t_RPAREN = r'\)'
92t_LBRACKET = r'\['
93t_RBRACKET = r'\]'
94t_LBRACE = r'\{'
95t_RBRACE = r'\}'
96t_COMMA = r','
97t_PERIOD = r'\.'
98t_SEMI = r';'
99t_COLON = r':'
100t_ELLIPSIS = r'\.\.\.'
101
102# Identifiers
103t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
104
105# Integer literal
106t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
107
108# Floating literal
109t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
110
111# String literal
112t_STRING = r'\"([^\\\n]|(\\.))*?\"'
113
114# Character constant 'c' or L'c'
115t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
116
117# Comment (C-Style)
118def t_COMMENT(t):
119 r'/\*(.|\n)*?\*/'
120 t.lexer.lineno += t.value.count('\n')
121 return t
122
123# Comment (C++-Style)
124def t_CPPCOMMENT(t):
125 r'//.*\n'
126 t.lexer.lineno += 1
127 return t
128
129
130
131
132
133