Blame - ply/yacc.py - plyxproto

blob: 49d83d78e5779c5abf580bfd4bfaaf6f13389874 [file] [log] [blame]

Dusan Klinec	ccaa0d9	2014-11-09 03:21:31 +0100	[diff] [blame]	1	# -----------------------------------------------------------------------------
				2	# ply: yacc.py
				3	#
				4	# Copyright (C) 2001-2011,
				5	# David M. Beazley (Dabeaz LLC)
				6	# All rights reserved.
				7	#
				8	# Redistribution and use in source and binary forms, with or without
				9	# modification, are permitted provided that the following conditions are
				10	# met:
				11	#
				12	# * Redistributions of source code must retain the above copyright notice,
				13	# this list of conditions and the following disclaimer.
				14	# * Redistributions in binary form must reproduce the above copyright notice,
				15	# this list of conditions and the following disclaimer in the documentation
				16	# and/or other materials provided with the distribution.
				17	# * Neither the name of the David Beazley or Dabeaz LLC may be used to
				18	# endorse or promote products derived from this software without
				19	# specific prior written permission.
				20	#
				21	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				22	# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				23	# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				24	# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				25	# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				26	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				27	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				28	# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				29	# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				30	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				31	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				32	# -----------------------------------------------------------------------------
				33	#
				34	# This implements an LR parser that is constructed from grammar rules defined
				35	# as Python functions. The grammer is specified by supplying the BNF inside
				36	# Python documentation strings. The inspiration for this technique was borrowed
				37	# from John Aycock's Spark parsing system. PLY might be viewed as cross between
				38	# Spark and the GNU bison utility.
				39	#
				40	# The current implementation is only somewhat object-oriented. The
				41	# LR parser itself is defined in terms of an object (which allows multiple
				42	# parsers to co-exist). However, most of the variables used during table
				43	# construction are defined in terms of global variables. Users shouldn't
				44	# notice unless they are trying to define multiple parsers at the same
				45	# time using threads (in which case they should have their head examined).
				46	#
				47	# This implementation supports both SLR and LALR(1) parsing. LALR(1)
				48	# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
				49	# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
				50	# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
				51	# by the more efficient DeRemer and Pennello algorithm.
				52	#
				53	# :::::::: WARNING :::::::
				54	#
				55	# Construction of LR parsing tables is fairly complicated and expensive.
				56	# To make this module run fast, a LOT of work has been put into
				57	# optimization---often at the expensive of readability and what might
				58	# consider to be good Python "coding style." Modify the code at your
				59	# own risk!
				60	# ----------------------------------------------------------------------------
				61
				62	__version__ = "3.5"
				63	__tabversion__ = "3.2" # Table version
				64
				65	#-----------------------------------------------------------------------------
				66	# === User configurable parameters ===
				67	#
				68	# Change these to modify the default behavior of yacc (if you wish)
				69	#-----------------------------------------------------------------------------
				70
				71	yaccdebug = 1 # Debugging mode. If set, yacc generates a
				72	# a 'parser.out' file in the current directory
				73
				74	debug_file = 'parser.out' # Default name of the debugging file
				75	tab_module = 'parsetab' # Default name of the table module
				76	default_lr = 'LALR' # Default LR table generation method
				77
				78	error_count = 3 # Number of symbols that must be shifted to leave recovery mode
				79
				80	yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
				81	# implementations of certain functions.
				82
				83	resultlimit = 40 # Size limit of results when running in debug mode.
				84
				85	pickle_protocol = 0 # Protocol to use when writing pickle files
				86
				87	import re, types, sys, os.path, inspect
				88
				89	# Compatibility function for python 2.6/3.0
				90	if sys.version_info[0] < 3:
				91	def func_code(f):
				92	return f.func_code
				93	else:
				94	def func_code(f):
				95	return f.__code__
				96
				97	# String type-checking compatibility
				98	if sys.version_info[0] < 3:
				99	string_types = basestring
				100	else:
				101	string_types = str
				102
				103	# Compatibility
				104	try:
				105	MAXINT = sys.maxint
				106	except AttributeError:
				107	MAXINT = sys.maxsize
				108
				109	# Python 2.x/3.0 compatibility.
				110	def load_ply_lex():
				111	if sys.version_info[0] < 3:
				112	import lex
				113	else:
				114	import ply.lex as lex
				115	return lex
				116
				117	# This object is a stand-in for a logging object created by the
				118	# logging module. PLY will use this by default to create things
				119	# such as the parser.out file. If a user wants more detailed
				120	# information, they can create their own logging object and pass
				121	# it into PLY.
				122
				123	class PlyLogger(object):
				124	def __init__(self,f):
				125	self.f = f
				126	def debug(self,msg,args,*kwargs):
				127	self.f.write((msg % args) + "\n")
				128	info = debug
				129
				130	def warning(self,msg,args,*kwargs):
				131	self.f.write("WARNING: "+ (msg % args) + "\n")
				132
				133	def error(self,msg,args,*kwargs):
				134	self.f.write("ERROR: " + (msg % args) + "\n")
				135
				136	critical = debug
				137
				138	# Null logger is used when no output is generated. Does nothing.
				139	class NullLogger(object):
				140	def __getattribute__(self,name):
				141	return self
				142	def __call__(self,args,*kwargs):
				143	return self
				144
				145	# Exception raised for yacc-related errors
				146	class YaccError(Exception): pass
				147
				148	# Format the result message that the parser produces when running in debug mode.
				149	def format_result(r):
				150	repr_str = repr(r)
				151	if '\n' in repr_str: repr_str = repr(repr_str)
				152	if len(repr_str) > resultlimit:
				153	repr_str = repr_str[:resultlimit]+" ..."
				154	result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
				155	return result
				156
				157
				158	# Format stack entries when the parser is running in debug mode
				159	def format_stack_entry(r):
				160	repr_str = repr(r)
				161	if '\n' in repr_str: repr_str = repr(repr_str)
				162	if len(repr_str) < 16:
				163	return repr_str
				164	else:
				165	return "<%s @ 0x%x>" % (type(r).__name__,id(r))
				166
				167	# Panic mode error recovery support. This feature is being reworked--much of the
				168	# code here is to offer a deprecation/backwards compatible transition
				169
				170	_errok = None
				171	_token = None
				172	_restart = None
				173	_warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error().
				174	Instead, invoke the methods on the associated parser instance:
				175
				176	def p_error(p):
				177	...
				178	# Use parser.errok(), parser.token(), parser.restart()
				179	...
				180
				181	parser = yacc.yacc()
				182	"""
				183	import warnings
				184	def errok():
				185	warnings.warn(_warnmsg)
				186	return _errok()
				187
				188	def restart():
				189	warnings.warn(_warnmsg)
				190	return _restart()
				191
				192	def token():
				193	warnings.warn(_warnmsg)
				194	return _token()
				195
				196	# Utility function to call the p_error() function with some deprecation hacks
				197	def call_errorfunc(errorfunc,token,parser):
				198	global _errok, _token, _restart
				199	_errok = parser.errok
				200	_token = parser.token
				201	_restart = parser.restart
				202	r = errorfunc(token)
				203	del _errok, _token, _restart
				204
				205	#-----------------------------------------------------------------------------
				206	# === LR Parsing Engine ===
				207	#
				208	# The following classes are used for the LR parser itself. These are not
				209	# used during table construction and are independent of the actual LR
				210	# table generation algorithm
				211	#-----------------------------------------------------------------------------
				212
				213	# This class is used to hold non-terminal grammar symbols during parsing.
				214	# It normally has the following attributes set:
				215	# .type = Grammar symbol type
				216	# .value = Symbol value
				217	# .lineno = Starting line number
				218	# .endlineno = Ending line number (optional, set automatically)
				219	# .lexpos = Starting lex position
				220	# .endlexpos = Ending lex position (optional, set automatically)
				221
				222	class YaccSymbol:
				223	def __str__(self): return self.type
				224	def __repr__(self): return str(self)
				225
				226	# This class is a wrapper around the objects actually passed to each
				227	# grammar rule. Index lookup and assignment actually assign the
				228	# .value attribute of the underlying YaccSymbol object.
				229	# The lineno() method returns the line number of a given
				230	# item (or 0 if not defined). The linespan() method returns
				231	# a tuple of (startline,endline) representing the range of lines
				232	# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
				233	# representing the range of positional information for a symbol.
				234
				235	class YaccProduction:
				236	def __init__(self,s,stack=None):
				237	self.slice = s
				238	self.stack = stack
				239	self.lexer = None
				240	self.parser= None
				241	def __getitem__(self,n):
				242	if isinstance(n, slice):
				243	return [s.value for s in self.slice[n]]
				244	elif n >= 0:
				245	return self.slice[n].value
				246	else:
				247	return self.stack[n].value
				248
				249	def __setitem__(self,n,v):
				250	self.slice[n].value = v
				251
				252	def __getslice__(self,i,j):
				253	return [s.value for s in self.slice[i:j]]
				254
				255	def __len__(self):
				256	return len(self.slice)
				257
				258	def lineno(self,n):
				259	return getattr(self.slice[n],"lineno",0)
				260
				261	def set_lineno(self,n,lineno):
				262	self.slice[n].lineno = lineno
				263
				264	def linespan(self,n):
				265	startline = getattr(self.slice[n],"lineno",0)
				266	endline = getattr(self.slice[n],"endlineno",startline)
				267	return startline,endline
				268
				269	def lexpos(self,n):
				270	return getattr(self.slice[n],"lexpos",0)
				271
				272	def lexspan(self,n):
				273	startpos = getattr(self.slice[n],"lexpos",0)
				274	endpos = getattr(self.slice[n],"endlexpos",startpos)
				275	return startpos,endpos
				276
				277	def error(self):
				278	raise SyntaxError
				279
				280
				281	# -----------------------------------------------------------------------------
				282	# == LRParser ==
				283	#
				284	# The LR Parsing engine.
				285	# -----------------------------------------------------------------------------
				286
				287	class LRParser:
				288	def __init__(self,lrtab,errorf):
				289	self.productions = lrtab.lr_productions
				290	self.action = lrtab.lr_action
				291	self.goto = lrtab.lr_goto
				292	self.errorfunc = errorf
				293
				294	def errok(self):
				295	self.errorok = 1
				296
				297	def restart(self):
				298	del self.statestack[:]
				299	del self.symstack[:]
				300	sym = YaccSymbol()
				301	sym.type = '$end'
				302	self.symstack.append(sym)
				303	self.statestack.append(0)
				304
				305	def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				306	if debug or yaccdevel:
				307	if isinstance(debug,int):
				308	debug = PlyLogger(sys.stderr)
				309	return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
				310	elif tracking:
				311	return self.parseopt(input,lexer,debug,tracking,tokenfunc)
				312	else:
				313	return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
				314
				315
				316	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				317	# parsedebug().
				318	#
				319	# This is the debugging enabled version of parse(). All changes made to the
				320	# parsing engine should be made here. For the non-debugging version,
				321	# copy this code to a method parseopt() and delete all of the sections
				322	# enclosed in:
				323	#
				324	# #--! DEBUG
				325	# statements
				326	# #--! DEBUG
				327	#
				328	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				329
				330	def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
				331	lookahead = None # Current lookahead symbol
				332	lookaheadstack = [ ] # Stack of lookahead symbols
				333	actions = self.action # Local reference to action table (to avoid lookup on self.)
				334	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				335	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				336	pslice = YaccProduction(None) # Production object passed to grammar rules
				337	errorcount = 0 # Used during error recovery
				338
				339	# --! DEBUG
				340	debug.info("PLY: PARSE DEBUG START")
				341	# --! DEBUG
				342
				343	# If no lexer was given, we will try to use the lex module
				344	if not lexer:
				345	lex = load_ply_lex()
				346	lexer = lex.lexer
				347
				348	# Set up the lexer and parser objects on pslice
				349	pslice.lexer = lexer
				350	pslice.parser = self
				351
				352	# If input was supplied, pass to lexer
				353	if input is not None:
				354	lexer.input(input)
				355
				356	if tokenfunc is None:
				357	# Tokenize function
				358	get_token = lexer.token
				359	else:
				360	get_token = tokenfunc
				361
				362	# Set the parser() token method (sometimes used in error recovery)
				363	self.token = get_token
				364
				365	# Set up the state and symbol stacks
				366
				367	statestack = [ ] # Stack of parsing states
				368	self.statestack = statestack
				369	symstack = [ ] # Stack of grammar symbols
				370	self.symstack = symstack
				371
				372	pslice.stack = symstack # Put in the production
				373	errtoken = None # Err token
				374
				375	# The start state is assumed to be (0,$end)
				376
				377	statestack.append(0)
				378	sym = YaccSymbol()
				379	sym.type = "$end"
				380	symstack.append(sym)
				381	state = 0
				382	while 1:
				383	# Get the next symbol on the input. If a lookahead symbol
				384	# is already set, we just use that. Otherwise, we'll pull
				385	# the next token off of the lookaheadstack or from the lexer
				386
				387	# --! DEBUG
				388	debug.debug('')
				389	debug.debug('State : %s', state)
				390	# --! DEBUG
				391
				392	if not lookahead:
				393	if not lookaheadstack:
				394	lookahead = get_token() # Get the next token
				395	else:
				396	lookahead = lookaheadstack.pop()
				397	if not lookahead:
				398	lookahead = YaccSymbol()
				399	lookahead.type = "$end"
				400
				401	# --! DEBUG
				402	debug.debug('Stack : %s',
				403	("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
				404	# --! DEBUG
				405
				406	# Check the action table
				407	ltype = lookahead.type
				408	t = actions[state].get(ltype)
				409
				410	if t is not None:
				411	if t > 0:
				412	# shift a symbol on the stack
				413	statestack.append(t)
				414	state = t
				415
				416	# --! DEBUG
				417	debug.debug("Action : Shift and goto state %s", t)
				418	# --! DEBUG
				419
				420	symstack.append(lookahead)
				421	lookahead = None
				422
				423	# Decrease error count on successful shift
				424	if errorcount: errorcount -=1
				425	continue
				426
				427	if t < 0:
				428	# reduce a symbol on the stack, emit a production
				429	p = prod[-t]
				430	pname = p.name
				431	plen = p.len
				432
				433	# Get production function
				434	sym = YaccSymbol()
				435	sym.type = pname # Production name
				436	sym.value = None
				437
				438	# --! DEBUG
				439	if plen:
				440	debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
				441	else:
				442	debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
				443
				444	# --! DEBUG
				445
				446	if plen:
				447	targ = symstack[-plen-1:]
				448	targ[0] = sym
				449
				450	# --! TRACKING
				451	if tracking:
				452	t1 = targ[1]
				453	sym.lineno = t1.lineno
				454	sym.lexpos = t1.lexpos
				455	t1 = targ[-1]
				456	sym.endlineno = getattr(t1,"endlineno",t1.lineno)
				457	sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
				458
				459	# --! TRACKING
				460
				461	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				462	# The code enclosed in this section is duplicated
				463	# below as a performance optimization. Make sure
				464	# changes get made in both locations.
				465
				466	pslice.slice = targ
				467
				468	try:
				469	# Call the grammar rule with our special slice object
				470	del symstack[-plen:]
				471	del statestack[-plen:]
				472	p.callable(pslice)
				473	# --! DEBUG
				474	debug.info("Result : %s", format_result(pslice[0]))
				475	# --! DEBUG
				476	symstack.append(sym)
				477	state = goto[statestack[-1]][pname]
				478	statestack.append(state)
				479	except SyntaxError:
				480	# If an error was set. Enter error recovery state
				481	lookaheadstack.append(lookahead)
				482	symstack.pop()
				483	statestack.pop()
				484	state = statestack[-1]
				485	sym.type = 'error'
				486	lookahead = sym
				487	errorcount = error_count
				488	self.errorok = 0
				489	continue
				490	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				491
				492	else:
				493
				494	# --! TRACKING
				495	if tracking:
				496	sym.lineno = lexer.lineno
				497	sym.lexpos = lexer.lexpos
				498	# --! TRACKING
				499
				500	targ = [ sym ]
				501
				502	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				503	# The code enclosed in this section is duplicated
				504	# above as a performance optimization. Make sure
				505	# changes get made in both locations.
				506
				507	pslice.slice = targ
				508
				509	try:
				510	# Call the grammar rule with our special slice object
				511	p.callable(pslice)
				512	# --! DEBUG
				513	debug.info("Result : %s", format_result(pslice[0]))
				514	# --! DEBUG
				515	symstack.append(sym)
				516	state = goto[statestack[-1]][pname]
				517	statestack.append(state)
				518	except SyntaxError:
				519	# If an error was set. Enter error recovery state
				520	lookaheadstack.append(lookahead)
				521	symstack.pop()
				522	statestack.pop()
				523	state = statestack[-1]
				524	sym.type = 'error'
				525	lookahead = sym
				526	errorcount = error_count
				527	self.errorok = 0
				528	continue
				529	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				530
				531	if t == 0:
				532	n = symstack[-1]
				533	result = getattr(n,"value",None)
				534	# --! DEBUG
				535	debug.info("Done : Returning %s", format_result(result))
				536	debug.info("PLY: PARSE DEBUG END")
				537	# --! DEBUG
				538	return result
				539
				540	if t == None:
				541
				542	# --! DEBUG
				543	debug.error('Error : %s',
				544	("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
				545	# --! DEBUG
				546
				547	# We have some kind of parsing error here. To handle
				548	# this, we are going to push the current token onto
				549	# the tokenstack and replace it with an 'error' token.
				550	# If there are any synchronization rules, they may
				551	# catch it.
				552	#
				553	# In addition to pushing the error token, we call call
				554	# the user defined p_error() function if this is the
				555	# first syntax error. This function is only called if
				556	# errorcount == 0.
				557	if errorcount == 0 or self.errorok:
				558	errorcount = error_count
				559	self.errorok = 0
				560	errtoken = lookahead
				561	if errtoken.type == "$end":
				562	errtoken = None # End of file!
				563	if self.errorfunc:
				564	if errtoken and not hasattr(errtoken,'lexer'):
				565	errtoken.lexer = lexer
				566	tok = call_errorfunc(self.errorfunc, errtoken, self)
				567	if self.errorok:
				568	# User must have done some kind of panic
				569	# mode recovery on their own. The
				570	# returned token is the next lookahead
				571	lookahead = tok
				572	errtoken = None
				573	continue
				574	else:
				575	if errtoken:
				576	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				577	else: lineno = 0
				578	if lineno:
				579	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				580	else:
				581	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				582	else:
				583	sys.stderr.write("yacc: Parse error in input. EOF\n")
				584	return
				585
				586	else:
				587	errorcount = error_count
				588
				589	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				590	# entire parse has been rolled back and we're completely hosed. The token is
				591	# discarded and we just keep going.
				592
				593	if len(statestack) <= 1 and lookahead.type != "$end":
				594	lookahead = None
				595	errtoken = None
				596	state = 0
				597	# Nuke the pushback stack
				598	del lookaheadstack[:]
				599	continue
				600
				601	# case 2: the statestack has a couple of entries on it, but we're
				602	# at the end of the file. nuke the top entry and generate an error token
				603
				604	# Start nuking entries on the stack
				605	if lookahead.type == "$end":
				606	# Whoa. We're really hosed here. Bail out
				607	return
				608
				609	if lookahead.type != 'error':
				610	sym = symstack[-1]
				611	if sym.type == 'error':
				612	# Hmmm. Error is on top of stack, we'll just nuke input
				613	# symbol and continue
				614	if tracking:
				615	sym.endlineno = getattr(lookahead,"lineno", sym.lineno)
				616	sym.endlexpos = getattr(lookahead,"lexpos", sym.lexpos)
				617	lookahead = None
				618	continue
				619	t = YaccSymbol()
				620	t.type = 'error'
				621	if hasattr(lookahead,"lineno"):
				622	t.lineno = lookahead.lineno
				623	if hasattr(lookahead,"lexpos"):
				624	t.lexpos = lookahead.lexpos
				625	t.value = lookahead
				626	lookaheadstack.append(lookahead)
				627	lookahead = t
				628	else:
				629	sym = symstack.pop()
				630	if tracking:
				631	lookahead.lineno = sym.lineno
				632	lookahead.lexpos = sym.lexpos
				633	statestack.pop()
				634	state = statestack[-1] # Potential bug fix
				635
				636	continue
				637
				638	# Call an error function here
				639	raise RuntimeError("yacc: internal parser error!!!\n")
				640
				641	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				642	# parseopt().
				643	#
				644	# Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
				645	# Edit the debug version above, then copy any modifications to the method
				646	# below while removing #--! DEBUG sections.
				647	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				648
				649
				650	def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				651	lookahead = None # Current lookahead symbol
				652	lookaheadstack = [ ] # Stack of lookahead symbols
				653	actions = self.action # Local reference to action table (to avoid lookup on self.)
				654	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				655	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				656	pslice = YaccProduction(None) # Production object passed to grammar rules
				657	errorcount = 0 # Used during error recovery
				658
				659	# If no lexer was given, we will try to use the lex module
				660	if not lexer:
				661	lex = load_ply_lex()
				662	lexer = lex.lexer
				663
				664	# Set up the lexer and parser objects on pslice
				665	pslice.lexer = lexer
				666	pslice.parser = self
				667
				668	# If input was supplied, pass to lexer
				669	if input is not None:
				670	lexer.input(input)
				671
				672	if tokenfunc is None:
				673	# Tokenize function
				674	get_token = lexer.token
				675	else:
				676	get_token = tokenfunc
				677
				678	# Set the parser() token method (sometimes used in error recovery)
				679	self.token = get_token
				680
				681	# Set up the state and symbol stacks
				682
				683	statestack = [ ] # Stack of parsing states
				684	self.statestack = statestack
				685	symstack = [ ] # Stack of grammar symbols
				686	self.symstack = symstack
				687
				688	pslice.stack = symstack # Put in the production
				689	errtoken = None # Err token
				690
				691	# The start state is assumed to be (0,$end)
				692
				693	statestack.append(0)
				694	sym = YaccSymbol()
				695	sym.type = '$end'
				696	symstack.append(sym)
				697	state = 0
				698	while 1:
				699	# Get the next symbol on the input. If a lookahead symbol
				700	# is already set, we just use that. Otherwise, we'll pull
				701	# the next token off of the lookaheadstack or from the lexer
				702
				703	if not lookahead:
				704	if not lookaheadstack:
				705	lookahead = get_token() # Get the next token
				706	else:
				707	lookahead = lookaheadstack.pop()
				708	if not lookahead:
				709	lookahead = YaccSymbol()
				710	lookahead.type = '$end'
				711
				712	# Check the action table
				713	ltype = lookahead.type
				714	t = actions[state].get(ltype)
				715
				716	if t is not None:
				717	if t > 0:
				718	# shift a symbol on the stack
				719	statestack.append(t)
				720	state = t
				721
				722	symstack.append(lookahead)
				723	lookahead = None
				724
				725	# Decrease error count on successful shift
				726	if errorcount: errorcount -=1
				727	continue
				728
				729	if t < 0:
				730	# reduce a symbol on the stack, emit a production
				731	p = prod[-t]
				732	pname = p.name
				733	plen = p.len
				734
				735	# Get production function
				736	sym = YaccSymbol()
				737	sym.type = pname # Production name
				738	sym.value = None
				739
				740	if plen:
				741	targ = symstack[-plen-1:]
				742	targ[0] = sym
				743
				744	# --! TRACKING
				745	if tracking:
				746	t1 = targ[1]
				747	sym.lineno = t1.lineno
				748	sym.lexpos = t1.lexpos
				749	t1 = targ[-1]
				750	sym.endlineno = getattr(t1,"endlineno",t1.lineno)
				751	sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
				752
				753	# --! TRACKING
				754
				755	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				756	# The code enclosed in this section is duplicated
				757	# below as a performance optimization. Make sure
				758	# changes get made in both locations.
				759
				760	pslice.slice = targ
				761
				762	try:
				763	# Call the grammar rule with our special slice object
				764	del symstack[-plen:]
				765	del statestack[-plen:]
				766	p.callable(pslice)
				767	symstack.append(sym)
				768	state = goto[statestack[-1]][pname]
				769	statestack.append(state)
				770	except SyntaxError:
				771	# If an error was set. Enter error recovery state
				772	lookaheadstack.append(lookahead)
				773	symstack.pop()
				774	statestack.pop()
				775	state = statestack[-1]
				776	sym.type = 'error'
				777	lookahead = sym
				778	errorcount = error_count
				779	self.errorok = 0
				780	continue
				781	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				782
				783	else:
				784
				785	# --! TRACKING
				786	if tracking:
				787	sym.lineno = lexer.lineno
				788	sym.lexpos = lexer.lexpos
				789	# --! TRACKING
				790
				791	targ = [ sym ]
				792
				793	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				794	# The code enclosed in this section is duplicated
				795	# above as a performance optimization. Make sure
				796	# changes get made in both locations.
				797
				798	pslice.slice = targ
				799
				800	try:
				801	# Call the grammar rule with our special slice object
				802	p.callable(pslice)
				803	symstack.append(sym)
				804	state = goto[statestack[-1]][pname]
				805	statestack.append(state)
				806	except SyntaxError:
				807	# If an error was set. Enter error recovery state
				808	lookaheadstack.append(lookahead)
				809	symstack.pop()
				810	statestack.pop()
				811	state = statestack[-1]
				812	sym.type = 'error'
				813	lookahead = sym
				814	errorcount = error_count
				815	self.errorok = 0
				816	continue
				817	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				818
				819	if t == 0:
				820	n = symstack[-1]
				821	return getattr(n,"value",None)
				822
				823	if t == None:
				824
				825	# We have some kind of parsing error here. To handle
				826	# this, we are going to push the current token onto
				827	# the tokenstack and replace it with an 'error' token.
				828	# If there are any synchronization rules, they may
				829	# catch it.
				830	#
				831	# In addition to pushing the error token, we call call
				832	# the user defined p_error() function if this is the
				833	# first syntax error. This function is only called if
				834	# errorcount == 0.
				835	if errorcount == 0 or self.errorok:
				836	errorcount = error_count
				837	self.errorok = 0
				838	errtoken = lookahead
				839	if errtoken.type == '$end':
				840	errtoken = None # End of file!
				841	if self.errorfunc:
				842	if errtoken and not hasattr(errtoken,'lexer'):
				843	errtoken.lexer = lexer
				844	tok = call_errorfunc(self.errorfunc, errtoken, self)
				845
				846	if self.errorok:
				847	# User must have done some kind of panic
				848	# mode recovery on their own. The
				849	# returned token is the next lookahead
				850	lookahead = tok
				851	errtoken = None
				852	continue
				853	else:
				854	if errtoken:
				855	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				856	else: lineno = 0
				857	if lineno:
				858	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				859	else:
				860	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				861	else:
				862	sys.stderr.write("yacc: Parse error in input. EOF\n")
				863	return
				864
				865	else:
				866	errorcount = error_count
				867
				868	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				869	# entire parse has been rolled back and we're completely hosed. The token is
				870	# discarded and we just keep going.
				871
				872	if len(statestack) <= 1 and lookahead.type != '$end':
				873	lookahead = None
				874	errtoken = None
				875	state = 0
				876	# Nuke the pushback stack
				877	del lookaheadstack[:]
				878	continue
				879
				880	# case 2: the statestack has a couple of entries on it, but we're
				881	# at the end of the file. nuke the top entry and generate an error token
				882
				883	# Start nuking entries on the stack
				884	if lookahead.type == '$end':
				885	# Whoa. We're really hosed here. Bail out
				886	return
				887
				888	if lookahead.type != 'error':
				889	sym = symstack[-1]
				890	if sym.type == 'error':
				891	# Hmmm. Error is on top of stack, we'll just nuke input
				892	# symbol and continue
				893	if tracking:
				894	sym.endlineno = getattr(lookahead,"lineno", sym.lineno)
				895	sym.endlexpos = getattr(lookahead,"lexpos", sym.lexpos)
				896	lookahead = None
				897	continue
				898	t = YaccSymbol()
				899	t.type = 'error'
				900	if hasattr(lookahead,"lineno"):
				901	t.lineno = lookahead.lineno
				902	if hasattr(lookahead,"lexpos"):
				903	t.lexpos = lookahead.lexpos
				904	t.value = lookahead
				905	lookaheadstack.append(lookahead)
				906	lookahead = t
				907	else:
				908	sym = symstack.pop()
				909	if tracking:
				910	lookahead.lineno = sym.lineno
				911	lookahead.lexpos = sym.lexpos
				912	statestack.pop()
				913	state = statestack[-1] # Potential bug fix
				914
				915	continue
				916
				917	# Call an error function here
				918	raise RuntimeError("yacc: internal parser error!!!\n")
				919
				920	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				921	# parseopt_notrack().
				922	#
				923	# Optimized version of parseopt() with line number tracking removed.
				924	# DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
				925	# code in the #--! TRACKING sections
				926	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				927
				928	def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
				929	lookahead = None # Current lookahead symbol
				930	lookaheadstack = [ ] # Stack of lookahead symbols
				931	actions = self.action # Local reference to action table (to avoid lookup on self.)
				932	goto = self.goto # Local reference to goto table (to avoid lookup on self.)
				933	prod = self.productions # Local reference to production list (to avoid lookup on self.)
				934	pslice = YaccProduction(None) # Production object passed to grammar rules
				935	errorcount = 0 # Used during error recovery
				936
				937	# If no lexer was given, we will try to use the lex module
				938	if not lexer:
				939	lex = load_ply_lex()
				940	lexer = lex.lexer
				941
				942	# Set up the lexer and parser objects on pslice
				943	pslice.lexer = lexer
				944	pslice.parser = self
				945
				946	# If input was supplied, pass to lexer
				947	if input is not None:
				948	lexer.input(input)
				949
				950	if tokenfunc is None:
				951	# Tokenize function
				952	get_token = lexer.token
				953	else:
				954	get_token = tokenfunc
				955
				956	# Set the parser() token method (sometimes used in error recovery)
				957	self.token = get_token
				958
				959	# Set up the state and symbol stacks
				960
				961	statestack = [ ] # Stack of parsing states
				962	self.statestack = statestack
				963	symstack = [ ] # Stack of grammar symbols
				964	self.symstack = symstack
				965
				966	pslice.stack = symstack # Put in the production
				967	errtoken = None # Err token
				968
				969	# The start state is assumed to be (0,$end)
				970
				971	statestack.append(0)
				972	sym = YaccSymbol()
				973	sym.type = '$end'
				974	symstack.append(sym)
				975	state = 0
				976	while 1:
				977	# Get the next symbol on the input. If a lookahead symbol
				978	# is already set, we just use that. Otherwise, we'll pull
				979	# the next token off of the lookaheadstack or from the lexer
				980
				981	if not lookahead:
				982	if not lookaheadstack:
				983	lookahead = get_token() # Get the next token
				984	else:
				985	lookahead = lookaheadstack.pop()
				986	if not lookahead:
				987	lookahead = YaccSymbol()
				988	lookahead.type = '$end'
				989
				990	# Check the action table
				991	ltype = lookahead.type
				992	t = actions[state].get(ltype)
				993
				994	if t is not None:
				995	if t > 0:
				996	# shift a symbol on the stack
				997	statestack.append(t)
				998	state = t
				999
				1000	symstack.append(lookahead)
				1001	lookahead = None
				1002
				1003	# Decrease error count on successful shift
				1004	if errorcount: errorcount -=1
				1005	continue
				1006
				1007	if t < 0:
				1008	# reduce a symbol on the stack, emit a production
				1009	p = prod[-t]
				1010	pname = p.name
				1011	plen = p.len
				1012
				1013	# Get production function
				1014	sym = YaccSymbol()
				1015	sym.type = pname # Production name
				1016	sym.value = None
				1017
				1018	if plen:
				1019	targ = symstack[-plen-1:]
				1020	targ[0] = sym
				1021
				1022	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				1023	# The code enclosed in this section is duplicated
				1024	# below as a performance optimization. Make sure
				1025	# changes get made in both locations.
				1026
				1027	pslice.slice = targ
				1028
				1029	try:
				1030	# Call the grammar rule with our special slice object
				1031	del symstack[-plen:]
				1032	del statestack[-plen:]
				1033	p.callable(pslice)
				1034	symstack.append(sym)
				1035	state = goto[statestack[-1]][pname]
				1036	statestack.append(state)
				1037	except SyntaxError:
				1038	# If an error was set. Enter error recovery state
				1039	lookaheadstack.append(lookahead)
				1040	symstack.pop()
				1041	statestack.pop()
				1042	state = statestack[-1]
				1043	sym.type = 'error'
				1044	lookahead = sym
				1045	errorcount = error_count
				1046	self.errorok = 0
				1047	continue
				1048	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				1049
				1050	else:
				1051
				1052	targ = [ sym ]
				1053
				1054	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				1055	# The code enclosed in this section is duplicated
				1056	# above as a performance optimization. Make sure
				1057	# changes get made in both locations.
				1058
				1059	pslice.slice = targ
				1060
				1061	try:
				1062	# Call the grammar rule with our special slice object
				1063	p.callable(pslice)
				1064	symstack.append(sym)
				1065	state = goto[statestack[-1]][pname]
				1066	statestack.append(state)
				1067	except SyntaxError:
				1068	# If an error was set. Enter error recovery state
				1069	lookaheadstack.append(lookahead)
				1070	symstack.pop()
				1071	statestack.pop()
				1072	state = statestack[-1]
				1073	sym.type = 'error'
				1074	lookahead = sym
				1075	errorcount = error_count
				1076	self.errorok = 0
				1077	continue
				1078	# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
				1079
				1080	if t == 0:
				1081	n = symstack[-1]
				1082	return getattr(n,"value",None)
				1083
				1084	if t == None:
				1085
				1086	# We have some kind of parsing error here. To handle
				1087	# this, we are going to push the current token onto
				1088	# the tokenstack and replace it with an 'error' token.
				1089	# If there are any synchronization rules, they may
				1090	# catch it.
				1091	#
				1092	# In addition to pushing the error token, we call call
				1093	# the user defined p_error() function if this is the
				1094	# first syntax error. This function is only called if
				1095	# errorcount == 0.
				1096	if errorcount == 0 or self.errorok:
				1097	errorcount = error_count
				1098	self.errorok = 0
				1099	errtoken = lookahead
				1100	if errtoken.type == '$end':
				1101	errtoken = None # End of file!
				1102	if self.errorfunc:
				1103	if errtoken and not hasattr(errtoken,'lexer'):
				1104	errtoken.lexer = lexer
				1105	tok = call_errorfunc(self.errorfunc, errtoken, self)
				1106
				1107	if self.errorok:
				1108	# User must have done some kind of panic
				1109	# mode recovery on their own. The
				1110	# returned token is the next lookahead
				1111	lookahead = tok
				1112	errtoken = None
				1113	continue
				1114	else:
				1115	if errtoken:
				1116	if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
				1117	else: lineno = 0
				1118	if lineno:
				1119	sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
				1120	else:
				1121	sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
				1122	else:
				1123	sys.stderr.write("yacc: Parse error in input. EOF\n")
				1124	return
				1125
				1126	else:
				1127	errorcount = error_count
				1128
				1129	# case 1: the statestack only has 1 entry on it. If we're in this state, the
				1130	# entire parse has been rolled back and we're completely hosed. The token is
				1131	# discarded and we just keep going.
				1132
				1133	if len(statestack) <= 1 and lookahead.type != '$end':
				1134	lookahead = None
				1135	errtoken = None
				1136	state = 0
				1137	# Nuke the pushback stack
				1138	del lookaheadstack[:]
				1139	continue
				1140
				1141	# case 2: the statestack has a couple of entries on it, but we're
				1142	# at the end of the file. nuke the top entry and generate an error token
				1143
				1144	# Start nuking entries on the stack
				1145	if lookahead.type == '$end':
				1146	# Whoa. We're really hosed here. Bail out
				1147	return
				1148
				1149	if lookahead.type != 'error':
				1150	sym = symstack[-1]
				1151	if sym.type == 'error':
				1152	# Hmmm. Error is on top of stack, we'll just nuke input
				1153	# symbol and continue
				1154	lookahead = None
				1155	continue
				1156	t = YaccSymbol()
				1157	t.type = 'error'
				1158	if hasattr(lookahead,"lineno"):
				1159	t.lineno = lookahead.lineno
				1160	if hasattr(lookahead,"lexpos"):
				1161	t.lexpos = lookahead.lexpos
				1162	t.value = lookahead
				1163	lookaheadstack.append(lookahead)
				1164	lookahead = t
				1165	else:
				1166	symstack.pop()
				1167	statestack.pop()
				1168	state = statestack[-1] # Potential bug fix
				1169
				1170	continue
				1171
				1172	# Call an error function here
				1173	raise RuntimeError("yacc: internal parser error!!!\n")
				1174
				1175	# -----------------------------------------------------------------------------
				1176	# === Grammar Representation ===
				1177	#
				1178	# The following functions, classes, and variables are used to represent and
				1179	# manipulate the rules that make up a grammar.
				1180	# -----------------------------------------------------------------------------
				1181
				1182	import re
				1183
				1184	# regex matching identifiers
				1185	_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
				1186
				1187	# -----------------------------------------------------------------------------
				1188	# class Production:
				1189	#
				1190	# This class stores the raw information about a single production or grammar rule.
				1191	# A grammar rule refers to a specification such as this:
				1192	#
				1193	# expr : expr PLUS term
				1194	#
				1195	# Here are the basic attributes defined on all productions
				1196	#
				1197	# name - Name of the production. For example 'expr'
				1198	# prod - A list of symbols on the right side ['expr','PLUS','term']
				1199	# prec - Production precedence level
				1200	# number - Production number.
				1201	# func - Function that executes on reduce
				1202	# file - File where production function is defined
				1203	# lineno - Line number where production function is defined
				1204	#
				1205	# The following attributes are defined or optional.
				1206	#
				1207	# len - Length of the production (number of symbols on right hand side)
				1208	# usyms - Set of unique symbols found in the production
				1209	# -----------------------------------------------------------------------------
				1210
				1211	class Production(object):
				1212	reduced = 0
				1213	def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
				1214	self.name = name
				1215	self.prod = tuple(prod)
				1216	self.number = number
				1217	self.func = func
				1218	self.callable = None
				1219	self.file = file
				1220	self.line = line
				1221	self.prec = precedence
				1222
				1223	# Internal settings used during table construction
				1224
				1225	self.len = len(self.prod) # Length of the production
				1226
				1227	# Create a list of unique production symbols used in the production
				1228	self.usyms = [ ]
				1229	for s in self.prod:
				1230	if s not in self.usyms:
				1231	self.usyms.append(s)
				1232
				1233	# List of all LR items for the production
				1234	self.lr_items = []
				1235	self.lr_next = None
				1236
				1237	# Create a string representation
				1238	if self.prod:
				1239	self.str = "%s -> %s" % (self.name," ".join(self.prod))
				1240	else:
				1241	self.str = "%s -> <empty>" % self.name
				1242
				1243	def __str__(self):
				1244	return self.str
				1245
				1246	def __repr__(self):
				1247	return "Production("+str(self)+")"
				1248
				1249	def __len__(self):
				1250	return len(self.prod)
				1251
				1252	def __nonzero__(self):
				1253	return 1
				1254
				1255	def __getitem__(self,index):
				1256	return self.prod[index]
				1257
				1258	# Return the nth lr_item from the production (or None if at the end)
				1259	def lr_item(self,n):
				1260	if n > len(self.prod): return None
				1261	p = LRItem(self,n)
				1262
				1263	# Precompute the list of productions immediately following. Hack. Remove later
				1264	try:
				1265	p.lr_after = Prodnames[p.prod[n+1]]
				1266	except (IndexError,KeyError):
				1267	p.lr_after = []
				1268	try:
				1269	p.lr_before = p.prod[n-1]
				1270	except IndexError:
				1271	p.lr_before = None
				1272
				1273	return p
				1274
				1275	# Bind the production function name to a callable
				1276	def bind(self,pdict):
				1277	if self.func:
				1278	self.callable = pdict[self.func]
				1279
				1280	# This class serves as a minimal standin for Production objects when
				1281	# reading table data from files. It only contains information
				1282	# actually used by the LR parsing engine, plus some additional
				1283	# debugging information.
				1284	class MiniProduction(object):
				1285	def __init__(self,str,name,len,func,file,line):
				1286	self.name = name
				1287	self.len = len
				1288	self.func = func
				1289	self.callable = None
				1290	self.file = file
				1291	self.line = line
				1292	self.str = str
				1293	def __str__(self):
				1294	return self.str
				1295	def __repr__(self):
				1296	return "MiniProduction(%s)" % self.str
				1297
				1298	# Bind the production function name to a callable
				1299	def bind(self,pdict):
				1300	if self.func:
				1301	self.callable = pdict[self.func]
				1302
				1303
				1304	# -----------------------------------------------------------------------------
				1305	# class LRItem
				1306	#
				1307	# This class represents a specific stage of parsing a production rule. For
				1308	# example:
				1309	#
				1310	# expr : expr . PLUS term
				1311	#
				1312	# In the above, the "." represents the current location of the parse. Here
				1313	# basic attributes:
				1314	#
				1315	# name - Name of the production. For example 'expr'
				1316	# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
				1317	# number - Production number.
				1318	#
				1319	# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
				1320	# then lr_next refers to 'expr -> expr PLUS . term'
				1321	# lr_index - LR item index (location of the ".") in the prod list.
				1322	# lookaheads - LALR lookahead symbols for this item
				1323	# len - Length of the production (number of symbols on right hand side)
				1324	# lr_after - List of all productions that immediately follow
				1325	# lr_before - Grammar symbol immediately before
				1326	# -----------------------------------------------------------------------------
				1327
				1328	class LRItem(object):
				1329	def __init__(self,p,n):
				1330	self.name = p.name
				1331	self.prod = list(p.prod)
				1332	self.number = p.number
				1333	self.lr_index = n
				1334	self.lookaheads = { }
				1335	self.prod.insert(n,".")
				1336	self.prod = tuple(self.prod)
				1337	self.len = len(self.prod)
				1338	self.usyms = p.usyms
				1339
				1340	def __str__(self):
				1341	if self.prod:
				1342	s = "%s -> %s" % (self.name," ".join(self.prod))
				1343	else:
				1344	s = "%s -> <empty>" % self.name
				1345	return s
				1346
				1347	def __repr__(self):
				1348	return "LRItem("+str(self)+")"
				1349
				1350	# -----------------------------------------------------------------------------
				1351	# rightmost_terminal()
				1352	#
				1353	# Return the rightmost terminal from a list of symbols. Used in add_production()
				1354	# -----------------------------------------------------------------------------
				1355	def rightmost_terminal(symbols, terminals):
				1356	i = len(symbols) - 1
				1357	while i >= 0:
				1358	if symbols[i] in terminals:
				1359	return symbols[i]
				1360	i -= 1
				1361	return None
				1362
				1363	# -----------------------------------------------------------------------------
				1364	# === GRAMMAR CLASS ===
				1365	#
				1366	# The following class represents the contents of the specified grammar along
				1367	# with various computed properties such as first sets, follow sets, LR items, etc.
				1368	# This data is used for critical parts of the table generation process later.
				1369	# -----------------------------------------------------------------------------
				1370
				1371	class GrammarError(YaccError): pass
				1372
				1373	class Grammar(object):
				1374	def __init__(self,terminals):
				1375	self.Productions = [None] # A list of all of the productions. The first
				1376	# entry is always reserved for the purpose of
				1377	# building an augmented grammar
				1378
				1379	self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
				1380	# productions of that nonterminal.
				1381
				1382	self.Prodmap = { } # A dictionary that is only used to detect duplicate
				1383	# productions.
				1384
				1385	self.Terminals = { } # A dictionary mapping the names of terminal symbols to a
				1386	# list of the rules where they are used.
				1387
				1388	for term in terminals:
				1389	self.Terminals[term] = []
				1390
				1391	self.Terminals['error'] = []
				1392
				1393	self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list
				1394	# of rule numbers where they are used.
				1395
				1396	self.First = { } # A dictionary of precomputed FIRST(x) symbols
				1397
				1398	self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
				1399
				1400	self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the
				1401	# form ('right',level) or ('nonassoc', level) or ('left',level)
				1402
				1403	self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer.
				1404	# This is only used to provide error checking and to generate
				1405	# a warning about unused precedence rules.
				1406
				1407	self.Start = None # Starting symbol for the grammar
				1408
				1409
				1410	def __len__(self):
				1411	return len(self.Productions)
				1412
				1413	def __getitem__(self,index):
				1414	return self.Productions[index]
				1415
				1416	# -----------------------------------------------------------------------------
				1417	# set_precedence()
				1418	#
				1419	# Sets the precedence for a given terminal. assoc is the associativity such as
				1420	# 'left','right', or 'nonassoc'. level is a numeric level.
				1421	#
				1422	# -----------------------------------------------------------------------------
				1423
				1424	def set_precedence(self,term,assoc,level):
				1425	assert self.Productions == [None],"Must call set_precedence() before add_production()"
				1426	if term in self.Precedence:
				1427	raise GrammarError("Precedence already specified for terminal %r" % term)
				1428	if assoc not in ['left','right','nonassoc']:
				1429	raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
				1430	self.Precedence[term] = (assoc,level)
				1431
				1432	# -----------------------------------------------------------------------------
				1433	# add_production()
				1434	#
				1435	# Given an action function, this function assembles a production rule and
				1436	# computes its precedence level.
				1437	#
				1438	# The production rule is supplied as a list of symbols. For example,
				1439	# a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
				1440	# symbols ['expr','PLUS','term'].
				1441	#
				1442	# Precedence is determined by the precedence of the right-most non-terminal
				1443	# or the precedence of a terminal specified by %prec.
				1444	#
				1445	# A variety of error checks are performed to make sure production symbols
				1446	# are valid and that %prec is used correctly.
				1447	# -----------------------------------------------------------------------------
				1448
				1449	def add_production(self,prodname,syms,func=None,file='',line=0):
				1450
				1451	if prodname in self.Terminals:
				1452	raise GrammarError("%s:%d: Illegal rule name %r. Already defined as a token" % (file,line,prodname))
				1453	if prodname == 'error':
				1454	raise GrammarError("%s:%d: Illegal rule name %r. error is a reserved word" % (file,line,prodname))
				1455	if not _is_identifier.match(prodname):
				1456	raise GrammarError("%s:%d: Illegal rule name %r" % (file,line,prodname))
				1457
				1458	# Look for literal tokens
				1459	for n,s in enumerate(syms):
				1460	if s[0] in "'\"":
				1461	try:
				1462	c = eval(s)
				1463	if (len(c) > 1):
				1464	raise GrammarError("%s:%d: Literal token %s in rule %r may only be a single character" % (file,line,s, prodname))
				1465	if not c in self.Terminals:
				1466	self.Terminals[c] = []
				1467	syms[n] = c
				1468	continue
				1469	except SyntaxError:
				1470	pass
				1471	if not _is_identifier.match(s) and s != '%prec':
				1472	raise GrammarError("%s:%d: Illegal name %r in rule %r" % (file,line,s, prodname))
				1473
				1474	# Determine the precedence level
				1475	if '%prec' in syms:
				1476	if syms[-1] == '%prec':
				1477	raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
				1478	if syms[-2] != '%prec':
				1479	raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
				1480	precname = syms[-1]
				1481	prodprec = self.Precedence.get(precname)
				1482	if not prodprec:
				1483	raise GrammarError("%s:%d: Nothing known about the precedence of %r" % (file,line,precname))
				1484	else:
				1485	self.UsedPrecedence[precname] = 1
				1486	del syms[-2:] # Drop %prec from the rule
				1487	else:
				1488	# If no %prec, precedence is determined by the rightmost terminal symbol
				1489	precname = rightmost_terminal(syms,self.Terminals)
				1490	prodprec = self.Precedence.get(precname,('right',0))
				1491
				1492	# See if the rule is already in the rulemap
				1493	map = "%s -> %s" % (prodname,syms)
				1494	if map in self.Prodmap:
				1495	m = self.Prodmap[map]
				1496	raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
				1497	"Previous definition at %s:%d" % (m.file, m.line))
				1498
				1499	# From this point on, everything is valid. Create a new Production instance
				1500	pnumber = len(self.Productions)
				1501	if not prodname in self.Nonterminals:
				1502	self.Nonterminals[prodname] = [ ]
				1503
				1504	# Add the production number to Terminals and Nonterminals
				1505	for t in syms:
				1506	if t in self.Terminals:
				1507	self.Terminals[t].append(pnumber)
				1508	else:
				1509	if not t in self.Nonterminals:
				1510	self.Nonterminals[t] = [ ]
				1511	self.Nonterminals[t].append(pnumber)
				1512
				1513	# Create a production and add it to the list of productions
				1514	p = Production(pnumber,prodname,syms,prodprec,func,file,line)
				1515	self.Productions.append(p)
				1516	self.Prodmap[map] = p
				1517
				1518	# Add to the global productions list
				1519	try:
				1520	self.Prodnames[prodname].append(p)
				1521	except KeyError:
				1522	self.Prodnames[prodname] = [ p ]
				1523	return 0
				1524
				1525	# -----------------------------------------------------------------------------
				1526	# set_start()
				1527	#
				1528	# Sets the starting symbol and creates the augmented grammar. Production
				1529	# rule 0 is S' -> start where start is the start symbol.
				1530	# -----------------------------------------------------------------------------
				1531
				1532	def set_start(self,start=None):
				1533	if not start:
				1534	start = self.Productions[1].name
				1535	if start not in self.Nonterminals:
				1536	raise GrammarError("start symbol %s undefined" % start)
				1537	self.Productions[0] = Production(0,"S'",[start])
				1538	self.Nonterminals[start].append(0)
				1539	self.Start = start
				1540
				1541	# -----------------------------------------------------------------------------
				1542	# find_unreachable()
				1543	#
				1544	# Find all of the nonterminal symbols that can't be reached from the starting
				1545	# symbol. Returns a list of nonterminals that can't be reached.
				1546	# -----------------------------------------------------------------------------
				1547
				1548	def find_unreachable(self):
				1549
				1550	# Mark all symbols that are reachable from a symbol s
				1551	def mark_reachable_from(s):
				1552	if reachable[s]:
				1553	# We've already reached symbol s.
				1554	return
				1555	reachable[s] = 1
				1556	for p in self.Prodnames.get(s,[]):
				1557	for r in p.prod:
				1558	mark_reachable_from(r)
				1559
				1560	reachable = { }
				1561	for s in list(self.Terminals) + list(self.Nonterminals):
				1562	reachable[s] = 0
				1563
				1564	mark_reachable_from( self.Productions[0].prod[0] )
				1565
				1566	return [s for s in list(self.Nonterminals)
				1567	if not reachable[s]]
				1568
				1569	# -----------------------------------------------------------------------------
				1570	# infinite_cycles()
				1571	#
				1572	# This function looks at the various parsing rules and tries to detect
				1573	# infinite recursion cycles (grammar rules where there is no possible way
				1574	# to derive a string of only terminals).
				1575	# -----------------------------------------------------------------------------
				1576
				1577	def infinite_cycles(self):
				1578	terminates = {}
				1579
				1580	# Terminals:
				1581	for t in self.Terminals:
				1582	terminates[t] = 1
				1583
				1584	terminates['$end'] = 1
				1585
				1586	# Nonterminals:
				1587
				1588	# Initialize to false:
				1589	for n in self.Nonterminals:
				1590	terminates[n] = 0
				1591
				1592	# Then propagate termination until no change:
				1593	while 1:
				1594	some_change = 0
				1595	for (n,pl) in self.Prodnames.items():
				1596	# Nonterminal n terminates iff any of its productions terminates.
				1597	for p in pl:
				1598	# Production p terminates iff all of its rhs symbols terminate.
				1599	for s in p.prod:
				1600	if not terminates[s]:
				1601	# The symbol s does not terminate,
				1602	# so production p does not terminate.
				1603	p_terminates = 0
				1604	break
				1605	else:
				1606	# didn't break from the loop,
				1607	# so every symbol s terminates
				1608	# so production p terminates.
				1609	p_terminates = 1
				1610
				1611	if p_terminates:
				1612	# symbol n terminates!
				1613	if not terminates[n]:
				1614	terminates[n] = 1
				1615	some_change = 1
				1616	# Don't need to consider any more productions for this n.
				1617	break
				1618
				1619	if not some_change:
				1620	break
				1621
				1622	infinite = []
				1623	for (s,term) in terminates.items():
				1624	if not term:
				1625	if not s in self.Prodnames and not s in self.Terminals and s != 'error':
				1626	# s is used-but-not-defined, and we've already warned of that,
				1627	# so it would be overkill to say that it's also non-terminating.
				1628	pass
				1629	else:
				1630	infinite.append(s)
				1631
				1632	return infinite
				1633
				1634
				1635	# -----------------------------------------------------------------------------
				1636	# undefined_symbols()
				1637	#
				1638	# Find all symbols that were used the grammar, but not defined as tokens or
				1639	# grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
				1640	# and prod is the production where the symbol was used.
				1641	# -----------------------------------------------------------------------------
				1642	def undefined_symbols(self):
				1643	result = []
				1644	for p in self.Productions:
				1645	if not p: continue
				1646
				1647	for s in p.prod:
				1648	if not s in self.Prodnames and not s in self.Terminals and s != 'error':
				1649	result.append((s,p))
				1650	return result
				1651
				1652	# -----------------------------------------------------------------------------
				1653	# unused_terminals()
				1654	#
				1655	# Find all terminals that were defined, but not used by the grammar. Returns
				1656	# a list of all symbols.
				1657	# -----------------------------------------------------------------------------
				1658	def unused_terminals(self):
				1659	unused_tok = []
				1660	for s,v in self.Terminals.items():
				1661	if s != 'error' and not v:
				1662	unused_tok.append(s)
				1663
				1664	return unused_tok
				1665
				1666	# ------------------------------------------------------------------------------
				1667	# unused_rules()
				1668	#
				1669	# Find all grammar rules that were defined, but not used (maybe not reachable)
				1670	# Returns a list of productions.
				1671	# ------------------------------------------------------------------------------
				1672
				1673	def unused_rules(self):
				1674	unused_prod = []
				1675	for s,v in self.Nonterminals.items():
				1676	if not v:
				1677	p = self.Prodnames[s][0]
				1678	unused_prod.append(p)
				1679	return unused_prod
				1680
				1681	# -----------------------------------------------------------------------------
				1682	# unused_precedence()
				1683	#
				1684	# Returns a list of tuples (term,precedence) corresponding to precedence
				1685	# rules that were never used by the grammar. term is the name of the terminal
				1686	# on which precedence was applied and precedence is a string such as 'left' or
				1687	# 'right' corresponding to the type of precedence.
				1688	# -----------------------------------------------------------------------------
				1689
				1690	def unused_precedence(self):
				1691	unused = []
				1692	for termname in self.Precedence:
				1693	if not (termname in self.Terminals or termname in self.UsedPrecedence):
				1694	unused.append((termname,self.Precedence[termname][0]))
				1695
				1696	return unused
				1697
				1698	# -------------------------------------------------------------------------
				1699	# _first()
				1700	#
				1701	# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
				1702	#
				1703	# During execution of compute_first1, the result may be incomplete.
				1704	# Afterward (e.g., when called from compute_follow()), it will be complete.
				1705	# -------------------------------------------------------------------------
				1706	def _first(self,beta):
				1707
				1708	# We are computing First(x1,x2,x3,...,xn)
				1709	result = [ ]
				1710	for x in beta:
				1711	x_produces_empty = 0
				1712
				1713	# Add all the non-<empty> symbols of First[x] to the result.
				1714	for f in self.First[x]:
				1715	if f == '<empty>':
				1716	x_produces_empty = 1
				1717	else:
				1718	if f not in result: result.append(f)
				1719
				1720	if x_produces_empty:
				1721	# We have to consider the next x in beta,
				1722	# i.e. stay in the loop.
				1723	pass
				1724	else:
				1725	# We don't have to consider any further symbols in beta.
				1726	break
				1727	else:
				1728	# There was no 'break' from the loop,
				1729	# so x_produces_empty was true for all x in beta,
				1730	# so beta produces empty as well.
				1731	result.append('<empty>')
				1732
				1733	return result
				1734
				1735	# -------------------------------------------------------------------------
				1736	# compute_first()
				1737	#
				1738	# Compute the value of FIRST1(X) for all symbols
				1739	# -------------------------------------------------------------------------
				1740	def compute_first(self):
				1741	if self.First:
				1742	return self.First
				1743
				1744	# Terminals:
				1745	for t in self.Terminals:
				1746	self.First[t] = [t]
				1747
				1748	self.First['$end'] = ['$end']
				1749
				1750	# Nonterminals:
				1751
				1752	# Initialize to the empty set:
				1753	for n in self.Nonterminals:
				1754	self.First[n] = []
				1755
				1756	# Then propagate symbols until no change:
				1757	while 1:
				1758	some_change = 0
				1759	for n in self.Nonterminals:
				1760	for p in self.Prodnames[n]:
				1761	for f in self._first(p.prod):
				1762	if f not in self.First[n]:
				1763	self.First[n].append( f )
				1764	some_change = 1
				1765	if not some_change:
				1766	break
				1767
				1768	return self.First
				1769
				1770	# ---------------------------------------------------------------------
				1771	# compute_follow()
				1772	#
				1773	# Computes all of the follow sets for every non-terminal symbol. The
				1774	# follow set is the set of all symbols that might follow a given
				1775	# non-terminal. See the Dragon book, 2nd Ed. p. 189.
				1776	# ---------------------------------------------------------------------
				1777	def compute_follow(self,start=None):
				1778	# If already computed, return the result
				1779	if self.Follow:
				1780	return self.Follow
				1781
				1782	# If first sets not computed yet, do that first.
				1783	if not self.First:
				1784	self.compute_first()
				1785
				1786	# Add '$end' to the follow list of the start symbol
				1787	for k in self.Nonterminals:
				1788	self.Follow[k] = [ ]
				1789
				1790	if not start:
				1791	start = self.Productions[1].name
				1792
				1793	self.Follow[start] = [ '$end' ]
				1794
				1795	while 1:
				1796	didadd = 0
				1797	for p in self.Productions[1:]:
				1798	# Here is the production set
				1799	for i in range(len(p.prod)):
				1800	B = p.prod[i]
				1801	if B in self.Nonterminals:
				1802	# Okay. We got a non-terminal in a production
				1803	fst = self._first(p.prod[i+1:])
				1804	hasempty = 0
				1805	for f in fst:
				1806	if f != '<empty>' and f not in self.Follow[B]:
				1807	self.Follow[B].append(f)
				1808	didadd = 1
				1809	if f == '<empty>':
				1810	hasempty = 1
				1811	if hasempty or i == (len(p.prod)-1):
				1812	# Add elements of follow(a) to follow(b)
				1813	for f in self.Follow[p.name]:
				1814	if f not in self.Follow[B]:
				1815	self.Follow[B].append(f)
				1816	didadd = 1
				1817	if not didadd: break
				1818	return self.Follow
				1819
				1820
				1821	# -----------------------------------------------------------------------------
				1822	# build_lritems()
				1823	#
				1824	# This function walks the list of productions and builds a complete set of the
				1825	# LR items. The LR items are stored in two ways: First, they are uniquely
				1826	# numbered and placed in the list _lritems. Second, a linked list of LR items
				1827	# is built for each production. For example:
				1828	#
				1829	# E -> E PLUS E
				1830	#
				1831	# Creates the list
				1832	#
				1833	# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
				1834	# -----------------------------------------------------------------------------
				1835
				1836	def build_lritems(self):
				1837	for p in self.Productions:
				1838	lastlri = p
				1839	i = 0
				1840	lr_items = []
				1841	while 1:
				1842	if i > len(p):
				1843	lri = None
				1844	else:
				1845	lri = LRItem(p,i)
				1846	# Precompute the list of productions immediately following
				1847	try:
				1848	lri.lr_after = self.Prodnames[lri.prod[i+1]]
				1849	except (IndexError,KeyError):
				1850	lri.lr_after = []
				1851	try:
				1852	lri.lr_before = lri.prod[i-1]
				1853	except IndexError:
				1854	lri.lr_before = None
				1855
				1856	lastlri.lr_next = lri
				1857	if not lri: break
				1858	lr_items.append(lri)
				1859	lastlri = lri
				1860	i += 1
				1861	p.lr_items = lr_items
				1862
				1863	# -----------------------------------------------------------------------------
				1864	# == Class LRTable ==
				1865	#
				1866	# This basic class represents a basic table of LR parsing information.
				1867	# Methods for generating the tables are not defined here. They are defined
				1868	# in the derived class LRGeneratedTable.
				1869	# -----------------------------------------------------------------------------
				1870
				1871	class VersionError(YaccError): pass
				1872
				1873	class LRTable(object):
				1874	def __init__(self):
				1875	self.lr_action = None
				1876	self.lr_goto = None
				1877	self.lr_productions = None
				1878	self.lr_method = None
				1879
				1880	def read_table(self,module):
				1881	if isinstance(module,types.ModuleType):
				1882	parsetab = module
				1883	else:
				1884	if sys.version_info[0] < 3:
				1885	exec("import %s as parsetab" % module)
				1886	else:
				1887	env = { }
				1888	exec("import %s as parsetab" % module, env, env)
				1889	parsetab = env['parsetab']
				1890
				1891	if parsetab._tabversion != __tabversion__:
				1892	raise VersionError("yacc table file version is out of date")
				1893
				1894	self.lr_action = parsetab._lr_action
				1895	self.lr_goto = parsetab._lr_goto
				1896
				1897	self.lr_productions = []
				1898	for p in parsetab._lr_productions:
				1899	self.lr_productions.append(MiniProduction(*p))
				1900
				1901	self.lr_method = parsetab._lr_method
				1902	return parsetab._lr_signature
				1903
				1904	def read_pickle(self,filename):
				1905	try:
				1906	import cPickle as pickle
				1907	except ImportError:
				1908	import pickle
				1909
				1910	in_f = open(filename,"rb")
				1911
				1912	tabversion = pickle.load(in_f)
				1913	if tabversion != __tabversion__:
				1914	raise VersionError("yacc table file version is out of date")
				1915	self.lr_method = pickle.load(in_f)
				1916	signature = pickle.load(in_f)
				1917	self.lr_action = pickle.load(in_f)
				1918	self.lr_goto = pickle.load(in_f)
				1919	productions = pickle.load(in_f)
				1920
				1921	self.lr_productions = []
				1922	for p in productions:
				1923	self.lr_productions.append(MiniProduction(*p))
				1924
				1925	in_f.close()
				1926	return signature
				1927
				1928	# Bind all production function names to callable objects in pdict
				1929	def bind_callables(self,pdict):
				1930	for p in self.lr_productions:
				1931	p.bind(pdict)
				1932
				1933	# -----------------------------------------------------------------------------
				1934	# === LR Generator ===
				1935	#
				1936	# The following classes and functions are used to generate LR parsing tables on
				1937	# a grammar.
				1938	# -----------------------------------------------------------------------------
				1939
				1940	# -----------------------------------------------------------------------------
				1941	# digraph()
				1942	# traverse()
				1943	#
				1944	# The following two functions are used to compute set valued functions
				1945	# of the form:
				1946	#
				1947	# F(x) = F'(x) U U{F(y) \| x R y}
				1948	#
				1949	# This is used to compute the values of Read() sets as well as FOLLOW sets
				1950	# in LALR(1) generation.
				1951	#
				1952	# Inputs: X - An input set
				1953	# R - A relation
				1954	# FP - Set-valued function
				1955	# ------------------------------------------------------------------------------
				1956
				1957	def digraph(X,R,FP):
				1958	N = { }
				1959	for x in X:
				1960	N[x] = 0
				1961	stack = []
				1962	F = { }
				1963	for x in X:
				1964	if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
				1965	return F
				1966
				1967	def traverse(x,N,stack,F,X,R,FP):
				1968	stack.append(x)
				1969	d = len(stack)
				1970	N[x] = d
				1971	F[x] = FP(x) # F(X) <- F'(x)
				1972
				1973	rel = R(x) # Get y's related to x
				1974	for y in rel:
				1975	if N[y] == 0:
				1976	traverse(y,N,stack,F,X,R,FP)
				1977	N[x] = min(N[x],N[y])
				1978	for a in F.get(y,[]):
				1979	if a not in F[x]: F[x].append(a)
				1980	if N[x] == d:
				1981	N[stack[-1]] = MAXINT
				1982	F[stack[-1]] = F[x]
				1983	element = stack.pop()
				1984	while element != x:
				1985	N[stack[-1]] = MAXINT
				1986	F[stack[-1]] = F[x]
				1987	element = stack.pop()
				1988
				1989	class LALRError(YaccError): pass
				1990
				1991	# -----------------------------------------------------------------------------
				1992	# == LRGeneratedTable ==
				1993	#
				1994	# This class implements the LR table generation algorithm. There are no
				1995	# public methods except for write()
				1996	# -----------------------------------------------------------------------------
				1997
				1998	class LRGeneratedTable(LRTable):
				1999	def __init__(self,grammar,method='LALR',log=None):
				2000	if method not in ['SLR','LALR']:
				2001	raise LALRError("Unsupported method %s" % method)
				2002
				2003	self.grammar = grammar
				2004	self.lr_method = method
				2005
				2006	# Set up the logger
				2007	if not log:
				2008	log = NullLogger()
				2009	self.log = log
				2010
				2011	# Internal attributes
				2012	self.lr_action = {} # Action table
				2013	self.lr_goto = {} # Goto table
				2014	self.lr_productions = grammar.Productions # Copy of grammar Production array
				2015	self.lr_goto_cache = {} # Cache of computed gotos
				2016	self.lr0_cidhash = {} # Cache of closures
				2017
				2018	self._add_count = 0 # Internal counter used to detect cycles
				2019
				2020	# Diagonistic information filled in by the table generator
				2021	self.sr_conflict = 0
				2022	self.rr_conflict = 0
				2023	self.conflicts = [] # List of conflicts
				2024
				2025	self.sr_conflicts = []
				2026	self.rr_conflicts = []
				2027
				2028	# Build the tables
				2029	self.grammar.build_lritems()
				2030	self.grammar.compute_first()
				2031	self.grammar.compute_follow()
				2032	self.lr_parse_table()
				2033
				2034	# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
				2035
				2036	def lr0_closure(self,I):
				2037	self._add_count += 1
				2038
				2039	# Add everything in I to J
				2040	J = I[:]
				2041	didadd = 1
				2042	while didadd:
				2043	didadd = 0
				2044	for j in J:
				2045	for x in j.lr_after:
				2046	if getattr(x,"lr0_added",0) == self._add_count: continue
				2047	# Add B --> .G to J
				2048	J.append(x.lr_next)
				2049	x.lr0_added = self._add_count
				2050	didadd = 1
				2051
				2052	return J
				2053
				2054	# Compute the LR(0) goto function goto(I,X) where I is a set
				2055	# of LR(0) items and X is a grammar symbol. This function is written
				2056	# in a way that guarantees uniqueness of the generated goto sets
				2057	# (i.e. the same goto set will never be returned as two different Python
				2058	# objects). With uniqueness, we can later do fast set comparisons using
				2059	# id(obj) instead of element-wise comparison.
				2060
				2061	def lr0_goto(self,I,x):
				2062	# First we look for a previously cached entry
				2063	g = self.lr_goto_cache.get((id(I),x))
				2064	if g: return g
				2065
				2066	# Now we generate the goto set in a way that guarantees uniqueness
				2067	# of the result
				2068
				2069	s = self.lr_goto_cache.get(x)
				2070	if not s:
				2071	s = { }
				2072	self.lr_goto_cache[x] = s
				2073
				2074	gs = [ ]
				2075	for p in I:
				2076	n = p.lr_next
				2077	if n and n.lr_before == x:
				2078	s1 = s.get(id(n))
				2079	if not s1:
				2080	s1 = { }
				2081	s[id(n)] = s1
				2082	gs.append(n)
				2083	s = s1
				2084	g = s.get('$end')
				2085	if not g:
				2086	if gs:
				2087	g = self.lr0_closure(gs)
				2088	s['$end'] = g
				2089	else:
				2090	s['$end'] = gs
				2091	self.lr_goto_cache[(id(I),x)] = g
				2092	return g
				2093
				2094	# Compute the LR(0) sets of item function
				2095	def lr0_items(self):
				2096
				2097	C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
				2098	i = 0
				2099	for I in C:
				2100	self.lr0_cidhash[id(I)] = i
				2101	i += 1
				2102
				2103	# Loop over the items in C and each grammar symbols
				2104	i = 0
				2105	while i < len(C):
				2106	I = C[i]
				2107	i += 1
				2108
				2109	# Collect all of the symbols that could possibly be in the goto(I,X) sets
				2110	asyms = { }
				2111	for ii in I:
				2112	for s in ii.usyms:
				2113	asyms[s] = None
				2114
				2115	for x in asyms:
				2116	g = self.lr0_goto(I,x)
				2117	if not g: continue
				2118	if id(g) in self.lr0_cidhash: continue
				2119	self.lr0_cidhash[id(g)] = len(C)
				2120	C.append(g)
				2121
				2122	return C
				2123
				2124	# -----------------------------------------------------------------------------
				2125	# ==== LALR(1) Parsing ====
				2126	#
				2127	# LALR(1) parsing is almost exactly the same as SLR except that instead of
				2128	# relying upon Follow() sets when performing reductions, a more selective
				2129	# lookahead set that incorporates the state of the LR(0) machine is utilized.
				2130	# Thus, we mainly just have to focus on calculating the lookahead sets.
				2131	#
				2132	# The method used here is due to DeRemer and Pennelo (1982).
				2133	#
				2134	# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
				2135	# Lookahead Sets", ACM Transactions on Programming Languages and Systems,
				2136	# Vol. 4, No. 4, Oct. 1982, pp. 615-649
				2137	#
				2138	# Further details can also be found in:
				2139	#
				2140	# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
				2141	# McGraw-Hill Book Company, (1985).
				2142	#
				2143	# -----------------------------------------------------------------------------
				2144
				2145	# -----------------------------------------------------------------------------
				2146	# compute_nullable_nonterminals()
				2147	#
				2148	# Creates a dictionary containing all of the non-terminals that might produce
				2149	# an empty production.
				2150	# -----------------------------------------------------------------------------
				2151
				2152	def compute_nullable_nonterminals(self):
				2153	nullable = {}
				2154	num_nullable = 0
				2155	while 1:
				2156	for p in self.grammar.Productions[1:]:
				2157	if p.len == 0:
				2158	nullable[p.name] = 1
				2159	continue
				2160	for t in p.prod:
				2161	if not t in nullable: break
				2162	else:
				2163	nullable[p.name] = 1
				2164	if len(nullable) == num_nullable: break
				2165	num_nullable = len(nullable)
				2166	return nullable
				2167
				2168	# -----------------------------------------------------------------------------
				2169	# find_nonterminal_trans(C)
				2170	#
				2171	# Given a set of LR(0) items, this functions finds all of the non-terminal
				2172	# transitions. These are transitions in which a dot appears immediately before
				2173	# a non-terminal. Returns a list of tuples of the form (state,N) where state
				2174	# is the state number and N is the nonterminal symbol.
				2175	#
				2176	# The input C is the set of LR(0) items.
				2177	# -----------------------------------------------------------------------------
				2178
				2179	def find_nonterminal_transitions(self,C):
				2180	trans = []
				2181	for state in range(len(C)):
				2182	for p in C[state]:
				2183	if p.lr_index < p.len - 1:
				2184	t = (state,p.prod[p.lr_index+1])
				2185	if t[1] in self.grammar.Nonterminals:
				2186	if t not in trans: trans.append(t)
				2187	state = state + 1
				2188	return trans
				2189
				2190	# -----------------------------------------------------------------------------
				2191	# dr_relation()
				2192	#
				2193	# Computes the DR(p,A) relationships for non-terminal transitions. The input
				2194	# is a tuple (state,N) where state is a number and N is a nonterminal symbol.
				2195	#
				2196	# Returns a list of terminals.
				2197	# -----------------------------------------------------------------------------
				2198
				2199	def dr_relation(self,C,trans,nullable):
				2200	dr_set = { }
				2201	state,N = trans
				2202	terms = []
				2203
				2204	g = self.lr0_goto(C[state],N)
				2205	for p in g:
				2206	if p.lr_index < p.len - 1:
				2207	a = p.prod[p.lr_index+1]
				2208	if a in self.grammar.Terminals:
				2209	if a not in terms: terms.append(a)
				2210
				2211	# This extra bit is to handle the start state
				2212	if state == 0 and N == self.grammar.Productions[0].prod[0]:
				2213	terms.append('$end')
				2214
				2215	return terms
				2216
				2217	# -----------------------------------------------------------------------------
				2218	# reads_relation()
				2219	#
				2220	# Computes the READS() relation (p,A) READS (t,C).
				2221	# -----------------------------------------------------------------------------
				2222
				2223	def reads_relation(self,C, trans, empty):
				2224	# Look for empty transitions
				2225	rel = []
				2226	state, N = trans
				2227
				2228	g = self.lr0_goto(C[state],N)
				2229	j = self.lr0_cidhash.get(id(g),-1)
				2230	for p in g:
				2231	if p.lr_index < p.len - 1:
				2232	a = p.prod[p.lr_index + 1]
				2233	if a in empty:
				2234	rel.append((j,a))
				2235
				2236	return rel
				2237
				2238	# -----------------------------------------------------------------------------
				2239	# compute_lookback_includes()
				2240	#
				2241	# Determines the lookback and includes relations
				2242	#
				2243	# LOOKBACK:
				2244	#
				2245	# This relation is determined by running the LR(0) state machine forward.
				2246	# For example, starting with a production "N : . A B C", we run it forward
				2247	# to obtain "N : A B C ." We then build a relationship between this final
				2248	# state and the starting state. These relationships are stored in a dictionary
				2249	# lookdict.
				2250	#
				2251	# INCLUDES:
				2252	#
				2253	# Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
				2254	#
				2255	# This relation is used to determine non-terminal transitions that occur
				2256	# inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
				2257	# if the following holds:
				2258	#
				2259	# B -> LAT, where T -> epsilon and p' -L-> p
				2260	#
				2261	# L is essentially a prefix (which may be empty), T is a suffix that must be
				2262	# able to derive an empty string. State p' must lead to state p with the string L.
				2263	#
				2264	# -----------------------------------------------------------------------------
				2265
				2266	def compute_lookback_includes(self,C,trans,nullable):
				2267
				2268	lookdict = {} # Dictionary of lookback relations
				2269	includedict = {} # Dictionary of include relations
				2270
				2271	# Make a dictionary of non-terminal transitions
				2272	dtrans = {}
				2273	for t in trans:
				2274	dtrans[t] = 1
				2275
				2276	# Loop over all transitions and compute lookbacks and includes
				2277	for state,N in trans:
				2278	lookb = []
				2279	includes = []
				2280	for p in C[state]:
				2281	if p.name != N: continue
				2282
				2283	# Okay, we have a name match. We now follow the production all the way
				2284	# through the state machine until we get the . on the right hand side
				2285
				2286	lr_index = p.lr_index
				2287	j = state
				2288	while lr_index < p.len - 1:
				2289	lr_index = lr_index + 1
				2290	t = p.prod[lr_index]
				2291
				2292	# Check to see if this symbol and state are a non-terminal transition
				2293	if (j,t) in dtrans:
				2294	# Yes. Okay, there is some chance that this is an includes relation
				2295	# the only way to know for certain is whether the rest of the
				2296	# production derives empty
				2297
				2298	li = lr_index + 1
				2299	while li < p.len:
				2300	if p.prod[li] in self.grammar.Terminals: break # No forget it
				2301	if not p.prod[li] in nullable: break
				2302	li = li + 1
				2303	else:
				2304	# Appears to be a relation between (j,t) and (state,N)
				2305	includes.append((j,t))
				2306
				2307	g = self.lr0_goto(C[j],t) # Go to next set
				2308	j = self.lr0_cidhash.get(id(g),-1) # Go to next state
				2309
				2310	# When we get here, j is the final state, now we have to locate the production
				2311	for r in C[j]:
				2312	if r.name != p.name: continue
				2313	if r.len != p.len: continue
				2314	i = 0
				2315	# This look is comparing a production ". A B C" with "A B C ."
				2316	while i < r.lr_index:
				2317	if r.prod[i] != p.prod[i+1]: break
				2318	i = i + 1
				2319	else:
				2320	lookb.append((j,r))
				2321	for i in includes:
				2322	if not i in includedict: includedict[i] = []
				2323	includedict[i].append((state,N))
				2324	lookdict[(state,N)] = lookb
				2325
				2326	return lookdict,includedict
				2327
				2328	# -----------------------------------------------------------------------------
				2329	# compute_read_sets()
				2330	#
				2331	# Given a set of LR(0) items, this function computes the read sets.
				2332	#
				2333	# Inputs: C = Set of LR(0) items
				2334	# ntrans = Set of nonterminal transitions
				2335	# nullable = Set of empty transitions
				2336	#
				2337	# Returns a set containing the read sets
				2338	# -----------------------------------------------------------------------------
				2339
				2340	def compute_read_sets(self,C, ntrans, nullable):
				2341	FP = lambda x: self.dr_relation(C,x,nullable)
				2342	R = lambda x: self.reads_relation(C,x,nullable)
				2343	F = digraph(ntrans,R,FP)
				2344	return F
				2345
				2346	# -----------------------------------------------------------------------------
				2347	# compute_follow_sets()
				2348	#
				2349	# Given a set of LR(0) items, a set of non-terminal transitions, a readset,
				2350	# and an include set, this function computes the follow sets
				2351	#
				2352	# Follow(p,A) = Read(p,A) U U {Follow(p',B) \| (p,A) INCLUDES (p',B)}
				2353	#
				2354	# Inputs:
				2355	# ntrans = Set of nonterminal transitions
				2356	# readsets = Readset (previously computed)
				2357	# inclsets = Include sets (previously computed)
				2358	#
				2359	# Returns a set containing the follow sets
				2360	# -----------------------------------------------------------------------------
				2361
				2362	def compute_follow_sets(self,ntrans,readsets,inclsets):
				2363	FP = lambda x: readsets[x]
				2364	R = lambda x: inclsets.get(x,[])
				2365	F = digraph(ntrans,R,FP)
				2366	return F
				2367
				2368	# -----------------------------------------------------------------------------
				2369	# add_lookaheads()
				2370	#
				2371	# Attaches the lookahead symbols to grammar rules.
				2372	#
				2373	# Inputs: lookbacks - Set of lookback relations
				2374	# followset - Computed follow set
				2375	#
				2376	# This function directly attaches the lookaheads to productions contained
				2377	# in the lookbacks set
				2378	# -----------------------------------------------------------------------------
				2379
				2380	def add_lookaheads(self,lookbacks,followset):
				2381	for trans,lb in lookbacks.items():
				2382	# Loop over productions in lookback
				2383	for state,p in lb:
				2384	if not state in p.lookaheads:
				2385	p.lookaheads[state] = []
				2386	f = followset.get(trans,[])
				2387	for a in f:
				2388	if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
				2389
				2390	# -----------------------------------------------------------------------------
				2391	# add_lalr_lookaheads()
				2392	#
				2393	# This function does all of the work of adding lookahead information for use
				2394	# with LALR parsing
				2395	# -----------------------------------------------------------------------------
				2396
				2397	def add_lalr_lookaheads(self,C):
				2398	# Determine all of the nullable nonterminals
				2399	nullable = self.compute_nullable_nonterminals()
				2400
				2401	# Find all non-terminal transitions
				2402	trans = self.find_nonterminal_transitions(C)
				2403
				2404	# Compute read sets
				2405	readsets = self.compute_read_sets(C,trans,nullable)
				2406
				2407	# Compute lookback/includes relations
				2408	lookd, included = self.compute_lookback_includes(C,trans,nullable)
				2409
				2410	# Compute LALR FOLLOW sets
				2411	followsets = self.compute_follow_sets(trans,readsets,included)
				2412
				2413	# Add all of the lookaheads
				2414	self.add_lookaheads(lookd,followsets)
				2415
				2416	# -----------------------------------------------------------------------------
				2417	# lr_parse_table()
				2418	#
				2419	# This function constructs the parse tables for SLR or LALR
				2420	# -----------------------------------------------------------------------------
				2421	def lr_parse_table(self):
				2422	Productions = self.grammar.Productions
				2423	Precedence = self.grammar.Precedence
				2424	goto = self.lr_goto # Goto array
				2425	action = self.lr_action # Action array
				2426	log = self.log # Logger for output
				2427
				2428	actionp = { } # Action production array (temporary)
				2429
				2430	log.info("Parsing method: %s", self.lr_method)
				2431
				2432	# Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
				2433	# This determines the number of states
				2434
				2435	C = self.lr0_items()
				2436
				2437	if self.lr_method == 'LALR':
				2438	self.add_lalr_lookaheads(C)
				2439
				2440	# Build the parser table, state by state
				2441	st = 0
				2442	for I in C:
				2443	# Loop over each production in I
				2444	actlist = [ ] # List of actions
				2445	st_action = { }
				2446	st_actionp = { }
				2447	st_goto = { }
				2448	log.info("")
				2449	log.info("state %d", st)
				2450	log.info("")
				2451	for p in I:
				2452	log.info(" (%d) %s", p.number, str(p))
				2453	log.info("")
				2454
				2455	for p in I:
				2456	if p.len == p.lr_index + 1:
				2457	if p.name == "S'":
				2458	# Start symbol. Accept!
				2459	st_action["$end"] = 0
				2460	st_actionp["$end"] = p
				2461	else:
				2462	# We are at the end of a production. Reduce!
				2463	if self.lr_method == 'LALR':
				2464	laheads = p.lookaheads[st]
				2465	else:
				2466	laheads = self.grammar.Follow[p.name]
				2467	for a in laheads:
				2468	actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
				2469	r = st_action.get(a)
				2470	if r is not None:
				2471	# Whoa. Have a shift/reduce or reduce/reduce conflict
				2472	if r > 0:
				2473	# Need to decide on shift or reduce here
				2474	# By default we favor shifting. Need to add
				2475	# some precedence rules here.
				2476	sprec,slevel = Productions[st_actionp[a].number].prec
				2477	rprec,rlevel = Precedence.get(a,('right',0))
				2478	if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
				2479	# We really need to reduce here.
				2480	st_action[a] = -p.number
				2481	st_actionp[a] = p
				2482	if not slevel and not rlevel:
				2483	log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
				2484	self.sr_conflicts.append((st,a,'reduce'))
				2485	Productions[p.number].reduced += 1
				2486	elif (slevel == rlevel) and (rprec == 'nonassoc'):
				2487	st_action[a] = None
				2488	else:
				2489	# Hmmm. Guess we'll keep the shift
				2490	if not rlevel:
				2491	log.info(" ! shift/reduce conflict for %s resolved as shift",a)
				2492	self.sr_conflicts.append((st,a,'shift'))
				2493	elif r < 0:
				2494	# Reduce/reduce conflict. In this case, we favor the rule
				2495	# that was defined first in the grammar file
				2496	oldp = Productions[-r]
				2497	pp = Productions[p.number]
				2498	if oldp.line > pp.line:
				2499	st_action[a] = -p.number
				2500	st_actionp[a] = p
				2501	chosenp,rejectp = pp,oldp
				2502	Productions[p.number].reduced += 1
				2503	Productions[oldp.number].reduced -= 1
				2504	else:
				2505	chosenp,rejectp = oldp,pp
				2506	self.rr_conflicts.append((st,chosenp,rejectp))
				2507	log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
				2508	else:
				2509	raise LALRError("Unknown conflict in state %d" % st)
				2510	else:
				2511	st_action[a] = -p.number
				2512	st_actionp[a] = p
				2513	Productions[p.number].reduced += 1
				2514	else:
				2515	i = p.lr_index
				2516	a = p.prod[i+1] # Get symbol right after the "."
				2517	if a in self.grammar.Terminals:
				2518	g = self.lr0_goto(I,a)
				2519	j = self.lr0_cidhash.get(id(g),-1)
				2520	if j >= 0:
				2521	# We are in a shift state
				2522	actlist.append((a,p,"shift and go to state %d" % j))
				2523	r = st_action.get(a)
				2524	if r is not None:
				2525	# Whoa have a shift/reduce or shift/shift conflict
				2526	if r > 0:
				2527	if r != j:
				2528	raise LALRError("Shift/shift conflict in state %d" % st)
				2529	elif r < 0:
				2530	# Do a precedence check.
				2531	# - if precedence of reduce rule is higher, we reduce.
				2532	# - if precedence of reduce is same and left assoc, we reduce.
				2533	# - otherwise we shift
				2534	rprec,rlevel = Productions[st_actionp[a].number].prec
				2535	sprec,slevel = Precedence.get(a,('right',0))
				2536	if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
				2537	# We decide to shift here... highest precedence to shift
				2538	Productions[st_actionp[a].number].reduced -= 1
				2539	st_action[a] = j
				2540	st_actionp[a] = p
				2541	if not rlevel:
				2542	log.info(" ! shift/reduce conflict for %s resolved as shift",a)
				2543	self.sr_conflicts.append((st,a,'shift'))
				2544	elif (slevel == rlevel) and (rprec == 'nonassoc'):
				2545	st_action[a] = None
				2546	else:
				2547	# Hmmm. Guess we'll keep the reduce
				2548	if not slevel and not rlevel:
				2549	log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
				2550	self.sr_conflicts.append((st,a,'reduce'))
				2551
				2552	else:
				2553	raise LALRError("Unknown conflict in state %d" % st)
				2554	else:
				2555	st_action[a] = j
				2556	st_actionp[a] = p
				2557
				2558	# Print the actions associated with each terminal
				2559	_actprint = { }
				2560	for a,p,m in actlist:
				2561	if a in st_action:
				2562	if p is st_actionp[a]:
				2563	log.info(" %-15s %s",a,m)
				2564	_actprint[(a,m)] = 1
				2565	log.info("")
				2566	# Print the actions that were not used. (debugging)
				2567	not_used = 0
				2568	for a,p,m in actlist:
				2569	if a in st_action:
				2570	if p is not st_actionp[a]:
				2571	if not (a,m) in _actprint:
				2572	log.debug(" ! %-15s [ %s ]",a,m)
				2573	not_used = 1
				2574	_actprint[(a,m)] = 1
				2575	if not_used:
				2576	log.debug("")
				2577
				2578	# Construct the goto table for this state
				2579
				2580	nkeys = { }
				2581	for ii in I:
				2582	for s in ii.usyms:
				2583	if s in self.grammar.Nonterminals:
				2584	nkeys[s] = None
				2585	for n in nkeys:
				2586	g = self.lr0_goto(I,n)
				2587	j = self.lr0_cidhash.get(id(g),-1)
				2588	if j >= 0:
				2589	st_goto[n] = j
				2590	log.info(" %-30s shift and go to state %d",n,j)
				2591
				2592	action[st] = st_action
				2593	actionp[st] = st_actionp
				2594	goto[st] = st_goto
				2595	st += 1
				2596
				2597
				2598	# -----------------------------------------------------------------------------
				2599	# write()
				2600	#
				2601	# This function writes the LR parsing tables to a file
				2602	# -----------------------------------------------------------------------------
				2603
				2604	def write_table(self,modulename,outputdir='',signature=""):
				2605	basemodulename = modulename.split(".")[-1]
				2606	filename = os.path.join(outputdir,basemodulename) + ".py"
				2607	try:
				2608	f = open(filename,"w")
				2609
				2610	f.write("""
				2611	# %s
				2612	# This file is automatically generated. Do not edit.
				2613	_tabversion = %r
				2614
				2615	_lr_method = %r
				2616
				2617	_lr_signature = %r
				2618	""" % (filename, __tabversion__, self.lr_method, signature))
				2619
				2620	# Change smaller to 0 to go back to original tables
				2621	smaller = 1
				2622
				2623	# Factor out names to try and make smaller
				2624	if smaller:
				2625	items = { }
				2626
				2627	for s,nd in self.lr_action.items():
				2628	for name,v in nd.items():
				2629	i = items.get(name)
				2630	if not i:
				2631	i = ([],[])
				2632	items[name] = i
				2633	i[0].append(s)
				2634	i[1].append(v)
				2635
				2636	f.write("\n_lr_action_items = {")
				2637	for k,v in items.items():
				2638	f.write("%r:([" % k)
				2639	for i in v[0]:
				2640	f.write("%r," % i)
				2641	f.write("],[")
				2642	for i in v[1]:
				2643	f.write("%r," % i)
				2644
				2645	f.write("]),")
				2646	f.write("}\n")
				2647
				2648	f.write("""
				2649	_lr_action = { }
				2650	for _k, _v in _lr_action_items.items():
				2651	for _x,_y in zip(_v[0],_v[1]):
				2652	if not _x in _lr_action: _lr_action[_x] = { }
				2653	_lr_action[_x][_k] = _y
				2654	del _lr_action_items
				2655	""")
				2656
				2657	else:
				2658	f.write("\n_lr_action = { ");
				2659	for k,v in self.lr_action.items():
				2660	f.write("(%r,%r):%r," % (k[0],k[1],v))
				2661	f.write("}\n");
				2662
				2663	if smaller:
				2664	# Factor out names to try and make smaller
				2665	items = { }
				2666
				2667	for s,nd in self.lr_goto.items():
				2668	for name,v in nd.items():
				2669	i = items.get(name)
				2670	if not i:
				2671	i = ([],[])
				2672	items[name] = i
				2673	i[0].append(s)
				2674	i[1].append(v)
				2675
				2676	f.write("\n_lr_goto_items = {")
				2677	for k,v in items.items():
				2678	f.write("%r:([" % k)
				2679	for i in v[0]:
				2680	f.write("%r," % i)
				2681	f.write("],[")
				2682	for i in v[1]:
				2683	f.write("%r," % i)
				2684
				2685	f.write("]),")
				2686	f.write("}\n")
				2687
				2688	f.write("""
				2689	_lr_goto = { }
				2690	for _k, _v in _lr_goto_items.items():
				2691	for _x,_y in zip(_v[0],_v[1]):
				2692	if not _x in _lr_goto: _lr_goto[_x] = { }
				2693	_lr_goto[_x][_k] = _y
				2694	del _lr_goto_items
				2695	""")
				2696	else:
				2697	f.write("\n_lr_goto = { ");
				2698	for k,v in self.lr_goto.items():
				2699	f.write("(%r,%r):%r," % (k[0],k[1],v))
				2700	f.write("}\n");
				2701
				2702	# Write production table
				2703	f.write("_lr_productions = [\n")
				2704	for p in self.lr_productions:
				2705	if p.func:
				2706	f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
				2707	else:
				2708	f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
				2709	f.write("]\n")
				2710	f.close()
				2711
				2712	except IOError:
				2713	e = sys.exc_info()[1]
				2714	sys.stderr.write("Unable to create %r\n" % filename)
				2715	sys.stderr.write(str(e)+"\n")
				2716	return
				2717
				2718
				2719	# -----------------------------------------------------------------------------
				2720	# pickle_table()
				2721	#
				2722	# This function pickles the LR parsing tables to a supplied file object
				2723	# -----------------------------------------------------------------------------
				2724
				2725	def pickle_table(self,filename,signature=""):
				2726	try:
				2727	import cPickle as pickle
				2728	except ImportError:
				2729	import pickle
				2730	outf = open(filename,"wb")
				2731	pickle.dump(__tabversion__,outf,pickle_protocol)
				2732	pickle.dump(self.lr_method,outf,pickle_protocol)
				2733	pickle.dump(signature,outf,pickle_protocol)
				2734	pickle.dump(self.lr_action,outf,pickle_protocol)
				2735	pickle.dump(self.lr_goto,outf,pickle_protocol)
				2736
				2737	outp = []
				2738	for p in self.lr_productions:
				2739	if p.func:
				2740	outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
				2741	else:
				2742	outp.append((str(p),p.name,p.len,None,None,None))
				2743	pickle.dump(outp,outf,pickle_protocol)
				2744	outf.close()
				2745
				2746	# -----------------------------------------------------------------------------
				2747	# === INTROSPECTION ===
				2748	#
				2749	# The following functions and classes are used to implement the PLY
				2750	# introspection features followed by the yacc() function itself.
				2751	# -----------------------------------------------------------------------------
				2752
				2753	# -----------------------------------------------------------------------------
				2754	# get_caller_module_dict()
				2755	#
				2756	# This function returns a dictionary containing all of the symbols defined within
				2757	# a caller further down the call stack. This is used to get the environment
				2758	# associated with the yacc() call if none was provided.
				2759	# -----------------------------------------------------------------------------
				2760
				2761	def get_caller_module_dict(levels):
				2762	try:
				2763	raise RuntimeError
				2764	except RuntimeError:
				2765	e,b,t = sys.exc_info()
				2766	f = t.tb_frame
				2767	while levels > 0:
				2768	f = f.f_back
				2769	levels -= 1
				2770	ldict = f.f_globals.copy()
				2771	if f.f_globals != f.f_locals:
				2772	ldict.update(f.f_locals)
				2773
				2774	return ldict
				2775
				2776	# -----------------------------------------------------------------------------
				2777	# parse_grammar()
				2778	#
				2779	# This takes a raw grammar rule string and parses it into production data
				2780	# -----------------------------------------------------------------------------
				2781	def parse_grammar(doc,file,line):
				2782	grammar = []
				2783	# Split the doc string into lines
				2784	pstrings = doc.splitlines()
				2785	lastp = None
				2786	dline = line
				2787	for ps in pstrings:
				2788	dline += 1
				2789	p = ps.split()
				2790	if not p: continue
				2791	try:
				2792	if p[0] == '\|':
				2793	# This is a continuation of a previous rule
				2794	if not lastp:
				2795	raise SyntaxError("%s:%d: Misplaced '\|'" % (file,dline))
				2796	prodname = lastp
				2797	syms = p[1:]
				2798	else:
				2799	prodname = p[0]
				2800	lastp = prodname
				2801	syms = p[2:]
				2802	assign = p[1]
				2803	if assign != ':' and assign != '::=':
				2804	raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
				2805
				2806	grammar.append((file,dline,prodname,syms))
				2807	except SyntaxError:
				2808	raise
				2809	except Exception:
				2810	raise SyntaxError("%s:%d: Syntax error in rule %r" % (file,dline,ps.strip()))
				2811
				2812	return grammar
				2813
				2814	# -----------------------------------------------------------------------------
				2815	# ParserReflect()
				2816	#
				2817	# This class represents information extracted for building a parser including
				2818	# start symbol, error function, tokens, precedence list, action functions,
				2819	# etc.
				2820	# -----------------------------------------------------------------------------
				2821	class ParserReflect(object):
				2822	def __init__(self,pdict,log=None):
				2823	self.pdict = pdict
				2824	self.start = None
				2825	self.error_func = None
				2826	self.tokens = None
				2827	self.modules = {}
				2828	self.grammar = []
				2829	self.error = 0
				2830
				2831	if log is None:
				2832	self.log = PlyLogger(sys.stderr)
				2833	else:
				2834	self.log = log
				2835
				2836	# Get all of the basic information
				2837	def get_all(self):
				2838	self.get_start()
				2839	self.get_error_func()
				2840	self.get_tokens()
				2841	self.get_precedence()
				2842	self.get_pfunctions()
				2843
				2844	# Validate all of the information
				2845	def validate_all(self):
				2846	self.validate_start()
				2847	self.validate_error_func()
				2848	self.validate_tokens()
				2849	self.validate_precedence()
				2850	self.validate_pfunctions()
				2851	self.validate_modules()
				2852	return self.error
				2853
				2854	# Compute a signature over the grammar
				2855	def signature(self):
				2856	try:
				2857	from hashlib import md5
				2858	except ImportError:
				2859	from md5 import md5
				2860	try:
				2861	sig = md5()
				2862	if self.start:
				2863	sig.update(self.start.encode('latin-1'))
				2864	if self.prec:
				2865	sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
				2866	if self.tokens:
				2867	sig.update(" ".join(self.tokens).encode('latin-1'))
				2868	for f in self.pfuncs:
				2869	if f[3]:
				2870	sig.update(f[3].encode('latin-1'))
				2871	except (TypeError,ValueError):
				2872	pass
				2873	return sig.digest()
				2874
				2875	# -----------------------------------------------------------------------------
				2876	# validate_modules()
				2877	#
				2878	# This method checks to see if there are duplicated p_rulename() functions
				2879	# in the parser module file. Without this function, it is really easy for
				2880	# users to make mistakes by cutting and pasting code fragments (and it's a real
				2881	# bugger to try and figure out why the resulting parser doesn't work). Therefore,
				2882	# we just do a little regular expression pattern matching of def statements
				2883	# to try and detect duplicates.
				2884	# -----------------------------------------------------------------------------
				2885
				2886	def validate_modules(self):
				2887	# Match def p_funcname(
				2888	fre = re.compile(r'\sdef\s+(p_[a-zA-Z_0-9])\(')
				2889
				2890	for module in self.modules.keys():
				2891	lines, linen = inspect.getsourcelines(module)
				2892
				2893	counthash = { }
				2894	for linen,l in enumerate(lines):
				2895	linen += 1
				2896	m = fre.match(l)
				2897	if m:
				2898	name = m.group(1)
				2899	prev = counthash.get(name)
				2900	if not prev:
				2901	counthash[name] = linen
				2902	else:
				2903	filename = inspect.getsourcefile(module)
				2904	self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
				2905
				2906	# Get the start symbol
				2907	def get_start(self):
				2908	self.start = self.pdict.get('start')
				2909
				2910	# Validate the start symbol
				2911	def validate_start(self):
				2912	if self.start is not None:
				2913	if not isinstance(self.start, string_types):
				2914	self.log.error("'start' must be a string")
				2915
				2916	# Look for error handler
				2917	def get_error_func(self):
				2918	self.error_func = self.pdict.get('p_error')
				2919
				2920	# Validate the error function
				2921	def validate_error_func(self):
				2922	if self.error_func:
				2923	if isinstance(self.error_func,types.FunctionType):
				2924	ismethod = 0
				2925	elif isinstance(self.error_func, types.MethodType):
				2926	ismethod = 1
				2927	else:
				2928	self.log.error("'p_error' defined, but is not a function or method")
				2929	self.error = 1
				2930	return
				2931
				2932	eline = func_code(self.error_func).co_firstlineno
				2933	efile = func_code(self.error_func).co_filename
				2934	module = inspect.getmodule(self.error_func)
				2935	self.modules[module] = 1
				2936
				2937	argcount = func_code(self.error_func).co_argcount - ismethod
				2938	if argcount != 1:
				2939	self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
				2940	self.error = 1
				2941
				2942	# Get the tokens map
				2943	def get_tokens(self):
				2944	tokens = self.pdict.get("tokens")
				2945	if not tokens:
				2946	self.log.error("No token list is defined")
				2947	self.error = 1
				2948	return
				2949
				2950	if not isinstance(tokens,(list, tuple)):
				2951	self.log.error("tokens must be a list or tuple")
				2952	self.error = 1
				2953	return
				2954
				2955	if not tokens:
				2956	self.log.error("tokens is empty")
				2957	self.error = 1
				2958	return
				2959
				2960	self.tokens = tokens
				2961
				2962	# Validate the tokens
				2963	def validate_tokens(self):
				2964	# Validate the tokens.
				2965	if 'error' in self.tokens:
				2966	self.log.error("Illegal token name 'error'. Is a reserved word")
				2967	self.error = 1
				2968	return
				2969
				2970	terminals = {}
				2971	for n in self.tokens:
				2972	if n in terminals:
				2973	self.log.warning("Token %r multiply defined", n)
				2974	terminals[n] = 1
				2975
				2976	# Get the precedence map (if any)
				2977	def get_precedence(self):
				2978	self.prec = self.pdict.get("precedence")
				2979
				2980	# Validate and parse the precedence map
				2981	def validate_precedence(self):
				2982	preclist = []
				2983	if self.prec:
				2984	if not isinstance(self.prec,(list,tuple)):
				2985	self.log.error("precedence must be a list or tuple")
				2986	self.error = 1
				2987	return
				2988	for level,p in enumerate(self.prec):
				2989	if not isinstance(p,(list,tuple)):
				2990	self.log.error("Bad precedence table")
				2991	self.error = 1
				2992	return
				2993
				2994	if len(p) < 2:
				2995	self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
				2996	self.error = 1
				2997	return
				2998	assoc = p[0]
				2999	if not isinstance(assoc, string_types):
				3000	self.log.error("precedence associativity must be a string")
				3001	self.error = 1
				3002	return
				3003	for term in p[1:]:
				3004	if not isinstance(term, string_types):
				3005	self.log.error("precedence items must be strings")
				3006	self.error = 1
				3007	return
				3008	preclist.append((term, assoc, level+1))
				3009	self.preclist = preclist
				3010
				3011	# Get all p_functions from the grammar
				3012	def get_pfunctions(self):
				3013	p_functions = []
				3014	for name, item in self.pdict.items():
				3015	if not name.startswith('p_'): continue
				3016	if name == 'p_error': continue
				3017	if isinstance(item,(types.FunctionType,types.MethodType)):
				3018	line = func_code(item).co_firstlineno
				3019	module = inspect.getmodule(item)
				3020	p_functions.append((line,module,name,item.__doc__))
				3021
				3022	# Sort all of the actions by line number
				3023	p_functions.sort()
				3024	self.pfuncs = p_functions
				3025
				3026
				3027	# Validate all of the p_functions
				3028	def validate_pfunctions(self):
				3029	grammar = []
				3030	# Check for non-empty symbols
				3031	if len(self.pfuncs) == 0:
				3032	self.log.error("no rules of the form p_rulename are defined")
				3033	self.error = 1
				3034	return
				3035
				3036	for line, module, name, doc in self.pfuncs:
				3037	file = inspect.getsourcefile(module)
				3038	func = self.pdict[name]
				3039	if isinstance(func, types.MethodType):
				3040	reqargs = 2
				3041	else:
				3042	reqargs = 1
				3043	if func_code(func).co_argcount > reqargs:
				3044	self.log.error("%s:%d: Rule %r has too many arguments",file,line,func.__name__)
				3045	self.error = 1
				3046	elif func_code(func).co_argcount < reqargs:
				3047	self.log.error("%s:%d: Rule %r requires an argument",file,line,func.__name__)
				3048	self.error = 1
				3049	elif not func.__doc__:
				3050	self.log.warning("%s:%d: No documentation string specified in function %r (ignored)",file,line,func.__name__)
				3051	else:
				3052	try:
				3053	parsed_g = parse_grammar(doc,file,line)
				3054	for g in parsed_g:
				3055	grammar.append((name, g))
				3056	except SyntaxError:
				3057	e = sys.exc_info()[1]
				3058	self.log.error(str(e))
				3059	self.error = 1
				3060
				3061	# Looks like a valid grammar rule
				3062	# Mark the file in which defined.
				3063	self.modules[module] = 1
				3064
				3065	# Secondary validation step that looks for p_ definitions that are not functions
				3066	# or functions that look like they might be grammar rules.
				3067
				3068	for n,v in self.pdict.items():
				3069	if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): continue
				3070	if n.startswith('t_'): continue
				3071	if n.startswith('p_') and n != 'p_error':
				3072	self.log.warning("%r not defined as a function", n)
				3073	if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
				3074	(isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
				3075	try:
				3076	doc = v.__doc__.split(" ")
				3077	if doc[1] == ':':
				3078	self.log.warning("%s:%d: Possible grammar rule %r defined without p_ prefix",
				3079	func_code(v).co_filename, func_code(v).co_firstlineno,n)
				3080	except Exception:
				3081	pass
				3082
				3083	self.grammar = grammar
				3084
				3085	# -----------------------------------------------------------------------------
				3086	# yacc(module)
				3087	#
				3088	# Build a parser
				3089	# -----------------------------------------------------------------------------
				3090
				3091	def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
				3092	check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
				3093	debuglog=None, errorlog = None, picklefile=None):
				3094
				3095	global parse # Reference to the parsing method of the last built parser
				3096
				3097	# If pickling is enabled, table files are not created
				3098
				3099	if picklefile:
				3100	write_tables = 0
				3101
				3102	if errorlog is None:
				3103	errorlog = PlyLogger(sys.stderr)
				3104
				3105	# Get the module dictionary used for the parser
				3106	if module:
				3107	_items = [(k,getattr(module,k)) for k in dir(module)]
				3108	pdict = dict(_items)
				3109	else:
				3110	pdict = get_caller_module_dict(2)
				3111
				3112	# Collect parser information from the dictionary
				3113	pinfo = ParserReflect(pdict,log=errorlog)
				3114	pinfo.get_all()
				3115
				3116	if pinfo.error:
				3117	raise YaccError("Unable to build parser")
				3118
				3119	# Check signature against table files (if any)
				3120	signature = pinfo.signature()
				3121
				3122	# Read the tables
				3123	try:
				3124	lr = LRTable()
				3125	if picklefile:
				3126	read_signature = lr.read_pickle(picklefile)
				3127	else:
				3128	read_signature = lr.read_table(tabmodule)
				3129	if optimize or (read_signature == signature):
				3130	try:
				3131	lr.bind_callables(pinfo.pdict)
				3132	parser = LRParser(lr,pinfo.error_func)
				3133	parse = parser.parse
				3134	return parser
				3135	except Exception:
				3136	e = sys.exc_info()[1]
				3137	errorlog.warning("There was a problem loading the table file: %s", repr(e))
				3138	except VersionError:
				3139	e = sys.exc_info()
				3140	errorlog.warning(str(e))
				3141	except Exception:
				3142	pass
				3143
				3144	if debuglog is None:
				3145	if debug:
				3146	debuglog = PlyLogger(open(debugfile,"w"))
				3147	else:
				3148	debuglog = NullLogger()
				3149
				3150	debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
				3151
				3152
				3153	errors = 0
				3154
				3155	# Validate the parser information
				3156	if pinfo.validate_all():
				3157	raise YaccError("Unable to build parser")
				3158
				3159	if not pinfo.error_func:
				3160	errorlog.warning("no p_error() function is defined")
				3161
				3162	# Create a grammar object
				3163	grammar = Grammar(pinfo.tokens)
				3164
				3165	# Set precedence level for terminals
				3166	for term, assoc, level in pinfo.preclist:
				3167	try:
				3168	grammar.set_precedence(term,assoc,level)
				3169	except GrammarError:
				3170	e = sys.exc_info()[1]
				3171	errorlog.warning("%s",str(e))
				3172
				3173	# Add productions to the grammar
				3174	for funcname, gram in pinfo.grammar:
				3175	file, line, prodname, syms = gram
				3176	try:
				3177	grammar.add_production(prodname,syms,funcname,file,line)
				3178	except GrammarError:
				3179	e = sys.exc_info()[1]
				3180	errorlog.error("%s",str(e))
				3181	errors = 1
				3182
				3183	# Set the grammar start symbols
				3184	try:
				3185	if start is None:
				3186	grammar.set_start(pinfo.start)
				3187	else:
				3188	grammar.set_start(start)
				3189	except GrammarError:
				3190	e = sys.exc_info()[1]
				3191	errorlog.error(str(e))
				3192	errors = 1
				3193
				3194	if errors:
				3195	raise YaccError("Unable to build parser")
				3196
				3197	# Verify the grammar structure
				3198	undefined_symbols = grammar.undefined_symbols()
				3199	for sym, prod in undefined_symbols:
				3200	errorlog.error("%s:%d: Symbol %r used, but not defined as a token or a rule",prod.file,prod.line,sym)
				3201	errors = 1
				3202
				3203	unused_terminals = grammar.unused_terminals()
				3204	if unused_terminals:
				3205	debuglog.info("")
				3206	debuglog.info("Unused terminals:")
				3207	debuglog.info("")
				3208	for term in unused_terminals:
				3209	errorlog.warning("Token %r defined, but not used", term)
				3210	debuglog.info(" %s", term)
				3211
				3212	# Print out all productions to the debug log
				3213	if debug:
				3214	debuglog.info("")
				3215	debuglog.info("Grammar")
				3216	debuglog.info("")
				3217	for n,p in enumerate(grammar.Productions):
				3218	debuglog.info("Rule %-5d %s", n, p)
				3219
				3220	# Find unused non-terminals
				3221	unused_rules = grammar.unused_rules()
				3222	for prod in unused_rules:
				3223	errorlog.warning("%s:%d: Rule %r defined, but not used", prod.file, prod.line, prod.name)
				3224
				3225	if len(unused_terminals) == 1:
				3226	errorlog.warning("There is 1 unused token")
				3227	if len(unused_terminals) > 1:
				3228	errorlog.warning("There are %d unused tokens", len(unused_terminals))
				3229
				3230	if len(unused_rules) == 1:
				3231	errorlog.warning("There is 1 unused rule")
				3232	if len(unused_rules) > 1:
				3233	errorlog.warning("There are %d unused rules", len(unused_rules))
				3234
				3235	if debug:
				3236	debuglog.info("")
				3237	debuglog.info("Terminals, with rules where they appear")
				3238	debuglog.info("")
				3239	terms = list(grammar.Terminals)
				3240	terms.sort()
				3241	for term in terms:
				3242	debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
				3243
				3244	debuglog.info("")
				3245	debuglog.info("Nonterminals, with rules where they appear")
				3246	debuglog.info("")
				3247	nonterms = list(grammar.Nonterminals)
				3248	nonterms.sort()
				3249	for nonterm in nonterms:
				3250	debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
				3251	debuglog.info("")
				3252
				3253	if check_recursion:
				3254	unreachable = grammar.find_unreachable()
				3255	for u in unreachable:
				3256	errorlog.warning("Symbol %r is unreachable",u)
				3257
				3258	infinite = grammar.infinite_cycles()
				3259	for inf in infinite:
				3260	errorlog.error("Infinite recursion detected for symbol %r", inf)
				3261	errors = 1
				3262
				3263	unused_prec = grammar.unused_precedence()
				3264	for term, assoc in unused_prec:
				3265	errorlog.error("Precedence rule %r defined for unknown symbol %r", assoc, term)
				3266	errors = 1
				3267
				3268	if errors:
				3269	raise YaccError("Unable to build parser")
				3270
				3271	# Run the LRGeneratedTable on the grammar
				3272	if debug:
				3273	errorlog.debug("Generating %s tables", method)
				3274
				3275	lr = LRGeneratedTable(grammar,method,debuglog)
				3276
				3277	if debug:
				3278	num_sr = len(lr.sr_conflicts)
				3279
				3280	# Report shift/reduce and reduce/reduce conflicts
				3281	if num_sr == 1:
				3282	errorlog.warning("1 shift/reduce conflict")
				3283	elif num_sr > 1:
				3284	errorlog.warning("%d shift/reduce conflicts", num_sr)
				3285
				3286	num_rr = len(lr.rr_conflicts)
				3287	if num_rr == 1:
				3288	errorlog.warning("1 reduce/reduce conflict")
				3289	elif num_rr > 1:
				3290	errorlog.warning("%d reduce/reduce conflicts", num_rr)
				3291
				3292	# Write out conflicts to the output file
				3293	if debug and (lr.sr_conflicts or lr.rr_conflicts):
				3294	debuglog.warning("")
				3295	debuglog.warning("Conflicts:")
				3296	debuglog.warning("")
				3297
				3298	for state, tok, resolution in lr.sr_conflicts:
				3299	debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
				3300
				3301	already_reported = {}
				3302	for state, rule, rejected in lr.rr_conflicts:
				3303	if (state,id(rule),id(rejected)) in already_reported:
				3304	continue
				3305	debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
				3306	debuglog.warning("rejected rule (%s) in state %d", rejected,state)
				3307	errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
				3308	errorlog.warning("rejected rule (%s) in state %d", rejected, state)
				3309	already_reported[state,id(rule),id(rejected)] = 1
				3310
				3311	warned_never = []
				3312	for state, rule, rejected in lr.rr_conflicts:
				3313	if not rejected.reduced and (rejected not in warned_never):
				3314	debuglog.warning("Rule (%s) is never reduced", rejected)
				3315	errorlog.warning("Rule (%s) is never reduced", rejected)
				3316	warned_never.append(rejected)
				3317
				3318	# Write the table file if requested
				3319	if write_tables:
				3320	lr.write_table(tabmodule,outputdir,signature)
				3321
				3322	# Write a pickled version of the tables
				3323	if picklefile:
				3324	lr.pickle_table(picklefile,signature)
				3325
				3326	# Build the parser
				3327	lr.bind_callables(pinfo.pdict)
				3328	parser = LRParser(lr,pinfo.error_func)
				3329
				3330	parse = parser.parse
				3331	return parser