"""
This script implements a simple and naive recursive top-down parser.  The
only trouble is that it builds on the infrastructure for generation which
in turn makes building the parse tree a bit tricky.
"""

import grammar, word, symbol

exampleGrammar = """S -> NP VP
S -> NP VP PP
VP -> "vi"
VP -> "vt" NP
VP -> "vt" NP PP
NP -> "n"
NP -> "det" "n"
NP -> "det" "adj" n
PP -> "prep" NP
=S
"""

exampleGrammar2 = """S -> "a" S "a"
S -> "b"
=S
"""

class Error(Exception):
	pass

class TopDownParser:
	def __init__(self, grammar):
		"""constructs the parser.  parserStr has to contain a grammar
		suitable for consumption by grammar.Grammar.  It can only
		handle Grammars without epsilon productions.
		"""
		self.grammar = grammar

	def parse(self, wordToParse, curParse=None):
		"""returns a sequence of Rules the application of which yields
		wordToParse in a left derivation. You should not pass anything 
		to curParse when calling it.
		"""
		if curParse is None:
			curParse = word.Word(str(self.grammar.getStartSymbol()))
		if wordToParse==curParse:
			return []
		leftmostNonTerm = curParse.getLeftmostNonTerm()
		if not leftmostNonTerm or len(curParse)>len(wordToParse):
			return
		for rule, deriv in self.grammar.deriveLeft(leftmostNonTerm, 
				curParse, yieldRules=1):
			rulesApplied = self.parse(wordToParse, deriv)
			if rulesApplied is not None:
				return [rule]+rulesApplied


def _getAndDelFirstMatchingRule(ruleSeq, targetSymbol):
	"""returns the first rule in ruleSeq that has targetSymbol on its
	left side and removes it from ruleSeq.  Helper function for
	_ruleSeqToTree.
	"""
	for rule in ruleSeq:
		if rule.getLeft()==targetSymbol:
			break
	else:
		raise Error("No target symbol %r in rule sequence"%targetSymbol)
	ruleSeq.remove(rule)
	return rule

def _ruleSeqToTree(ruleSeq, targetSymbol):
	"""converts a sequence of rules from a left derivation to a parse
	tree.  The parse tree is encoded in lists -- see printListTree for
	an explanation.
	"""
	parseTree = []
	targetRule = _getAndDelFirstMatchingRule(ruleSeq, targetSymbol)
	for sym in targetRule.getRight():
		if isinstance(sym, symbol.NonTerminal):
			parseTree.append(_ruleSeqToTree(ruleSeq, sym))
		else:
			parseTree.append(str(sym))
	return [str(targetRule.getLeft())]+parseTree

def printListTree(seq, indentStr="  "):
	"""prettyprints a list tree, i.e. a tree consisting of
	lists.  In each list, the parent comes first, followed
	by its children, which are either strings or lists.
	"""
	print indentStr[:-2]+"+-"+seq[0]
	for child in seq[1:-1]:
		if isinstance(child, list):
			printListTree(child, indentStr+"| ")
		else:
			print indentStr+"+-"+child
	if isinstance(seq[-1], list):
		printListTree(seq[-1], indentStr+"  ")
	else:
		print indentStr+"+-"+seq[-1]


if __name__=="__main__":
	gram = grammar.Grammar(None, ruleString=exampleGrammar)
	parser = TopDownParser(gram)
	toParse = word.Word('"det" "n" "vt" "n" "prep" "n"')
#	toParse = word.Word('"a" "b" "a"')
	ruleSeq = parser.parse(toParse)
	printListTree(
		_ruleSeqToTree(ruleSeq, gram.getStartSymbol()))

