############################################################################
# Autor: 					Jan-Ole Esleben
# e-Mail-Adresse: esleben@cl.uni-heidelberg.de
# Projekt:				Terminparser (beendet WS02)
# 
# Grammatikmodul; übersetzt die (XML-) Grammatik mit Hilfe von rules.rb in
# reguläre Ausdrücke und stellt diese über eine Schnittstelle zur
# Verfügung.
#
# Dokumentation kann mit rdoc extrahiert werden; Kommentare und
# Dokumentation sind aus Gründen der Kürze und Prägnanz in englischer
# Sprache.
############################################################################
# License: cf. file LICENSE.txt distributed with the code
############################################################################

require 'rexml/document'
include REXML

module Calendar
	# These should really be included (do I need a "$" prefix for global
	# constants?)
	LIBRARY = "rules.rb"
	EMPTY = "0"
	THIS = "this"
	SCORE = "score"
	TOPLEVEL = "xml"
	
	# Preprocess an XML grammar (into regexps, basically), and access this
	# grammar (through iterators)
	#
	# This class needs a rule library with a hash of rules (+rules+) mapping 
	# to proc objects, and indexed by the _name_ of the tag processed, whose 
	# arguments are
	# * the contents of that tag and
	# * the (tag) arguments dictionary
	# It should return the regex for that tag alone.
	#
	# This proc object is where the actual translation occurs, the main body
	# of the class only adds context (rule name, score, groupings etc.)
	#
	# Once compiled, a grammar can be marshalled and reloaded from disk.
	class Grammar
		require LIBRARY

		# The +rules+ Array. Each element is another Array consisting of
		# _type_ and _score_ of a rule and the _rule_ itself.
		attr_reader :rules

		def initialize
			@rules = [] # filled either by unmarshalling a file or by
									# precompiling a grammar
		end

		# +filename+ is a file containing a _valid_ XML grammar. This file need
		# only be valid during the construction of the object, which <i>may take
		# some time</i>
		def precompile filename
			#===============================================#
			# TODO: precompile regexp objects, not strings! #
			#===============================================#
			doc = Document.new File.new(filename)
			doc.each_element("#{TOPLEVEL}/*") do | e | 
				thisFlag = leftFlag = rightFlag = false
				rule, type, score = "", e.name, e.attributes[SCORE]
				e.each_element("*") do | c |
					if THIS == c.name
						raise "There seem to be two #{THIS} elements..." if thisFlag
						thisFlag = true
						rule += "(" unless leftFlag
						rule += ")("
						c.each_element("*") { | t | 
							# TODO if for debugging purposes only: non-existent gram parts!!!
							unless RULES[t.name.downcase]
								puts "DEBUG: rule not implemented: #{t.name}"
							else
								rule += RULES[t.name.downcase].call(t.text, t.attributes)
							end
						}
						rule += ")"
					else
						if thisFlag and not rightFlag
							rightFlag = true
							rule += "("
						elsif not leftFlag and not thisFlag
							leftFlag = true 
							rule += "("
						end
						# TODO if for debugging purposes only: non-existent gram parts!!!
						unless RULES[c.name.downcase]
							puts "DEBUG: rule not implemented: #{c.name}"
						else
							rule += RULES[c.name.downcase].call(c.text, c.attributes)
						end
					end
				end
				unless rightFlag then rule += "()" else rule += ")" end
				@rules.push({'type' => type, 'score' => score, 'regex' => /#{rule}/mi})
			end
			self
		end

		# Iterator: yields each rule as a dictionary with the fields
		# * +type+: the type of the rule (time, ...)
		# * +score+: its score
		# * +regex+: the regex itself
		#
		# Note: this is just a (safe) wrapper for iterating over the rules 
		# accessor (<tt>Calendar::Grammar::rules.each { | r | ... }</tt>)
		def each
			raise "Precompile/load rules first!" if @rules.empty?
			@rules.each { | r | yield r }
		end
		
		# TODO: implement an iterator accessing the hash through the first
		# element
		
		# UNIMPLEMENTED
		#
		# Instead of precompiling a grammar, load a previously compiled one
		def load filename
			# TODO
		end

		# UNIMPLEMENTED
		#
		# Marshall a grammar to disk
		def save filename
			# TODO
		end
		
		# UNIMPLEMENTED
		#
		# run a very basic test on some sample of code
		# (nothing really spectacular)
		def test filename
			# TODO
		end

		#######
		private
		#######
		
	end
end


# vim:ts=2:sw=2:tw=75:
