=begin rdoc
SplitRecs: iCookware recipe splitter module for meal master format

Author:: Olga Mordvinova (mordvino@cl.uni-heidelberg.de), Nicola Kaiser (kaiser@cl.uni-heidelberg.de)
Project:: iCookWare
Copyright:: iCookWare Team 2005 (Nicola Kaiser, Ana Kovatcheva, Olga Mordvinova, Stephanie Schuldes)
Embedded Documentation Tool:: rdoc
=end

#this module was altered by niki 
#and is the one which should be used to build a recipe datafile for icw_bleder
#i know these are very ugly hacks but it was the best i could do for the mo...

module SplitRecs

# ICW::Tilte contains the titel of the recipe and basically works like an array 
class Title
	attr_reader :name
  def initialize(name)
		@name=name
	end
end

# ICW::Yields contains the yields/sevings of the recipe and basically works like an array 
class Yields
	attr_reader :nr
  def initialize(nr)
		@nr=nr
	end
end

# ICW::Ingreds contains ingredient with description of its set and also works like an array 
class Ingreds
	attr_reader :name, :set, :type
  def initialize(name, set, type)
		@name=name
		@set=set
		@type=type
	end
	def apendname(newName)
  	@name=@name + newName
  end
  def isempty
  	if @name=~/\w+/ then
  		return false
  	else
  		return true
  	end
  end
	def ingprint
    #print "Ing: #{@name}, #{@set}, #{@type}\n"
  end
end

# ICW::IngredsList works like an array of arrays and contains the set of one or more of the calss Ingreds
class IngredsList
  attr_reader :inglist
	def initialize
		@inglist=Array.new
	end		
	def append(aIngred)
		@inglist.push(aIngred)
		self
	end
	def ilprint
		for i in 0..(@inglist.size-1)
			@inglist[i].ingprint
		end
	end
end

# ICW::Category contains the category of the recipe and basically works like an array 
class Category
  attr_reader :name
	def initialize(name)
		@name=name
	end
end

# ICW::CategoryList works like an array and contains the set of one or more elements of the class Category
class CategoryList
	attr_reader :catlist
  def initialize
		@catlist=Array.new
	end	
	def append(aCategory)
		@catlist.push(aCategory)
			self
	end
	def catprint
		for i in 0..(@catlist.size-1)
				if(i==@catlist.size-1) then
					#print @catlist[i] +"\n"
				else
					#print @catlist[i] +","
				end
		end
	end
end

# ICW::Keyword is an array which contains keyword of the recipe
class Keyword
  attr_reader :name
	def initialize(name)
		@name=name
	end
end

# ICW::KeywordsList works like an array  and contains the set of one or more elements of the class Keyword
class KeywordsList
  attr_reader :keylist
	def initialize
		@keylist=Array.new
	end	
	def append(aKeyword)
			@keylist.push(aKeyword)
			self
	end
	def keyprint
		for i in 0..(@keylist.size-1)
			if(i==@keylist.size-1) then
				#print @keylist[i] +"\n"
			else
				#print @keylist[i] +","
			end
		end
	end
end

# ICW::Recdata is also an array an contains the main text of the recipe
class Recdata
	attr_reader :text
  def initialize(text)
	@text=text
	end
end

# ICW::Recipe contains all the classes mentioned above and represent the structure of the hole recipe. It works like an array of arrays.
class Recipe

  def initialize(title, category, yields, ingreds, data, keywords)
    @title = title
    @category   = Array.new
    @yields = yields
    @ingreds= Array.new
    @data=data
    @keywords=Array.new
    @category=category
    @ingreds=ingreds
    @keywords=keywords
  end

  def title
    @title
   end
   
   def category
    @category
   end
   
   def yields
   	@yields
   end
   
   def ingreds
   	@ingreds
   end
   
   def data
   	@data
   end
   
   def keywords
   	@keywords
   end   	
end

# ICW::RecipeList works like an array of arrays  and contains the set of one or more elements of the class Recipe. 
#It basically contains the whole corpus of recipes.
class RecipeList

	def initialize
		@reclist=Array.new
	end	
	
	def append(aRecipe)
				@reclist.push(aRecipe)
				self
	end
	
	def [] (key)
		if key.kind_of?(Integer)
	  return @reclist[key]
	  end
	end
	
	
	def convertRecipes(filename) 
		mydata=Array.new
		mydata = getData(filename)
		fp="recipes_hash_marshalled"
		rectext =""		
		aRecipeList=RecipeList.new()
		aCategoryList=CategoryList.new()
		aKeywordsList=KeywordsList.new()
		i=0
		listOfHashes=Array.new
		
		#aRecipeHash=Hash.new
		#aRecipeHash={title => "", category => "", yields => "", ingreds => "", data => "", keywords=> ""}
		while i<= mydata.size-1
		
					if isMMstart(mydata[i]) then
						  while not isTitle(mydata[i]) and i<=mydata.size-1
									i=i+1
							end
							if i>mydata.size-1 then print "EOF !!!\n" end
							#print "title "+String(i) + "\n"
							aTitle=Title.new(String(clearTitle(mydata[i])))
							
							# extracts  Categories:		
							while not isCategory(mydata[i]) and i<=mydata.size-1 
									i=i+1
							end
							
							#print "cat "+String(i) + "\n"
							aCategoryList=clearCategories(mydata[i])
							
							# extracts Servings:				 
								while not isYield(mydata[i]) and  i<=mydata.size-1  
									i=i+1
							end
							#print "yield "+String(i) + "\n"
							aYield=Yields.new(clearYield(mydata[i]))
							i=i+2
							
							# extracts Ingredients:
							aIngredList=IngredsList.new()
							while (mydata[i] =~ /\w+/ or isMMline(mydata[i+1]))
								
								if (isMMline(mydata[i+1]) and not mydata[i] =~ /\w+/) then
									i=i+2
								elsif isMMline(mydata[i]) then i=i+1
								else	
								#print "ingrnext "+String(i) + "\n"					
									if not isMMline(mydata[i])
										#falll mit liste von ingreds
								    if (mydata[i] =~ /,\s+/) then
								    #print mydata[i] + String(i)+" zzzzzzzzzzzzzzzzzzzzzzz\n"
								    	mIngreds = mydata[i].chomp.split(/,/)
								    	for j in 0..mIngreds.size-1
								    		aIngred=Ingreds.new('','','')
								    	  aIngred=splitIgredients(mIngreds[j].chomp, mydata[i+1].chomp)
								    	  aIngred.ingprint
												aIngredList.append([aIngred.name, aIngred.set, aIngred.type])
								    	end
								    else   
											aIngred=Ingreds.new('','','')
								#		print mydata[i] + String(i)+" zzzzzzzzzzzzzzzzzzzzzzz\n"
											aIngred=splitIgredients(mydata[i].chomp, mydata[i+1].chomp)
											if not aIngred.isempty then
												aIngredList.append([aIngred.name, aIngred.set, aIngred.type])
											end
											aIngred.ingprint
										end
										 
											i=i+1
									else
									 i=i+1
									end			
								end # if
							end		#while
							
								
							#print "ingr_end "+String(i) + "\n"		
							#	extracts recipe text:
							rectext=""
							while not mydata[i] =~ /:[\w;\d; ]+/ and i<=mydata.size-1
									rectext << String(mydata[i].chomp)
										i=i+1
							end
									#print "rectext_end "+String(i) + "\n"		
							aRecData=Recdata.new(rectext)
														
							#	extracts Keywords:
							while (isKeywords(mydata[i]) and i<=mydata.size-1 and isMMend(mydata[i]) )
								i=i+1
							end
							#print "stichworte_end "+String(i) + "\n"	
							if  isKeywords(mydata[i])  then
								aKeywordsList=clearKeywords(mydata[i])
								#print "KWList "+String(i) + "\n"
							end
						
						aRecipe=Recipe.new(aTitle,aCategoryList,aYield,aIngredList,aRecData,aKeywordsList)
						aRecHash=Hash.new(nil)
						
            #puts aRecipe.title.name.inspect

            #aRecHash={"title"=>aTitle,"category"=>aCategoryList,"yields"=>aYield,"ingreds"=>aIngredList,"data"=>aRecData,"keywords"=>aKeywordsList}
            aRecHash={"title"=>aTitle.name,"category"=>aCategoryList.catlist,"yields"=>aYield.nr,"ingreds"=>aIngredList.inglist,"data"=>aRecData.text,"keywords"=>aKeywordsList.keylist}
						#puts "hier ist ein RecHash"
            #puts  aRecHash.inspect
            aRecipeList.append(aRecipe)
						listOfHashes.push(aRecHash)
            
						#aRecipeHash={title => aTitle, category => aCategoryList, yields => aYield, ingreds => #aIngredList, data => aRecData, keywords=> aKeywordsList}
									
										
						
					end
					 #print "Ende "+String(i) + "\n"
					 i=i+1
		end  #while i<= mydata.size-1
		#File.open(fp, "w") do | fp | Marshal.dump(aRecipeList, fp) end
		puts listOfHashes.inspect
    marshallData(listOfHashes, "recipes_hash_marshalled")
    @reclist=aRecipeList
		
	end  #def
	
end 

def writeToHash
	aRecipeHash=Hash.new
	aRecipeHash={"title" => "", "category" => "", "yields" => "", "ingreds" => "", "data" => "", keywords=> ""}

end


#Methodes for working with the corpus of recipes:
#marshal

def marshallData data, fn
  fp=open(fn, 'w')
  Marshal.dump(data, fp)
  fp.close   
end



# writeData(filename, data) write data readed from the file
def writeData(filename, data)	
			aFile = File.new(filename, "w")
			#for j in 0..(data.length-1)
            aFile.print  data#[j]
            aFile.print "\n"
  			#end
			aFile.close
			return 1
end

# isMMstart(line) looks for the start line of the Meal Master format
def isMMstart(line)
		if  line.downcase =~ /mmmmm-.*meal-master./i then
				return true
		else
				return false
		end
end

# isMMend(line) looks for the end line of the Meal Master format
def isMMend(line)
		if line =~ /MMMMM/ then
				return true
		else
				return false
		end
end 

# isMMline(line) looks for the comment line of the Meal Master format
def isMMline(line)
		if line =~ /MMMMM-/ then
				return true
		else
				return false
		end
end 

# isTitle(line) looks for the line which contains title of the recipe
def isTitle(line)
		if line =~ /Title: ./ then
				return true
		else
				return false
		end
end

# isCategory(line) looks for the line which contains category of the recipe
def isCategory(line)
		if line =~ /Categories: ./ then
				return true
		else
				return false
		end
end


# isYield(line) looks for the line which contains yields of the recipe
def isYield(line)
		if (line =~ /Yield: ./ or line =~ /Servings: ./) then
				return true
		else
				return false
		end
end

# isKeywords(line) looks for the line with keywords of the recipe
def isKeywords(line)
		if line =~ /:Stichworte/ then
				return true
		else
				return false
		end
end 

# clearTitle(line) cut off the string "Title:" from the line
def clearTitle(line)
			cline = line.chomp.split (/\W*Title: /)
			return cline[1].split(/ \W+/)
end

# clearCategories(line) cut off the string "Categories:" from the line
def clearCategories(line)
			aCategoryList=CategoryList.new()
			cline = line.chomp.split (/\W*Categories: /)
			ncline=cline[1] .split(/, /)
			for i in 0..ncline.size-1
				aCategory=Category.new(ncline[i].chomp)
				aCategoryList.append(aCategory.name)
			end
			return aCategoryList
end

# clearYield(line) cut off the string "Yield:" or "Servings:" from the line
def clearYield(line)
			cline = line.chomp.split (/\W*[Yield|Servings]: /)
			ncline=cline[1].chomp.split(/\s+/)
			return ncline[0]
end

# clearKeywords(line) cuts off the string ":Stichworte" from the line, what contains keywords of the recipe
def clearKeywords(line)
			aKeywordsList=KeywordsList.new()
			cline = line.chomp.split (/\W*:Stichworte/)
			ncline=cline[1] .split(/, /)
			for i in 0..ncline.size-1
				aKeyword=Keyword.new(ncline[i].chomp)
				aKeywordsList.append(aKeyword.name)
			end
			return aKeywordsList
end

# getData(filename) reads the file in and writes it in the array indata
# +filename+:: our corpus
#+indata+:: array in which our recipe corpus will be read in
def getData(filename)
	indata = Array.new
	rFile=File.new(filename, "r") #fehlerabfrage
	rFile.each_line { |line|
	indata << line
	}
	rFile.close
	return indata
end

#splitCase1 splits ingredients and thems sets which contain name, set and measurement
def splitCase1(c1line)
	nline= c1line.split(/\s+/,4)
		aIngred=Ingreds.new(nline[3],nline[1],nline[2])
		return aIngred
end

#splitCase2 splits ingredients and thems sets which contain name and set 
def splitCase2(c2line)
	nline= c2line.split(/\s+/,3)
	aIngred=Ingreds.new(nline[2], nline[1], '')
	return aIngred
end

#splitCase3 splits ingredients and thems sets which contain only name
def splitCase3(c3line)
	nline= c3line.split(/\s+/,2)
	aIngred=Ingreds.new(nline[1],'','')
	return aIngred
end

#splitIgredients splits all Ingredients according to 3 split cases defined above an write them into item Ingreds
def splitIgredients(line, nextline)
	sline= line.chomp
	sline=sline.gsub( /[\t\s]+/, ' ')
	nsline= nextline.chomp
	nsline= nsline.gsub( /[\t\s]+/, ' ')
	aIngred=Ingreds.new('','','')
	aIngredsList=IngredsList.new()
	
	if sline=~ /\A\s+[-\(]\w/ then	
	
	elsif sline=~ /\d+\s+\w{1,3}\s+|\t+\w+ / then
		aIngred=splitCase1(sline)		
		
		if nsline =~ /A\s+[-\(]\w/
		#print "NNNNNNNNNNNNNNNN " + sline + " ccccccccccccccccc " + nsline
				aIngred.apendname(nsline)
		end 
			
	elsif sline=~ /\d+\s+|\t+\w+ / then
		aIngred=splitCase2(sline)
		if nsline =~ /A\s+[-\(]\w/
		#print "NNNNNNNNNNNNNNNN " + sline + " ccccccccccccccccc " + nsline
			aIngred.apendname(nsline)
		end 
		
	elsif sline=~ /\s+|\t+\w+ / then
		aIngred=splitCase3(sline)
		if nsline =~ /A\s+[-\(]\w/
		#print "NNNNNNNNNNNNNNNN " + sline + " ccccccccccccccccc " + nsline
			aIngred.apendname(nsline)
		end 
	end		
  return aIngred
	end
	
	
end
