# ****************************************************
# Name des Moduls: mod_TasoSearch
# Name des Projekts: TaxoSearch
#
# Autor(en):
#        Thorsten Beinhorn, Vesna Cvoro,
#        Khaled Dhaoui und Christian Pretzsch 
#
# Aufgaben des Moduls: siehe Code Dokumentation TaxoSearch
# 
#
# Datum der letzten Aenderung: 26.11.2003
# ****************************************************



from mod_ResultCollector import *
from mod_DocumentObjects import *
from mod_DocumentProcessor import *
from mod_ResultReciever import *
from mod_InformationRetrieval import *

class TaxoSearch:
	def __init__(self):
		self.oRR=ResultReciever()
		self.oDC=DocumentCollection()
		self.lstRelevantWords=[]
	
	def doInternetSearch(self, QueryString, lstQuery):
		self.oRR=ResultReciever()
		self.oDC=DocumentCollection()
		self.lstRelevantWords=[]
		
		if self.oRR.PerformSearch(QueryString)==0:
			return 0
		else:
			#fill document collection with recieved results
			self.oDC=self.oRR.DocumentCollection
			
			#collect results from URLs
			oRC=ResultCollector(self.oDC)
			oRC.run()
			
			#update document collection
			self.oDC=oRC.DocumentCollection
			oDP=DocumentProcessor(self.oDC, lstQuery)
			oDP.run()
			#self.oDC=oDP.DocumentCollection
			
			#post process DocumentVectors"
			oPP=PostProcessor(oDP.DocumentCollection)
			oPP.SingularizeDC(oDP.DocumentCollection)
			self.oDC=oPP.DocumentCollection
			
			#calculate term frequencies
			self.oDC=CalculateTermFreq(self.oDC)
			self.lstRelevantWords=GetRelevantWords(self.oDC)
		return 1
	
	def doPageRanking(self, dicQueryVector):
		return doPageRanking(self.oDC,dicQueryVector)
