#!/usr/bin/python

import pickle

class Text:
	"""For the sake of efficiency, this class
	sacrifices some accuracy: word counts are never
	updated when no further occurence is found!"""
	
	def __init__(self):
		# {wordX -> {text# -> (timesFound, words), text#, text#},
		#  wordY, wordZ}
		self.hash = {}
		self.numTexts = 0
	
	def oldInsert(self, w, numWords):
		if self.hash[w].has_key(self.numTexts):
			(i, j) = self.hash[w][self.numTexts]
			self.hash[w][self.numTexts] = (i+1, numWords)
		else:
			self.hash[w][self.numTexts] = (1, numWords)
		
	def newInsert(self, w, numWords):
		self.hash[w] = {}
		self.hash[w][self.numTexts] = (1, numWords)
		
	def add(self, l):
		self.numTexts += 1
		numWords = 1
		for	w in l:
			if self.hash.has_key(w):
				self.oldInsert(w, numWords)
			else:
				self.newInsert(w, numWords)
			numWords += 1
	
	def pickle(self, f):
		pickle.dump(self, open(f, "w"))
		
if __name__ == "__main__":
	t = Text()
	t.add(["in", "einer", "kleinen", "einer", "grossen", "Stadt"])
	t.add(["wo", "jeder", "einen", "kleinen", "Affen", "hat"])
	print t.hash
	t.pickle(r"pickled")
