# ****************************************************
# Name des Moduls: EavaluationMonty
# Name des Projekts: TaxoSearch
#
# Autor(en):
#        Thorsten Beinhorn, Vesna Cvoro,
#        Khaled Dhaoui und Christian Pretzsch 
#
# Aufgaben des Moduls: siehe Code Dokumentation TaxoSearch
# 
#
# Datum der letzten Aenderung: 26.11.2003
# ****************************************************

from __future__ import division
import string,sys
if __name__ == "__main__":
    print """
    USAGE: python EvaluateMonty.py [-family] "tagged.txt" "goldstd.txt"
    -family   approximate matching (RB=RP,VB*=VB*,NN*=NN*,etc)
    """
    print 'Loading Corpora...'
    if '-family' in sys.argv:
        family_p = 1
    else:
        family_p = 0
    toks = []
    gold_toks = []
    f = open(sys.argv[-2],'r')
    toks = string.split(f.read())
    f.close()
    g = open(sys.argv[-1],'r')
    gold_toks = string.split(g.read())
    g.close()
    toks_w = []
    toks_t = []
    gold_toks_w = []
    gold_toks_t = []
    for t in toks:
        word_pos = string.split(t,'/')
        toks_w.append(word_pos[0])
        toks_t.append(word_pos[1])
    for t in gold_toks:
        word_pos = string.split(t,'/')
        gold_toks_w.append(word_pos[0])
        gold_toks_t.append(word_pos[1])
    if len(toks) != len(gold_toks):
        print "ERROR: different numbers of tokens between tagged corpora.  One contains",len(toks),"while the other contains",len(gold_toks)
        for i in range(len(toks)):
            if toks_w[i] != gold_toks_w[i]:
                print "off sync at i=",i,"words",toks_w[i],gold_toks_w[i]
        sys.exit(-1)
    print "Calculating Statistics..."
    confusion_dict = {}
    num_correct = 0
    num_errors = 0
    for i in range(len(toks)):
        tag1 = toks_t[i]
        tag2 = gold_toks_t[i]
        if family_p:
            tag1 = tag1[:2]
            tag2 = tag2[:2]
            if tag1[:1] == 'R' and tag2[:1] == 'R':
                tag1 = 'R'
                tag2 = 'R'
        entry1 = confusion_dict.get(tag1,[0,0])
        entry1[0] += 1
        confusion_dict[tag1] = entry1
        entry2 = confusion_dict.get(tag2,[0,0])
        entry2[1] += 1
        confusion_dict[tag2] = entry2
        if tag1 == tag2:
            num_correct += 1
        else:
            num_errors +=1
    total_trials = num_correct + num_errors
    p_a = num_correct / total_trials
    print '*** Percent Agreement:',p_a
    total_expected_agreement = 0
    for key in confusion_dict.keys():
        freqs = confusion_dict[key]
        percent1 = freqs[0]/total_trials
        percent2 = freqs[1]/total_trials
        expected_agreement = percent1 * percent2
        total_expected_agreement += expected_agreement
    print "*** Total Expected Agreement:",total_expected_agreement
    kappa = (p_a - total_expected_agreement) / (1 - total_expected_agreement)
    print "*** Kappa Statistic:",kappa
    print "*** Error Samples:"
    for i in range(len(toks)):
        tag1 = toks_t[i]
        tag2 = gold_toks_t[i]
        if tag1 != tag2:
            print "WRONG: ...",string.join(toks[i-6:i],' '),"@@"+toks[i]+"@@",string.join(toks[i+1:i+6],' ')
            print "RIGHT: ...",string.join(gold_toks[i-6:i],' '),"@@"+gold_toks[i]+"@@",string.join(gold_toks[i+1:i+6],' '),'\n'
    print 'Done!'