"""
Modul AD_token
Author: Branimira Nikolova
braniad@yahoo.com

"""

import MontyTokenizer, MontyTagger

class Token:

    """
    class Token
    argunets:
    token: the token
    tag: the part-of-speech tag
    tagSetDict: dictionary for some important tags
    """    
    
    def __init__(self, token, tag):

        
        self.token = token
        self.tag = tag
        self.tagSetDict = {"noun": ["NNP", "NNPS", "NN", "NNS", "CD"],
                             "stopTags":["CC","IN","TO","RP", "PRP", "PRP$", "WP", "WP$"],
                             "delimiters":[",", ";", ".", "!", "?"],
                             "open": "(",
                             "digit": "CD",
                             "close": ")"}
    
    def __str__(self):
        """
        Representation from Acro for printing.
        """
        return '%s, %s' %(self.token, self.tag)

    def tagTextWithMT(self, text):
        """
        The input text is tokenized and tagged using the Penn-Treebank Tag Set.

        Returns list of tokens.
        """
        
        tokenList=[]
        new_MTA = MontyTagger.MontyTagger()
        tokText = new_MTA.tag(text)
        ltTokens = tokText.split()
    
        for t in ltTokens:
            tt = t.split("/")
            newToken=Token(token = tt[0], tag = tt[1])
            #print newToken
            tokenList.append(newToken)

    
        return tokenList