mò
G¬ÜDc           @   s/   d  Z  d k Z d k Z d f  d „  ƒ  YZ d S(   s)   
Modul token
Author: Branimira Nikolova

Nt   Tokenc           B   s)   t  Z d  Z d „  Z d „  Z d „  Z RS(   s‹   
    class Token
    argunets:
    token: the token
    tag: the part-of-speech tag
    tagSetDict: dictionary for some important tags
    c         C   s‹   | |  _  | |  _ h  d d d d d d g <d d d	 d
 d d d d d g <d d d d d d g <d d <d d <d d <|  _ d  S(   Nt   nount   NNPt   NNPSt   NNt   NNSt   CDt   stopTagst   CCt   INt   TOt   RPt   PRPs   PRP$t   WPs   WP$t
   delimiterst   ,t   ;t   .t   !t   ?t   opent   (t   digitt   closet   )(   t   tokent   selft   tagt
   tagSetDict(   R   R   R   (    (    t   C:\AD\src\AD_token.pyt   __init__   s    		c         C   s   d |  i |  i f S(   s8   
        Representation from Acro for printing.
        s   %s, %sN(   R   R   R   (   R   (    (    R   t   __str__   s     c   	      C   s{   g  } t i ƒ  } | i | ƒ } | i ƒ  } xG | D]? } | i d ƒ } t
 d | d d | d ƒ } | i | ƒ q4 W| S(   sz   
        The input text is tokenized and tagged using the Penn-Treebank Tag Set.

        Returns list of tokens.
        t   /R   i    R   i   N(   t	   tokenListt   MontyTaggert   new_MTAR   t   textt   tokTextt   splitt   ltTokenst   tt   ttR    t   newTokent   append(	   R   R$   R%   R#   R'   R*   R!   R(   R)   (    (    R   t   tagTextWithMT%   s      (   t   __name__t
   __module__t   __doc__R   R   R,   (    (    (    R   R    	   s    		(   R/   t   MontyTokenizerR"   R    (   R    R0   R"   (    (    R   R      s   