#include <tokenizer.h>
Public Member Functions | |
Tokenizer (Input *i) | |
~Tokenizer () | |
bool | hasMoreTokens () |
std::string | nextUnencodedToken (bool *hasWhitespace=0) |
schma::UnicodePtr | nextToken (bool *hasWhitespace=0) |
Protected Member Functions | |
bool | containsTokens (const std::string &buffer, int pos=0) const |
bool | bufferIsFull () const |
bool | fillBuffer () |
Protected Attributes | |
UConverter * | m_conv |
UErrorCode | m_errcode |
std::string | m_tokenbuffer |
size_t | m_pos |
std::string | m_all_delim |
bool | m_insertWhitespace |
boost::scoped_ptr< Input > | m_input |
Static Protected Attributes | |
static std::string | m_whitespace |
static std::string | m_delimiters |
Definition at line 93 of file tokenizer.h.
PhraseHunter::Tokenizer::Tokenizer | ( | Input * | i | ) | [inline] |
Definition at line 133 of file tokenizer.h.
References fillBuffer(), m_all_delim, m_conv, m_delimiters, m_errcode, m_insertWhitespace, m_pos, and m_whitespace.
PhraseHunter::Tokenizer::~Tokenizer | ( | ) | [inline] |
bool PhraseHunter::Tokenizer::containsTokens | ( | const std::string & | buffer, | |
int | pos = 0 | |||
) | const [inline, protected] |
Definition at line 108 of file tokenizer.h.
References m_whitespace.
Referenced by bufferIsFull(), and fillBuffer().
bool PhraseHunter::Tokenizer::bufferIsFull | ( | ) | const [inline, protected] |
Definition at line 112 of file tokenizer.h.
References containsTokens(), m_pos, and m_tokenbuffer.
Referenced by hasMoreTokens().
bool PhraseHunter::Tokenizer::fillBuffer | ( | ) | [inline, protected] |
Definition at line 117 of file tokenizer.h.
References containsTokens(), m_input, m_insertWhitespace, m_pos, and m_tokenbuffer.
Referenced by hasMoreTokens(), and Tokenizer().
bool PhraseHunter::Tokenizer::hasMoreTokens | ( | ) | [inline] |
Definition at line 151 of file tokenizer.h.
References bufferIsFull(), and fillBuffer().
Referenced by PhraseHunter::TextSaver::hasMoreTokens(), and PhraseHunter::SearchEngine::searchPhrase().
std::string PhraseHunter::Tokenizer::nextUnencodedToken | ( | bool * | hasWhitespace = 0 |
) |
Definition at line 33 of file tokenizer.cpp.
References m_all_delim, m_insertWhitespace, m_pos, m_tokenbuffer, and m_whitespace.
Referenced by nextToken(), and PhraseHunter::SearchEngine::searchPhrase().
schma::UnicodePtr PhraseHunter::Tokenizer::nextToken | ( | bool * | hasWhitespace = 0 |
) |
Definition at line 52 of file tokenizer.cpp.
References m_conv, m_errcode, and nextUnencodedToken().
Referenced by PhraseHunter::TextSaver::nextToken().
UConverter* PhraseHunter::Tokenizer::m_conv [protected] |
Definition at line 96 of file tokenizer.h.
Referenced by nextToken(), Tokenizer(), and ~Tokenizer().
UErrorCode PhraseHunter::Tokenizer::m_errcode [protected] |
std::string PhraseHunter::Tokenizer::m_tokenbuffer [protected] |
Definition at line 99 of file tokenizer.h.
Referenced by bufferIsFull(), fillBuffer(), and nextUnencodedToken().
size_t PhraseHunter::Tokenizer::m_pos [protected] |
Definition at line 100 of file tokenizer.h.
Referenced by bufferIsFull(), fillBuffer(), nextUnencodedToken(), and Tokenizer().
std::string PhraseHunter::Tokenizer::m_all_delim [protected] |
std::string PhraseHunter::Tokenizer::m_whitespace [static, protected] |
Definition at line 103 of file tokenizer.h.
Referenced by containsTokens(), nextUnencodedToken(), and Tokenizer().
std::string PhraseHunter::Tokenizer::m_delimiters [static, protected] |
bool PhraseHunter::Tokenizer::m_insertWhitespace [protected] |
Definition at line 106 of file tokenizer.h.
Referenced by fillBuffer(), nextUnencodedToken(), and Tokenizer().
boost::scoped_ptr<Input> PhraseHunter::Tokenizer::m_input [protected] |