#include <contextreader.h>
Public Member Functions | |
ContextReader (const boost::filesystem::path &textdirectory) | |
Constructor. Should not be called directly. Instead, use CorpusManager::contextReader(). | |
~ContextReader () | |
TokenContextPtr | getContextFromPosition (DocID doc, IdxPos pos, size_t tokenLength, unsigned int width) const throw (Exceptions::FileError) |
Get the context for one particular position in a particular document. | |
void | fillContext (TokenContextPtr context, unsigned int width) const throw (Exceptions::FileError) |
Fill a TokenContext object with a particular context. | |
std::vector< TokenContextPtr > | context (TokenPtr token, unsigned int width) const throw (Exceptions::FileError) |
Get the context of a token. | |
std::vector< TokenContextPtr > | context (const TokenVector &tokens, unsigned int width) const throw (Exceptions::FileError) |
Overloaded convenience function. | |
Private Member Functions | |
void | readBlock (std::ifstream &file, TokenContextPtr) const |
unsigned int | removeNewlines (char *buffer, unsigned int length) const |
boost::filesystem::path | documentFile (DocID docId) const |
Private Attributes | |
boost::filesystem::path | m_textDirectory |
Static Private Attributes | |
static const int | BufferSize = 5120 |
Definition at line 35 of file contextreader.h.
PhraseHunter::ContextReader::ContextReader | ( | const boost::filesystem::path & | textdirectory | ) |
Constructor. Should not be called directly. Instead, use CorpusManager::contextReader().
Textdirectory | The directory containing the plain source text files. |
Definition at line 30 of file contextreader.cpp.
PhraseHunter::ContextReader::~ContextReader | ( | ) | [inline] |
Definition at line 43 of file contextreader.h.
TokenContextPtr PhraseHunter::ContextReader::getContextFromPosition | ( | DocID | doc, | |
IdxPos | pos, | |||
size_t | tokenLength, | |||
unsigned int | width | |||
) | const throw (Exceptions::FileError) |
Get the context for one particular position in a particular document.
doc | The ID of the document. | |
pos | The offset of the token, i.e. the byte position in the document. | |
tokenLength | Byte length of word. | |
width | The width of each left and right context. |
void PhraseHunter::ContextReader::fillContext | ( | TokenContextPtr | context, | |
unsigned int | width | |||
) | const throw (Exceptions::FileError) |
Fill a TokenContext object with a particular context.
context | TokenContext object to fill. | |
width | the width of each left and right context. |
Definition at line 87 of file contextreader.cpp.
Referenced by KwicTableModel::getContextString().
std::vector< TokenContextPtr > PhraseHunter::ContextReader::context | ( | TokenPtr | token, | |
unsigned int | width | |||
) | const throw (Exceptions::FileError) |
Get the context of a token.
token | The token in question. | |
width | The width of each left and right context. |
Definition at line 100 of file contextreader.cpp.
References PhraseHunter::TokenContext::emptyContext().
Referenced by PhraseHunter::StatisticsEngine::getContextVector().
std::vector< TokenContextPtr > PhraseHunter::ContextReader::context | ( | const TokenVector & | tokens, | |
unsigned int | width | |||
) | const throw (Exceptions::FileError) |
void PhraseHunter::ContextReader::readBlock | ( | std::ifstream & | file, | |
TokenContextPtr | ||||
) | const [private] |
Definition at line 48 of file contextreader.cpp.
References BufferSize, removeNewlines(), and schma::UTF8Converter().
unsigned int PhraseHunter::ContextReader::removeNewlines | ( | char * | buffer, | |
unsigned int | length | |||
) | const [private] |
boost::filesystem::path PhraseHunter::ContextReader::documentFile | ( | DocID | docId | ) | const [inline, private] |
const int PhraseHunter::ContextReader::BufferSize = 5120 [static, private] |
boost::filesystem::path PhraseHunter::ContextReader::m_textDirectory [private] |