corpus.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 /*
00003   Phrasehunter - index and query text corpora
00004   Copyright (C) 2006  Torsten Marek (shlomme@gmx.de) &
00005   Armin Schmidt (armin.sch@gmail.com)
00006   
00007   This program is free software; you can redistribute it and/or
00008   modify it under the terms of the GNU General Public License
00009   as published by the Free Software Foundation; either version 2
00010   of the License, or (at your option) any later version.
00011   
00012   This program is distributed in the hope that it will be useful,
00013   but WITHOUT ANY WARRANTY; without even the implied warranty of
00014   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015   GNU General Public License for more details.
00016   
00017   You should have received a copy of the GNU General Public License
00018   along with this program; if not, write to the Free Software
00019   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
00020 */
00021 
00022 #ifndef CORPUS_H
00023 #define CORPUS_H CORPUS_H
00024 
00025 #include <string>
00026 #include <boost/filesystem/path.hpp>
00027 #include <stdexcept>
00028 
00029 #include "sqlitepp/sqlitepp.h"
00030 
00031 #include "ph_types.h"
00032 #include "phexception.h"
00033 
00034 namespace PhraseHunter {
00035 
00036 //! Class for managing corpus repositories.
00037 class CorpusManager 
00038 {
00039     
00040 public:
00041     //! The mode in which a CorpusManager object should operate
00042     enum Mode {
00043         Read = 1,
00044         Write = 2,
00045     };
00046 
00047     /**
00048        \brief Constructor.
00049        \param corpus_repository Path to the base directory of the corpus.
00050        \param mode The mode in which a CorpusManager object should operate.
00051     */
00052     CorpusManager(const std::string& corpus_repository, CorpusManager::Mode mode = Read)
00053         throw(Exceptions::NoSuchCorpusError);
00054     
00055     ~CorpusManager();
00056 
00057     //! Factory function that returns a pointer to a newly constructed SearchEngine object.
00058     SearchEngine* searchEngine();
00059     //! Factory function that returns a pointer to a newly constructed StatisticsEngine object.
00060     StatisticsEngine* statisticsEngine();
00061     //! Factory function that returns a pointer to a newly constructed ContextReader object.
00062     ContextReader* contextReader();
00063 
00064     //! Get the file name for a particular DocID
00065     std::string documentName(DocID docID);
00066 
00067     /**
00068        \brief Add a file to the repository.
00069        \param filename The file to be added
00070        \param info String holding information about this file. Will be written to info file.
00071     */
00072     void addFile(const std::string& filename, const std::string& info)
00073         throw (Exceptions::FileError, Exceptions::NoIndexerError, std::out_of_range);
00074     //! Remove a particular file from the repository.
00075     void removeFile(DocID docID) throw(Exceptions::NoIndexerError);
00076     
00077 private:
00078     inline boost::filesystem::path textDir() 
00079     {
00080         return m_directory / Repository::TextDirectory;
00081     }
00082     
00083     inline boost::filesystem::path indexFile() 
00084     {
00085         return m_directory / Repository::IndexFile;
00086     }
00087     CorpusManager::Mode m_mode;
00088     IndexManager* m_indexer;
00089     const boost::filesystem::path m_directory;
00090     SearchEngine* m_searchEngine;
00091     StatisticsEngine* m_statistics;
00092     ContextReader* m_contextReader;
00093     
00094     SQLitePP::SqliteDB m_db;
00095     struct Repository
00096     {
00097         static const int Format;
00098         static const std::string TextDirectory;
00099         static const std::string InfoFile;
00100         static const std::string IndexFile;
00101     };
00102 };
00103 
00104 }
00105 
00106 
00107 #endif // CORPUS_H

Generated on Thu Dec 21 16:14:40 2006 for The Phrasehunter by  doxygen 1.5.1