00001 // -*- C++ -*- 00002 /* 00003 Phrasehunter - index and query text corpora 00004 Copyright (C) 2006 Torsten Marek (shlomme@gmx.de) & 00005 Armin Schmidt (armin.sch@gmail.com) 00006 00007 This program is free software; you can redistribute it and/or 00008 modify it under the terms of the GNU General Public License 00009 as published by the Free Software Foundation; either version 2 00010 of the License, or (at your option) any later version. 00011 00012 This program is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU General Public License for more details. 00016 00017 You should have received a copy of the GNU General Public License 00018 along with this program; if not, write to the Free Software 00019 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00020 */ 00021 00022 #ifndef CORPUS_H 00023 #define CORPUS_H CORPUS_H 00024 00025 #include <string> 00026 #include <boost/filesystem/path.hpp> 00027 #include <stdexcept> 00028 00029 #include "sqlitepp/sqlitepp.h" 00030 00031 #include "ph_types.h" 00032 #include "phexception.h" 00033 00034 namespace PhraseHunter { 00035 00036 //! Class for managing corpus repositories. 00037 class CorpusManager 00038 { 00039 00040 public: 00041 //! The mode in which a CorpusManager object should operate 00042 enum Mode { 00043 Read = 1, 00044 Write = 2, 00045 }; 00046 00047 /** 00048 \brief Constructor. 00049 \param corpus_repository Path to the base directory of the corpus. 00050 \param mode The mode in which a CorpusManager object should operate. 00051 */ 00052 CorpusManager(const std::string& corpus_repository, CorpusManager::Mode mode = Read) 00053 throw(Exceptions::NoSuchCorpusError); 00054 00055 ~CorpusManager(); 00056 00057 //! Factory function that returns a pointer to a newly constructed SearchEngine object. 00058 SearchEngine* searchEngine(); 00059 //! Factory function that returns a pointer to a newly constructed StatisticsEngine object. 00060 StatisticsEngine* statisticsEngine(); 00061 //! Factory function that returns a pointer to a newly constructed ContextReader object. 00062 ContextReader* contextReader(); 00063 00064 //! Get the file name for a particular DocID 00065 std::string documentName(DocID docID); 00066 00067 /** 00068 \brief Add a file to the repository. 00069 \param filename The file to be added 00070 \param info String holding information about this file. Will be written to info file. 00071 */ 00072 void addFile(const std::string& filename, const std::string& info) 00073 throw (Exceptions::FileError, Exceptions::NoIndexerError, std::out_of_range); 00074 //! Remove a particular file from the repository. 00075 void removeFile(DocID docID) throw(Exceptions::NoIndexerError); 00076 00077 private: 00078 inline boost::filesystem::path textDir() 00079 { 00080 return m_directory / Repository::TextDirectory; 00081 } 00082 00083 inline boost::filesystem::path indexFile() 00084 { 00085 return m_directory / Repository::IndexFile; 00086 } 00087 CorpusManager::Mode m_mode; 00088 IndexManager* m_indexer; 00089 const boost::filesystem::path m_directory; 00090 SearchEngine* m_searchEngine; 00091 StatisticsEngine* m_statistics; 00092 ContextReader* m_contextReader; 00093 00094 SQLitePP::SqliteDB m_db; 00095 struct Repository 00096 { 00097 static const int Format; 00098 static const std::string TextDirectory; 00099 static const std::string InfoFile; 00100 static const std::string IndexFile; 00101 }; 00102 }; 00103 00104 } 00105 00106 00107 #endif // CORPUS_H