00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <iostream>
00022 #include <boost/filesystem/operations.hpp>
00023 #include <boost/format.hpp>
00024 #include <boost/lexical_cast.hpp>
00025
00026 #include "sqlitepp/sqlitepp.h"
00027
00028 #include "phrasehunter/corpus.h"
00029 #include "phrasehunter/indexer.h"
00030 #include "phrasehunter/tokenizer.h"
00031 #include "phrasehunter/searchengine.h"
00032 #include "phrasehunter/statistics.h"
00033 #include "phrasehunter/contextreader.h"
00034
00035
00036 using boost::lexical_cast;
00037 using namespace SQLitePP;
00038 namespace filesystem = boost::filesystem;
00039
00040 namespace PhraseHunter {
00041
00042 const int CorpusManager::Repository::Format = 4;
00043 const std::string CorpusManager::Repository::TextDirectory("texts");
00044 const std::string CorpusManager::Repository::InfoFile("info");
00045 const std::string CorpusManager::Repository::IndexFile((boost::format("index-%||.db") % Format).str());
00046
00047 CorpusManager::CorpusManager(const std::string& corpus_repository, CorpusManager::Mode mode)
00048 throw(Exceptions::NoSuchCorpusError)
00049 : m_mode(mode),
00050 m_directory(corpus_repository),
00051 m_searchEngine(NULL),
00052 m_statistics(NULL),
00053 m_contextReader(NULL),
00054 m_db(indexFile().string())
00055 {
00056 if(!filesystem::exists(textDir().string()) || !filesystem::exists(indexFile().string()))
00057 throw Exceptions::NoSuchCorpusError(corpus_repository);
00058
00059 m_indexer = (mode & Write) ? new IndexManager(m_db) : NULL;
00060 }
00061
00062 CorpusManager::~CorpusManager()
00063 {
00064
00065 delete m_indexer;
00066 delete m_searchEngine;
00067 delete m_statistics;
00068 }
00069
00070 SearchEngine* CorpusManager::searchEngine()
00071 {
00072 if(m_searchEngine == NULL) {
00073 m_searchEngine = new SearchEngine(m_db);
00074 }
00075 return m_searchEngine;
00076 }
00077
00078 StatisticsEngine* CorpusManager::statisticsEngine()
00079 {
00080 if(m_statistics == NULL) {
00081 m_statistics = new StatisticsEngine(searchEngine(), contextReader(), m_db);
00082 }
00083 return m_statistics;
00084 }
00085
00086 ContextReader* CorpusManager::contextReader()
00087 {
00088 if(m_contextReader == NULL) {
00089 m_contextReader = new ContextReader(textDir());
00090 }
00091 return m_contextReader;
00092 }
00093
00094 std::string CorpusManager::documentName(DocID docID)
00095 {
00096 Statement::Pointer dn = m_db.statement("select description from docs where docid = ?");
00097
00098 ResultIterator ri = (*dn)
00099 .bindArgs(docID)
00100 .query();
00101
00102 return (ri.hasMoreRows())
00103 ? ri.get<std::string>(0)
00104 : "";
00105 }
00106
00107
00108 void CorpusManager::addFile(const std::string& filename, const std::string& info)
00109 throw (Exceptions::FileError, Exceptions::NoIndexerError, std::out_of_range)
00110 {
00111 if (m_indexer == NULL)
00112 throw Exceptions::NoIndexerError();
00113
00114
00115
00116 int newDocID =
00117 (*m_db.statement("INSERT INTO docs (description) VALUES (?)"))
00118 .bindArgs(info)
00119 .exec()
00120 .lastInsertId();
00121
00122 filesystem::path newname = textDir() / lexical_cast<std::string>(newDocID);
00123
00124 (*m_db.statement("UPDATE docs SET filename = ? WHERE id = ?"))
00125 .bindArgs(newname.string(), newDocID)
00126 .exec();
00127
00128 TextSaver ts(filename, newname.string());
00129 m_indexer->addToIndex(ts, newDocID);
00130 }
00131
00132 void CorpusManager::removeFile(DocID docID) throw (Exceptions::NoIndexerError)
00133 {
00134 if(m_indexer == NULL)
00135 throw Exceptions::NoIndexerError();
00136 filesystem::remove(textDir() / lexical_cast<std::string>(docID));
00137
00138 (*m_db.statement("DELETE FROM docs WHERE docid = ?"))
00139 .bindArgs(docID)
00140 .exec();
00141
00142 m_indexer->removeFromIndex(docID);
00143 }
00144
00145 }