phrasesearch.cpp

Go to the documentation of this file.
00001 /*
00002   Phrasehunter - index and query text corpora
00003   Copyright (C) 2006  Torsten Marek (shlomme@gmx.de) &
00004   Armin Schmidt (armin.sch@gmail.com)
00005 
00006   This program is free software; you can redistribute it and/or
00007   modify it under the terms of the GNU General Public License
00008   as published by the Free Software Foundation; either version 2
00009   of the License, or (at your option) any later version.
00010 
00011   This program is distributed in the hope that it will be useful,
00012   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014   GNU General Public License for more details.
00015 
00016   You should have received a copy of the GNU General Public License
00017   along with this program; if not, write to the Free Software
00018   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
00019 */
00020 
00021 #include <iostream>
00022 
00023 #include <boost/format.hpp>
00024 #include <boost/assign.hpp>
00025 #include <boost/bind.hpp>
00026 
00027 #include "support/hptimer.hpp"
00028 #include "support/unicodehelpers.h"
00029 
00030 #include "phrasehunter/corpus.h"
00031 #include "phrasehunter/token.h"
00032 #include "phrasehunter/searchengine.h"
00033 
00034 
00035 using namespace boost::assign;
00036 using boost::bind;
00037 using boost::format;
00038 
00039 typedef std::list<std::string> StringList;
00040 
00041 const StringList phrases = 
00042     list_of
00043     ("im herbst letzten jahres")
00044     ("vor allem in")
00045     ("im november 1993")
00046     (", der im vergangenen jahr");
00047 
00048 void time_serge(PhraseHunter::SearchEngine* se, std::string phrase, int iterations = 10) 
00049 {
00050     hptimer t;
00051     for(int i = 0; i < iterations; ++i) {
00052         se->searchPhrase(schma::UnicodePtr(new UnicodeString(phrase.c_str())));
00053     }
00054     unsigned long elapsed = t.elapsed();
00055     
00056     std::cout << format("%|-30||%|5||%|10||%|10||")
00057         % phrase
00058         % iterations
00059         % elapsed
00060         % (elapsed / static_cast<double>(iterations))
00061               << std::endl;
00062 }
00063 
00064 int main(int argc, char** argv)
00065 {
00066     PhraseHunter::CorpusManager corpus("corpora/testcorpus");
00067     PhraseHunter::SearchEngine *se = corpus.searchEngine();
00068     
00069     std::cout << format("%|=30||%|=5||%|=10||%|=10||")
00070         % "phrase" % "i" % "t (ms)" % "1 (ms)" << std::endl;
00071 
00072     for_each(phrases.begin(), phrases.end(),
00073              bind(time_serge, se, _1, 100));
00074     std::cout << std::endl;
00075     
00076     return 0;
00077 }
00078 
00079 
00080 
00081 
00082           

Generated on Thu Dec 21 16:14:41 2006 for The Phrasehunter by  doxygen 1.5.1