regexsearch.cpp

Go to the documentation of this file.
00001 /*
00002   Phrasehunter - index and query text corpora
00003   Copyright (C) 2006  Torsten Marek (shlomme@gmx.de) &
00004   Armin Schmidt (armin.sch@gmail.com)
00005 
00006   This program is free software; you can redistribute it and/or
00007   modify it under the terms of the GNU General Public License
00008   as published by the Free Software Foundation; either version 2
00009   of the License, or (at your option) any later version.
00010 
00011   This program is distributed in the hope that it will be useful,
00012   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014   GNU General Public License for more details.
00015 
00016   You should have received a copy of the GNU General Public License
00017   along with this program; if not, write to the Free Software
00018   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
00019 */
00020 
00021 #include <iostream>
00022 
00023 #include <boost/format.hpp>
00024 #include <boost/assign.hpp>
00025 #include <boost/bind.hpp>
00026 
00027 #include "support/hptimer.hpp"
00028 #include "support/unicodehelpers.h"
00029 
00030 #include "phrasehunter/corpus.h"
00031 #include "phrasehunter/token.h"
00032 #include "phrasehunter/searchengine.h"
00033 
00034 
00035 using namespace boost::assign;
00036 using boost::bind;
00037 using boost::format;
00038 
00039 typedef std::list<std::string> StringList;
00040 
00041 const StringList phrases = 
00042     list_of
00043     ("d(er|ie|as) system\\w*");
00044 
00045 void time_serge(PhraseHunter::SearchEngine* se, std::string phrase, int iterations = 10) 
00046 {
00047     hptimer t;
00048     for(int i = 0; i < iterations; ++i) {
00049         se->searchPhrasalRegex(schma::UnicodePtr(new UnicodeString(phrase.c_str())));
00050     }
00051     unsigned long elapsed = t.elapsed();
00052     
00053     std::cout << format("%|-30||%|5||%|10||%|10||")
00054       % phrase
00055       % iterations
00056       % elapsed
00057       % (elapsed / static_cast<double>(iterations))
00058             << std::endl;
00059 }
00060 
00061 int main(int argc, char** argv)
00062 {
00063     PhraseHunter::CorpusManager corpus("corpora/testcorpus");
00064     PhraseHunter::SearchEngine *se = corpus.searchEngine();
00065     
00066     std::cout << format("%|=30||%|=5||%|=10||%|=10||")
00067       % "phrase" % "i" % "t (ms)" % "1 (ms)" << std::endl;
00068 
00069     for_each(phrases.begin(), phrases.end(),
00070            bind(time_serge, se, _1, 10));
00071     std::cout << std::endl;
00072     
00073     return 0;
00074 }
00075 
00076 
00077 
00078 
00079           

Generated on Thu Dec 21 16:14:41 2006 for The Phrasehunter by  doxygen 1.5.1