searchtab.cpp

Go to the documentation of this file.
00001 /*
00002   Phrasehunter - index and query text corpora
00003   Copyright (C) 2006  Torsten Marek (shlomme@gmx.de) &
00004   Armin Schmidt (armin.sch@gmail.com)
00005   
00006   This program is free software; you can redistribute it and/or
00007   modify it under the terms of the GNU General Public License
00008   as published by the Free Software Foundation; either version 2
00009   of the License, or (at your option) any later version.
00010   
00011   This program is distributed in the hope that it will be useful,
00012   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014   GNU General Public License for more details.
00015   
00016   You should have received a copy of the GNU General Public License
00017   along with this program; if not, write to the Free Software
00018   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
00019 */
00020 
00021 #include <QtGui/QMessageBox>
00022 #include <QtGui/QSortFilterProxyModel>
00023 
00024 #include "phrasehunter/corpus.h"
00025 #include "phrasehunter/searchengine.h"
00026 #include "phrasehunter/statistics.h"
00027 #include "phrasehunter/tokencontext.h"
00028 #include "phrasehunter/token.h"
00029 #include "support/unicodehelpers.h"
00030 
00031 #include "searchtab.h"
00032 
00033 SearchTab::SearchTab(QWidget* parent): 
00034     QWidget(parent),
00035     m_searchEngine(NULL),
00036     m_stats(NULL),
00037     m_model(NULL)
00038 {
00039     setupUi(this);
00040     
00041     connect(pbSearch, SIGNAL(clicked(bool)), this, SLOT(search()));
00042     connect(leQuery, SIGNAL(returnPressed()), this, SLOT(search()));
00043     
00044     connect(resultsTableView, SIGNAL(doubleClicked(const QModelIndex&)), 
00045             this, SLOT(emitDocumentRequested(const QModelIndex&)));
00046 }
00047 
00048 void SearchTab::emitDocumentRequested(const QModelIndex& index) 
00049 {
00050     PhraseHunter::TokenContextPtr context = m_model->getContext(index);
00051     emit documentRequested(context->docID(), context->position(), context->tokenLength());
00052 }
00053 
00054 void SearchTab::setContextWidth(int newContextWidth)
00055 {
00056     m_contextWidth = newContextWidth;
00057     if(m_model != NULL) {
00058         m_model->setContextWidth(newContextWidth);
00059     }
00060 }
00061 
00062 void SearchTab::setQueryType(const QString& type) 
00063 {
00064     queryType = type;
00065 }
00066 
00067 void SearchTab::enableSearch(PhraseHunter::CorpusManager* corpus) 
00068 {
00069     m_searchEngine = corpus->searchEngine();
00070     
00071     m_stats = corpus->statisticsEngine();
00072     
00073     numberOfDocs = m_stats->getNumberOfDocuments();
00074     numberOfTokens = m_stats->getSizeOfSampleSpace();
00075     numberOfTypes = m_stats->getNumberOfTypes();
00076     
00077     
00078     resultsTableView->setModel(NULL);
00079     delete m_model;
00080     m_model = new KwicTableModel(this, corpus->contextReader());
00081     m_model->setContextWidth(m_contextWidth);
00082     resultsTableView->setModel(m_model);
00083 
00084     /////////////////////////////////////////////////////////////////////////////
00085     //This shouldn't be neccessary if sorting had been implemented for QTableView
00086     QSortFilterProxyModel *md = new QSortFilterProxyModel(this);
00087     md->setSourceModel(m_model);
00088     resultsTableView->setModel(md);
00089     /////////////////////////////////////////////////////////////////////////////
00090 
00091     leQuery->setText("");
00092     leQuery->setEnabled(true);
00093     pbSearch->setEnabled(true);
00094 }
00095 
00096 void SearchTab::search() 
00097 {
00098     try {
00099         hideWidgets();
00100         
00101         if(m_searchEngine == NULL) {
00102             QMessageBox::critical(this, QString("Error"), 
00103                                   QString("No or invalid corpus loaded"), 
00104                                   QMessageBox::Ok, QMessageBox::NoButton, QMessageBox::NoButton);
00105             return;
00106         }
00107 
00108         unsigned int docFreq = 0;
00109         unsigned int freq = 0;
00110         int rank = 0;
00111       
00112         QString query = leQuery->text().toLower();
00113 
00114         if(queryType == "Regex") {
00115             std::vector<PhraseHunter::TokenPtr> resultTokens;
00116           
00117             if (query.contains(' ')) {
00118                 resultTokens = m_searchEngine->searchPhrasalRegex(schma::UnicodePtr(new UnicodeString(query.toUtf8().data())));
00119             } else {
00120                 resultTokens = m_searchEngine->searchRegexToken(schma::UnicodePtr(new UnicodeString(query.toUtf8().data())));
00121             }
00122             
00123             for(PhraseHunter::TokenVector::const_iterator i = resultTokens.begin();
00124                 i != resultTokens.end(); ++i) {
00125                 freq += (*i)->corpusFrequency();
00126             }
00127             docFreq = PhraseHunter::StatisticsEngine::sizeOfDocSet(resultTokens);
00128             
00129             // TODO: sort by rank
00130             m_model->setData(resultTokens);
00131             
00132         } else if(queryType == "Phrase") {
00133 
00134             PhraseHunter::TokenPtr queryToken;
00135             
00136             if (query.contains(' ')) {
00137                 queryToken = m_searchEngine->searchPhrase(schma::UnicodePtr(new UnicodeString(query.toUtf8().data())));
00138             } else {
00139                 queryToken = m_searchEngine->searchToken(schma::UnicodePtr(new UnicodeString(query.toUtf8().data())));
00140             }
00141             m_model->setData(queryToken);
00142             
00143             docFreq = queryToken->documentFrequency();
00144             rank = m_stats->rank(queryToken);
00145             freq = queryToken->corpusFrequency();
00146         }
00147         
00148         setStatistics(docFreq, freq, rank);
00149         resultsTableView->resizeColumnsToContents();
00150       
00151     } catch (PhraseHunter::Exceptions::Exception& _e) {
00152         QMessageBox::critical(this, QString("Error"),
00153                               QString("Error while processing search"),
00154                               QMessageBox::Ok,QMessageBox::NoButton,QMessageBox::NoButton);
00155     }
00156 }
00157 
00158 void SearchTab::hideWidgets()
00159 {
00160     if(queryType == "Regex") {
00161         lblRank->setHidden(true);
00162         label_5->setHidden(true);
00163         //show 'Sort by rank' checkbox
00164     } else if (leQuery->text().trimmed().contains(' ')) {
00165         lblRank->setHidden(true);
00166         label_5->setHidden(true);
00167     } else {
00168         lblRank->setHidden(false);
00169         label_5->setHidden(false);
00170     }
00171 }
00172 
00173 void SearchTab::setStatistics(int docFreq, int freq, int rank)
00174 {
00175     lblDocFrequency->setText(QString("%1 of %2").arg(docFreq).arg(numberOfDocs));
00176     
00177     if (leQuery->text().contains(' '))
00178         lblCorpusFrequency->setText(QString("%1").arg(freq));
00179     else
00180         lblCorpusFrequency->setText(QString("%1 of %2").arg(freq).arg(numberOfTokens));
00181     
00182     lblRank->setText(QString("%1 of %2").arg(rank).arg(numberOfTypes));
00183 }

Generated on Thu Dec 21 16:14:40 2006 for The Phrasehunter by  doxygen 1.5.1