00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <QtGui/QMessageBox>
00022 #include <QtGui/QSortFilterProxyModel>
00023
00024 #include "phrasehunter/corpus.h"
00025 #include "phrasehunter/searchengine.h"
00026 #include "phrasehunter/statistics.h"
00027 #include "phrasehunter/tokencontext.h"
00028 #include "phrasehunter/token.h"
00029 #include "support/unicodehelpers.h"
00030
00031 #include "searchtab.h"
00032
00033 SearchTab::SearchTab(QWidget* parent):
00034 QWidget(parent),
00035 m_searchEngine(NULL),
00036 m_stats(NULL),
00037 m_model(NULL)
00038 {
00039 setupUi(this);
00040
00041 connect(pbSearch, SIGNAL(clicked(bool)), this, SLOT(search()));
00042 connect(leQuery, SIGNAL(returnPressed()), this, SLOT(search()));
00043
00044 connect(resultsTableView, SIGNAL(doubleClicked(const QModelIndex&)),
00045 this, SLOT(emitDocumentRequested(const QModelIndex&)));
00046 }
00047
00048 void SearchTab::emitDocumentRequested(const QModelIndex& index)
00049 {
00050 PhraseHunter::TokenContextPtr context = m_model->getContext(index);
00051 emit documentRequested(context->docID(), context->position(), context->tokenLength());
00052 }
00053
00054 void SearchTab::setContextWidth(int newContextWidth)
00055 {
00056 m_contextWidth = newContextWidth;
00057 if(m_model != NULL) {
00058 m_model->setContextWidth(newContextWidth);
00059 }
00060 }
00061
00062 void SearchTab::setQueryType(const QString& type)
00063 {
00064 queryType = type;
00065 }
00066
00067 void SearchTab::enableSearch(PhraseHunter::CorpusManager* corpus)
00068 {
00069 m_searchEngine = corpus->searchEngine();
00070
00071 m_stats = corpus->statisticsEngine();
00072
00073 numberOfDocs = m_stats->getNumberOfDocuments();
00074 numberOfTokens = m_stats->getSizeOfSampleSpace();
00075 numberOfTypes = m_stats->getNumberOfTypes();
00076
00077
00078 resultsTableView->setModel(NULL);
00079 delete m_model;
00080 m_model = new KwicTableModel(this, corpus->contextReader());
00081 m_model->setContextWidth(m_contextWidth);
00082 resultsTableView->setModel(m_model);
00083
00084
00085
00086 QSortFilterProxyModel *md = new QSortFilterProxyModel(this);
00087 md->setSourceModel(m_model);
00088 resultsTableView->setModel(md);
00089
00090
00091 leQuery->setText("");
00092 leQuery->setEnabled(true);
00093 pbSearch->setEnabled(true);
00094 }
00095
00096 void SearchTab::search()
00097 {
00098 try {
00099 hideWidgets();
00100
00101 if(m_searchEngine == NULL) {
00102 QMessageBox::critical(this, QString("Error"),
00103 QString("No or invalid corpus loaded"),
00104 QMessageBox::Ok, QMessageBox::NoButton, QMessageBox::NoButton);
00105 return;
00106 }
00107
00108 unsigned int docFreq = 0;
00109 unsigned int freq = 0;
00110 int rank = 0;
00111
00112 QString query = leQuery->text().toLower();
00113
00114 if(queryType == "Regex") {
00115 std::vector<PhraseHunter::TokenPtr> resultTokens;
00116
00117 if (query.contains(' ')) {
00118 resultTokens = m_searchEngine->searchPhrasalRegex(schma::UnicodePtr(new UnicodeString(query.toUtf8().data())));
00119 } else {
00120 resultTokens = m_searchEngine->searchRegexToken(schma::UnicodePtr(new UnicodeString(query.toUtf8().data())));
00121 }
00122
00123 for(PhraseHunter::TokenVector::const_iterator i = resultTokens.begin();
00124 i != resultTokens.end(); ++i) {
00125 freq += (*i)->corpusFrequency();
00126 }
00127 docFreq = PhraseHunter::StatisticsEngine::sizeOfDocSet(resultTokens);
00128
00129
00130 m_model->setData(resultTokens);
00131
00132 } else if(queryType == "Phrase") {
00133
00134 PhraseHunter::TokenPtr queryToken;
00135
00136 if (query.contains(' ')) {
00137 queryToken = m_searchEngine->searchPhrase(schma::UnicodePtr(new UnicodeString(query.toUtf8().data())));
00138 } else {
00139 queryToken = m_searchEngine->searchToken(schma::UnicodePtr(new UnicodeString(query.toUtf8().data())));
00140 }
00141 m_model->setData(queryToken);
00142
00143 docFreq = queryToken->documentFrequency();
00144 rank = m_stats->rank(queryToken);
00145 freq = queryToken->corpusFrequency();
00146 }
00147
00148 setStatistics(docFreq, freq, rank);
00149 resultsTableView->resizeColumnsToContents();
00150
00151 } catch (PhraseHunter::Exceptions::Exception& _e) {
00152 QMessageBox::critical(this, QString("Error"),
00153 QString("Error while processing search"),
00154 QMessageBox::Ok,QMessageBox::NoButton,QMessageBox::NoButton);
00155 }
00156 }
00157
00158 void SearchTab::hideWidgets()
00159 {
00160 if(queryType == "Regex") {
00161 lblRank->setHidden(true);
00162 label_5->setHidden(true);
00163
00164 } else if (leQuery->text().trimmed().contains(' ')) {
00165 lblRank->setHidden(true);
00166 label_5->setHidden(true);
00167 } else {
00168 lblRank->setHidden(false);
00169 label_5->setHidden(false);
00170 }
00171 }
00172
00173 void SearchTab::setStatistics(int docFreq, int freq, int rank)
00174 {
00175 lblDocFrequency->setText(QString("%1 of %2").arg(docFreq).arg(numberOfDocs));
00176
00177 if (leQuery->text().contains(' '))
00178 lblCorpusFrequency->setText(QString("%1").arg(freq));
00179 else
00180 lblCorpusFrequency->setText(QString("%1 of %2").arg(freq).arg(numberOfTokens));
00181
00182 lblRank->setText(QString("%1 of %2").arg(rank).arg(numberOfTypes));
00183 }