Kelson has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/343897 )
Change subject: Do not store the snippet nor the size of the content in the database. ...................................................................... Do not store the snippet nor the size of the content in the database. Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729 --- M zimwriterfs/indexer.cpp M zimwriterfs/indexer.h M zimwriterfs/xapianIndexer.cpp M zimwriterfs/xapianIndexer.h 4 files changed, 2 insertions(+), 28 deletions(-) Approvals: Kelson: Verified; Looks good to me, approved diff --git a/zimwriterfs/indexer.cpp b/zimwriterfs/indexer.cpp index 6c26fc9..b83abd4 100644 --- a/zimwriterfs/indexer.cpp +++ b/zimwriterfs/indexer.cpp @@ -84,8 +84,6 @@ token.title, token.keywords, token.content, - token.snippet, - token.size, token.wordCount ); diff --git a/zimwriterfs/indexer.h b/zimwriterfs/indexer.h index 02d989b..3291e36 100644 --- a/zimwriterfs/indexer.h +++ b/zimwriterfs/indexer.h @@ -46,8 +46,6 @@ string title; string keywords; string content; - string snippet; - string size; string wordCount; }; @@ -70,8 +68,6 @@ const string &unaccentedTitle, const string &keywords, const string &content, - const string &snippet, - const string &size, const string &wordCount) = 0; virtual void flush() = 0; virtual void indexingPostlude() = 0; diff --git a/zimwriterfs/xapianIndexer.cpp b/zimwriterfs/xapianIndexer.cpp index 65129b7..db27f9d 100644 --- a/zimwriterfs/xapianIndexer.cpp +++ b/zimwriterfs/xapianIndexer.cpp @@ -52,7 +52,7 @@ void XapianIndexer::indexingPrelude(const string indexPath_) { indexPath = indexPath_; this->writableDatabase = Xapian::WritableDatabase(indexPath + ".tmp", Xapian::DB_CREATE_OR_OVERWRITE); - this->writableDatabase.set_metadata("valuesmap", "title:0;snippet:1;size:2;wordcount:3"); + this->writableDatabase.set_metadata("valuesmap", "title:0;wordcount:1"); this->writableDatabase.begin_transaction(true); /* Insert the stopwords */ @@ -72,17 +72,13 @@ const string &unaccentedTitle, const string &keywords, const string &content, - const string &snippet, - const string &size, const string &wordCount) { /* Put the data in the document */ Xapian::Document currentDocument; currentDocument.clear_values(); currentDocument.add_value(0, title); - currentDocument.add_value(1, snippet); - currentDocument.add_value(2, size); - currentDocument.add_value(3, wordCount); + currentDocument.add_value(1, wordCount); currentDocument.set_data(url); indexer.set_document(currentDocument); @@ -149,20 +145,6 @@ stringstream countWordStringStream; countWordStringStream << countWords(htmlParser.dump); token.wordCount = countWordStringStream.str(); - - /* snippet */ - std::string snippet = std::string(htmlParser.dump, 0, 300); - std::string::size_type last = snippet.find_last_of('.'); - if (last == snippet.npos) - last = snippet.find_last_of(' '); - if (last != snippet.npos) - snippet = snippet.substr(0, last); - token.snippet = snippet; - - /* size */ - stringstream sizeStringStream; - sizeStringStream << token.content.size() / 1024; - token.size = sizeStringStream.str(); /* Remove accent */ token.title = removeAccents(token.accentedTitle); diff --git a/zimwriterfs/xapianIndexer.h b/zimwriterfs/xapianIndexer.h index 1d854da..16dc094 100644 --- a/zimwriterfs/xapianIndexer.h +++ b/zimwriterfs/xapianIndexer.h @@ -61,8 +61,6 @@ const string &unaccentedTitle, const string &keywords, const string &content, - const string &snippet, - const string &size, const string &wordCount); void flush(); void indexingPostlude(); -- To view, visit https://gerrit.wikimedia.org/r/343897 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I354a1e76dd2214e844d67ddb4b94f43087664729 Gerrit-PatchSet: 1 Gerrit-Project: openzim Gerrit-Branch: master Gerrit-Owner: Mgautierfr <mgaut...@kymeria.fr> Gerrit-Reviewer: Kelson <kel...@kiwix.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits