On Mon, May 10, 2010 at 01:20:53PM +0100, Enrico Zini wrote: > I am preparing a new version of libept after quite aggressively > simplifying its code.
I've done quite a serious chopping of the textsearch part. I'm attaching the updated patch. Now the search works *better*, and the code is *more readable*. Ciao, Enrico -- GPG key: 4096R/E7AD5568 2009-05-08 Enrico Zini <[email protected]>
diff --git a/debian/control b/debian/control index d10e901..dc104ff 100644 --- a/debian/control +++ b/debian/control @@ -5,7 +5,7 @@ Maintainer: Debian Games Team <[email protected]> Uploaders: Miriam Ruiz <[email protected]>, Enrico Zini <[email protected]>, Jonas Smedegaard <[email protected]> Build-Depends: debhelper (>= 5), autotools-dev, c++abi2-dev, dh-buildinfo, pkg-config, - libept-dev (>= 0.5.10), libept-dev (<< 0.6), + libept-dev (>= 1.0), libept-dev (<< 2), libwibble-dev (>= 0.1.9), libwibble-dev (<< 0.2), libfltk1.1-dev, fluid Standards-Version: 3.7.3 diff --git a/src/Engine.cpp b/src/Engine.cpp index 866e5bf..0ad9c81 100644 --- a/src/Engine.cpp +++ b/src/Engine.cpp @@ -20,14 +20,26 @@ #include "Engine.h" +#include <wibble/string.h> +#include <wibble/regexp.h> #include <iostream> using namespace std; +using namespace wibble; using namespace ept::apt; using namespace ept::debtags; Engine::Engine() - : m_filter_state(ANY), m_dirty(true), m_max(0) {} + : m_db(ept::axi::path_db()), m_stem("en"), m_filter_state(ANY), m_dirty(true), m_max(0) +{ + m_qp.set_default_op(Xapian::Query::OP_AND); + m_qp.set_database(m_db); + m_qp.set_stemmer(m_stem); + m_qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); + m_qp.add_prefix("pkg", "XP"); + m_qp.add_boolean_prefix("tag", "XT"); + m_qp.add_boolean_prefix("sec", "XS"); +} struct EngineMatchDecider : public Xapian::MatchDecider { @@ -56,11 +68,11 @@ struct EngineMatchDecider : public Xapian::MatchDecider static Xapian::Query allGames(Vocabulary& voc, const std::string& facet="game") { - set<Tag> games = voc.tags(facet); + set<std::string> games = voc.tags(facet); vector<string> terms; - for (set<Tag>::const_iterator i = games.begin(); + for (set<std::string>::const_iterator i = games.begin(); i != games.end(); ++i) - terms.push_back("XT" + i->fullname()); + terms.push_back("XT" + *i); return Xapian::Query(Xapian::Query::OP_OR, terms.begin(), terms.end()); } @@ -72,11 +84,33 @@ Xapian::Query Engine::makeQuery() Xapian::Query ifacequery; if (!m_filter_keywords.empty()) - kwquery = m_textsearch.makePartialORQuery(m_filter_keywords); - if (m_filter_type.valid()) - typequery = Xapian::Query("XT"+m_filter_type.fullname()); - if (m_filter_iface.valid()) - ifacequery = Xapian::Query("XT"+m_filter_iface.fullname()); + { + // Add prefixes to tag names + Splitter splitter("[ \t]*,[ \t]*", REG_EXTENDED); + vector<string> kw; + for (Splitter::const_iterator i = splitter.begin(m_filter_keywords); + i != splitter.end(); ++i) + { + if (m_vocabulary.hasTag(*i)) + kw.push_back("tag:" + *i); + else + kw.push_back(*i); + } + bool do_partial = not (kw.size() == 1 and kw[0].size() < 3); + + kwquery = m_qp.parse_query(str::join(kw.begin(), kw.end(), " "), + Xapian::QueryParser::FLAG_BOOLEAN | + Xapian::QueryParser::FLAG_LOVEHATE | + Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE | + Xapian::QueryParser::FLAG_WILDCARD | + (do_partial ? Xapian::QueryParser::FLAG_PARTIAL : 0) | + Xapian::QueryParser::FLAG_PURE_NOT | + Xapian::QueryParser::FLAG_SPELLING_CORRECTION); + } + if (!m_filter_type.empty()) + typequery = Xapian::Query("XT"+m_filter_type); + if (!m_filter_iface.empty()) + ifacequery = Xapian::Query("XT"+m_filter_iface); if (kwquery.empty()) if (typequery.empty()) @@ -126,12 +160,12 @@ void Engine::recompute() //cerr << "Engine recompute:" << endl; // Compute the types - if (m_filter_type.valid()) + if (!m_filter_type.empty()) { //cerr << " filter type: " << m_filter_type.fullname() << endl; - Tag tmp = m_filter_type; - m_filter_type = Tag(); - Xapian::Enquire enquire(m_textsearch.db()); + std::string tmp = m_filter_type; + m_filter_type = std::string(); + Xapian::Enquire enquire(m_db); enquire.set_query(makeQuery()); // Get all the results out of Xapian @@ -144,10 +178,10 @@ void Engine::recompute() for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { // Get all the game and interface tags in the result set - set<Tag> tags = m_debtags.getTagsOfItem(i.get_document().get_data()); - for (set<Tag>::const_iterator j = tags.begin(); + set<std::string> tags = m_debtags.getTagsOfItem(i.get_document().get_data()); + for (set<std::string>::const_iterator j = tags.begin(); j != tags.end(); ++j) - if (j->facet().name() == mainFacet) + if (voc::getfacet(*j) == mainFacet) m_types.insert(*j); } } @@ -157,12 +191,12 @@ void Engine::recompute() } // Compute the interfaces - if (m_filter_iface.valid()) + if (!m_filter_iface.empty()) { //cerr << " filter iface: " << m_filter_iface.fullname() << endl; - Tag tmp = m_filter_iface; - m_filter_iface = Tag(); - Xapian::Enquire enquire(m_textsearch.db()); + std::string tmp = m_filter_iface; + m_filter_iface = std::string(); + Xapian::Enquire enquire(m_db); enquire.set_query(makeQuery()); // Get all the results out of Xapian @@ -175,10 +209,10 @@ void Engine::recompute() for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { // Get all the game and interface tags in the result set - set<Tag> tags = m_debtags.getTagsOfItem(i.get_document().get_data()); - for (set<Tag>::const_iterator j = tags.begin(); + set<std::string> tags = m_debtags.getTagsOfItem(i.get_document().get_data()); + for (set<std::string>::const_iterator j = tags.begin(); j != tags.end(); ++j) - if (j->facet().name() == secondaryFacet) + if (voc::getfacet(*j) == secondaryFacet) m_interfaces.insert(*j); } } @@ -187,7 +221,7 @@ void Engine::recompute() //cerr << " no filter iface" << endl; } - Xapian::Enquire enquire(m_textsearch.db()); + Xapian::Enquire enquire(m_db); enquire.set_query(makeQuery()); //cerr << " filter query: " << enquire.get_query().get_description() << endl; @@ -221,22 +255,22 @@ void Engine::recompute() // Get all the game and interface tags in the result set // only for type or filter when they are not set - if (!m_filter_type.valid() || !m_filter_iface.valid()) + if (m_filter_type.empty() || m_filter_iface.empty()) { - set<Tag> tags = m_debtags.getTagsOfItem(res.name); - for (set<Tag>::const_iterator j = tags.begin(); + set<std::string> tags = m_debtags.getTagsOfItem(res.name); + for (set<std::string>::const_iterator j = tags.begin(); j != tags.end(); ++j) - if (!m_filter_type.valid() && j->facet().name() == mainFacet) + if (m_filter_type.empty() && voc::getfacet(*j) == mainFacet) m_types.insert(*j); - else if (!m_filter_iface.valid() && j->facet().name() == secondaryFacet) + else if (m_filter_iface.empty() && voc::getfacet(*j) == secondaryFacet) m_interfaces.insert(*j); } } } // Always keep the currently selected items in the lists - if (m_filter_type.valid()) + if (!m_filter_type.empty()) m_types.insert(m_filter_type); - if (m_filter_iface.valid()) + if (!m_filter_iface.empty()) m_interfaces.insert(m_filter_iface); @@ -251,7 +285,7 @@ void Engine::recompute() std::vector<Result> Engine::related(const std::string& name, int count) const { - Xapian::Enquire enquire(m_textsearch.db()); + Xapian::Enquire enquire(m_db); // Retrieve the document for the given package enquire.set_query(Xapian::Query("XP"+name)); @@ -284,13 +318,13 @@ void Engine::setKeywordFilter(const std::string& keywords) m_dirty = true; } -void Engine::setTypeFilter(const ept::debtags::Tag& tag) +void Engine::setTypeFilter(const std::string& tag) { m_filter_type = tag; m_dirty = true; } -void Engine::setInterfaceFilter(const ept::debtags::Tag& tag) +void Engine::setInterfaceFilter(const std::string& tag) { m_filter_iface = tag; m_dirty = true; diff --git a/src/Engine.h b/src/Engine.h index 4369d08..855450e 100644 --- a/src/Engine.h +++ b/src/Engine.h @@ -23,7 +23,8 @@ #include <ept/apt/apt.h> #include <ept/debtags/debtags.h> -#include <ept/textsearch/textsearch.h> +#include <ept/debtags/vocabulary.h> +#include <ept/axi/axi.h> #include <ept/popcon/popcon.h> #include <string> #include <set> @@ -58,22 +59,31 @@ protected: /// Debtags data provider ept::debtags::Debtags m_debtags; + /// Vocabulary data provider + ept::debtags::Vocabulary m_vocabulary; + /// Xapian data provider - ept::textsearch::TextSearch m_textsearch; + Xapian::Database m_db; + + /// Xapian stemmer + Xapian::Stem m_stem; + + /// Xapian query parser + Xapian::QueryParser m_qp; /// Popcon scores ept::popcon::Popcon m_popcon; std::string m_filter_keywords; - ept::debtags::Tag m_filter_type; - ept::debtags::Tag m_filter_iface; + std::string m_filter_type; + std::string m_filter_iface; Engine::State m_filter_state; bool m_dirty; std::vector<Result> m_results; - std::set<ept::debtags::Tag> m_types; - std::set<ept::debtags::Tag> m_interfaces; + std::set<std::string> m_types; + std::set<std::string> m_interfaces; float m_max; float m_res_max; @@ -100,20 +110,20 @@ public: ept::debtags::Debtags& debtags() { return m_debtags; } /// Access the tag vocabulary - ept::debtags::Vocabulary& voc() { return m_debtags.vocabulary(); } + ept::debtags::Vocabulary& voc() { return m_vocabulary; } /// Access the popcon data source ept::popcon::Popcon& popcon() { return m_popcon; } /// Get the list of available game types - const std::set<ept::debtags::Tag>& types() + const std::set<std::string>& types() { if (m_dirty) recompute(); return m_types; } /// Get the list of available interfaces - const std::set<ept::debtags::Tag>& interfaces() + const std::set<std::string>& interfaces() { if (m_dirty) recompute(); return m_interfaces; @@ -144,12 +154,12 @@ public: /** * Set the game type filter */ - void setTypeFilter(const ept::debtags::Tag& tag = ept::debtags::Tag()); + void setTypeFilter(const std::string& tag = std::string()); /** * Set the interface type filter */ - void setInterfaceFilter(const ept::debtags::Tag& tag = ept::debtags::Tag()); + void setInterfaceFilter(const std::string& tag = std::string()); /** * Set the installed state filter diff --git a/src/filter.cpp b/src/filter.cpp index a59c553..bc1ee88 100644 --- a/src/filter.cpp +++ b/src/filter.cpp @@ -20,7 +20,6 @@ #include "taghandler.h" #include <string> -#include <ept/debtags/tag.h> #define FACET_VIOLENCE "rating:violence" #define FACET_SEX "rating:sex" @@ -86,26 +85,22 @@ int PackageFilter::TagValue(const Tag &tag) bool PackageFilter::GreenTag(const Tag &tag) { - std::string name = tag.fullname(); - return tagdata.CheckTag(&green_tags, name); + return tagdata.CheckTag(&green_tags, tag); } bool PackageFilter::YellowTag(const Tag &tag) { - std::string name = tag.fullname(); - return tagdata.CheckTag(&yellow_tags, name); + return tagdata.CheckTag(&yellow_tags, tag); } bool PackageFilter::RedTag(const Tag &tag) { - std::string name = tag.fullname(); - return tagdata.CheckTag(&red_tags, name); + return tagdata.CheckTag(&red_tags, tag); } bool PackageFilter::BlackTag(const Tag &tag) { - std::string name = tag.fullname(); - return tagdata.CheckTag(&black_tags, name); + return tagdata.CheckTag(&black_tags, tag); } int PackageFilter::TagsValue(const TagSet &tags) diff --git a/src/filter.h b/src/filter.h index 4cd5467..357dda2 100644 --- a/src/filter.h +++ b/src/filter.h @@ -22,7 +22,6 @@ #include "taghandler.h" #include <set> -#include <ept/debtags/tag.h> class PackageFilter { @@ -38,8 +37,8 @@ public: Black, // Mayday, mayday, the tag/package might be really dangerous! }; - typedef ept::debtags::Tag Tag; - typedef std::set<Tag> TagSet; + typedef std::string Tag; + typedef std::set<std::string> TagSet; bool GreenTag(const Tag &tag); bool YellowTag(const Tag &tag); diff --git a/src/goplay.cpp b/src/goplay.cpp index c85d77e..5e4e3c2 100644 --- a/src/goplay.cpp +++ b/src/goplay.cpp @@ -91,16 +91,15 @@ using namespace std; using namespace ept; using namespace ept::debtags; using namespace ept::apt; -using namespace ept::textsearch; -char* tagString(const Tag& tag) +char* tagString(const std::string& tag) { static map<string, char*> table; - map<string, char*>::iterator i = table.find(tag.fullname()); + map<string, char*>::iterator i = table.find(tag); if (i == table.end()) { pair< map<string, char*>::iterator, bool > tmp = - table.insert(make_pair(tag.fullname(), strdup(tag.fullname().c_str()))); + table.insert(make_pair(tag, strdup(tag.c_str()))); i = tmp.first; } return i->second; @@ -128,18 +127,18 @@ void printResults(Engine& engine) cerr << "PKG " << pkg.package() << " - " << pkg.shortDescription() << endl; } - const set<Tag>& ttags = engine.types(); - for (set<Tag>::const_iterator i = ttags.begin(); + const set<string>& ttags = engine.types(); + for (set<string>::const_iterator i = ttags.begin(); i != ttags.end(); ++i) { - cerr << "TTAG " << i->fullname() << endl; + cerr << "TTAG " << *i << endl; } - const set<Tag>& ftags = engine.interfaces(); - for (set<Tag>::const_iterator i = ftags.begin(); + const set<string>& ftags = engine.interfaces(); + for (set<string>::const_iterator i = ftags.begin(); i != ftags.end(); ++i) { - cerr << "ITAG " << i->fullname() << endl; + cerr << "ITAG " << *i << endl; } } @@ -186,26 +185,30 @@ static void UpdateUILists(GamesUI& ui) // FIXME: there are better ways to remember the previous item - const set<Tag> types = engine.types(); + const set<string> types = engine.types(); int newIdx = 0; - for (set<Tag>::const_iterator i = types.begin(); + for (set<string>::const_iterator i = types.begin(); i != types.end(); ++i) { - int idx = ui.TypeSelection->add(gettext(i->shortDescription().c_str()), + const voc::TagData* td = engine.voc().tagData(*i); + if (!td) continue; + int idx = ui.TypeSelection->add(gettext(td->shortDescription().c_str()), 0, NULL, tagString(*i), FL_NORMAL_LABEL); - if (i->fullname() == oldType) + if (*i == oldType) newIdx = idx; } ui.TypeSelection->value(newIdx); - const set<Tag> ifaces = engine.interfaces(); + const set<std::string> ifaces = engine.interfaces(); newIdx = 0; - for (set<Tag>::const_iterator i = ifaces.begin(); + for (set<std::string>::const_iterator i = ifaces.begin(); i != ifaces.end(); ++i) { - int idx = ui.InterfaceSelection->add(gettext(i->shortDescription().c_str()), + const voc::TagData* td = engine.voc().tagData(*i); + if (!td) continue; + int idx = ui.InterfaceSelection->add(gettext(td->shortDescription().c_str()), 0, NULL, tagString(*i), FL_NORMAL_LABEL); - if (i->fullname() == oldIface) + if (*i == oldIface) newIdx = idx; } ui.InterfaceSelection->value(newIdx); @@ -225,7 +228,7 @@ static void UpdateUILists(GamesUI& ui) Fl_Color bk(FL_WHITE); Fl_Color fr(FL_BLACK); - set<Tag> tags = ui.engine->debtags().getTagsOfItem((const char *)rec.package().c_str()); + set<std::string> tags = ui.engine->debtags().getTagsOfItem((const char *)rec.package().c_str()); switch (filter.TagsValue(tags)) { case PackageFilter::Green: @@ -267,8 +270,7 @@ static void CallBackTypeSelection(Fl_Choice* choice, void *data) //printf("CallBackTypeSelection\n"); //fflush(stdout); GamesUI& ui = *static_cast<GamesUI*>(data); - Tag tag = ui.engine->voc().tagByName(ReadFlChoice(*choice)); - ui.engine->setTypeFilter(tag); + ui.engine->setTypeFilter(ReadFlChoice(*choice)); UpdateUILists(ui); } @@ -277,8 +279,7 @@ static void CallBackInterfaceSelection(Fl_Choice* choice, void *data) //printf("CallBackInterfaceSelection\n"); //fflush(stdout); GamesUI& ui = *static_cast<GamesUI*>(data); - Tag tag = ui.engine->voc().tagByName(ReadFlChoice(*choice)); - ui.engine->setInterfaceFilter(tag); + ui.engine->setInterfaceFilter(ReadFlChoice(*choice)); UpdateUILists(ui); } diff --git a/src/pkgbrowser.cpp b/src/pkgbrowser.cpp index f39f02b..417f5f1 100644 --- a/src/pkgbrowser.cpp +++ b/src/pkgbrowser.cpp @@ -65,7 +65,6 @@ using namespace std; using namespace ept; using namespace ept::debtags; using namespace ept::apt; -using namespace ept::textsearch; static const char legalchars[] = "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvw...@_?+-,.~/%&=:*#"; #define SIZE_URI 256 @@ -221,11 +220,15 @@ void PackageBrowser::item_select(void *p, int s) ui->DebTagsBrowser->column_widths(widths); ui->DebTagsBrowser->add(_("@b...@c7@[email protected]\t@b...@c7@[email protected]")); - set<Tag> tags = ui->engine->debtags().getTagsOfItem((const char *)data); + set<std::string> tags = ui->engine->debtags().getTagsOfItem((const char *)data); PackageFilter filter; char *tag_txt = new char[512]; - for (set<Tag>::const_iterator i = tags.begin(); i != tags.end(); ++i) + for (set<std::string>::const_iterator i = tags.begin(); i != tags.end(); ++i) { + const voc::FacetData* fd = ui->engine->voc().facetData(voc::getfacet(*i)); + const voc::TagData* td = ui->engine->voc().tagData(*i); + if (!fd || !td) continue; + // Available Colors: FL_BLACK, FL_BLUE, FL_CYAN, FL_DARK_BLUE, // FL_DARK_CYAN, FL_DARK_GREEN FL_DARK_MAGENTA, FL_DARK_RED, // FL_DARK_YELLOW, FL_GREEN, FL_MAGENTA, FL_RED, FL_WHITE, FL_YELLOW @@ -247,9 +250,9 @@ void PackageBrowser::item_select(void *p, int s) } snprintf(tag_txt, 512, "@b...@c%d@.%...@b%d@c...@.%s", bk, fr, - gettext(i->facet().shortDescription().c_str()), + gettext(fd->shortDescription().c_str()), bk, fr, - gettext(i->shortDescription().c_str()) + gettext(td->shortDescription().c_str()) ); ui->DebTagsBrowser->add(tag_txt); }
signature.asc
Description: Digital signature

