On Mon, May 10, 2010 at 01:20:53PM +0100, Enrico Zini wrote:

> I am preparing a new version of libept after quite aggressively
> simplifying its code.

I've done quite a serious chopping of the textsearch part.

I'm attaching the updated patch. Now the search works *better*, and the
code is *more readable*.


Ciao,

Enrico

-- 
GPG key: 4096R/E7AD5568 2009-05-08 Enrico Zini <[email protected]>
diff --git a/debian/control b/debian/control
index d10e901..dc104ff 100644
--- a/debian/control
+++ b/debian/control
@@ -5,7 +5,7 @@ Maintainer: Debian Games Team <[email protected]>
 Uploaders: Miriam Ruiz <[email protected]>, Enrico Zini <[email protected]>, Jonas Smedegaard <[email protected]>
 Build-Depends: debhelper (>= 5), autotools-dev,
  c++abi2-dev, dh-buildinfo, pkg-config,
- libept-dev (>= 0.5.10), libept-dev (<< 0.6),
+ libept-dev (>= 1.0), libept-dev (<< 2),
  libwibble-dev (>= 0.1.9), libwibble-dev (<< 0.2),
  libfltk1.1-dev, fluid
 Standards-Version: 3.7.3
diff --git a/src/Engine.cpp b/src/Engine.cpp
index 866e5bf..0ad9c81 100644
--- a/src/Engine.cpp
+++ b/src/Engine.cpp
@@ -20,14 +20,26 @@
 
 #include "Engine.h"
 
+#include <wibble/string.h>
+#include <wibble/regexp.h>
 #include <iostream>
 
 using namespace std;
+using namespace wibble;
 using namespace ept::apt;
 using namespace ept::debtags;
 
 Engine::Engine()
-	: m_filter_state(ANY), m_dirty(true), m_max(0) {}
+	: m_db(ept::axi::path_db()), m_stem("en"), m_filter_state(ANY), m_dirty(true), m_max(0)
+{
+	m_qp.set_default_op(Xapian::Query::OP_AND);
+        m_qp.set_database(m_db);
+        m_qp.set_stemmer(m_stem);
+        m_qp.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
+        m_qp.add_prefix("pkg", "XP");
+        m_qp.add_boolean_prefix("tag", "XT");
+        m_qp.add_boolean_prefix("sec", "XS");
+}
 
 struct EngineMatchDecider : public Xapian::MatchDecider
 {
@@ -56,11 +68,11 @@ struct EngineMatchDecider : public Xapian::MatchDecider
 
 static Xapian::Query allGames(Vocabulary& voc, const std::string& facet="game")
 {
-	set<Tag> games = voc.tags(facet);
+	set<std::string> games = voc.tags(facet);
 	vector<string> terms;
-	for (set<Tag>::const_iterator i = games.begin();
+	for (set<std::string>::const_iterator i = games.begin();
 			i != games.end(); ++i)
-		terms.push_back("XT" + i->fullname());
+		terms.push_back("XT" + *i);
 	return Xapian::Query(Xapian::Query::OP_OR, terms.begin(), terms.end());
 }
 
@@ -72,11 +84,33 @@ Xapian::Query Engine::makeQuery()
 	Xapian::Query ifacequery;
 
 	if (!m_filter_keywords.empty())
-		kwquery = m_textsearch.makePartialORQuery(m_filter_keywords);
-	if (m_filter_type.valid())
-		typequery = Xapian::Query("XT"+m_filter_type.fullname());
-	if (m_filter_iface.valid())
-		ifacequery = Xapian::Query("XT"+m_filter_iface.fullname());
+	{
+		// Add prefixes to tag names
+		Splitter splitter("[ \t]*,[ \t]*", REG_EXTENDED);
+		vector<string> kw;
+		for (Splitter::const_iterator i = splitter.begin(m_filter_keywords);
+				i != splitter.end(); ++i)
+		{
+			if (m_vocabulary.hasTag(*i))
+				kw.push_back("tag:" + *i);
+			else
+				kw.push_back(*i);
+		}
+		bool do_partial = not (kw.size() == 1 and kw[0].size() < 3);
+
+		kwquery = m_qp.parse_query(str::join(kw.begin(), kw.end(), " "),
+				Xapian::QueryParser::FLAG_BOOLEAN |
+				Xapian::QueryParser::FLAG_LOVEHATE |
+				Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
+				Xapian::QueryParser::FLAG_WILDCARD |
+				(do_partial ? Xapian::QueryParser::FLAG_PARTIAL : 0) |
+				Xapian::QueryParser::FLAG_PURE_NOT |
+				Xapian::QueryParser::FLAG_SPELLING_CORRECTION);
+	}
+	if (!m_filter_type.empty())
+		typequery = Xapian::Query("XT"+m_filter_type);
+	if (!m_filter_iface.empty())
+		ifacequery = Xapian::Query("XT"+m_filter_iface);
 		
 	if (kwquery.empty())
 		if (typequery.empty())
@@ -126,12 +160,12 @@ void Engine::recompute()
 	//cerr << "Engine recompute:" << endl;
 
 	// Compute the types
-	if (m_filter_type.valid())
+	if (!m_filter_type.empty())
 	{
 		//cerr << "  filter type: " << m_filter_type.fullname() << endl;
-		Tag tmp = m_filter_type;
-		m_filter_type = Tag();
-		Xapian::Enquire enquire(m_textsearch.db());
+		std::string tmp = m_filter_type;
+		m_filter_type = std::string();
+		Xapian::Enquire enquire(m_db);
 		enquire.set_query(makeQuery());
 
 		// Get all the results out of Xapian
@@ -144,10 +178,10 @@ void Engine::recompute()
 			for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i)
 			{
 				// Get all the game and interface tags in the result set
-				set<Tag> tags = m_debtags.getTagsOfItem(i.get_document().get_data());
-				for (set<Tag>::const_iterator j = tags.begin();
+				set<std::string> tags = m_debtags.getTagsOfItem(i.get_document().get_data());
+				for (set<std::string>::const_iterator j = tags.begin();
 						j != tags.end(); ++j)
-					if (j->facet().name() == mainFacet)
+					if (voc::getfacet(*j) == mainFacet)
 						m_types.insert(*j);
 			}
 		}
@@ -157,12 +191,12 @@ void Engine::recompute()
 	}
 
 	// Compute the interfaces
-	if (m_filter_iface.valid())
+	if (!m_filter_iface.empty())
 	{
 		//cerr << "  filter iface: " << m_filter_iface.fullname() << endl;
-		Tag tmp = m_filter_iface;
-		m_filter_iface = Tag();
-		Xapian::Enquire enquire(m_textsearch.db());
+		std::string tmp = m_filter_iface;
+		m_filter_iface = std::string();
+		Xapian::Enquire enquire(m_db);
 		enquire.set_query(makeQuery());
 
 		// Get all the results out of Xapian
@@ -175,10 +209,10 @@ void Engine::recompute()
 			for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i)
 			{
 				// Get all the game and interface tags in the result set
-				set<Tag> tags = m_debtags.getTagsOfItem(i.get_document().get_data());
-				for (set<Tag>::const_iterator j = tags.begin();
+				set<std::string> tags = m_debtags.getTagsOfItem(i.get_document().get_data());
+				for (set<std::string>::const_iterator j = tags.begin();
 						j != tags.end(); ++j)
-					if (j->facet().name() == secondaryFacet)
+					if (voc::getfacet(*j) == secondaryFacet)
 						m_interfaces.insert(*j);
 			}
 		}
@@ -187,7 +221,7 @@ void Engine::recompute()
 		//cerr << "  no filter iface" << endl;
 	}
 
-	Xapian::Enquire enquire(m_textsearch.db());
+	Xapian::Enquire enquire(m_db);
 	enquire.set_query(makeQuery());
 
 	//cerr << "  filter query: " << enquire.get_query().get_description() << endl;
@@ -221,22 +255,22 @@ void Engine::recompute()
 
 			// Get all the game and interface tags in the result set
 			// only for type or filter when they are not set
-			if (!m_filter_type.valid() || !m_filter_iface.valid())
+			if (m_filter_type.empty() || m_filter_iface.empty())
 			{
-				set<Tag> tags = m_debtags.getTagsOfItem(res.name);
-				for (set<Tag>::const_iterator j = tags.begin();
+				set<std::string> tags = m_debtags.getTagsOfItem(res.name);
+				for (set<std::string>::const_iterator j = tags.begin();
 						j != tags.end(); ++j)
-					if (!m_filter_type.valid() && j->facet().name() == mainFacet)
+					if (m_filter_type.empty() && voc::getfacet(*j) == mainFacet)
 						m_types.insert(*j);
-					else if (!m_filter_iface.valid() && j->facet().name() == secondaryFacet)
+					else if (m_filter_iface.empty() && voc::getfacet(*j) == secondaryFacet)
 						m_interfaces.insert(*j);
 			}
 		}
 	}
 	// Always keep the currently selected items in the lists
-	if (m_filter_type.valid())
+	if (!m_filter_type.empty())
 		m_types.insert(m_filter_type);
-	if (m_filter_iface.valid())
+	if (!m_filter_iface.empty())
 		m_interfaces.insert(m_filter_iface);
 
 
@@ -251,7 +285,7 @@ void Engine::recompute()
 
 std::vector<Result> Engine::related(const std::string& name, int count) const
 {
-	Xapian::Enquire enquire(m_textsearch.db());
+	Xapian::Enquire enquire(m_db);
 	
 	// Retrieve the document for the given package
 	enquire.set_query(Xapian::Query("XP"+name));
@@ -284,13 +318,13 @@ void Engine::setKeywordFilter(const std::string& keywords)
 	m_dirty = true;
 }
 
-void Engine::setTypeFilter(const ept::debtags::Tag& tag)
+void Engine::setTypeFilter(const std::string& tag)
 {
 	m_filter_type = tag;
 	m_dirty = true;
 }
 
-void Engine::setInterfaceFilter(const ept::debtags::Tag& tag)
+void Engine::setInterfaceFilter(const std::string& tag)
 {
 	m_filter_iface = tag;
 	m_dirty = true;
diff --git a/src/Engine.h b/src/Engine.h
index 4369d08..855450e 100644
--- a/src/Engine.h
+++ b/src/Engine.h
@@ -23,7 +23,8 @@
 
 #include <ept/apt/apt.h>
 #include <ept/debtags/debtags.h>
-#include <ept/textsearch/textsearch.h>
+#include <ept/debtags/vocabulary.h>
+#include <ept/axi/axi.h>
 #include <ept/popcon/popcon.h>
 #include <string>
 #include <set>
@@ -58,22 +59,31 @@ protected:
 	/// Debtags data provider
 	ept::debtags::Debtags m_debtags;
 
+	/// Vocabulary data provider
+	ept::debtags::Vocabulary m_vocabulary;
+
 	/// Xapian data provider
-	ept::textsearch::TextSearch m_textsearch;
+	Xapian::Database m_db;
+
+	/// Xapian stemmer
+	Xapian::Stem m_stem;
+
+	/// Xapian query parser
+	Xapian::QueryParser m_qp;
 
 	/// Popcon scores
 	ept::popcon::Popcon m_popcon;
 
 	std::string m_filter_keywords;
-	ept::debtags::Tag m_filter_type;
-	ept::debtags::Tag m_filter_iface;
+	std::string m_filter_type;
+	std::string m_filter_iface;
 	Engine::State m_filter_state;
 
 	bool m_dirty;
 
 	std::vector<Result> m_results;
-	std::set<ept::debtags::Tag> m_types;
-	std::set<ept::debtags::Tag> m_interfaces;
+	std::set<std::string> m_types;
+	std::set<std::string> m_interfaces;
 
 	float m_max;
 	float m_res_max;
@@ -100,20 +110,20 @@ public:
 	ept::debtags::Debtags& debtags() { return m_debtags; }
 
 	/// Access the tag vocabulary
-	ept::debtags::Vocabulary& voc() { return m_debtags.vocabulary(); }
+	ept::debtags::Vocabulary& voc() { return m_vocabulary; }
 
 	/// Access the popcon data source
 	ept::popcon::Popcon& popcon() { return m_popcon; }
 
 	/// Get the list of available game types
-	const std::set<ept::debtags::Tag>& types()
+	const std::set<std::string>& types()
 	{
 		if (m_dirty) recompute();
 		return m_types;
 	}
 
 	/// Get the list of available interfaces
-	const std::set<ept::debtags::Tag>& interfaces()
+	const std::set<std::string>& interfaces()
 	{
 		if (m_dirty) recompute();
 		return m_interfaces;
@@ -144,12 +154,12 @@ public:
 	/**
 	 * Set the game type filter
 	 */
-	void setTypeFilter(const ept::debtags::Tag& tag = ept::debtags::Tag());
+	void setTypeFilter(const std::string& tag = std::string());
 
 	/**
 	 * Set the interface type filter
 	 */
-	void setInterfaceFilter(const ept::debtags::Tag& tag = ept::debtags::Tag());
+	void setInterfaceFilter(const std::string& tag = std::string());
 
 	/**
 	 * Set the installed state filter
diff --git a/src/filter.cpp b/src/filter.cpp
index a59c553..bc1ee88 100644
--- a/src/filter.cpp
+++ b/src/filter.cpp
@@ -20,7 +20,6 @@
 #include "taghandler.h"
 
 #include <string>
-#include <ept/debtags/tag.h>
 
 #define FACET_VIOLENCE "rating:violence"
 #define FACET_SEX "rating:sex"
@@ -86,26 +85,22 @@ int PackageFilter::TagValue(const Tag &tag)
 
 bool PackageFilter::GreenTag(const Tag &tag)
 {
-	std::string name = tag.fullname();
-	return tagdata.CheckTag(&green_tags, name);
+	return tagdata.CheckTag(&green_tags, tag);
 }
 
 bool PackageFilter::YellowTag(const Tag &tag)
 {
-	std::string name = tag.fullname();
-	return tagdata.CheckTag(&yellow_tags, name);
+	return tagdata.CheckTag(&yellow_tags, tag);
 }
 
 bool PackageFilter::RedTag(const Tag &tag)
 {
-	std::string name = tag.fullname();
-	return tagdata.CheckTag(&red_tags, name);
+	return tagdata.CheckTag(&red_tags, tag);
 }
 
 bool PackageFilter::BlackTag(const Tag &tag)
 {
-	std::string name = tag.fullname();
-	return tagdata.CheckTag(&black_tags, name);
+	return tagdata.CheckTag(&black_tags, tag);
 }
 
 int PackageFilter::TagsValue(const TagSet &tags)
diff --git a/src/filter.h b/src/filter.h
index 4cd5467..357dda2 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -22,7 +22,6 @@
 #include "taghandler.h"
 
 #include <set>
-#include <ept/debtags/tag.h>
 
 class PackageFilter
 {
@@ -38,8 +37,8 @@ public:
 		Black,     // Mayday, mayday, the tag/package might be really dangerous!
 	};
 
-	typedef ept::debtags::Tag Tag;
-	typedef std::set<Tag> TagSet;
+	typedef std::string Tag;
+	typedef std::set<std::string> TagSet;
 
 	bool GreenTag(const Tag &tag);
 	bool YellowTag(const Tag &tag);
diff --git a/src/goplay.cpp b/src/goplay.cpp
index c85d77e..5e4e3c2 100644
--- a/src/goplay.cpp
+++ b/src/goplay.cpp
@@ -91,16 +91,15 @@ using namespace std;
 using namespace ept;
 using namespace ept::debtags;
 using namespace ept::apt;
-using namespace ept::textsearch;
 
-char* tagString(const Tag& tag)
+char* tagString(const std::string& tag)
 {
 	static map<string, char*> table;
-	map<string, char*>::iterator i = table.find(tag.fullname());
+	map<string, char*>::iterator i = table.find(tag);
 	if (i == table.end())
 	{
 		pair< map<string, char*>::iterator, bool > tmp =
-			table.insert(make_pair(tag.fullname(), strdup(tag.fullname().c_str())));
+			table.insert(make_pair(tag, strdup(tag.c_str())));
 		i = tmp.first;
 	}
 	return i->second;
@@ -128,18 +127,18 @@ void printResults(Engine& engine)
 		cerr << "PKG " << pkg.package() << " - " << pkg.shortDescription() << endl;
 	}
 
-	const set<Tag>& ttags = engine.types();
-	for (set<Tag>::const_iterator i = ttags.begin();
+	const set<string>& ttags = engine.types();
+	for (set<string>::const_iterator i = ttags.begin();
 			i != ttags.end(); ++i)
 	{
-		cerr << "TTAG " << i->fullname() << endl;
+		cerr << "TTAG " << *i << endl;
 	}
 
-	const set<Tag>& ftags = engine.interfaces();
-	for (set<Tag>::const_iterator i = ftags.begin();
+	const set<string>& ftags = engine.interfaces();
+	for (set<string>::const_iterator i = ftags.begin();
 			i != ftags.end(); ++i)
 	{
-		cerr << "ITAG " << i->fullname() << endl;
+		cerr << "ITAG " << *i << endl;
 	}
 }
 
@@ -186,26 +185,30 @@ static void UpdateUILists(GamesUI& ui)
 
 	// FIXME: there are better ways to remember the previous item
 	
-	const set<Tag> types = engine.types();
+	const set<string> types = engine.types();
 	int newIdx = 0;
-	for (set<Tag>::const_iterator i = types.begin();
+	for (set<string>::const_iterator i = types.begin();
 			i != types.end(); ++i)
 	{
-		int idx = ui.TypeSelection->add(gettext(i->shortDescription().c_str()),
+		const voc::TagData* td = engine.voc().tagData(*i);
+		if (!td) continue;
+		int idx = ui.TypeSelection->add(gettext(td->shortDescription().c_str()),
 							0, NULL, tagString(*i), FL_NORMAL_LABEL);
-		if (i->fullname() == oldType)
+		if (*i == oldType)
 			newIdx = idx;
 	}
 	ui.TypeSelection->value(newIdx);
 	
-	const set<Tag> ifaces = engine.interfaces();
+	const set<std::string> ifaces = engine.interfaces();
 	newIdx = 0;
-	for (set<Tag>::const_iterator i = ifaces.begin();
+	for (set<std::string>::const_iterator i = ifaces.begin();
 			i != ifaces.end(); ++i)
 	{
-		int idx = ui.InterfaceSelection->add(gettext(i->shortDescription().c_str()),
+		const voc::TagData* td = engine.voc().tagData(*i);
+		if (!td) continue;
+		int idx = ui.InterfaceSelection->add(gettext(td->shortDescription().c_str()),
 							0, NULL, tagString(*i), FL_NORMAL_LABEL);
-		if (i->fullname() == oldIface)
+		if (*i == oldIface)
 			newIdx = idx;
 	}
 	ui.InterfaceSelection->value(newIdx);
@@ -225,7 +228,7 @@ static void UpdateUILists(GamesUI& ui)
 
 		Fl_Color bk(FL_WHITE);
 		Fl_Color fr(FL_BLACK);
-		set<Tag> tags = ui.engine->debtags().getTagsOfItem((const char *)rec.package().c_str());
+		set<std::string> tags = ui.engine->debtags().getTagsOfItem((const char *)rec.package().c_str());
 		switch (filter.TagsValue(tags))
 		{
 			case PackageFilter::Green:
@@ -267,8 +270,7 @@ static void CallBackTypeSelection(Fl_Choice* choice, void *data)
 	//printf("CallBackTypeSelection\n");
 	//fflush(stdout);
 	GamesUI& ui = *static_cast<GamesUI*>(data);
-	Tag tag = ui.engine->voc().tagByName(ReadFlChoice(*choice));
-	ui.engine->setTypeFilter(tag);
+	ui.engine->setTypeFilter(ReadFlChoice(*choice));
 	UpdateUILists(ui);
 }
 
@@ -277,8 +279,7 @@ static void CallBackInterfaceSelection(Fl_Choice* choice, void *data)
 	//printf("CallBackInterfaceSelection\n");
 	//fflush(stdout);
 	GamesUI& ui = *static_cast<GamesUI*>(data);
-	Tag tag = ui.engine->voc().tagByName(ReadFlChoice(*choice));
-	ui.engine->setInterfaceFilter(tag);
+	ui.engine->setInterfaceFilter(ReadFlChoice(*choice));
 	UpdateUILists(ui);
 }
 
diff --git a/src/pkgbrowser.cpp b/src/pkgbrowser.cpp
index f39f02b..417f5f1 100644
--- a/src/pkgbrowser.cpp
+++ b/src/pkgbrowser.cpp
@@ -65,7 +65,6 @@ using namespace std;
 using namespace ept;
 using namespace ept::debtags;
 using namespace ept::apt;
-using namespace ept::textsearch;
 
 static const char legalchars[] = "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvw...@_?+-,.~/%&=:*#";
 #define SIZE_URI 256
@@ -221,11 +220,15 @@ void PackageBrowser::item_select(void *p, int s)
 			ui->DebTagsBrowser->column_widths(widths);
 			ui->DebTagsBrowser->add(_("@b...@c7@[email protected]\t@b...@c7@[email protected]"));
 
-			set<Tag> tags = ui->engine->debtags().getTagsOfItem((const char *)data);
+			set<std::string> tags = ui->engine->debtags().getTagsOfItem((const char *)data);
 			PackageFilter filter;
 			char *tag_txt = new char[512];
-			for (set<Tag>::const_iterator i = tags.begin(); i != tags.end(); ++i)
+			for (set<std::string>::const_iterator i = tags.begin(); i != tags.end(); ++i)
 			{
+				const voc::FacetData* fd = ui->engine->voc().facetData(voc::getfacet(*i));
+				const voc::TagData* td = ui->engine->voc().tagData(*i);
+				if (!fd || !td) continue;
+
 				// Available Colors: FL_BLACK, FL_BLUE, FL_CYAN, FL_DARK_BLUE,
 				// FL_DARK_CYAN, FL_DARK_GREEN FL_DARK_MAGENTA, FL_DARK_RED,
 				// FL_DARK_YELLOW, FL_GREEN, FL_MAGENTA, FL_RED, FL_WHITE, FL_YELLOW
@@ -247,9 +250,9 @@ void PackageBrowser::item_select(void *p, int s)
 				}
 				snprintf(tag_txt, 512, "@b...@c%d@.%...@b%d@c...@.%s",
 					bk, fr,
-					gettext(i->facet().shortDescription().c_str()),
+					gettext(fd->shortDescription().c_str()),
 					bk, fr,
-					gettext(i->shortDescription().c_str())
+					gettext(td->shortDescription().c_str())
 				);
 				ui->DebTagsBrowser->add(tag_txt);
 			}

Attachment: signature.asc
Description: Digital signature

Reply via email to