Hello,
the fuzzy search gives also results for similar or wrong entered search texts.
For example if you search "REM" in the result appears also "R.E.M." or vice
versa.
A short description how the patch works:
- it creates a temporary table in the sqlite database to control the search
- it registers a user defined function "fuzzy" usable in sql queries
- it modifies the view used to display the library/search results to use the
temporary table and the function
- when a search string is entered it initializes the search engine
- then the search string is entered to the temporary table and
- then as usual the view is queried again which then executes the user defined
function
- sorting is done on the match value
Problems with this patches:
- need to be integrated to the build system
- the original version uses a function newOrder after the "first stage" to
refine the results
As far as I can see the original author has licensed the files with GPL2:
http://yammi.cvs.sourceforge.net/viewvc/yammi/kyammi/COPYING?revision=1.1.1.1&view=markup
The attached 3 patches are made against mixxx 1.9.
(Should I supply a version against HEAD?)
I want to ask if such a functionality is wanted in mixxx
and if there are some other problems with it or what you think of it?
Kind regards,
Bernhard
From 5131a4c4e77e2ac1be52ca7465d9c58ba2ba0ffc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= <bernha...@vr-web.de>
Date: Sat, 26 Mar 2011 17:50:53 +0100
Subject: Last version fuzzy search from yammi
---
mixxx/src/library/fuzzsrch.cpp | 419 ++++++++++++++++++++++++++++++++++++++++
mixxx/src/library/fuzzsrch.h | 164 ++++++++++++++++
2 files changed, 583 insertions(+), 0 deletions(-)
create mode 100644 mixxx/src/library/fuzzsrch.cpp
create mode 100644 mixxx/src/library/fuzzsrch.h
diff --git a/mixxx/src/library/fuzzsrch.cpp b/mixxx/src/library/fuzzsrch.cpp
new file mode 100644
index 0000000..da82169
--- /dev/null
+++ b/mixxx/src/library/fuzzsrch.cpp
@@ -0,0 +1,419 @@
+/** Unscharfe Suche nach Objektnamen
+*
+* 7-2000 - 8-2000 by Oliver Nölle
+*
+* Version 1.0, last modified 18.8.2000
+*
+* nur Methodenrümpfe, Klassendeklarationen+Doku in fuzzsrch.h
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "fuzzsrch.h"
+
+
+/*class BestMatchEntry {
+public:
+ int sim;
+ char* objName;
+ char* preparedName;
+ void* objPtr;
+*/
+
+BestMatchEntry::BestMatchEntry(const int sim_, const char* objName_, const char* preparedName_, void* objPtr_) {
+ sim=sim_;
+ objPtr=objPtr_;
+
+ // copy objName (if entry is too long, we simply cut it off to MaximumEntryLength)
+ objName=new char[MaximumEntryLength];
+// strcpy(objName, objName_);
+ char* tmpPtr=objName;
+ for ( ; *objName_ && (tmpPtr-objName < MaximumEntryLength-1); tmpPtr++, objName_++)
+ *tmpPtr=*objName_;
+ *tmpPtr='\0';
+
+ // copy preparedName (shouldn't be too long because already cut off in FuzzySearch::prepareString, but to be sure...)
+ preparedName=new char[MaximumEntryLength];
+// strcpy(preparedName, preparedName_);
+ tmpPtr=preparedName;
+ for ( ; *preparedName_ && (tmpPtr-preparedName < MaximumEntryLength-1); tmpPtr++, preparedName_++)
+ *tmpPtr=*preparedName_;
+ *tmpPtr='\0';
+}
+
+
+BestMatchEntry::~BestMatchEntry() {
+ delete(objName);
+ delete(preparedName);
+}
+
+
+
+
+// one ngram-entry associated with the searchstring
+// keeps track of frequencies and, corresponding, entropy of an ngram
+/*class NgramEntry {
+public:
+int length;
+int frequency;
+int entropy; // should give an information value to the appearance of this ngram
+int precondition[2]; // index of those ngrams that must be found for this ngram to be "appearable"
+bool found; // found in this objectname?
+char str[MaximumNgramLen];
+*/
+NgramEntry::NgramEntry(int length_, const char* strStart, int pre1, int pre2) {
+ int i;
+ length=length_;
+ frequency=0;
+ if (length_==0) found=true;
+ else found=false;
+ precondition[0]=pre1;
+ precondition[1]=pre2;
+ entropy=20+length*10;
+ for(i=0; i<length; i++) str[i]=strStart[i];
+ str[i]='\0';
+}
+
+int max(int a, int b)
+{
+ if(a>=b) return a;
+ else return b;
+}
+
+// a representation of a searchstring as an efficient ngram-datastructure
+// this representation needs to be build only once per searchstring
+/*class NgramRep {
+public:
+int minNgramLen, maxNgramLen, noLevels;
+int noNgrams;
+int noNgramsInLevel[MaximumNgramLen];
+int lenNgramsInLevel[MaximumNgramLen];
+NgramEntry** ngs;
+*/
+// constructor: divide searchstring into ngrams and build data structure
+NgramRep::NgramRep(char* searchStr, int length, int minNgramLen_, int maxNgramLen_)
+{
+ minNgramLen=minNgramLen_;
+ maxNgramLen=maxNgramLen_;
+ noLevels=maxNgramLen-minNgramLen+1;
+
+ int i, j;
+ for (i=minNgramLen, j=0, noNgrams=0; i<= maxNgramLen; i++, j++) {
+ noNgrams+=max(length-i+1, 0);
+ noNgramsInLevel[j]=max(length-i+1, 0);
+ lenNgramsInLevel[j]=i;
+ }
+
+ ngs=new NgramEntry*[noNgrams+1];
+ // divide searchstring into ngrams
+ int pre1, pre2;
+ int total=1; // keep in ngs[0] a pseudo ngram with found always =true
+ ngs[0]=new NgramEntry(0, "x", -1, -1);
+ maxScore=0;
+
+ // for each level (=length of ngrams) get all ngrams
+ for(int level=0; level<noLevels; level++)
+ {
+ for(i = 0; i < noNgramsInLevel[level]; i++)
+ {
+ if (level!=0) {
+ pre1=total-noNgramsInLevel[level]-1;
+ pre2=total-noNgramsInLevel[level];
+ } else {
+ pre1=0;
+ pre2=0;
+ }
+ ngs[total]=new NgramEntry(lenNgramsInLevel[level], &searchStr[i], pre1, pre2);
+ maxScore+=ngs[total]->entropy;
+ total++;
+ }
+ }
+}
+
+
+NgramRep::~NgramRep() {
+ for(int i=0; i<=noNgrams; i++)
+ delete(ngs[i]);
+ delete(ngs);
+}
+
+
+
+/*
+prepare a string:
+- enclose with spaces (better matches with matching word boundaries)
+- to lowercase
+- replace special characters like '-', '.', ... with space
+- return length
+- phonetic similarities => ß -> ss, ö -> oe, ...
+*/
+int FuzzySearch::prepareString(char* convStr, const char* originStr)
+{
+ char* tmpPtr=convStr;
+ *tmpPtr++=' '; // leading space
+
+ // if entry is too long, we simply cut it off to MaximumEntryLength
+ for ( ; *originStr && (tmpPtr-convStr < MaximumEntryLength-4); tmpPtr++, originStr++)
+ {
+// !!! *tmpPtr = tolower((unsigned char)*originStr);
+ *tmpPtr=*originStr;
+ switch(*tmpPtr)
+ {
+ // pretty ugly, but in a standalone dos-shell program the entered characters are ASCII-umlaute,
+ // in the db there are ANSI-umlaute, so we have to take care of both
+ case -60: // ANSI-Umlaute
+ *tmpPtr++ = 'a';
+ *tmpPtr = 'e';
+ break;
+ case -28:
+ *tmpPtr++ = 'a';
+
+ *tmpPtr = 'e';
+ break;
+ case -114: // ASCII-Umlaute
+ *tmpPtr++ = 'a';
+ *tmpPtr = 'e';
+ break;
+ case -124:
+ *tmpPtr++ = 'a';
+ *tmpPtr = 'e';
+ break;
+ case -42:
+ *tmpPtr++ = 'o';
+ *tmpPtr = 'e';
+ break;
+ case -10:
+ *tmpPtr++ = 'o';
+ *tmpPtr = 'e';
+ break;
+ case -108:
+ *tmpPtr++ = 'o';
+ *tmpPtr = 'e';
+ break;
+ case -103:
+ *tmpPtr++ = 'o';
+ *tmpPtr = 'e';
+ break;
+ case -36:
+ *tmpPtr++ = 'u';
+ *tmpPtr = 'e';
+ break;
+ case -4:
+ *tmpPtr++ = 'u';
+ *tmpPtr = 'e';
+ break;
+ case -102:
+ *tmpPtr++ = 'u';
+ *tmpPtr = 'e';
+ break;
+ case -127:
+ *tmpPtr++ = 'u';
+ *tmpPtr = 'e';
+ break;
+ case ',': *tmpPtr = ' '; break; // so, diese Sonderzeichen kommen alle vor, alle in spaces umwandeln...
+ case '-': *tmpPtr = ' '; break;
+ case '.': tmpPtr-- ; break; // for yammi: discard them (R.E.M = REM)
+ case '(': *tmpPtr = ' '; break;
+ case ')': *tmpPtr = ' '; break;
+ case '*': *tmpPtr = ' '; break;
+ case '/': *tmpPtr = ' '; break;
+ case '@': *tmpPtr = ' '; break; // ...bis hier
+ case -33: // scharfes s (im Textfile eingelesen)
+ case -31: // scharfes s (unter dos eingegeben)
+ *tmpPtr++='s';
+ *tmpPtr='s';
+ break;
+ case '\n': // discard newline character
+ tmpPtr--;
+ break;
+ case 13: // discard linefeed? character
+ tmpPtr--;
+ break;
+ }
+ }
+
+ *tmpPtr++=' '; // finishing space
+ *tmpPtr = '\0';
+ return (tmpPtr - convStr);
+}
+
+
+
+// re-evaluate similarities (now with frequency) and re-order list of best matches
+void FuzzySearch::newOrder() {
+ if (noObjects==-1) return;
+ NgramEntry** ngs;
+ ngs=ngr->ngs;
+ int i,j;
+
+ // step 0: calculate new entropy for all ngrams (depending on frequency)
+ ngr->maxScore=0;
+ for(i = 1; i <= ngr->noNgrams; i++) {
+ double prob=(double)ngs[i]->frequency/(double)noObjects; // probability of appearance
+ double loggi=log(prob*100+1)/log(2.0); // clevere Formel...
+ int entro = 20+(ngs[i]->length)*10;
+ entro=(int) (entro*(1.0-loggi/10.0));
+ if (entro<1) entro=1;
+
+ ngr->maxScore+=entro;
+ ngs[i]->entropy=entro;
+ }
+
+ // step 1: calculate new similarity value for all best matches
+ for(j=0; j<NoBestMatches && bme[j]; j++) {
+ int accuracy=0;
+ // für jedes ngram match überprüfen
+ for(i=1; i<=ngr->noNgrams; i++) {
+ ngs[i]->found=false;
+ if(ngs[ngs[i]->precondition[0]]->found==true && ngs[ngs[i]->precondition[1]]->found==true) {
+ if(strstr(bme[j]->preparedName, ngs[i]->str)) {
+ accuracy += (ngs[i]->entropy);
+ ngs[i]->found=true;
+ }
+ }
+ }
+ accuracy=accuracy*1000/ngr->maxScore;
+ bme[j]->sim=accuracy;
+ }
+
+ // step 2: re-order list
+ qsort((void*)bme, j, sizeof(bme[0]), bmeCompare );
+ return;
+}
+
+
+
+// insert into best matches list (ordered by similarity)
+int FuzzySearch::insertIntoOrderedList(int similarity, const char* insertObject, const char* prepObject, void* objPtr)
+{
+ int i, j;
+ for (i=0; i<NoBestMatches; i++) {
+ if (bme[i]==NULL) {
+ bme[i]=new BestMatchEntry(similarity, insertObject, prepObject, objPtr);
+ break;
+ }
+ else
+ if (bme[i]->sim<similarity) {
+ // delete last entry coz it will be dropped
+ if (bme[NoBestMatches-1]!=NULL)
+ delete(bme[NoBestMatches-1]);
+ for(j=NoBestMatches-1; j>i; j--) {
+ bme[j]=bme[j-1];
+ }
+ bme[i]=new BestMatchEntry(similarity, insertObject, prepObject, objPtr);
+ break;
+ }
+ }
+ if (bme[NoBestMatches-1]!=NULL)
+ return bme[NoBestMatches-1]->sim;
+ else
+ return -1;
+}
+
+
+void FuzzySearch::initialize(const char* searchStr_, int minNgramLen, int maxNgramLen)
+{
+ if (noObjects!=-1) { // object was already initialized => we have to clean up first
+ // this is exactly the same as what the destructor does (but we can't call it, can we?)
+ // clean up bme-list
+ for(int i=0; i<NoBestMatches; i++) {
+ if (bme[i]!=0)
+ delete(bme[i]);
+ }
+ delete ngr;
+ }
+
+ // cut off searchstring to MaximumEntryLength
+ // strcpy(searchStr, searchStr_);
+ char* tmpPtr=searchStr;
+ for ( ; *searchStr_ && (tmpPtr-searchStr < MaximumEntryLength-1); tmpPtr++, searchStr_++)
+ *tmpPtr=*searchStr_;
+ *tmpPtr='\0';
+
+ // initialize list of best matches
+ for (int i=0; i<NoBestMatches; i++)
+ bme[i]=NULL;
+ noObjects=0;
+ minSim=-1;
+
+ // printf("searchstring vorbereiten...");
+ searchStrLen = prepareString(preparedSearchStr, searchStr);
+ // printf(" result: <%s>\n", preparedSearchStr);
+
+ // printf("erzeuge ngramrep...\n");
+ ngr=new NgramRep(preparedSearchStr, searchStrLen, minNgramLen, maxNgramLen);
+ ngs=ngr->ngs;
+}
+
+
+// default-constructor
+FuzzySearch::FuzzySearch()
+{
+ noObjects=-1;
+}
+
+
+// init-constructor
+FuzzySearch::FuzzySearch(const char* searchStr_, int minNgramLen, int maxNgramLen)
+{
+ initialize(searchStr_, minNgramLen, maxNgramLen);
+}
+
+
+// destructor
+FuzzySearch::~FuzzySearch()
+{
+ if (noObjects==-1) return;
+ // clean up bme-list
+ for(int i=0; i<NoBestMatches; i++) {
+ if (bme[i]!=0)
+ delete(bme[i]);
+ }
+ delete ngr;
+}
+
+
+// perform searchstep
+// returns: similarity coefficient between 0 and 1, -1 if error
+int FuzzySearch::checkNext(const char* nextEntry, void* objPtr)
+{
+ if (noObjects==-1) return -1;
+ char preparedNextEntry[MaximumEntryLength];
+ int accuracy=0;
+ int i;
+
+ if (*nextEntry==0)
+ return -1;
+
+ noObjects++;
+ // prepare the string (leading space, lowercase, ...)
+ prepareString(preparedNextEntry, nextEntry);
+
+ // für jedes ngram match überprüfen
+ for(i=1; i<=ngr->noNgrams; i++) {
+ ngs[i]->found=false;
+ if(ngs[ngs[i]->precondition[0]]->found==true && ngs[ngs[i]->precondition[1]]->found==true) {
+ if(strstr(preparedNextEntry, ngs[i]->str)) {
+ accuracy += ngs[i]->entropy;
+ ngs[i]->frequency++;
+ ngs[i]->found=true;
+ }
+ }
+ }
+ accuracy=(accuracy*1000)/ngr->maxScore;
+ // Wenn Guete >= minimaler Wert in best matches liste: insert into list
+ if(accuracy > minSim) {
+ minSim=insertIntoOrderedList(accuracy, nextEntry, preparedNextEntry, objPtr);
+ }
+ return accuracy;
+}
+
+// returns a sorted array of the best matches
+// returns 0 if class was not initialized
+BestMatchEntry** FuzzySearch::getBestMatchesList() {
+ if (noObjects==-1) return 0;
+ else return bme;
+}
diff --git a/mixxx/src/library/fuzzsrch.h b/mixxx/src/library/fuzzsrch.h
new file mode 100644
index 0000000..dab103e
--- /dev/null
+++ b/mixxx/src/library/fuzzsrch.h
@@ -0,0 +1,164 @@
+/** Unscharfe Suche nach Objektnamen
+*
+* 7-2000 bis 8-2000 by Oliver Nölle
+*
+* Version 1.0, last modified 18.8.2000
+*
+
+ - führt unscharfe Suche auf Objektnamen durch
+ d.h. fehlertolerante Suche nach Objekten oder Strassen
+ - basiert auf ngram-Zerlegung
+ Artikel+Source in der c't (ich glaube 4-97)
+ - Effizienz angepaßt an
+ - kurzer Suchstring (ca. 2-20 Zeichen)
+ - viele (ca. 10.000 - 100.000) aber daür kurze (ca. 10-30 Zeichen) Einträge in der Objektdatenbank
+ - Qualität verbessert durch
+ - Berechnung des Informationsgehalts eines ngrams (frequency=Auftretenshäufigkeit)
+
+ Strategie:
+ 1. Suchstring + Objektnamen vorverarbeiten
+ - to lowercase
+ - phonetisch: ß -> ss
+ - Umlaute in ae, oe, ue
+ - Sonderzeichen -> Leerzeichen
+
+ 2. Suchstring in passende ngramme zerlegen, jeder bekommt eine Anfangsgewichtung nach Länge
+
+ 3. lineares durchsuchen der Textdatei, <NoBestMatches> besten Einträge werden in einer sortierten Liste gehalten
+
+
+
+ 4. Neubewertung der einzelnen ngramme nach frequency, Neusortierung der best matches liste
+
+ folgende Verbesserungen denkbar:
+ - diese 200 Einträge nach Levenshtein-Distanz neu bewerten/ordnen
+ (z.B. für suchstring "Heierweg" sollte "Haierweg" besser bewerten als "Eisweierweg"
+ - anstatt den ersten beiden Stufen: Bewertung >1000 = exakter Anfangsmatch, Bewertung =1000 exaktes Vorkommen, <1000 fuzzy
+ - anstatt fuzzy search und fuzzy phonetic getrennt: Bewertungen addieren / maximale Bewertung übernehmen
+ - überdenken der Präsentation der Ergebnisse der verschiedenen Stufen => transparentere Bewertungen
+
+*/
+
+#ifndef FUZZSRCH_H
+#define FUZZSRCH_H
+
+#define MaximumEntryLength 200 // maximum length of one searchable entry
+#define NoBestMatches 200 // how many hits to keep in best matches list
+#define MaximumNgramLen 10 // maximum length of used ngrams (more than 5 doesn't help a lot...)
+
+
+// fuzzysearch returns an array of <NoBestMatches> entries of this class
+// beware of null-pointers if checkNext is invoked less then <NoBestMatches> times!
+// in this case (eg only five streets in db), evaluate the returned array until the first null-pointer!
+// objPtr can be filled with anything when invoking FuzzySearch.checkNext(...)
+class BestMatchEntry {
+public:
+ int sim; // similarity coefficient, between 0(no match at all) and 1000 (all ngrams found)
+ char* objName; // original name of the object
+ char* preparedName; // preprocessed name
+ void* objPtr; // user-filled...whatever you like
+ BestMatchEntry(const int sim_, const char* objName_, const char* preparedName_, void* objPtr_);
+ ~BestMatchEntry();
+};
+
+
+
+// NgramRep and NgramEntry are needed to represent the searchstring
+// => only created once per search, so we don't worry too much about efficiency/space here
+
+// represents a single ngram
+class NgramEntry {
+public:
+ int length;
+ int frequency; // count how many times ngram appears
+ int entropy; // should give an information value to the appearance of this ngram
+ int precondition[2]; // index of those ngrams that must be found for this ngram to be "appearable"
+ bool found; // flag for checking precondition
+ char str[MaximumNgramLen]; // the ngram itself
+ NgramEntry(int length_, const char* strStart, int pre1, int pre2);
+};
+
+
+
+// a searchstring is represented as a number of ngrams, organised by this class
+class NgramRep {
+public:
+ int minNgramLen, maxNgramLen, noLevels;
+ int noNgrams; // no ngrams representing the searchstring
+ int noNgramsInLevel[MaximumNgramLen];
+ int lenNgramsInLevel[MaximumNgramLen];
+ int maxScore; // score of all ngrams added up = maximum score an entry can get
+ NgramEntry** ngs; // pointer to an array of ngrams
+ NgramRep(char* searchStr, int length, int minNgramLen_, int maxNgramLen_);
+ ~NgramRep();
+};
+
+
+
+
+// main class for performing a fuzzy search
+//
+// normal use in 4 steps:
+// step 1: create class + initialize with searchstring and params
+// step 2: checkNext(...) for all entries in database
+// step 3: (optional) newOrder(): reorder according to frequencies
+// step 4: getBestMatchesList() and do sth. with the result
+//
+// good values: minNgramLen=2 and maxNgramLen=5
+//
+// class is robust against:
+// - no matches at all (first found entries with sim=0 are returned)
+// - searchstrings longer than MaximumEntryLength (=> cut off)
+// - entries longer than MaximumEntryLength (=> cut off)
+// - searchstrings with length 0 (no meaningful result, but no crash)
+// - entries with length 0 (probably won't appear in result, which should be okay)
+//
+// beware of null-pointers in step 4 if best matches list is not completely filled
+// (not enough objects checked to fill list)
+class FuzzySearch {
+public:
+ // creates a not initialized class
+ FuzzySearch();
+
+ // creates and initializes class
+ FuzzySearch(const char* searchstStr, int minNgramLen, int maxNgramLen);
+
+ // frees all resources, including the best matches list
+ ~FuzzySearch();
+
+ // initializes an existing class
+ void initialize(const char* searchstStr, int minNgramLen, int maxNgramLen);
+
+ // checks next entry from database, returns similarity value for that entry
+ int checkNext(const char* nextObj, void* objPtr);
+
+ // returns the result array of best matches, 0 if class was not initialized
+ BestMatchEntry** getBestMatchesList();
+
+ // performs the newOrder mechanism to take frequency of ngrams into account
+ void newOrder();
+
+ NgramEntry** ngs; // should be protected, but for testing we want to have access
+ NgramRep* ngr; // same (we want to have a look at ngrams and their frequency/entropy)
+
+protected:
+ BestMatchEntry* bme[NoBestMatches];
+ int noObjects;
+ int minSim;
+ int searchStrLen;
+ char searchStr[MaximumEntryLength];
+ char preparedSearchStr[MaximumEntryLength];
+
+ int prepareString(char* convStr, const char* originStr);
+ // compares two best match entries, considering their similarity value
+ static int bmeCompare(const void *elem1, const void *elem2)
+ {
+ int sim1=(*(BestMatchEntry**)elem1)->sim;
+ int sim2=(*(BestMatchEntry**)elem2)->sim;
+ return sim2-sim1;
+ }
+ int insertIntoOrderedList(int similarity, const char* insertObject, const char* prepObject, void* objPtr);
+};
+
+#endif
+
--
1.7.2.5
From a23afebebe98acce525c0edd46754610d408fc08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= <bernha...@vr-web.de>
Date: Sat, 26 Mar 2011 19:07:58 +0100
Subject: Use the fuzzy search
---
mixxx/src/library/basesqltablemodel.cpp | 2 +
mixxx/src/library/librarytablemodel.cpp | 93 +++++++++++++++++++++++--------
2 files changed, 71 insertions(+), 24 deletions(-)
diff --git a/mixxx/src/library/basesqltablemodel.cpp b/mixxx/src/library/basesqltablemodel.cpp
index 12500f9..5192309 100644
--- a/mixxx/src/library/basesqltablemodel.cpp
+++ b/mixxx/src/library/basesqltablemodel.cpp
@@ -63,6 +63,8 @@ void BaseSqlTableModel::initHeaderData() {
Qt::Horizontal, tr("#"));
setHeaderData(fieldIndex(LIBRARYTABLE_KEY),
Qt::Horizontal, tr("Key"));
+ setHeaderData(fieldIndex("match"),
+ Qt::Horizontal, tr("Match"));
}
bool BaseSqlTableModel::select() {
diff --git a/mixxx/src/library/librarytablemodel.cpp b/mixxx/src/library/librarytablemodel.cpp
index e820de4..5967132 100644
--- a/mixxx/src/library/librarytablemodel.cpp
+++ b/mixxx/src/library/librarytablemodel.cpp
@@ -1,14 +1,31 @@
#include <QtCore>
#include <QtGui>
#include <QtSql>
+#include <sqlite3.h>
#include "library/trackcollection.h"
#include "library/librarytablemodel.h"
#include "mixxxutils.cpp"
+#include "fuzzsrch.cpp"
+
+static FuzzySearch fs;
+
const QString LibraryTableModel::DEFAULT_LIBRARYFILTER = "mixxx_deleted=0 AND fs_deleted=0";
+void fuzzy(sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+ if (argc == 1) {
+ const char * text = (const char *) sqlite3_value_text(argv[0]);
+ if (text) {
+ int result = fs.checkNext(text); // STEP 2
+ //qDebug() << "fuzzy called (" << result << " " << (const char *)sqlite3_value_text(argv[0]) << ")";
+ sqlite3_result_int64(context, result);
+ }
+ }
+}
+
LibraryTableModel::LibraryTableModel(QObject* parent,
TrackCollection* pTrackCollection)
: TrackModel(pTrackCollection->getDatabase(),
@@ -17,6 +34,28 @@ LibraryTableModel::LibraryTableModel(QObject* parent,
m_trackDao(pTrackCollection->getTrackDAO()) {
QSqlQuery query(pTrackCollection->getDatabase());
+
+// query.prepare("CREATE TEMPORARY TABLE IF NOT EXISTS currentsearch AS SELECT 'search' as type, '%' as value");
+ query.prepare("CREATE TEMPORARY TABLE IF NOT EXISTS currentsearch AS SELECT 'fuzzy' as type, '%' as value");
+ if (!query.exec()) {
+ qDebug() << query.executedQuery() << query.lastError();
+ }
+ query.finish();
+
+ QVariant v = pTrackCollection->getDatabase().driver()->handle();
+ if (v.isValid() && qstrcmp(v.typeName(), "sqlite3*")==0) {
+ // v.data() returns a pointer to the handle
+ sqlite3 *handle = *static_cast<sqlite3 **>(v.data());
+ if (handle != 0) { // check that it is not NULL
+ if (SQLITE_OK != sqlite3_create_function(handle, "FUZZY", 1, SQLITE_ANY, NULL, &fuzzy, NULL, NULL)) {
+ qDebug() << "sqlite3 function creation failed";
+ }
+ }
+ }
+
+ qDebug() << "fs.initialize called ()";
+ fs.initialize(QString(" ").toLower().toAscii(), 2, 4); // STEP 1
+
query.prepare("CREATE TEMPORARY VIEW IF NOT EXISTS library_view AS "
"SELECT "
"library." + LIBRARYTABLE_ID + "," +
@@ -34,13 +73,35 @@ LibraryTableModel::LibraryTableModel(QObject* parent,
"library." + LIBRARYTABLE_KEY + "," +
"library." + LIBRARYTABLE_DATETIMEADDED + "," +
"library." + LIBRARYTABLE_BPM + "," +
+ "library." + LIBRARYTABLE_BITRATE + "," +
"track_locations.location," +
"track_locations.fs_deleted," +
"library." + LIBRARYTABLE_COMMENT + "," +
- "library." + LIBRARYTABLE_MIXXXDELETED + " " +
- "FROM library " +
- "INNER JOIN track_locations " +
- "ON library.location = track_locations.id ");
+ "library." + LIBRARYTABLE_MIXXXDELETED + "," +
+ " CASE WHEN currentsearch.value in ('', 'search')"
+ " THEN CASE WHEN ( artist LIKE currentsearch.value"
+ " OR album LIKE currentsearch.value"
+ " OR title LIKE currentsearch.value"
+ " OR currentsearch.value = ''"
+ " )"
+ " THEN 100"
+ " ELSE 0"
+ " END"
+ " WHEN currentsearch.type = 'fuzzy'"
+ " THEN fuzzy(lower(CASE WHEN '' in (artist, title)"
+ " THEN ' ' || track_locations.filename || ' - ' || album || ' - ' || comment || ' '"
+ " ELSE ' ' || artist || ' - ' || title || ' - ' || album || ' - ' || comment || ' '"
+ " END))"
+ " ELSE 0"
+ " END AS match"
+ " FROM library,"
+ " track_locations,"
+ " currentsearch"
+ " WHERE library.location = track_locations.id"
+ " AND match >= 100"
+ " ORDER BY match desc"
+ );
+
if (!query.exec()) {
qDebug() << query.executedQuery() << query.lastError();
}
@@ -52,6 +113,7 @@ LibraryTableModel::LibraryTableModel(QObject* parent,
//setTable("library");
setTable("library_view");
+ setSort(17, Qt::DescendingOrder);
//Set up a relation which maps our location column (which is a foreign key
//into the track_locations) table. We tell Qt that our LIBRARYTABLE_LOCATION
@@ -155,27 +217,10 @@ void LibraryTableModel::slotSearch(const QString& searchText) {
m_currentSearch = searchText;
- QString filter;
- if (searchText == "")
- filter = "(" + LibraryTableModel::DEFAULT_LIBRARYFILTER + ")";
- else {
- QSqlField search("search", QVariant::String);
-
- filter = "(" + LibraryTableModel::DEFAULT_LIBRARYFILTER;
-
- foreach(QString term, searchText.split(" "))
- {
- search.setValue("%" + term + "%");
- QString escapedText = database().driver()->formatValue(search);
- filter += " AND (artist LIKE " + escapedText + " OR " +
- "album LIKE " + escapedText + " OR " +
- "location LIKE " + escapedText + " OR " +
- "comment LIKE " + escapedText + " OR " +
- "title LIKE " + escapedText + ")";
- }
+ qDebug() << "fs.initialize called (" << searchText.toAscii() << ")";
+ fs.initialize(searchText.toLower().toUtf8(), 2, 4); // STEP 1
- filter += ")";
- }
+ QString filter = "(" + LibraryTableModel::DEFAULT_LIBRARYFILTER + ")";
setFilter(filter);
}
--
1.7.2.5
From 28780262b785bb69cb32e4f5c9be754835165770 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= <bernha...@vr-web.de>
Date: Sat, 26 Mar 2011 17:59:46 +0100
Subject: Remove unusable features from fuzzsrch.* and some adaptions
---
mixxx/src/library/fuzzsrch.cpp | 168 ++++------------------------------------
mixxx/src/library/fuzzsrch.h | 50 +-----------
2 files changed, 19 insertions(+), 199 deletions(-)
diff --git a/mixxx/src/library/fuzzsrch.cpp b/mixxx/src/library/fuzzsrch.cpp
index da82169..be010bb 100644
--- a/mixxx/src/library/fuzzsrch.cpp
+++ b/mixxx/src/library/fuzzsrch.cpp
@@ -14,45 +14,6 @@
#include "fuzzsrch.h"
-
-/*class BestMatchEntry {
-public:
- int sim;
- char* objName;
- char* preparedName;
- void* objPtr;
-*/
-
-BestMatchEntry::BestMatchEntry(const int sim_, const char* objName_, const char* preparedName_, void* objPtr_) {
- sim=sim_;
- objPtr=objPtr_;
-
- // copy objName (if entry is too long, we simply cut it off to MaximumEntryLength)
- objName=new char[MaximumEntryLength];
-// strcpy(objName, objName_);
- char* tmpPtr=objName;
- for ( ; *objName_ && (tmpPtr-objName < MaximumEntryLength-1); tmpPtr++, objName_++)
- *tmpPtr=*objName_;
- *tmpPtr='\0';
-
- // copy preparedName (shouldn't be too long because already cut off in FuzzySearch::prepareString, but to be sure...)
- preparedName=new char[MaximumEntryLength];
-// strcpy(preparedName, preparedName_);
- tmpPtr=preparedName;
- for ( ; *preparedName_ && (tmpPtr-preparedName < MaximumEntryLength-1); tmpPtr++, preparedName_++)
- *tmpPtr=*preparedName_;
- *tmpPtr='\0';
-}
-
-
-BestMatchEntry::~BestMatchEntry() {
- delete(objName);
- delete(preparedName);
-}
-
-
-
-
// one ngram-entry associated with the searchstring
// keeps track of frequencies and, corresponding, entropy of an ngram
/*class NgramEntry {
@@ -242,90 +203,16 @@ int FuzzySearch::prepareString(char* convStr, const char* originStr)
-// re-evaluate similarities (now with frequency) and re-order list of best matches
-void FuzzySearch::newOrder() {
- if (noObjects==-1) return;
- NgramEntry** ngs;
- ngs=ngr->ngs;
- int i,j;
-
- // step 0: calculate new entropy for all ngrams (depending on frequency)
- ngr->maxScore=0;
- for(i = 1; i <= ngr->noNgrams; i++) {
- double prob=(double)ngs[i]->frequency/(double)noObjects; // probability of appearance
- double loggi=log(prob*100+1)/log(2.0); // clevere Formel...
- int entro = 20+(ngs[i]->length)*10;
- entro=(int) (entro*(1.0-loggi/10.0));
- if (entro<1) entro=1;
-
- ngr->maxScore+=entro;
- ngs[i]->entropy=entro;
- }
-
- // step 1: calculate new similarity value for all best matches
- for(j=0; j<NoBestMatches && bme[j]; j++) {
- int accuracy=0;
- // f端r jedes ngram match 端berpr端fen
- for(i=1; i<=ngr->noNgrams; i++) {
- ngs[i]->found=false;
- if(ngs[ngs[i]->precondition[0]]->found==true && ngs[ngs[i]->precondition[1]]->found==true) {
- if(strstr(bme[j]->preparedName, ngs[i]->str)) {
- accuracy += (ngs[i]->entropy);
- ngs[i]->found=true;
- }
- }
- }
- accuracy=accuracy*1000/ngr->maxScore;
- bme[j]->sim=accuracy;
- }
-
- // step 2: re-order list
- qsort((void*)bme, j, sizeof(bme[0]), bmeCompare );
- return;
-}
-
-
-
-// insert into best matches list (ordered by similarity)
-int FuzzySearch::insertIntoOrderedList(int similarity, const char* insertObject, const char* prepObject, void* objPtr)
-{
- int i, j;
- for (i=0; i<NoBestMatches; i++) {
- if (bme[i]==NULL) {
- bme[i]=new BestMatchEntry(similarity, insertObject, prepObject, objPtr);
- break;
- }
- else
- if (bme[i]->sim<similarity) {
- // delete last entry coz it will be dropped
- if (bme[NoBestMatches-1]!=NULL)
- delete(bme[NoBestMatches-1]);
- for(j=NoBestMatches-1; j>i; j--) {
- bme[j]=bme[j-1];
- }
- bme[i]=new BestMatchEntry(similarity, insertObject, prepObject, objPtr);
- break;
- }
- }
- if (bme[NoBestMatches-1]!=NULL)
- return bme[NoBestMatches-1]->sim;
- else
- return -1;
-}
-
-
void FuzzySearch::initialize(const char* searchStr_, int minNgramLen, int maxNgramLen)
{
- if (noObjects!=-1) { // object was already initialized => we have to clean up first
- // this is exactly the same as what the destructor does (but we can't call it, can we?)
- // clean up bme-list
- for(int i=0; i<NoBestMatches; i++) {
- if (bme[i]!=0)
- delete(bme[i]);
- }
+ if (ngr != NULL) {
delete ngr;
+ ngr = NULL;
}
+ int searchStrLen;
+ char searchStr[MaximumEntryLength];
+
// cut off searchstring to MaximumEntryLength
// strcpy(searchStr, searchStr_);
char* tmpPtr=searchStr;
@@ -333,54 +220,40 @@ void FuzzySearch::initialize(const char* searchStr_, int minNgramLen, int maxNgr
*tmpPtr=*searchStr_;
*tmpPtr='\0';
- // initialize list of best matches
- for (int i=0; i<NoBestMatches; i++)
- bme[i]=NULL;
- noObjects=0;
- minSim=-1;
-
+ char preparedSearchStr[MaximumEntryLength];
// printf("searchstring vorbereiten...");
searchStrLen = prepareString(preparedSearchStr, searchStr);
// printf(" result: <%s>\n", preparedSearchStr);
// printf("erzeuge ngramrep...\n");
ngr=new NgramRep(preparedSearchStr, searchStrLen, minNgramLen, maxNgramLen);
- ngs=ngr->ngs;
}
// default-constructor
FuzzySearch::FuzzySearch()
+: ngr(NULL)
{
- noObjects=-1;
-}
-
-
-// init-constructor
-FuzzySearch::FuzzySearch(const char* searchStr_, int minNgramLen, int maxNgramLen)
-{
- initialize(searchStr_, minNgramLen, maxNgramLen);
}
// destructor
FuzzySearch::~FuzzySearch()
{
- if (noObjects==-1) return;
- // clean up bme-list
- for(int i=0; i<NoBestMatches; i++) {
- if (bme[i]!=0)
- delete(bme[i]);
+ if (ngr != NULL) {
+ delete ngr;
+ ngr = NULL;
}
- delete ngr;
}
// perform searchstep
// returns: similarity coefficient between 0 and 1, -1 if error
-int FuzzySearch::checkNext(const char* nextEntry, void* objPtr)
+int FuzzySearch::checkNext(const char* nextEntry)
{
- if (noObjects==-1) return -1;
+ if (ngr == NULL) {
+ return -1;
+ }
char preparedNextEntry[MaximumEntryLength];
int accuracy=0;
int i;
@@ -388,10 +261,10 @@ int FuzzySearch::checkNext(const char* nextEntry, void* objPtr)
if (*nextEntry==0)
return -1;
- noObjects++;
// prepare the string (leading space, lowercase, ...)
prepareString(preparedNextEntry, nextEntry);
+ NgramEntry** ngs = ngr->ngs;
// f端r jedes ngram match 端berpr端fen
for(i=1; i<=ngr->noNgrams; i++) {
ngs[i]->found=false;
@@ -404,16 +277,5 @@ int FuzzySearch::checkNext(const char* nextEntry, void* objPtr)
}
}
accuracy=(accuracy*1000)/ngr->maxScore;
- // Wenn Guete >= minimaler Wert in best matches liste: insert into list
- if(accuracy > minSim) {
- minSim=insertIntoOrderedList(accuracy, nextEntry, preparedNextEntry, objPtr);
- }
return accuracy;
}
-
-// returns a sorted array of the best matches
-// returns 0 if class was not initialized
-BestMatchEntry** FuzzySearch::getBestMatchesList() {
- if (noObjects==-1) return 0;
- else return bme;
-}
diff --git a/mixxx/src/library/fuzzsrch.h b/mixxx/src/library/fuzzsrch.h
index dab103e..58e7149 100644
--- a/mixxx/src/library/fuzzsrch.h
+++ b/mixxx/src/library/fuzzsrch.h
@@ -47,22 +47,6 @@
#define MaximumNgramLen 10 // maximum length of used ngrams (more than 5 doesn't help a lot...)
-// fuzzysearch returns an array of <NoBestMatches> entries of this class
-// beware of null-pointers if checkNext is invoked less then <NoBestMatches> times!
-// in this case (eg only five streets in db), evaluate the returned array until the first null-pointer!
-// objPtr can be filled with anything when invoking FuzzySearch.checkNext(...)
-class BestMatchEntry {
-public:
- int sim; // similarity coefficient, between 0(no match at all) and 1000 (all ngrams found)
- char* objName; // original name of the object
- char* preparedName; // preprocessed name
- void* objPtr; // user-filled...whatever you like
- BestMatchEntry(const int sim_, const char* objName_, const char* preparedName_, void* objPtr_);
- ~BestMatchEntry();
-};
-
-
-
// NgramRep and NgramEntry are needed to represent the searchstring
// => only created once per search, so we don't worry too much about efficiency/space here
@@ -101,8 +85,8 @@ public:
// normal use in 4 steps:
// step 1: create class + initialize with searchstring and params
// step 2: checkNext(...) for all entries in database
-// step 3: (optional) newOrder(): reorder according to frequencies
-// step 4: getBestMatchesList() and do sth. with the result
+// step 3: (removed)
+// step 4: (removed)
//
// good values: minNgramLen=2 and maxNgramLen=5
//
@@ -113,16 +97,11 @@ public:
// - searchstrings with length 0 (no meaningful result, but no crash)
// - entries with length 0 (probably won't appear in result, which should be okay)
//
-// beware of null-pointers in step 4 if best matches list is not completely filled
-// (not enough objects checked to fill list)
class FuzzySearch {
public:
// creates a not initialized class
FuzzySearch();
- // creates and initializes class
- FuzzySearch(const char* searchstStr, int minNgramLen, int maxNgramLen);
-
// frees all resources, including the best matches list
~FuzzySearch();
@@ -130,34 +109,13 @@ public:
void initialize(const char* searchstStr, int minNgramLen, int maxNgramLen);
// checks next entry from database, returns similarity value for that entry
- int checkNext(const char* nextObj, void* objPtr);
-
- // returns the result array of best matches, 0 if class was not initialized
- BestMatchEntry** getBestMatchesList();
+ int checkNext(const char* nextObj);
- // performs the newOrder mechanism to take frequency of ngrams into account
- void newOrder();
-
- NgramEntry** ngs; // should be protected, but for testing we want to have access
+ // should be protected, but for testing we want to have access
NgramRep* ngr; // same (we want to have a look at ngrams and their frequency/entropy)
protected:
- BestMatchEntry* bme[NoBestMatches];
- int noObjects;
- int minSim;
- int searchStrLen;
- char searchStr[MaximumEntryLength];
- char preparedSearchStr[MaximumEntryLength];
-
int prepareString(char* convStr, const char* originStr);
- // compares two best match entries, considering their similarity value
- static int bmeCompare(const void *elem1, const void *elem2)
- {
- int sim1=(*(BestMatchEntry**)elem1)->sim;
- int sim2=(*(BestMatchEntry**)elem2)->sim;
- return sim2-sim1;
- }
- int insertIntoOrderedList(int similarity, const char* insertObject, const char* prepObject, void* objPtr);
};
#endif
--
1.7.2.5
------------------------------------------------------------------------------
Enable your software for Intel(R) Active Management Technology to meet the
growing manageability and security demands of your customers. Businesses
are taking advantage of Intel(R) vPro (TM) technology - will your software
be a part of the solution? Download the Intel(R) Manageability Checker
today! http://p.sf.net/sfu/intel-dev2devmar
_______________________________________________
Mixxx-devel mailing list
Mixxx-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mixxx-devel