Git commit 646b960b2c410bb6f6e9877d8d9a4caa1694047e by Robby Stephenson.
Committed on 15/02/2021 at 16:37.
Pushed by rstephenson into branch 'master'.

Update MultiFetcher to chain data sources in order

Every result from the first data source is taken as input to an update
request for each subsequent data source. Previously, each data source
was queried independently with results showing up in any order.

M  +4    -0    ChangeLog
M  +2    -2    doc/configuration.docbook
M  +1    -0    src/fetch/fetcher.cpp
M  +2    -0    src/fetch/fetchmanager.h
M  +71   -37   src/fetch/multifetcher.cpp
M  +3    -1    src/fetch/multifetcher.h
M  +14   -0    src/tests/CMakeLists.txt
M  +3    -3    src/tests/data/cat_mods.spec
A  +104  -0    src/tests/multifetchertest.cpp     [License: GPL (v2/3)]
A  +41   -0    src/tests/multifetchertest.h     [License: GPL (v2/3)]

https://invent.kde.org/office/tellico/commit/646b960b2c410bb6f6e9877d8d9a4caa1694047e

diff --git a/ChangeLog b/ChangeLog
index f5d7e9e7..9e1f2c50 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2021-02-15  Robby Stephenson  <[email protected]>
+
+       * Improved MultiFetcher to chain data source searches in order.
+
 2021-02-07  Robby Stephenson  <[email protected]>
 
        * Added UPCItemDB.com data source.
diff --git a/doc/configuration.docbook b/doc/configuration.docbook
index 800d38d2..d850ce69 100644
--- a/doc/configuration.docbook
+++ b/doc/configuration.docbook
@@ -536,7 +536,7 @@ For updating entries already in the collection, the final 
check box and edit box
 <sect2 id="multiple-sources">
 <title>Multiple Combined Data Sources</title>
 <para>
-Combinations of up to eight existing data sources can be used as a single 
source, where the search results from all the sources are merged. The 
collection type to be used must be set before adding sources.
+Combinations of up to eight existing data sources can be used as a single 
source, where each search result from the first source is updated from the 
subsequent sources. The collection type to be used must be set before adding 
sources.
 </para>
 
 <screenshot>
@@ -546,7 +546,7 @@ Combinations of up to eight existing data sources can be 
used as a single source
 </screenshot>
 
 <para>
-Only existing data sources can be used in combination. The search request is 
sent to each source, and the results are combined. Since the merged results 
depend on the order of the search results, the combined set may be different 
depending on network and source speed.
+Only existing data sources can be used in combination. Only the search type 
for the first source can be used in this source since the results come from the 
first data source. For example, a UPCitemDb search may first be done, with each 
result then updated from the TheMovieDB.
 </para>
 </sect2>
 
diff --git a/src/fetch/fetcher.cpp b/src/fetch/fetcher.cpp
index 94281e78..02b213b6 100644
--- a/src/fetch/fetcher.cpp
+++ b/src/fetch/fetcher.cpp
@@ -76,6 +76,7 @@ const Tellico::Fetch::FetchRequest& Fetcher::request() const {
 void Fetcher::startSearch(const FetchRequest& request_) {
   m_request = request_;
   if(!canFetch(m_request.collectionType)) {
+    myDebug() << "Bad collection type:" << source() << 
m_request.collectionType;
     message(i18n("%1 does not allow searching for this collection type.", 
source()),
             MessageHandler::Warning);
     emit signalDone(this);
diff --git a/src/fetch/fetchmanager.h b/src/fetch/fetchmanager.h
index eba3aba3..3710cf76 100644
--- a/src/fetch/fetchmanager.h
+++ b/src/fetch/fetchmanager.h
@@ -36,6 +36,7 @@
 #include <QPixmap>
 
 class QUrl;
+class MultiFetcherTest;
 
 namespace Tellico {
   namespace Fetch {
@@ -121,6 +122,7 @@ private Q_SLOTS:
 private:
   friend class ManagerMessage;
   friend class FetcherInitializer;
+  friend class ::MultiFetcherTest;
   static Manager* s_self;
 
   Manager();
diff --git a/src/fetch/multifetcher.cpp b/src/fetch/multifetcher.cpp
index cf540433..d410ff53 100644
--- a/src/fetch/multifetcher.cpp
+++ b/src/fetch/multifetcher.cpp
@@ -40,7 +40,7 @@ using namespace Tellico;
 using Tellico::Fetch::MultiFetcher;
 
 MultiFetcher::MultiFetcher(QObject* parent_)
-    : Fetcher(parent_), m_collType(0), m_started(false) {
+    : Fetcher(parent_), m_collType(0), m_fetcherIndex(0), m_started(false) {
 }
 
 MultiFetcher::~MultiFetcher() {
@@ -55,8 +55,8 @@ bool MultiFetcher::canFetch(int type) const {
 }
 
 bool MultiFetcher::canSearch(FetchKey k) const {
-  return k == Title ||
-         (k == ISBN && m_collType == Data::Collection::Book);
+  // can fetch anything supported by the first data source
+  return m_fetchers.isEmpty() || m_fetchers.front()->canSearch(k);;
 }
 
 void MultiFetcher::readConfigHook(const KConfigGroup& config_) {
@@ -71,11 +71,15 @@ void MultiFetcher::readSources() const {
   foreach(const QString& uuid, m_uuids) {
     Fetcher::Ptr fetcher = Manager::self()->fetcherByUuid(uuid);
     if(fetcher) {
+//      myDebug() << "Adding fetcher:" << fetcher->source();
       m_fetchers.append(fetcher);
-      connect(fetcher.data(), &Fetcher::signalResultFound,
-              this, &MultiFetcher::slotResult);
-      connect(fetcher.data(), &Fetcher::signalDone,
-              this, &MultiFetcher::slotDone);
+      // in the event we have multiple instances of same fetcher, only need to 
connect once
+      if(m_fetchers.count(fetcher) == 1) {
+        connect(fetcher.data(), &Fetcher::signalResultFound,
+                this, &MultiFetcher::slotResult);
+        connect(fetcher.data(), &Fetcher::signalDone,
+                this, &MultiFetcher::slotDone);
+      }
     }
   }
 }
@@ -83,17 +87,14 @@ void MultiFetcher::readSources() const {
 void MultiFetcher::search() {
   m_started = true;
   readSources();
-  foreach(Fetcher::Ptr fetcher, m_fetchers) {
-    fetcher->startSearch(request());
-  }
-}
-
-void MultiFetcher::continueSearch() {
-  m_started = true;
-  readSources();
-  foreach(Fetcher::Ptr fetcher, m_fetchers) {
-    fetcher->continueSearch();
+  if(m_fetchers.isEmpty()) {
+//    myDebug() << source() << "has no sources";
+    slotDone();
+    return;
   }
+  m_matches.clear();
+//  myDebug() << "Starting" << m_fetchers.front()->source();
+  m_fetchers.front()->startSearch(request());
 }
 
 void MultiFetcher::stop() {
@@ -109,30 +110,64 @@ void MultiFetcher::stop() {
 
 void MultiFetcher::slotResult(Tellico::Fetch::FetchResult* result) {
   Data::EntryPtr newEntry = result->fetchEntry();
-  // first check if we've already received this entry result from another 
fetcher
-  bool alreadyFound = false;
-  foreach(Data::EntryPtr entry, m_entries) {
-    if(entry->collection()->sameEntry(entry, newEntry) > 
EntryComparison::ENTRY_GOOD_MATCH) {
-      // same entry, so instead of adding a new result, just merge it
-      Data::Document::mergeEntry(entry, newEntry);
-      alreadyFound = true;
-      break;
-    }
-  }
-
-  if(!alreadyFound) {
+  // if fetcher index is 0, this is the first set of results, save them all
+  if(m_fetcherIndex == 0) {
+//    myDebug() << "...found new result:" << newEntry->title();
     m_entries.append(newEntry);
+    return;
   }
+
+  // otherwise, keep the entry to compare later
+  m_matches.append(newEntry);
 }
 
 void MultiFetcher::slotDone() {
-  //keep it simple, if a little slow
-  // iterate over all fetchers, if they are still running, do not emit done
-  foreach(Fetcher::Ptr fetcher, m_fetchers) {
-    if(fetcher->isSearching())  {
-      return;
+  if(m_fetcherIndex == 0) {
+    m_fetcherIndex++;
+    m_resultIndex = 0;
+  } else {
+    // iterate over all the matches from this data source and figure out which 
one is the best match to the existing result
+    Data::EntryPtr entry = m_entries.at(m_resultIndex);
+    int bestScore = -1;
+    int bestIndex = -1;
+    for(int idx = 0; idx < m_matches.count(); ++idx) {
+      auto match = m_matches.at(idx);
+      const int score = entry->collection()->sameEntry(entry, match);
+      if(score > bestScore) {
+        bestScore = score;
+        bestIndex = idx;
+      }
+      if(score > EntryComparison::ENTRY_PERFECT_MATCH) {
+        // no need to compare further
+        break;
+      }
+    }
+//    myDebug() << "best score" << bestScore  << "; index:" << bestIndex;
+    if(bestIndex > -1 && bestScore > EntryComparison::ENTRY_GOOD_MATCH) {
+      auto newEntry = m_matches.at(bestIndex);
+//      myDebug() << "...merging from" << newEntry->title() << "into" << 
entry->title();
+      Data::Document::mergeEntry(entry, newEntry);
+    } else {
+//      myDebug() << "___no match for" << entry->title();
     }
+
+    // now, bump to next result and continue trying to match
+    m_resultIndex++;
+    if(m_resultIndex >= m_entries.count()) {
+      m_fetcherIndex++;
+      m_resultIndex = 0;
+    }
+  }
+
+  if(m_fetcherIndex < m_fetchers.count() && m_resultIndex < m_entries.count()) 
{
+    Fetcher::Ptr fetcher = m_fetchers.at(m_fetcherIndex);
+    Q_ASSERT(fetcher);
+//    myDebug() << "updating entry#" << m_resultIndex << "from" << 
fetcher->source();
+    fetcher->startUpdate(m_entries.at(m_resultIndex));
+    return;
   }
+
+  // at this point, all the fetchers have run through all the results, so we're
   // done so emit all results
   foreach(Data::EntryPtr entry, m_entries) {
     FetchResult* r = new FetchResult(Fetcher::Ptr(this), entry);
@@ -152,12 +187,11 @@ Tellico::Data::EntryPtr MultiFetcher::fetchEntryHook(uint 
uid_) {
 }
 
 Tellico::Fetch::FetchRequest MultiFetcher::updateRequest(Data::EntryPtr 
entry_) {
-//  myDebug();
-  QString isbn = entry_->field(QStringLiteral("isbn"));
+  const QString isbn = entry_->field(QStringLiteral("isbn"));
   if(!isbn.isEmpty()) {
     return FetchRequest(ISBN, isbn);
   }
-  QString title = entry_->field(QStringLiteral("title"));
+  const QString title = entry_->field(QStringLiteral("title"));
   if(!title.isEmpty()) {
     return FetchRequest(Title, title);
   }
diff --git a/src/fetch/multifetcher.h b/src/fetch/multifetcher.h
index ca658d33..c7495739 100644
--- a/src/fetch/multifetcher.h
+++ b/src/fetch/multifetcher.h
@@ -61,7 +61,6 @@ public:
    */
   virtual QString source() const Q_DECL_OVERRIDE;
   virtual bool isSearching() const Q_DECL_OVERRIDE { return m_started; }
-  virtual void continueSearch() Q_DECL_OVERRIDE;
   virtual bool canSearch(FetchKey k) const Q_DECL_OVERRIDE;
   virtual void stop() Q_DECL_OVERRIDE;
   virtual Data::EntryPtr fetchEntryHook(uint uid) Q_DECL_OVERRIDE;
@@ -93,10 +92,13 @@ private:
   void readSources() const;
 
   Data::EntryList m_entries;
+  Data::EntryList m_matches;
   QHash<uint, Data::EntryPtr> m_entryHash;
   int m_collType;
   QStringList m_uuids;
   mutable QList<Fetcher::Ptr> m_fetchers;
+  int m_fetcherIndex;
+  int m_resultIndex;
 
   bool m_started;
 };
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 57793661..b0c38ee0 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -794,6 +794,20 @@ add_test(mrlookupfetchertest mrlookupfetchertest)
 ecm_mark_as_test(mrlookupfetchertest)
 TARGET_LINK_LIBRARIES(mrlookupfetchertest fetcherstest ${TELLICO_BTPARSE_LIBS} 
${TELLICO_TEST_LIBS})
 
+add_executable(multifetchertest multifetchertest.cpp abstractfetchertest.cpp
+  ../fetch/multifetcher.cpp
+  ../fetch/execexternalfetcher.cpp
+  ../fetch/messagelogger.cpp
+  ../translators/bibteximporter.cpp
+  ../translators/risimporter.cpp
+  ../gui/collectiontypecombo.cpp
+  ../gui/kwidgetlister.cpp
+)
+ecm_mark_nongui_executable(multifetchertest)
+add_test(multifetchertest multifetchertest)
+ecm_mark_as_test(multifetchertest)
+TARGET_LINK_LIBRARIES(multifetchertest fetcherstest newstuff 
${TELLICO_BTPARSE_LIBS} ${TELLICO_TEST_LIBS})
+
 add_executable(musicbrainzfetchertest musicbrainzfetchertest.cpp 
abstractfetchertest.cpp
   ../fetch/musicbrainzfetcher.cpp
 )
diff --git a/src/tests/data/cat_mods.spec b/src/tests/data/cat_mods.spec
index 9561b8fe..cb8060e3 100644
--- a/src/tests/data/cat_mods.spec
+++ b/src/tests/data/cat_mods.spec
@@ -1,8 +1,8 @@
-ArgumentKeys=1
-Arguments=%1
+ArgumentKeys=1,3
+Arguments=%1,%1
 CollectionType=2
 FormatType=6
 Name=Cat File
 Type=data-source
-UpdateArgs=%{title}
+UpdateArgs=%{title} %{isbn}
 Uuid={d8eeb500-0333-4c29-a390-618c97e060cb}
diff --git a/src/tests/multifetchertest.cpp b/src/tests/multifetchertest.cpp
new file mode 100644
index 00000000..a966df48
--- /dev/null
+++ b/src/tests/multifetchertest.cpp
@@ -0,0 +1,104 @@
+/***************************************************************************
+    Copyright (C) 2021 Robby Stephenson <[email protected]>
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or         *
+ *   modify it under the terms of the GNU General Public License as        *
+ *   published by the Free Software Foundation; either version 2 of        *
+ *   the License or (at your option) version 3 or any later version        *
+ *   accepted by the membership of KDE e.V. (or its successor approved     *
+ *   by the membership of KDE e.V.), which shall act as a proxy            *
+ *   defined in Section 14 of version 3 of the license.                    *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
+ *                                                                         *
+ ***************************************************************************/
+
+#include "multifetchertest.h"
+
+#include "../fetch/multifetcher.h"
+#include "../fetch/execexternalfetcher.h"
+#include "../fetch/fetchmanager.h"
+#include "../fetch/messagelogger.h"
+#include "../collections/bookcollection.h"
+#include "../collectionfactory.h"
+#include "../entry.h"
+#include "../utils/datafileregistry.h"
+
+#include <KSharedConfig>
+#include <KConfigGroup>
+
+#include <QTest>
+
+QTEST_GUILESS_MAIN( MultiFetcherTest )
+
+MultiFetcherTest::MultiFetcherTest() : AbstractFetcherTest() {
+}
+
+void MultiFetcherTest::initTestCase() {
+  Tellico::RegisterCollection<Tellico::Data::BookCollection> 
registerBook(Tellico::Data::Collection::Book, "book");
+  
Tellico::DataFileRegistry::self()->addDataLocation(QFINDTESTDATA("../../xslt/mods2tellico.xsl"));
+}
+
+void MultiFetcherTest::testEmpty() {
+  auto config = KSharedConfig::openConfig(QString(), 
KConfig::SimpleConfig)->group(QStringLiteral("multi"));
+  config.writeEntry("CollectionType", int(Tellico::Data::Collection::Book));
+
+  Tellico::Fetch::FetchRequest request(Tellico::Data::Collection::Book, 
Tellico::Fetch::Title, QString());
+  Tellico::Fetch::Fetcher::Ptr fetcher(new Tellico::Fetch::MultiFetcher(this));
+  fetcher->readConfig(config);
+
+  Tellico::Data::EntryList results = DO_FETCH(fetcher, request);
+  QVERIFY(results.isEmpty());
+}
+
+void MultiFetcherTest::testIsbn() {
+  // just going to chain two trivial data sources together to test multifetcher
+  Tellico::Fetch::Fetcher::Ptr modsFetcher1(new 
Tellico::Fetch::ExecExternalFetcher(this));
+  Tellico::Fetch::Fetcher::Ptr modsFetcher2(new 
Tellico::Fetch::ExecExternalFetcher(this));
+
+  KSharedConfig::Ptr catConfig = 
KSharedConfig::openConfig(QFINDTESTDATA("data/cat_mods.spec"), 
KConfig::SimpleConfig);
+  KConfigGroup catConfigGroup = catConfig->group(QStringLiteral("<default>"));
+  catConfigGroup.writeEntry("ExecPath", QFINDTESTDATA("data/cat_mods.sh")); // 
update command path to local script
+  catConfigGroup.markAsClean(); // don't edit the file on sync()
+  modsFetcher1->readConfig(catConfigGroup);
+  modsFetcher1->setMessageHandler(new Tellico::Fetch::MessageLogger);
+  modsFetcher2->readConfig(catConfigGroup);
+  modsFetcher2->setMessageHandler(new Tellico::Fetch::MessageLogger);
+
+  auto fetchManager = Tellico::Fetch::Manager::self();
+  fetchManager->addFetcher(modsFetcher1);
+  fetchManager->addFetcher(modsFetcher2);
+
+  QStringList uuids;
+  uuids << modsFetcher1->uuid() << modsFetcher2->uuid();
+
+  auto multiConfig = KSharedConfig::openConfig(QString(), 
KConfig::SimpleConfig)->group(QStringLiteral("multi"));
+  multiConfig.writeEntry("Sources", uuids);
+  multiConfig.writeEntry("CollectionType", 
int(Tellico::Data::Collection::Book));
+
+  Tellico::Fetch::FetchRequest isbnRequest(Tellico::Data::Collection::Book,
+                                           Tellico::Fetch::ISBN,
+                                           QStringLiteral("0801486394"));
+  Tellico::Fetch::Fetcher::Ptr multiFetcher(new 
Tellico::Fetch::MultiFetcher(this));
+  multiFetcher->readConfig(multiConfig);
+  multiFetcher->setMessageHandler(new Tellico::Fetch::MessageLogger);
+
+  Tellico::Data::EntryList results = DO_FETCH(multiFetcher, isbnRequest);
+
+  QCOMPARE(results.size(), 1);
+
+  Tellico::Data::EntryPtr entry = results.at(0);
+  QVERIFY(entry);
+
+  QCOMPARE(entry->field(QStringLiteral("title")), QStringLiteral("Sound and 
fury"));
+  QCOMPARE(entry->field(QStringLiteral("isbn")), 
QStringLiteral("0-8014-8639-4"));
+}
diff --git a/src/tests/multifetchertest.h b/src/tests/multifetchertest.h
new file mode 100644
index 00000000..273a42ed
--- /dev/null
+++ b/src/tests/multifetchertest.h
@@ -0,0 +1,41 @@
+/***************************************************************************
+    Copyright (C) 2021 Robby Stephenson <[email protected]>
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or         *
+ *   modify it under the terms of the GNU General Public License as        *
+ *   published by the Free Software Foundation; either version 2 of        *
+ *   the License or (at your option) version 3 or any later version        *
+ *   accepted by the membership of KDE e.V. (or its successor approved     *
+ *   by the membership of KDE e.V.), which shall act as a proxy            *
+ *   defined in Section 14 of version 3 of the license.                    *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef MULTIFETCHERTEST_H
+#define MULTIFETCHERTEST_H
+
+#include "abstractfetchertest.h"
+
+class MultiFetcherTest : public AbstractFetcherTest {
+Q_OBJECT
+public:
+  MultiFetcherTest();
+
+private Q_SLOTS:
+  void initTestCase();
+  void testEmpty();
+  void testIsbn();
+};
+
+#endif

Reply via email to