Hello community, here is the log from the commit of package kdebase4-runtime for openSUSE:Factory checked in at 2012-03-20 17:48:55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/kdebase4-runtime (Old) and /work/SRC/openSUSE:Factory/.kdebase4-runtime.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "kdebase4-runtime", Maintainer is "[email protected]" Changes: -------- --- /work/SRC/openSUSE:Factory/kdebase4-runtime/kdebase4-runtime.changes 2012-03-12 20:14:43.000000000 +0100 +++ /work/SRC/openSUSE:Factory/.kdebase4-runtime.new/kdebase4-runtime.changes 2012-03-20 17:48:56.000000000 +0100 @@ -1,0 +2,7 @@ +Thu Mar 15 11:19:22 UTC 2012 - [email protected] + +- Add patches from 4.8 branch to improve mail indexing + * Improve performance of storing semantic metadata + * Fix failure to store semantic metadata on merge with existing + +------------------------------------------------------------------- New: ---- 50ef3393-nepomuk-data-merging-fix.diff 754275ed-nepomuk-res-ident-perf.diff ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ kdebase4-runtime.spec ++++++ --- /var/tmp/diff_new_pack.zxT85k/_old 2012-03-20 17:48:58.000000000 +0100 +++ /var/tmp/diff_new_pack.zxT85k/_new 2012-03-20 17:48:58.000000000 +0100 @@ -16,6 +16,7 @@ # + Name: kdebase4-runtime Version: 4.8.1 Release: 0 @@ -41,6 +42,8 @@ Patch15: kdesu-symbol-lookup-workaround.diff Patch16: phonon-always-forget.diff Patch17: desktop-files.diff +Patch18: 754275ed-nepomuk-res-ident-perf.diff +Patch19: 50ef3393-nepomuk-data-merging-fix.diff BuildRequires: NetworkManager-devel BuildRequires: QtZeitgeist-devel BuildRequires: bluez-devel @@ -168,6 +171,8 @@ %patch15 %patch16 %patch17 +%patch18 -p1 +%patch19 -p1 %build %cmake_kde4 -d build -- -DKDE4_ENABLE_FPIE=1 ++++++ 50ef3393-nepomuk-data-merging-fix.diff ++++++ commit 50ef33937d04c6e4b72e1d492c6cc1ef9f699151 Author: Vishesh Handa <[email protected]> Date: Tue Mar 13 18:23:06 2012 +0530 Merge duplicate statements in the ResourceMerger This happens in cases where two resources identifiy to the same resource and their statements are resolved in the ResourceMerger. During the cardinality checks, there exist identical duplicates (because of the merge) of certain properties which result in cardinality errors. Also added a unit test. This should fix the problem with PIM Email Indexing CCMAIL: [email protected] diff --git a/nepomuk/services/storage/resourcemerger.cpp b/nepomuk/services/storage/resourcemerger.cpp index 0aa4ae7..93d39be 100644 --- a/nepomuk/services/storage/resourcemerger.cpp +++ b/nepomuk/services/storage/resourcemerger.cpp @@ -509,20 +509,26 @@ Soprano::Node Nepomuk::ResourceMerger::resolveUnmappedNode(const Soprano::Node& return newUri; } -void Nepomuk::ResourceMerger::resolveBlankNodesInList(QList<Soprano::Statement> *stList) +void Nepomuk::ResourceMerger::resolveBlankNodesInSet(QSet<Soprano::Statement> *stList) { - QMutableListIterator<Soprano::Statement> iter( *stList ); + QSet<Soprano::Statement> newSet; + + QSetIterator<Soprano::Statement> iter( *stList ); while( iter.hasNext() ) { - Soprano::Statement &st = iter.next(); + Soprano::Statement st = iter.next(); st.setSubject( resolveUnmappedNode(st.subject()) ); st.setObject( resolveUnmappedNode(st.object()) ); + + newSet.insert( st ); } + + *stList = newSet; } -void Nepomuk::ResourceMerger::removeDuplicatesInList(QList<Soprano::Statement> *stList) +void Nepomuk::ResourceMerger::removeDuplicatesInList(QSet<Soprano::Statement> *stList) { - QMutableListIterator<Soprano::Statement> it( *stList ); + QMutableSetIterator<Soprano::Statement> it( *stList ); while( it.hasNext() ) { const Soprano::Statement &st = it.next(); if( st.subject().isBlank() || st.object().isBlank() ) @@ -649,9 +655,9 @@ bool Nepomuk::ResourceMerger::merge( const Soprano::Graph& stGraph ) // First separate all the statements predicate rdf:type. // and collect info required to check the types and cardinality // - QList<Soprano::Statement> remainingStatements; - QList<Soprano::Statement> typeStatements; - QList<Soprano::Statement> metadataStatements; + QSet<Soprano::Statement> remainingStatements; + QSet<Soprano::Statement> typeStatements; + QSet<Soprano::Statement> metadataStatements; foreach( const Soprano::Statement & st, statements ) { const QUrl subUri = getBlankOrResourceUri( st.subject() ); @@ -676,7 +682,9 @@ bool Nepomuk::ResourceMerger::merge( const Soprano::Graph& stGraph ) // Get the cardinality if( tree->maxCardinality( prop ) > 0 ) { QPair<QUrl,QUrl> subPredPair( subUri, st.predicate().uri() ); - cardinality.insert( subPredPair, st.object() ); + if( !cardinality.contains( subPredPair, st.object() ) ) { + cardinality.insert( subPredPair, st.object() ); + } } } @@ -892,9 +900,9 @@ bool Nepomuk::ResourceMerger::merge( const Soprano::Graph& stGraph ) } // Create all the blank nodes - resolveBlankNodesInList( &typeStatements ); - resolveBlankNodesInList( &remainingStatements ); - resolveBlankNodesInList( &metadataStatements ); + resolveBlankNodesInSet( &typeStatements ); + resolveBlankNodesInSet( &remainingStatements ); + resolveBlankNodesInSet( &metadataStatements ); // Push all these statements and get the list of all the modified resource foreach( Soprano::Statement st, typeStatements ) { diff --git a/nepomuk/services/storage/resourcemerger.h b/nepomuk/services/storage/resourcemerger.h index 8cd4ad5..8258f52 100644 --- a/nepomuk/services/storage/resourcemerger.h +++ b/nepomuk/services/storage/resourcemerger.h @@ -74,13 +74,13 @@ namespace Nepomuk { Soprano::Node resolveUnmappedNode( const Soprano::Node& node ); /// This modifies the list - void resolveBlankNodesInList( QList<Soprano::Statement> *stList ); + void resolveBlankNodesInSet( QSet<Soprano::Statement> *stList ); /** * Removes all the statements that already exist in the model * and adds them to m_duplicateStatements */ - void removeDuplicatesInList( QList<Soprano::Statement> *stList ); + void removeDuplicatesInList( QSet<Soprano::Statement> *stList ); QMultiHash<QUrl, Soprano::Statement> m_duplicateStatements; QHash<QUrl, QUrl> m_mappings; diff --git a/nepomuk/services/storage/test/datamanagementmodeltest.cpp b/nepomuk/services/storage/test/datamanagementmodeltest.cpp index 0c37a5e..b4c8bfb 100644 --- a/nepomuk/services/storage/test/datamanagementmodeltest.cpp +++ b/nepomuk/services/storage/test/datamanagementmodeltest.cpp @@ -4669,8 +4669,36 @@ void DataManagementModelTest::testStoreResources_duplicates2() int emailCount = m_model->listStatements( Node(), RDF::type(), NCO::EmailAddress() ).allStatements().size(); QCOMPARE( emailCount, 1 ); + + QVERIFY(!haveTrailingGraphs()); + QVERIFY(!haveDataInDefaultGraph()); } +void DataManagementModelTest::testStoreResources_duplicatesInMerger() +{ + SimpleResource contact1; + contact1.addType( NCO::PersonContact() ); + contact1.setProperty( NCO::fullname(), QLatin1String("Rachel McAdams") ); + + SimpleResourceGraph graph; + graph << contact1; + + m_dmModel->storeResources( graph, QLatin1String("appA") ); + QVERIFY(!m_dmModel->lastError()); + + SimpleResource contact2; + contact2.addType( NCO::PersonContact() ); + contact2.setProperty( NCO::fullname(), QLatin1String("Rachel McAdams") ); + contact2.setProperty( NAO::prefLabel(), QLatin1String("Rachel McAdams") ); + + graph << contact2; + + m_dmModel->storeResources( graph, QLatin1String("appA") ); + QVERIFY(!m_dmModel->lastError()); + + QVERIFY(!haveTrailingGraphs()); + QVERIFY(!haveDataInDefaultGraph()); +} void DataManagementModelTest::testStoreResources_overwriteProperties() { diff --git a/nepomuk/services/storage/test/datamanagementmodeltest.h b/nepomuk/services/storage/test/datamanagementmodeltest.h index 93dd913..ab87f3a 100644 --- a/nepomuk/services/storage/test/datamanagementmodeltest.h +++ b/nepomuk/services/storage/test/datamanagementmodeltest.h @@ -143,6 +143,7 @@ private Q_SLOTS: void testStoreResources_kioProtocols(); void testStoreResources_duplicates(); void testStoreResources_duplicates2(); + void testStoreResources_duplicatesInMerger(); void testStoreResources_overwriteProperties(); void testStoreResources_overwriteProperties_invalidCard(); void testStoreResources_correctDomainInStore(); diff --git a/nepomuk/services/storage/test/qtest_dms.cpp b/nepomuk/services/storage/test/qtest_dms.cpp index 64949fd..9afd290 100644 --- a/nepomuk/services/storage/test/qtest_dms.cpp +++ b/nepomuk/services/storage/test/qtest_dms.cpp @@ -187,6 +187,10 @@ void Nepomuk::insertOntologies(Soprano::Model* _model, const QUrl& graph) model.addStatement( NCO::Contact(), RDFS::subClassOf(), NCO::Role(), graph ); model.addStatement( NCO::Contact(), RDFS::subClassOf(), NAO::Party(), graph ); + model.addStatement( NCO::PersonContact(), RDF::type(), RDFS::Resource(), graph ); + model.addStatement( NCO::PersonContact(), RDF::type(), RDFS::Class(), graph ); + model.addStatement( NCO::PersonContact(), RDFS::subClassOf(), NCO::Contact(), graph ); + model.addStatement( NAO::Tag(), RDF::type(), RDFS::Class(), graph ); model.addStatement( NFO::FileDataObject(), RDF::type(), RDFS::Class(), graph ); model.addStatement( NFO::Folder(), RDF::type(), RDFS::Class(), graph ); ++++++ 754275ed-nepomuk-res-ident-perf.diff ++++++ commit 754275eda610dce1160286a76339353097d8764c Author: Sebastian Trueg <[email protected]> Date: Fri Mar 9 17:17:48 2012 +0100 Backport from nepomuk-core: improved performance on res identification. BUG: 289932 FIXED-IN: 4.8.2 diff --git a/nepomuk/services/backupsync/lib/resourceidentifier.cpp b/nepomuk/services/backupsync/lib/resourceidentifier.cpp index c1a9919..894372c 100644 --- a/nepomuk/services/backupsync/lib/resourceidentifier.cpp +++ b/nepomuk/services/backupsync/lib/resourceidentifier.cpp @@ -31,6 +31,7 @@ #include <Soprano/Statement> #include <Soprano/Graph> #include <Soprano/Node> +#include <Soprano/BindingSet> #include <Soprano/StatementIterator> #include <Soprano/QueryResultIterator> #include <Soprano/Model> @@ -176,19 +177,18 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri) return false; } - QString query; - QStringList identifyingProperties; QHash<KUrl, Soprano::Node> identifyingPropertiesHash; QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin(); QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd(); + QList<Soprano::Node> requiredTypes; for( ; it != constEnd; it++ ) { const QUrl & prop = it.key(); // Special handling for rdf:type if( prop == RDF::type() ) { - query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() ); + requiredTypes << it.value().uri(); continue; } @@ -219,6 +219,10 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri) return false; } + + // construct the identification query + QString query = QLatin1String("select distinct ?r where { "); + // // Optimization: // If there is only one identifying property using all that optional and filter stuff @@ -235,7 +239,7 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri) QString::number( numIdentifyingProperties++ ) ); } - // Make sure atleast one of the identification properties has been matched + // Make sure at least one of the identification properties has been matched // by adding filter( bound(?o1) || bound(?o2) ... ) query += QString::fromLatin1("filter( "); for( int i=0; i<numIdentifyingProperties-1; i++ ) { @@ -247,43 +251,68 @@ bool Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri) query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()), identifyingPropertiesHash.constBegin().value().toN3()); } - query += QLatin1String("}"); - // Construct the entire query - QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as ?cnt " - "where { ?r ?p ?o. filter( ?p in (%1) ).") - .arg( identifyingProperties.join(",") ); - - query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)"); + // + // For performance reasons we add a limit even though this could mean that we + // miss a resource to identify since we check the types below. + // + query += QLatin1String("} LIMIT 100"); - kDebug() << query; // - // Only store the results which have the maximum score + // Fetch a score for each result. + // We do this in a separate query for performance reasons. // - QSet<KUrl> results; - int score = -1; + QMultiHash<int, KUrl> resultsScoreHash; + int maxScore = -1; Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, Soprano::Query::QueryLanguageSparql ); while( qit.next() ) { - //kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"]; + const Soprano::Node r(qit["r"]); + + // + // Check the type requirements. Experiments have shown this to mean a substantial + // performance boost as compared to doing it in the main query. + // + if(!requiredTypes.isEmpty() ) { + query = QLatin1String("ask where { "); + foreach(const Soprano::Node& type, requiredTypes) { + query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3()); + } + query += QLatin1String("}"); + if(!d->m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) { + continue; + } + } + + + const int score = d->m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { " + "%1 ?p ?o. filter( ?p in (%2) ) . }") + .arg( r.toN3(), + identifyingProperties.join(",") ), + Soprano::Query::QueryLanguageSparql) + .allBindings().first()["cnt"].literal().toInt(); - int count = qit["cnt"].literal().toInt(); - if( score == -1 ) { - score = count; + if( maxScore < score ) { + maxScore = score; } - else if( count < score ) - break; - results << qit["r"].uri(); + resultsScoreHash.insert(score, r.uri()); } + // + // Only get the results which have the maximum score + // + QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore)); + + //kDebug() << "Got " << results.size() << " results"; if( results.empty() ) return false; KUrl newUri; - if( results.size() == 1 ) + if( results.size() == 1 ) { newUri = *results.begin(); + } else { kDebug() << "DUPLICATE RESULTS!"; newUri = duplicateMatch( res.uri(), results ); -- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
