Hello community,

here is the log from the commit of package kdebase4-runtime for 
openSUSE:Factory checked in at 2012-03-20 17:48:55
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/kdebase4-runtime (Old)
 and      /work/SRC/openSUSE:Factory/.kdebase4-runtime.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "kdebase4-runtime", Maintainer is "[email protected]"

Changes:
--------
--- /work/SRC/openSUSE:Factory/kdebase4-runtime/kdebase4-runtime.changes        
2012-03-12 20:14:43.000000000 +0100
+++ /work/SRC/openSUSE:Factory/.kdebase4-runtime.new/kdebase4-runtime.changes   
2012-03-20 17:48:56.000000000 +0100
@@ -1,0 +2,7 @@
+Thu Mar 15 11:19:22 UTC 2012 - [email protected]
+
+- Add patches from 4.8 branch to improve mail indexing
+  * Improve performance of storing semantic metadata
+  * Fix failure to store semantic metadata on merge with existing
+
+-------------------------------------------------------------------

New:
----
  50ef3393-nepomuk-data-merging-fix.diff
  754275ed-nepomuk-res-ident-perf.diff

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ kdebase4-runtime.spec ++++++
--- /var/tmp/diff_new_pack.zxT85k/_old  2012-03-20 17:48:58.000000000 +0100
+++ /var/tmp/diff_new_pack.zxT85k/_new  2012-03-20 17:48:58.000000000 +0100
@@ -16,6 +16,7 @@
 #
 
 
+
 Name:           kdebase4-runtime
 Version:        4.8.1
 Release:        0
@@ -41,6 +42,8 @@
 Patch15:        kdesu-symbol-lookup-workaround.diff
 Patch16:        phonon-always-forget.diff
 Patch17:        desktop-files.diff
+Patch18:        754275ed-nepomuk-res-ident-perf.diff
+Patch19:        50ef3393-nepomuk-data-merging-fix.diff
 BuildRequires:  NetworkManager-devel
 BuildRequires:  QtZeitgeist-devel
 BuildRequires:  bluez-devel
@@ -168,6 +171,8 @@
 %patch15
 %patch16
 %patch17
+%patch18 -p1
+%patch19 -p1
 
 %build
   %cmake_kde4 -d build -- -DKDE4_ENABLE_FPIE=1

++++++ 50ef3393-nepomuk-data-merging-fix.diff ++++++
commit 50ef33937d04c6e4b72e1d492c6cc1ef9f699151
Author: Vishesh Handa <[email protected]>
Date:   Tue Mar 13 18:23:06 2012 +0530

    Merge duplicate statements in the ResourceMerger
    
    This happens in cases where two resources identifiy to the same resource and
    their statements are resolved in the ResourceMerger. During the cardinality
    checks, there exist identical duplicates (because of the merge) of certain
    properties which result in cardinality errors.
    
    Also added a unit test.
    
    This should fix the problem with PIM Email Indexing
    
    CCMAIL: [email protected]

diff --git a/nepomuk/services/storage/resourcemerger.cpp 
b/nepomuk/services/storage/resourcemerger.cpp
index 0aa4ae7..93d39be 100644
--- a/nepomuk/services/storage/resourcemerger.cpp
+++ b/nepomuk/services/storage/resourcemerger.cpp
@@ -509,20 +509,26 @@ Soprano::Node 
Nepomuk::ResourceMerger::resolveUnmappedNode(const Soprano::Node&
     return newUri;
 }
 
-void 
Nepomuk::ResourceMerger::resolveBlankNodesInList(QList<Soprano::Statement> 
*stList)
+void Nepomuk::ResourceMerger::resolveBlankNodesInSet(QSet<Soprano::Statement> 
*stList)
 {
-    QMutableListIterator<Soprano::Statement> iter( *stList );
+    QSet<Soprano::Statement> newSet;
+
+    QSetIterator<Soprano::Statement> iter( *stList );
     while( iter.hasNext() ) {
-        Soprano::Statement &st = iter.next();
+        Soprano::Statement st = iter.next();
 
         st.setSubject( resolveUnmappedNode(st.subject()) );
         st.setObject( resolveUnmappedNode(st.object()) );
+
+        newSet.insert( st );
     }
+
+    *stList = newSet;
 }
 
-void Nepomuk::ResourceMerger::removeDuplicatesInList(QList<Soprano::Statement> 
*stList)
+void Nepomuk::ResourceMerger::removeDuplicatesInList(QSet<Soprano::Statement> 
*stList)
 {
-    QMutableListIterator<Soprano::Statement> it( *stList );
+    QMutableSetIterator<Soprano::Statement> it( *stList );
     while( it.hasNext() ) {
         const Soprano::Statement &st = it.next();
         if( st.subject().isBlank() || st.object().isBlank() )
@@ -649,9 +655,9 @@ bool Nepomuk::ResourceMerger::merge( const Soprano::Graph& 
stGraph )
     // First separate all the statements predicate rdf:type.
     // and collect info required to check the types and cardinality
     //
-    QList<Soprano::Statement> remainingStatements;
-    QList<Soprano::Statement> typeStatements;
-    QList<Soprano::Statement> metadataStatements;
+    QSet<Soprano::Statement> remainingStatements;
+    QSet<Soprano::Statement> typeStatements;
+    QSet<Soprano::Statement> metadataStatements;
 
     foreach( const Soprano::Statement & st, statements ) {
         const QUrl subUri = getBlankOrResourceUri( st.subject() );
@@ -676,7 +682,9 @@ bool Nepomuk::ResourceMerger::merge( const Soprano::Graph& 
stGraph )
         // Get the cardinality
         if( tree->maxCardinality( prop ) > 0 ) {
             QPair<QUrl,QUrl> subPredPair( subUri, st.predicate().uri() );
-            cardinality.insert( subPredPair, st.object() );
+            if( !cardinality.contains( subPredPair, st.object() ) ) {
+                cardinality.insert( subPredPair, st.object() );
+            }
         }
     }
 
@@ -892,9 +900,9 @@ bool Nepomuk::ResourceMerger::merge( const Soprano::Graph& 
stGraph )
     }
 
     // Create all the blank nodes
-    resolveBlankNodesInList( &typeStatements );
-    resolveBlankNodesInList( &remainingStatements );
-    resolveBlankNodesInList( &metadataStatements );
+    resolveBlankNodesInSet( &typeStatements );
+    resolveBlankNodesInSet( &remainingStatements );
+    resolveBlankNodesInSet( &metadataStatements );
 
     // Push all these statements and get the list of all the modified resource
     foreach( Soprano::Statement st, typeStatements ) {
diff --git a/nepomuk/services/storage/resourcemerger.h 
b/nepomuk/services/storage/resourcemerger.h
index 8cd4ad5..8258f52 100644
--- a/nepomuk/services/storage/resourcemerger.h
+++ b/nepomuk/services/storage/resourcemerger.h
@@ -74,13 +74,13 @@ namespace Nepomuk {
         Soprano::Node resolveUnmappedNode( const Soprano::Node& node );
 
         /// This modifies the list
-        void resolveBlankNodesInList( QList<Soprano::Statement> *stList );
+        void resolveBlankNodesInSet( QSet<Soprano::Statement> *stList );
 
         /**
          * Removes all the statements that already exist in the model
          * and adds them to m_duplicateStatements
          */
-        void removeDuplicatesInList( QList<Soprano::Statement> *stList );
+        void removeDuplicatesInList( QSet<Soprano::Statement> *stList );
         QMultiHash<QUrl, Soprano::Statement> m_duplicateStatements;
 
         QHash<QUrl, QUrl> m_mappings;
diff --git a/nepomuk/services/storage/test/datamanagementmodeltest.cpp 
b/nepomuk/services/storage/test/datamanagementmodeltest.cpp
index 0c37a5e..b4c8bfb 100644
--- a/nepomuk/services/storage/test/datamanagementmodeltest.cpp
+++ b/nepomuk/services/storage/test/datamanagementmodeltest.cpp
@@ -4669,8 +4669,36 @@ void 
DataManagementModelTest::testStoreResources_duplicates2()
 
     int emailCount = m_model->listStatements( Node(), RDF::type(), 
NCO::EmailAddress() ).allStatements().size();
     QCOMPARE( emailCount, 1 );
+
+    QVERIFY(!haveTrailingGraphs());
+    QVERIFY(!haveDataInDefaultGraph());
 }
 
+void DataManagementModelTest::testStoreResources_duplicatesInMerger()
+{
+    SimpleResource contact1;
+    contact1.addType( NCO::PersonContact() );
+    contact1.setProperty( NCO::fullname(), QLatin1String("Rachel McAdams") );
+
+    SimpleResourceGraph graph;
+    graph << contact1;
+
+    m_dmModel->storeResources( graph, QLatin1String("appA") );
+    QVERIFY(!m_dmModel->lastError());
+
+    SimpleResource contact2;
+    contact2.addType( NCO::PersonContact() );
+    contact2.setProperty( NCO::fullname(), QLatin1String("Rachel McAdams") );
+    contact2.setProperty( NAO::prefLabel(), QLatin1String("Rachel McAdams") );
+
+    graph << contact2;
+
+    m_dmModel->storeResources( graph, QLatin1String("appA") );
+    QVERIFY(!m_dmModel->lastError());
+
+    QVERIFY(!haveTrailingGraphs());
+    QVERIFY(!haveDataInDefaultGraph());
+}
 
 void DataManagementModelTest::testStoreResources_overwriteProperties()
 {
diff --git a/nepomuk/services/storage/test/datamanagementmodeltest.h 
b/nepomuk/services/storage/test/datamanagementmodeltest.h
index 93dd913..ab87f3a 100644
--- a/nepomuk/services/storage/test/datamanagementmodeltest.h
+++ b/nepomuk/services/storage/test/datamanagementmodeltest.h
@@ -143,6 +143,7 @@ private Q_SLOTS:
     void testStoreResources_kioProtocols();
     void testStoreResources_duplicates();
     void testStoreResources_duplicates2();
+    void testStoreResources_duplicatesInMerger();
     void testStoreResources_overwriteProperties();
     void testStoreResources_overwriteProperties_invalidCard();
     void testStoreResources_correctDomainInStore();
diff --git a/nepomuk/services/storage/test/qtest_dms.cpp 
b/nepomuk/services/storage/test/qtest_dms.cpp
index 64949fd..9afd290 100644
--- a/nepomuk/services/storage/test/qtest_dms.cpp
+++ b/nepomuk/services/storage/test/qtest_dms.cpp
@@ -187,6 +187,10 @@ void Nepomuk::insertOntologies(Soprano::Model* _model, 
const QUrl& graph)
     model.addStatement( NCO::Contact(), RDFS::subClassOf(), NCO::Role(), graph 
);
     model.addStatement( NCO::Contact(), RDFS::subClassOf(), NAO::Party(), 
graph );
 
+    model.addStatement( NCO::PersonContact(), RDF::type(), RDFS::Resource(), 
graph );
+    model.addStatement( NCO::PersonContact(), RDF::type(), RDFS::Class(), 
graph );
+    model.addStatement( NCO::PersonContact(), RDFS::subClassOf(), 
NCO::Contact(), graph );
+
     model.addStatement( NAO::Tag(), RDF::type(), RDFS::Class(), graph );
     model.addStatement( NFO::FileDataObject(), RDF::type(), RDFS::Class(), 
graph );
     model.addStatement( NFO::Folder(), RDF::type(), RDFS::Class(), graph );
++++++ 754275ed-nepomuk-res-ident-perf.diff ++++++
commit 754275eda610dce1160286a76339353097d8764c
Author: Sebastian Trueg <[email protected]>
Date:   Fri Mar 9 17:17:48 2012 +0100

    Backport from nepomuk-core: improved performance on res identification.
    
    BUG: 289932
    FIXED-IN: 4.8.2

diff --git a/nepomuk/services/backupsync/lib/resourceidentifier.cpp 
b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
index c1a9919..894372c 100644
--- a/nepomuk/services/backupsync/lib/resourceidentifier.cpp
+++ b/nepomuk/services/backupsync/lib/resourceidentifier.cpp
@@ -31,6 +31,7 @@
 #include <Soprano/Statement>
 #include <Soprano/Graph>
 #include <Soprano/Node>
+#include <Soprano/BindingSet>
 #include <Soprano/StatementIterator>
 #include <Soprano/QueryResultIterator>
 #include <Soprano/Model>
@@ -176,19 +177,18 @@ bool 
Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
         return false;
     }
 
-    QString query;
-
     QStringList identifyingProperties;
     QHash<KUrl, Soprano::Node> identifyingPropertiesHash;
 
     QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin();
     QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd();
+    QList<Soprano::Node> requiredTypes;
     for( ; it != constEnd; it++ ) {
         const QUrl & prop = it.key();
 
         // Special handling for rdf:type
         if( prop == RDF::type() ) {
-            query += QString::fromLatin1(" ?r a %1 . ").arg( it.value().toN3() 
);
+            requiredTypes << it.value().uri();
             continue;
         }
 
@@ -219,6 +219,10 @@ bool 
Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
         return false;
     }
 
+
+    // construct the identification query
+    QString query = QLatin1String("select distinct ?r where { ");
+
     //
     // Optimization:
     // If there is only one identifying property using all that optional and 
filter stuff
@@ -235,7 +239,7 @@ bool 
Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
                            QString::number( numIdentifyingProperties++ ) );
         }
 
-        // Make sure atleast one of the identification properties has been 
matched
+        // Make sure at least one of the identification properties has been 
matched
         // by adding filter( bound(?o1) || bound(?o2) ... )
         query += QString::fromLatin1("filter( ");
         for( int i=0; i<numIdentifyingProperties-1; i++ ) {
@@ -247,43 +251,68 @@ bool 
Nepomuk::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
         query += QString::fromLatin1("?r %1 %2 . 
").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()),
                                                          
identifyingPropertiesHash.constBegin().value().toN3());
     }
-    query += QLatin1String("}");
 
-    // Construct the entire query
-    QString queryBegin = QString::fromLatin1("select distinct ?r count(?p) as 
?cnt "
-    "where { ?r ?p ?o. filter( ?p in (%1) ).")
-    .arg( identifyingProperties.join(",") );
-
-    query = queryBegin + query + QString::fromLatin1(" order by desc(?cnt)");
+    //
+    // For performance reasons we add a limit even though this could mean that 
we
+    // miss a resource to identify since we check the types below.
+    //
+    query += QLatin1String("} LIMIT 100");
 
-    kDebug() << query;
 
     //
-    // Only store the results which have the maximum score
+    // Fetch a score for each result.
+    // We do this in a separate query for performance reasons.
     //
-    QSet<KUrl> results;
-    int score = -1;
+    QMultiHash<int, KUrl> resultsScoreHash;
+    int maxScore = -1;
     Soprano::QueryResultIterator qit = d->m_model->executeQuery( query, 
Soprano::Query::QueryLanguageSparql );
     while( qit.next() ) {
-        //kDebug() << "RESULT: " << qit["r"] << " " << qit["cnt"];
+        const Soprano::Node r(qit["r"]);
+
+        //
+        // Check the type requirements. Experiments have shown this to mean a 
substantial
+        // performance boost as compared to doing it in the main query.
+        //
+        if(!requiredTypes.isEmpty() ) {
+            query = QLatin1String("ask where { ");
+            foreach(const Soprano::Node& type, requiredTypes) {
+                query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), 
type.toN3());
+            }
+            query += QLatin1String("}");
+            if(!d->m_model->executeQuery(query, 
Soprano::Query::QueryLanguageSparql).boolValue()) {
+                continue;
+            }
+        }
+
+
+        const int score = d->m_model->executeQuery(QString::fromLatin1("select 
count(?p) as ?cnt where { "
+                                                                       "%1 ?p 
?o. filter( ?p in (%2) ) . }")
+                                                   .arg( r.toN3(),
+                                                         
identifyingProperties.join(",") ),
+                                                   
Soprano::Query::QueryLanguageSparql)
+                          .allBindings().first()["cnt"].literal().toInt();
 
-        int count = qit["cnt"].literal().toInt();
-        if( score == -1 ) {
-            score = count;
+        if( maxScore < score ) {
+            maxScore = score;
         }
-        else if( count < score )
-            break;
 
-        results << qit["r"].uri();
+        resultsScoreHash.insert(score, r.uri());
     }
 
+    //
+    // Only get the results which have the maximum score
+    //
+    QSet<KUrl> results = 
QSet<KUrl>::fromList(resultsScoreHash.values(maxScore));
+
+
     //kDebug() << "Got " << results.size() << " results";
     if( results.empty() )
         return false;
 
     KUrl newUri;
-    if( results.size() == 1 )
+    if( results.size() == 1 ) {
         newUri = *results.begin();
+    }
     else {
         kDebug() << "DUPLICATE RESULTS!";
         newUri = duplicateMatch( res.uri(), results );


-- 
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to