jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/396474 )

Change subject: Fix Updater performance issue
......................................................................


Fix Updater performance issue

Do not use removeAll - just assemble the collections in straightforward way.

Bug: T182464
Change-Id: I02530690e877cc1b60f11dc2fc8b97622c742d68
---
M 
tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java
M tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java
2 files changed, 39 insertions(+), 23 deletions(-)

Approvals:
  jenkins-bot: Verified
  Gehel: Looks good to me, approved



diff --git 
a/tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java
 
b/tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java
index caf3ff6..85b6208 100644
--- 
a/tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java
+++ 
b/tools/src/main/java/org/wikidata/query/rdf/tool/change/RecentChangesPoller.java
@@ -97,10 +97,6 @@
             int batchSize, Map<Long, Boolean> seenIDs, int tailSeconds) {
         this.wikibase = wikibase;
         this.firstStartTime = firstStartTime;
-        // FIXME: temporary plug for T182464, remove after it's fixed
-        if (batchSize > 200) {
-            batchSize = 200;
-        }
         this.batchSize = batchSize;
         this.seenIDs = seenIDs;
         this.tailSeconds = tailSeconds;
diff --git 
a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java 
b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java
index e6e891e..b048f6d 100644
--- a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java
+++ b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/RdfRepository.java
@@ -2,7 +2,6 @@
 
 import static com.google.common.collect.Sets.newHashSetWithExpectedSize;
 import static com.google.common.io.Resources.getResource;
-import static org.wikidata.query.rdf.tool.FilteredStatements.filtered;
 
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
@@ -372,7 +371,7 @@
     }
 
     /**
-     * Provides the SPARQL needed to synchronize the data statements.
+     * Provides the SPARQL needed to synchronize the data statements for a 
single entity.
      *
      * @param entityId id of the entity to sync
      * @param statements all known statements about the entity
@@ -390,17 +389,13 @@
         b.bind("uris.statement", uris.statement());
         b.bindStatements("insertStatements", statements);
 
-        Collection<Statement> entityStatements = 
filtered(statements).withSubject(uris.entity() + entityId);
+        List<Statement> entityStatements = new ArrayList<>();
+        List<Statement> statementStatements = new ArrayList<>();
+        Set<Statement> aboutStatements = new HashSet<>();
+        classifyStatements(statements, entityId, entityStatements, 
statementStatements, aboutStatements);
+
         b.bindValues("entityStatements", entityStatements);
-
-        Collection<Statement> statementStatements = 
filtered(statements).withSubjectStarts(uris.statement());
         b.bindValues("statementStatements", statementStatements);
-
-        Collection<Statement> aboutStatements = new 
HashSet<Statement>(statements);
-        aboutStatements.removeAll(entityStatements);
-        aboutStatements.removeAll(statementStatements);
-        
aboutStatements.removeAll(filtered(statements).withSubjectStarts(uris.value()));
-        
aboutStatements.removeAll(filtered(statements).withSubjectStarts(uris.reference()));
         b.bindValues("aboutStatements", aboutStatements);
 
         if (valueList != null && !valueList.isEmpty()) {
@@ -412,6 +407,36 @@
         }
 
         return b.toString();
+    }
+
+    /**
+     * Sort statements into a set of specialized collections, by subject.
+     * @param statements List of statements to process
+     * @param entityId
+     * @param entityStatements subject is entity
+     * @param statementStatements subject is any statement
+     * @param aboutStatements not entity, not statement, not value and not 
reference
+     */
+    private void classifyStatements(Collection<Statement> statements,
+            String entityId, Collection<Statement> entityStatements,
+            Collection<Statement> statementStatements,
+            Collection<Statement> aboutStatements) {
+        for (Statement statement: statements) {
+            String s = statement.getSubject().stringValue();
+            if (s.equals(uris.entity() + entityId)) {
+                entityStatements.add(statement);
+            }
+            if (s.startsWith(uris.statement())) {
+                statementStatements.add(statement);
+            }
+            if (!s.equals(uris.entity() + entityId)
+                    && !s.startsWith(uris.statement())
+                    && !s.startsWith(uris.value())
+                    && !s.startsWith(uris.reference())
+            ) {
+                aboutStatements.add(statement);
+            }
+        }
     }
 
     /**
@@ -433,6 +458,8 @@
 
         List<Statement> insertStatements = new ArrayList<>();
         List<Statement> entityStatements = new ArrayList<>();
+        List<Statement> statementStatements = new ArrayList<>();
+        Set<Statement> aboutStatements = new HashSet<>();
         Set<String> valueSet = new HashSet<>();
 
         for (final Change change : changes) {
@@ -442,7 +469,7 @@
             }
             entityIds.add(change.entityId());
             insertStatements.addAll(change.getStatements());
-            
entityStatements.addAll(filtered(change.getStatements()).withSubject(uris.entity()
 + change.entityId()));
+            classifyStatements(change.getStatements(), change.entityId(), 
entityStatements, statementStatements, aboutStatements);
             valueSet.addAll(change.getCleanupList());
         }
 
@@ -456,14 +483,7 @@
         b.bindStatements("insertStatements", insertStatements);
         b.bindValues("entityStatements", entityStatements);
 
-        Collection<Statement> statementStatements = 
filtered(insertStatements).withSubjectStarts(uris.statement());
         b.bindValues("statementStatements", statementStatements);
-
-        Collection<Statement> aboutStatements = new 
HashSet<>(insertStatements);
-        aboutStatements.removeAll(entityStatements);
-        aboutStatements.removeAll(statementStatements);
-        
aboutStatements.removeAll(filtered(insertStatements).withSubjectStarts(uris.value()));
-        
aboutStatements.removeAll(filtered(insertStatements).withSubjectStarts(uris.reference()));
         b.bindValues("aboutStatements", aboutStatements);
 
         if (!valueSet.isEmpty()) {

-- 
To view, visit https://gerrit.wikimedia.org/r/396474
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I02530690e877cc1b60f11dc2fc8b97622c742d68
Gerrit-PatchSet: 5
Gerrit-Project: wikidata/query/rdf
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <smalys...@wikimedia.org>
Gerrit-Reviewer: Gehel <guillaume.leder...@wikimedia.org>
Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to