Aude has submitted this change and it was merged.

Change subject: (bug 47125) Avoid redundant filtering in dispatcher.
......................................................................


(bug 47125) Avoid redundant filtering in dispatcher.

This change abourts the filtering operation as soon as the desired
number of changes is reached.

Change-Id: Idc7def15a5bd113b2cf38f8140f26098848bc1a7
Note: This is only a PARTIAL solution for bug 47125!
---
M lib/maintenance/dispatchChanges.php
1 file changed, 30 insertions(+), 30 deletions(-)

Approvals:
  Aude: Verified; Looks good to me, approved
  jenkins-bot: Checked



diff --git a/lib/maintenance/dispatchChanges.php 
b/lib/maintenance/dispatchChanges.php
index 0517bbd..ac92390 100644
--- a/lib/maintenance/dispatchChanges.php
+++ b/lib/maintenance/dispatchChanges.php
@@ -780,13 +780,14 @@
                //      for free-form use.
 
                $batch = array();
+               $batchSize = 0;
                $chunkSize = $this->batchSize * $this->batchChunkFactor;
 
                // Track the change ID from which the next pass should start.
                // Note that this is non-trivial due to programmatic filtering.
-               $seen = $after;
+               $lastIdSeen = $after;
 
-               while ( count( $batch ) < $this->batchSize ) {
+               while ( $batchSize < $this->batchSize ) {
                        // get a chunk of changes
                        $chunk = $this->selectChanges( $after, $chunkSize );
 
@@ -800,23 +801,11 @@
                        reset( $chunk ); // don't leave the array pointer messy.
 
                        // filter the changes in the chunk and add the result 
to the batch
-                       $chunk = $this->filterChanges( $siteID, $wikiDB, $chunk 
);
-                       $batch = array_merge( $batch, $chunk );
+                       $remaining = $this->batchSize - $batchSize;
+                       list( $filtered, $lastIdSeen ) = $this->filterChanges( 
$siteID, $wikiDB, $chunk, $remaining );
 
-                       // truncate the batch if needed.
-                       if ( count( $batch ) > $this->batchSize ) {
-                               // We need to find and remember the first 
change that gets cur off,
-                               // so we can continue from that change on the 
next pass.
-
-                               /* @var Change $anchor */
-                               list( $anchor ) = array_slice( $batch, 
$this->batchSize, 1 );
-                               $seen = $anchor->getId() -1;
-
-                               $batch = array_slice( $batch, 0, 
$this->batchSize );
-                               break;
-                       } else {
-                               $seen = $last->getId();
-                       }
+                       $batch = array_merge( $batch, $filtered );
+                       $batchSize = count( $batch );
 
                        //XXX: We could try to adapt $chunkSize based on ratio 
of changes that get filtered out:
                        //     $chunkSize = ( $this->batchSize - count( $batch 
) ) * ( count_before / count_after );
@@ -826,7 +815,7 @@
 
                $this->trace( "Got pending changes." );
 
-               return array( $batch, $seen );
+               return array( $batch, $lastIdSeen );
        }
 
        /**
@@ -899,13 +888,16 @@
         * Currently, we only keep ItemChanges for items that have a sitelink 
to the
         * target client wiki.
         *
-        * @param string $siteID:    The client wiki's global site identifier, 
as used by sitelinks.
-        * @param string $wikiDB:    The logical database name of the target 
wiki.
+        * @param string   $siteID : The client wiki's global site identifier, 
as used by sitelinks.
+        * @param string   $wikiDB : The logical database name of the target 
wiki.
         * @param Change[] $changes: The list of changes to filter.
+        * @param int      $limit:   The max number of changes to return
         *
-        * @return Change[] list of Change object from $changes that are 
relevant to $siteID.
+        * @return array ( $batch, $seen ), where $batch is the filtered list 
of Change objects,
+        *         and $seen if the ID of the last change considered for the 
batch
+        *         (even if that was filtered out), for use as a continuation 
marker.
         */
-       protected function filterChanges( $siteID, $wikiDB, $changes ) {
+       protected function filterChanges( $siteID, $wikiDB, $changes, $limit ) {
                wfProfileIn( __METHOD__ );
 
                // collect all item IDs mentioned in the changes
@@ -937,26 +929,34 @@
                $this->trace( "Retaining changes for " . count( $linkedItems ) 
. " relevant items." );
 
                // find all changes that relate to an item that has a sitelink 
to $siteID.
-               $keep = array();
+               $filteredChanges = array();
+               $numberOfChangesFound = 0;
+               $lastIdSeen = 0;
                foreach ( $changes as $change ) {
+                       $lastIdSeen = $change->getId();
+
                        if ( $change instanceof ItemChange) {
                                $itemId = 
$change->getEntityId()->getNumericId();
 
-                               // The change is relevant if it alters any 
sitelinks refering to $siteID,
+                               // The change is relevant if it alters any 
sitelinks referring to $siteID,
                                // or the item currently links to $siteID.
                                if ( isset( $linkedItems[$itemId] )
                                        || $this->isRelevantChange( $change, 
$siteID ) !== null ) {
-                                       $keep[] = $change;
+
+                                       $filteredChanges[] = $change;
+                                       $numberOfChangesFound++;
                                }
+                       }
+
+                       if ( $numberOfChangesFound >= $limit ) {
+                               break;
                        }
                }
 
-               $changes = $keep;
-
-               $this->trace( "Found " . count( $changes ) . " changes for 
items with relevant sitelinks." );
+               $this->trace( "Found " . count( $filteredChanges ) . " changes 
for items with relevant sitelinks." );
 
                wfProfileOut( __METHOD__ );
-               return $changes;
+               return array( $filteredChanges, $lastIdSeen );
        }
 
        /**

-- 
To view, visit https://gerrit.wikimedia.org/r/59188
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Idc7def15a5bd113b2cf38f8140f26098848bc1a7
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Jeroen De Dauw <[email protected]>
Gerrit-Reviewer: Tobias Gritschacher <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to