Aude has submitted this change and it was merged.
Change subject: (bug 47125) Avoid redundant filtering in dispatcher.
......................................................................
(bug 47125) Avoid redundant filtering in dispatcher.
This change abourts the filtering operation as soon as the desired
number of changes is reached.
Change-Id: Idc7def15a5bd113b2cf38f8140f26098848bc1a7
Note: This is only a PARTIAL solution for bug 47125!
---
M lib/maintenance/dispatchChanges.php
1 file changed, 30 insertions(+), 30 deletions(-)
Approvals:
Aude: Verified; Looks good to me, approved
jenkins-bot: Checked
diff --git a/lib/maintenance/dispatchChanges.php
b/lib/maintenance/dispatchChanges.php
index 0517bbd..ac92390 100644
--- a/lib/maintenance/dispatchChanges.php
+++ b/lib/maintenance/dispatchChanges.php
@@ -780,13 +780,14 @@
// for free-form use.
$batch = array();
+ $batchSize = 0;
$chunkSize = $this->batchSize * $this->batchChunkFactor;
// Track the change ID from which the next pass should start.
// Note that this is non-trivial due to programmatic filtering.
- $seen = $after;
+ $lastIdSeen = $after;
- while ( count( $batch ) < $this->batchSize ) {
+ while ( $batchSize < $this->batchSize ) {
// get a chunk of changes
$chunk = $this->selectChanges( $after, $chunkSize );
@@ -800,23 +801,11 @@
reset( $chunk ); // don't leave the array pointer messy.
// filter the changes in the chunk and add the result
to the batch
- $chunk = $this->filterChanges( $siteID, $wikiDB, $chunk
);
- $batch = array_merge( $batch, $chunk );
+ $remaining = $this->batchSize - $batchSize;
+ list( $filtered, $lastIdSeen ) = $this->filterChanges(
$siteID, $wikiDB, $chunk, $remaining );
- // truncate the batch if needed.
- if ( count( $batch ) > $this->batchSize ) {
- // We need to find and remember the first
change that gets cur off,
- // so we can continue from that change on the
next pass.
-
- /* @var Change $anchor */
- list( $anchor ) = array_slice( $batch,
$this->batchSize, 1 );
- $seen = $anchor->getId() -1;
-
- $batch = array_slice( $batch, 0,
$this->batchSize );
- break;
- } else {
- $seen = $last->getId();
- }
+ $batch = array_merge( $batch, $filtered );
+ $batchSize = count( $batch );
//XXX: We could try to adapt $chunkSize based on ratio
of changes that get filtered out:
// $chunkSize = ( $this->batchSize - count( $batch
) ) * ( count_before / count_after );
@@ -826,7 +815,7 @@
$this->trace( "Got pending changes." );
- return array( $batch, $seen );
+ return array( $batch, $lastIdSeen );
}
/**
@@ -899,13 +888,16 @@
* Currently, we only keep ItemChanges for items that have a sitelink
to the
* target client wiki.
*
- * @param string $siteID: The client wiki's global site identifier,
as used by sitelinks.
- * @param string $wikiDB: The logical database name of the target
wiki.
+ * @param string $siteID : The client wiki's global site identifier,
as used by sitelinks.
+ * @param string $wikiDB : The logical database name of the target
wiki.
* @param Change[] $changes: The list of changes to filter.
+ * @param int $limit: The max number of changes to return
*
- * @return Change[] list of Change object from $changes that are
relevant to $siteID.
+ * @return array ( $batch, $seen ), where $batch is the filtered list
of Change objects,
+ * and $seen if the ID of the last change considered for the
batch
+ * (even if that was filtered out), for use as a continuation
marker.
*/
- protected function filterChanges( $siteID, $wikiDB, $changes ) {
+ protected function filterChanges( $siteID, $wikiDB, $changes, $limit ) {
wfProfileIn( __METHOD__ );
// collect all item IDs mentioned in the changes
@@ -937,26 +929,34 @@
$this->trace( "Retaining changes for " . count( $linkedItems )
. " relevant items." );
// find all changes that relate to an item that has a sitelink
to $siteID.
- $keep = array();
+ $filteredChanges = array();
+ $numberOfChangesFound = 0;
+ $lastIdSeen = 0;
foreach ( $changes as $change ) {
+ $lastIdSeen = $change->getId();
+
if ( $change instanceof ItemChange) {
$itemId =
$change->getEntityId()->getNumericId();
- // The change is relevant if it alters any
sitelinks refering to $siteID,
+ // The change is relevant if it alters any
sitelinks referring to $siteID,
// or the item currently links to $siteID.
if ( isset( $linkedItems[$itemId] )
|| $this->isRelevantChange( $change,
$siteID ) !== null ) {
- $keep[] = $change;
+
+ $filteredChanges[] = $change;
+ $numberOfChangesFound++;
}
+ }
+
+ if ( $numberOfChangesFound >= $limit ) {
+ break;
}
}
- $changes = $keep;
-
- $this->trace( "Found " . count( $changes ) . " changes for
items with relevant sitelinks." );
+ $this->trace( "Found " . count( $filteredChanges ) . " changes
for items with relevant sitelinks." );
wfProfileOut( __METHOD__ );
- return $changes;
+ return array( $filteredChanges, $lastIdSeen );
}
/**
--
To view, visit https://gerrit.wikimedia.org/r/59188
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Idc7def15a5bd113b2cf38f8140f26098848bc1a7
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Jeroen De Dauw <[email protected]>
Gerrit-Reviewer: Tobias Gritschacher <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits