Daniel Kinzler has uploaded a new change for review.
https://gerrit.wikimedia.org/r/59188
Change subject: (bug 47125) Avoid redundant filtering in dispatcher.
......................................................................
(bug 47125) Avoid redundant filtering in dispatcher.
This change abourts the filtering operation as soon as the desired
number of changes is reached.
Change-Id: Idc7def15a5bd113b2cf38f8140f26098848bc1a7
Note: This is only a PARTIAL solution for bug 47125!
---
M lib/maintenance/dispatchChanges.php
1 file changed, 25 insertions(+), 22 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase
refs/changes/88/59188/1
diff --git a/lib/maintenance/dispatchChanges.php
b/lib/maintenance/dispatchChanges.php
index 599cd86..98307df 100644
--- a/lib/maintenance/dispatchChanges.php
+++ b/lib/maintenance/dispatchChanges.php
@@ -764,13 +764,14 @@
// for free-form use.
$batch = array();
+ $batchSize = 0;
$chunkSize = $this->batchSize * $this->batchChunkFactor;
// Track the change ID from which the next pass should start.
// Note that this is non-trivial due to programmatic filtering.
$seen = $after;
- while ( count( $batch ) < $this->batchSize ) {
+ while ( $batchSize < $this->batchSize ) {
// get a chunk of changes
$chunk = $this->selectChanges( $after, $chunkSize );
@@ -784,22 +785,13 @@
reset( $chunk ); // don't leave the array pointer messy.
// filter the changes in the chunk and add the result
to the batch
- $chunk = $this->filterChanges( $siteID, $wikiDB, $chunk
);
- $batch = array_merge( $batch, $chunk );
+ list( $filtered, $seen ) = $this->filterChanges(
$siteID, $wikiDB, $chunk, $this->batchSize - $batchSize );
- // truncate the batch if needed.
- if ( count( $batch ) > $this->batchSize ) {
- // We need to find and remember the first
change that gets cur off,
- // so we can continue from that change on the
next pass.
+ $batch = array_merge( $batch, $filtered );
+ $batchSize = count( $batch );
- /* @var Change $anchor */
- list( $anchor ) = array_slice( $batch,
$this->batchSize, 1 );
- $seen = $anchor->getId() -1;
-
+ if ( $batchSize > $this->batchSize ) { // shouldn't
happen
$batch = array_slice( $batch, 0,
$this->batchSize );
- break;
- } else {
- $seen = $last->getId();
}
//XXX: We could try to adapt $chunkSize based on ratio
of changes that get filtered out:
@@ -875,13 +867,16 @@
* Currently, we only keep ItemChanges for items that have a sitelink
to the
* target client wiki.
*
- * @param string $siteID: The client wiki's global site identifier,
as used by sitelinks.
- * @param string $wikiDB: The logical database name of the target
wiki.
+ * @param string $siteID : The client wiki's global site identifier,
as used by sitelinks.
+ * @param string $wikiDB : The logical database name of the target
wiki.
* @param Change[] $changes: The list of changes to filter.
+ * @param int $limit: The max number of changes to return
*
- * @return Change[] list of Change object from $changes that are
relevant to $siteID.
+ * @return array ( $batch, $seen ), where $batch is the filtered list
of Change objects,
+ * and $seen if the ID of the last change considered for the
batch
+ * (even if that was filtered out), for use as a continuation
marker.
*/
- protected function filterChanges( $siteID, $wikiDB, $changes ) {
+ protected function filterChanges( $siteID, $wikiDB, $changes, $limit ) {
wfProfileIn( __METHOD__ );
// collect all item IDs mentioned in the changes
@@ -914,7 +909,11 @@
// find all changes that relate to an item that has a sitelink
to $siteID.
$keep = array();
+ $c = 0;
+ $seen = 0;
foreach ( $changes as $change ) {
+ $seen = $change->getId();
+
if ( $change instanceof ItemChange) {
$itemId =
$change->getEntityId()->getNumericId();
@@ -922,17 +921,21 @@
// or the item currently links to $siteID.
if ( isset( $linkedItems[$itemId] )
|| $this->isRelevantChange( $change,
$siteID ) !== null ) {
+
$keep[] = $change;
+ $c++;
}
+ }
+
+ if ( $c >= $limit ) {
+ break;
}
}
- $changes = $keep;
-
- $this->trace( "found " . count( $changes ) . " changes for
items with relevant sitelinks." );
+ $this->trace( "found " . count( $keep ) . " changes for items
with relevant sitelinks." );
wfProfileOut( __METHOD__ );
- return $changes;
+ return array( $keep, $seen );
}
/**
--
To view, visit https://gerrit.wikimedia.org/r/59188
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Idc7def15a5bd113b2cf38f8140f26098848bc1a7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits