jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/374416 )
Change subject: Use correct table to find deletes. ...................................................................... Use correct table to find deletes. Timestamps in archive table are revision creation timestamp, and thus unsuitable for retreving deletes by timestamp. This patch switches to using logging table, where timestamp is the actual deletion timestamp, plus we check that there is an archive entry for this title, to ensure the delete wasn't reversed. It is not perfect but it's reasonable approximation for now. Bug: T171921 Change-Id: Ie3c3ce292cd6a0c5264157a582544d98b3a6d48b --- M maintenance/forceSearchIndex.php 1 file changed, 14 insertions(+), 12 deletions(-) Approvals: Cindy-the-browser-test-bot: Looks good to me, but someone else must approve EBernhardson: Looks good to me, approved jenkins-bot: Verified DCausse: Looks good to me, but someone else must approve diff --git a/maintenance/forceSearchIndex.php b/maintenance/forceSearchIndex.php index 7adffc7..5d4c51a 100644 --- a/maintenance/forceSearchIndex.php +++ b/maintenance/forceSearchIndex.php @@ -385,24 +385,28 @@ $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] ); $it = new BatchRowIterator( $dbr, - 'archive', - [ 'ar_namespace', 'ar_title', 'ar_timestamp' ], + 'logging', + [ 'log_timestamp' ], $this->mBatchSize ); - $this->attachPageConditions( $dbr, $it, 'ar' ); - $this->attachTimestampConditions( $dbr, $it, 'ar' ); - $it->addConditions( [ 'ar_page_id IS NOT NULL' ] ); + $this->attachPageConditions( $dbr, $it, 'log' ); + $this->attachTimestampConditions( $dbr, $it, 'log' ); + $it->addConditions( [ + 'log_type' => 'delete', + 'log_action' => 'delete', + 'EXISTS(select * from archive where ar_title = log_title and ar_namespace = log_namespace)', + ] ); - $it->setFetchColumns( [ 'ar_timestamp', 'ar_namespace', 'ar_title', 'ar_page_id' ] ); + $it->setFetchColumns( [ 'log_timestamp', 'log_namespace', 'log_title', 'log_page' ] ); return new CallbackIterator( $it, function ( $batch ) { $titlesToDelete = []; $docIdsToDelete = []; $archive = []; foreach ( $batch as $row ) { - $title = Title::makeTitle( $row->ar_namespace, $row->ar_title ); - $id = $this->getSearchConfig()->makeId( $row->ar_page_id ); + $title = Title::makeTitle( $row->log_namespace, $row->log_title ); + $id = $this->getSearchConfig()->makeId( $row->log_page ); $titlesToDelete[] = $title; $docIdsToDelete[] = $id; $archive[] = [ @@ -415,10 +419,8 @@ 'titlesToDelete' => $titlesToDelete, 'docIdsToDelete' => $docIdsToDelete, 'archive' => $archive, - 'endingAt' => isset( $title ) - ? substr( preg_replace( - '/[^' . Title::legalChars() . ']/', '_', $title->getPrefixedDBkey() - ), 0, 30 ) + 'endingAt' => isset( $row ) + ? ( new MWTimestamp( $row->log_timestamp ) )->getTimestamp( TS_ISO_8601 ) : 'unknown', ]; } ); -- To view, visit https://gerrit.wikimedia.org/r/374416 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ie3c3ce292cd6a0c5264157a582544d98b3a6d48b Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: Cindy-the-browser-test-bot <bernhardsone...@gmail.com> Gerrit-Reviewer: DCausse <dcau...@wikimedia.org> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Gehel <guillaume.leder...@wikimedia.org> Gerrit-Reviewer: Jcrespo <jcre...@wikimedia.org> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits