Ladsgroup has uploaded a new change for review. https://gerrit.wikimedia.org/r/312286
Change subject: Add maintenance/CleanDuplicateScores.php ...................................................................... Add maintenance/CleanDuplicateScores.php Bug: T145503 Change-Id: I9ea60cd8e5c1f0acacb5897970fc134d148883f1 --- A maintenance/CleanDuplicateScores.php 1 file changed, 59 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES refs/changes/86/312286/1 diff --git a/maintenance/CleanDuplicateScores.php b/maintenance/CleanDuplicateScores.php new file mode 100644 index 0000000..45f114e --- /dev/null +++ b/maintenance/CleanDuplicateScores.php @@ -0,0 +1,59 @@ +<?php + +namespace ORES; + +use Maintenance; + +require_once ( getenv( 'MW_INSTALL_PATH' ) !== false + ? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php' + : __DIR__ . '/../../../maintenance/Maintenance.php' ); + +/** + * @ingroup Maintenance + */ +class CleanDuplicateScores extends Maintenance { + public function __construct() { + parent::__construct(); + + $this->addDescription( 'Clean up duplicate data in ORES scores' ); + + } + + public function execute() { + $dbr = \wfGetDB( DB_REPLICA ); + $dbw = \wfGetDB( DB_MASTER ); + $res = $dbr->select( + 'ores_classification', + [ 'oresc_id', 'oresc_rev', 'oresc_model', 'oresc_class' ], + '', + __METHOD__, + [ 'GROUP BY' => 'oresc_rev, oresc_model, oresc_class', + 'HAVING' => 'COUNT(*) > 1' ] + ); + $ids = []; + $dump = []; + foreach ( $row as $res ) { + $key = implode( ',', [ $row->oresc_rev, $row->oresc_model, $row->oresc_class ] ); + if ( array_has_key( $key, $dump ) ) { + $ids[] = $row->oresc_id; + } else { + $dump[] = $key; + } + } + $c = count( $ids ); + $this->output( "Got $c duplicates, cleaning them." ); + $chunks = array_chunk( $ids, 1000 ); + foreach ( $chunks as $chunk ) { + $dbw->delete( + 'ores_classification', + [ 'oresc_id' => $chunk ], + __METHOD__ + ); + wfWaitForSlaves(); + } + $this->output( "Done" ); + } +} + +$maintClass = 'ORES\CleanDuplicateScores'; +require_once RUN_MAINTENANCE_IF_MAIN; -- To view, visit https://gerrit.wikimedia.org/r/312286 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I9ea60cd8e5c1f0acacb5897970fc134d148883f1 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/ORES Gerrit-Branch: master Gerrit-Owner: Ladsgroup <ladsgr...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits