Ladsgroup has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/312286

Change subject: Add maintenance/CleanDuplicateScores.php
......................................................................

Add maintenance/CleanDuplicateScores.php

Bug: T145503
Change-Id: I9ea60cd8e5c1f0acacb5897970fc134d148883f1
---
A maintenance/CleanDuplicateScores.php
1 file changed, 59 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES 
refs/changes/86/312286/1

diff --git a/maintenance/CleanDuplicateScores.php 
b/maintenance/CleanDuplicateScores.php
new file mode 100644
index 0000000..45f114e
--- /dev/null
+++ b/maintenance/CleanDuplicateScores.php
@@ -0,0 +1,59 @@
+<?php
+
+namespace ORES;
+
+use Maintenance;
+
+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
+       ? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
+       : __DIR__ . '/../../../maintenance/Maintenance.php' );
+
+/**
+ * @ingroup Maintenance
+ */
+class CleanDuplicateScores extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+
+               $this->addDescription( 'Clean up duplicate data in ORES scores' 
);
+
+       }
+
+       public function execute() {
+               $dbr = \wfGetDB( DB_REPLICA );
+               $dbw = \wfGetDB( DB_MASTER );
+               $res = $dbr->select(
+                       'ores_classification',
+                       [ 'oresc_id', 'oresc_rev', 'oresc_model', 'oresc_class' 
],
+                       '',
+                       __METHOD__,
+                       [ 'GROUP BY' => 'oresc_rev, oresc_model, oresc_class',
+                       'HAVING' => 'COUNT(*) > 1' ]
+               );
+               $ids = [];
+               $dump = [];
+               foreach ( $row as $res ) {
+                       $key = implode( ',', [ $row->oresc_rev, 
$row->oresc_model, $row->oresc_class ] );
+                       if ( array_has_key( $key, $dump ) ) {
+                               $ids[] = $row->oresc_id;
+                       } else {
+                               $dump[] = $key;
+                       }
+               }
+               $c = count( $ids );
+               $this->output( "Got $c duplicates, cleaning them." );
+               $chunks = array_chunk( $ids, 1000 );
+               foreach ( $chunks as $chunk ) {
+                       $dbw->delete(
+                               'ores_classification',
+                               [ 'oresc_id' => $chunk ],
+                               __METHOD__
+                       );
+                       wfWaitForSlaves();
+               }
+               $this->output( "Done" );
+       }
+}
+
+$maintClass = 'ORES\CleanDuplicateScores';
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/312286
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9ea60cd8e5c1f0acacb5897970fc134d148883f1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <ladsgr...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to