Matthias Mullie has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/256449

Change subject: [WIP] Script to remove topics before a certain date
......................................................................

[WIP] Script to remove topics before a certain date

Change-Id: I593aac084939ef7317ac91ad932da2c23d463ad7
---
M includes/Repository/TreeRepository.php
A maintenance/FlowRemoveOldTopics.php
2 files changed, 174 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow 
refs/changes/49/256449/1

diff --git a/includes/Repository/TreeRepository.php 
b/includes/Repository/TreeRepository.php
index 41786c5..f605486 100644
--- a/includes/Repository/TreeRepository.php
+++ b/includes/Repository/TreeRepository.php
@@ -377,7 +377,7 @@
                        throw new DataModelException( 'No root exists in the 
identityMap', 'process-data' );
                }
 
-               return $identityMap[$root];
+               return $identityMap[$root->getAlphadecimal()];
        }
 
        public function fetchFullTree( UUID $nodeId ) {
diff --git a/maintenance/FlowRemoveOldTopics.php 
b/maintenance/FlowRemoveOldTopics.php
new file mode 100644
index 0000000..226ca02
--- /dev/null
+++ b/maintenance/FlowRemoveOldTopics.php
@@ -0,0 +1,173 @@
+<?php
+
+use Flow\Container;
+use Flow\Data\ManagerGroup;
+use Flow\Data\Utils\RawSql;
+use Flow\DbFactory;
+use Flow\Model\AbstractRevision;
+use Flow\Model\UUID;
+use Flow\Model\Workflow;
+use Flow\Repository\TreeRepository;
+
+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
+       ? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
+       : dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+
+/**
+ * @ingroup Maintenance
+ */
+class FlowRemoveOldTopics extends Maintenance {
+       /**
+        * @var ManagerGroup
+        */
+       protected $storage;
+
+       /**
+        * @var TreeRepository
+        */
+       protected $treeRepo;
+
+       /**
+        * @var DbFactory
+        */
+       protected $dbFactory;
+
+       public function __construct() {
+               parent::__construct();
+
+               $this->mDescription = "Deletes old topics";
+
+               $this->addOption( 'date', 'Date cutoff (in any format 
understood by wfTimestamp), topics older than this date will be deleted.', 
true, true );
+
+               $this->setBatchSize( 10 );
+       }
+
+       public function execute() {
+               $this->storage = Container::get( 'storage' );
+               $this->treeRepo = Container::get( 'repository.tree' );
+               $this->dbFactory = Container::get( 'db.factory' );
+
+               $timestamp = wfTimestamp( TS_MW, $this->getOption( 'date' ) );
+               $this->removeWorkflows( $timestamp );
+               $this->removeHeader( $timestamp );
+               // @todo: output how many were removed?
+       }
+
+       protected function removeHeader( $timestamp ) {
+               // @todo: do I actually want to remove header? what if it's 
been updated since?
+               // @todo: remove references
+       }
+
+       /**
+        * @param string $timestamp Timestamp in TS_MW format
+        * @throws \Flow\Exception\FlowException
+        */
+       protected function removeWorkflows( $timestamp ) {
+               $dbr = $this->dbFactory->getDB( DB_SLAVE );
+
+               // start from around unix epoch - there can be no Flow data 
before that
+               $startId = UUID::getComparisonUUID( '1' );
+               do {
+                       $workflows = $this->storage->find(
+                               'Workflow',
+                               array(
+                                       new RawSql( 'workflow_id > ' . 
$dbr->addQuotes( $startId->getBinary() ) ),
+                                       'workflow_wiki' => wfWikiId(),
+                                       'workflow_type' => 'topic',
+                                       new RawSql( 
'workflow_last_update_timestamp < ' . $dbr->addQuotes( $timestamp ) ),
+                               ),
+                               array( 'limit' => $this->mBatchSize )
+                       );
+
+                       if ( empty( $workflows ) ) {
+                               break;
+                       }
+
+                       // prepare for next batch
+                       $startId = end( $workflows )->getId();
+
+                       // @todo: remove everything else!
+                       foreach ( $workflows as $workflow ) {
+                               $this->removeTopicList( $workflow );
+                               $this->removeSummary( $workflow );
+                               $this->removePosts( $workflow );
+                       }
+
+                       var_dump( count( $workflows ) . ' workflows' );
+//                     $storage->multiRemove( $workflows ); // @todo
+
+                       $this->dbFactory->waitForSlaves();
+               } while ( !empty( $workflows ) );
+       }
+
+       protected function removeTopicList( Workflow $workflow ) {
+               $entries = $this->storage->find( 'TopicListEntry', array( 
'topic_id' => $workflow->getId() ) );
+               if ( $entries ) {
+                       var_dump( count( $entries ) . ' topiclist entries' );
+//                     $this->storage->multiRemove( $entries ); // @todo
+               }
+       }
+
+       protected function removeSummary( Workflow $workflow ) {
+               $revisions = $this->storage->find( 'PostSummary', array( 
'rev_type_id' => $workflow->getId() ) );
+               if ( $revisions ) {
+                       foreach ( $revisions as $revision ) {
+                               $this->removeReferences( $revision );
+                       }
+
+                       var_dump( count( $revisions ) . ' summaries' );
+//                     $this->storage->multiRemove( $revisions ); // @todo
+               }
+       }
+
+       protected function removePosts( Workflow $workflow ) {
+               // fetch all children (posts) from a topic
+               $subtree = $this->treeRepo->fetchSubtreeIdentityMap( 
$workflow->getId() );
+
+               $conds = array();
+               foreach ( $subtree as $id => $data ) {
+                       $conds[] = array( 'rev_type_id' => UUID::create( $id ) 
);
+               }
+
+               $posts = $this->storage->findMulti( 'PostRevision', $conds );
+               foreach ( $posts as $revisions ) {
+                       foreach ( $revisions as $revision ) {
+                               $this->removeReferences( $revision );
+                       }
+
+                       var_dump( count( $revisions ) . ' post revisions' );
+//                     $this->storage->multiRemove( $revisions ); // @todo
+               }
+
+               // @todo: remove TreeRepo data...
+       }
+
+       protected function removeReferences( AbstractRevision $revision ) {
+               $wikiReferences = $this->storage->find( 'WikiReference', array(
+                       'ref_src_wiki' => wfWikiId(),
+                       'ref_src_object_type' => $revision->getRevisionType(),
+                       'ref_src_object_id' => $revision->getCollectionId(),
+               ) );
+               if ( $wikiReferences ) {
+                       var_dump( count( $wikiReferences ) . ' wiki references' 
);
+//                     $this->storage->multiRemove( $wikiReferences ); // @todo
+               }
+
+               $urlReferences = $this->storage->find( 'URLReference', array(
+                       'ref_src_wiki' => wfWikiId(),
+                       'ref_src_object_type' => $revision->getRevisionType(),
+                       'ref_src_object_id' => $revision->getCollectionId(),
+               ) );
+               if ( $urlReferences ) {
+                       var_dump( count( $urlReferences ) . ' url references' );
+//                     $this->storage->multiRemove( $urlReferences ); // @todo
+               }
+       }
+
+       protected function removeTree() {
+               // @todo: damn, tree doesn't have any removal method, yet
+       }
+}
+
+$maintClass = 'FlowRemoveOldTopics'; // Tells it to run the class
+require_once( RUN_MAINTENANCE_IF_MAIN );

-- 
To view, visit https://gerrit.wikimedia.org/r/256449
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I593aac084939ef7317ac91ad932da2c23d463ad7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Flow
Gerrit-Branch: master
Gerrit-Owner: Matthias Mullie <mmul...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to