ArielGlenn has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/295387

Change subject: add option to XML dump stubs of page ranges with explicit 
rev_id ordering
......................................................................

add option to XML dump stubs of page ranges with explicit rev_id ordering

[WIP] utterly untested.

Change-Id: I94ca4a06235bdbed384bb997deb7432bb5aaa5b9
---
M includes/export/WikiExporter.php
M maintenance/backup.inc
M maintenance/dumpBackup.php
3 files changed, 15 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/87/295387/1

diff --git a/includes/export/WikiExporter.php b/includes/export/WikiExporter.php
index 54de26d..1da05bc 100644
--- a/includes/export/WikiExporter.php
+++ b/includes/export/WikiExporter.php
@@ -134,13 +134,14 @@
         * @param int $start Inclusive lower limit (this id is included)
         * @param int $end Exclusive upper limit (this id is not included)
         *   If 0, no upper limit.
+        * @param bool $orderRevs order revisions within pages in ascending 
order
         */
-       public function pagesByRange( $start, $end ) {
+       public function pagesByRange( $start, $end, $orderRevs ) {
                $condition = 'page_id >= ' . intval( $start );
                if ( $end ) {
                        $condition .= ' AND page_id < ' . intval( $end );
                }
-               $this->dumpFrom( $condition );
+               $this->dumpFrom( $condition, $orderRevs );
        }
 
        /**
@@ -245,7 +246,7 @@
         * @throws MWException
         * @throws Exception
         */
-       protected function dumpFrom( $cond = '' ) {
+       protected function dumpFrom( $cond = '', $orderRevs = false ) {
                # For logging dumps...
                if ( $this->history & self::LOGS ) {
                        $where = [ 'user_id = log_user' ];
@@ -333,6 +334,12 @@
                        } elseif ( $this->history & WikiExporter::FULL ) {
                                # Full history dumps...
                                $join['revision'] = [ 'INNER JOIN', 
'page_id=rev_page' ];
+                               # query optimization for history stub dumps
+                               if ( $this->text == WikiExporter::STUB && 
$orderRevs ) {
+                                       $opts[] = 'STRAIGHT_JOIN';
+                                       $opts['ORDER BY'] = [ 'rev_page ASC', 
'rev_id ASC' ];
+                                       $opts['USE INDEX']['page'] = 'PRIMARY';
+                               }
                        } elseif ( $this->history & WikiExporter::CURRENT ) {
                                # Latest revision dumps...
                                if ( $this->list_authors && $cond != '' ) { // 
List authors, if so desired
@@ -369,7 +376,6 @@
                        if ( $this->buffer == WikiExporter::STREAM ) {
                                $prev = $this->db->bufferResults( false );
                        }
-
                        $result = null; // Assuring $result is not undefined, 
if exception occurs early
                        try {
                                Hooks::run( 'ModifyExportQuery',
diff --git a/maintenance/backup.inc b/maintenance/backup.inc
index 3271fd6..db3af92 100644
--- a/maintenance/backup.inc
+++ b/maintenance/backup.inc
@@ -41,6 +41,7 @@
        public $revEndId = 0;
        public $dumpUploads = false;
        public $dumpUploadFileContents = false;
+       public $orderRevs = false;
 
        protected $reportingInterval = 100;
        protected $pageCount = 0;
@@ -271,7 +272,7 @@
                } elseif ( is_null( $this->pages ) ) {
                        # Page dumps: all or by page ID range
                        if ( $this->startId || $this->endId ) {
-                               $exporter->pagesByRange( $this->startId, 
$this->endId );
+                               $exporter->pagesByRange( $this->startId, 
$this->endId, $this->orderRevs );
                        } elseif ( $this->revStartId || $this->revEndId ) {
                                $exporter->revsByRange( $this->revStartId, 
$this->revEndId );
                        } else {
diff --git a/maintenance/dumpBackup.php b/maintenance/dumpBackup.php
index d4255a0..60ee99d 100644
--- a/maintenance/dumpBackup.php
+++ b/maintenance/dumpBackup.php
@@ -50,6 +50,8 @@
                $this->addOption( 'stable', 'Dump stable versions of pages' );
                $this->addOption( 'revrange', 'Dump range of revisions 
specified by revstart and ' .
                        'revend parameters' );
+               $this->addOption( 'orderrevs', 'Dump revisions in ascending 
revision order ' .
+                       '(implies dump of a range of pages)');
                $this->addOption( 'pagelist',
                        'Dump only pages included in the file', false, true );
                // Options
@@ -127,6 +129,7 @@
                $this->skipFooter = $this->hasOption( 'skip-footer' );
                $this->dumpUploads = $this->hasOption( 'uploads' );
                $this->dumpUploadFileContents = $this->hasOption( 
'include-files' );
+               $this->orderRevs = $this->hasOption( 'orderrevs' );
        }
 }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/295387
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I94ca4a06235bdbed384bb997deb7432bb5aaa5b9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <ar...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to