Anomie has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/349479 )

Change subject: ApiQueryContributors: Improve query behavior
......................................................................

ApiQueryContributors: Improve query behavior

Include the highest rev_page queried for anon contributions in the
continuation value so we only query each one once.

Also, we can improve the queries in the common single-title case to
avoid filesorting thanks to the long-standing MySQL optimizer bug.

This also starts outputting 'anoncontributors' for pages with zero
contributors, so clients can differentiate between "not returned" and
"zero contributors" if necessary.

Bug: T163495
Change-Id: I1a7ccd4f1e5fe478edf0a8d216037c429bd7e868
---
M includes/api/ApiQueryContributors.php
1 file changed, 75 insertions(+), 48 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/79/349479/1

diff --git a/includes/api/ApiQueryContributors.php 
b/includes/api/ApiQueryContributors.php
index 183409d..a30abcc 100644
--- a/includes/api/ApiQueryContributors.php
+++ b/includes/api/ApiQueryContributors.php
@@ -49,14 +49,24 @@
                $params = $this->extractRequestParams();
                $this->requireMaxOneParameter( $params, 'group', 
'excludegroup', 'rights', 'excluderights' );
 
-               // Only operate on existing pages
-               $pages = array_keys( $this->getPageSet()->getGoodTitles() );
-
-               // Filter out already-processed pages
+               // Process continue
+               $cont_anonpage = 0;
+               $cont_page = 0;
+               $cont_user = 0;
                if ( $params['continue'] !== null ) {
                        $cont = explode( '|', $params['continue'] );
-                       $this->dieContinueUsageIf( count( $cont ) != 2 );
-                       $cont_page = (int)$cont[0];
+                       $this->dieContinueUsageIf( count( $cont ) !== 3 );
+                       $cont_anonpage = $cont[0] === '-' ? '-' : (int)$cont[0];
+                       $cont_page = (int)$cont[1];
+                       $cont_user = (int)$cont[2];
+                       $this->dieContinueUsageIf( $cont[0] !== 
(string)$cont_anonpage );
+                       $this->dieContinueUsageIf( $cont[1] !== 
(string)$cont_page );
+                       $this->dieContinueUsageIf( $cont[2] !== 
(string)$cont_user );
+               }
+
+               // Only operate on existing pages
+               $pages = array_keys( $this->getPageSet()->getGoodTitles() );
+               if ( $cont_page > 0 ) {
                        $pages = array_filter( $pages, function ( $v ) use ( 
$cont_page ) {
                                return $v >= $cont_page;
                        } );
@@ -68,39 +78,60 @@
 
                // Apply MAX_PAGES, leaving any over the limit for a continue.
                sort( $pages );
-               $continuePages = null;
                if ( count( $pages ) > self::MAX_PAGES ) {
-                       $continuePages = $pages[self::MAX_PAGES] . '|0';
+                       $nextPage = $pages[self::MAX_PAGES];
+                       $continuePages = "$nextPage|$nextPage|0";
                        $pages = array_slice( $pages, 0, self::MAX_PAGES );
+               } else {
+                       $nextPage = '-';
+                       $continuePages = null;
                }
 
                $result = $this->getResult();
 
-               // First, count anons
-               $this->addTables( 'revision' );
-               $this->addFields( [
-                       'page' => 'rev_page',
-                       'anons' => 'COUNT(DISTINCT rev_user_text)',
-               ] );
-               $this->addWhereFld( 'rev_page', $pages );
-               $this->addWhere( 'rev_user = 0' );
-               $this->addWhere( $db->bitAnd( 'rev_deleted', 
Revision::DELETED_USER ) . ' = 0' );
-               $this->addOption( 'GROUP BY', 'rev_page' );
-               $res = $this->select( __METHOD__ );
-               foreach ( $res as $row ) {
-                       $fit = $result->addValue( [ 'query', 'pages', 
$row->page ],
-                               'anoncontributors', (int)$row->anons
-                       );
-                       if ( !$fit ) {
-                               // This not fitting isn't reasonable, so it 
probably means that
-                               // some other module used up all the space. 
Just set a dummy
-                               // continue and hope it works next time.
-                               $this->setContinueEnumParameter( 'continue',
-                                       $params['continue'] !== null ? 
$params['continue'] : '0|0'
-                               );
+               // First, count anons (but only for pages where we haven't 
already)
+               if ( $cont_anonpage === '-' ) {
+                       $anonpages = [];
+               } elseif ( $cont_anonpage > 0 ) {
+                       $anonpages = array_values( array_filter( $pages, 
function ( $v ) use ( $cont_anonpage ) {
+                               return $v >= $cont_anonpage;
+                       } ) );
+               } else {
+                       $anonpages = $pages;
+               }
+               if ( $anonpages ) {
+                       $counts = array_fill_keys( $anonpages, 0 );
 
-                               return;
+                       $this->addTables( 'revision' );
+                       $this->addFields( [
+                               'page' => 'rev_page',
+                               'anons' => 'COUNT(DISTINCT rev_user_text)',
+                       ] );
+                       if ( count( $anonpages ) === 1 ) {
+                               // See below
+                               $this->addWhere( 'rev_page = ' . 
(int)$anonpages[0] );
+                       } else {
+                               $this->addWhereFld( 'rev_page', $anonpages );
                        }
+                       $this->addWhere( 'rev_user = 0' );
+                       $this->addWhere( $db->bitAnd( 'rev_deleted', 
Revision::DELETED_USER ) . ' = 0' );
+                       $this->addOption( 'GROUP BY', 'rev_page' );
+                       $res = $this->select( __METHOD__ );
+                       foreach ( $res as $row ) {
+                               $counts[$row->page] = (int)$row->anons;
+                       }
+
+                       ksort( $counts );
+                       foreach ( $counts as $page => $count ) {
+                               $fit = $result->addValue( [ 'query', 'pages', 
$page ], 'anoncontributors', $count );
+                               if ( !$fit ) {
+                                       $this->setContinueEnumParameter( 
'continue', "$page|$cont_page|$cont_user" );
+                                       return;
+                               }
+                       }
+
+                       // Update for continuation in named users below
+                       $cont_anonpage = $nextPage;
                }
 
                // Next, add logged-in users
@@ -111,19 +142,21 @@
                        'user' => 'rev_user',
                        'username' => 'MAX(rev_user_text)', // Non-MySQL 
databases don't like partial group-by
                ] );
-               $this->addWhereFld( 'rev_page', $pages );
+               if ( count( $pages ) === 1 ) {
+                       // There's an old bug where MySQL will insist on 
filesorting if a
+                       // field that's constant in WHERE is used in GROUP BY 
or ORDER BY.
+                       // On modern MariaDB, though, the bug doesn't occur if 
it's an integer
+                       // field (as rev_page is) and the value is passed as an 
integer
+                       // rather than a string.
+                       $this->addWhere( 'rev_page = ' . (int)$pages[0] );
+               } else {
+                       $this->addWhereFld( 'rev_page', $pages );
+               }
                $this->addWhere( 'rev_user != 0' );
                $this->addWhere( $db->bitAnd( 'rev_deleted', 
Revision::DELETED_USER ) . ' = 0' );
                $this->addOption( 'GROUP BY', 'rev_page, rev_user' );
+               $this->addOption( 'ORDER BY', 'rev_page, rev_user' );
                $this->addOption( 'LIMIT', $params['limit'] + 1 );
-
-               // Force a sort order to ensure that properties are grouped by 
page
-               // But only if pp_page is not constant in the WHERE clause.
-               if ( count( $pages ) > 1 ) {
-                       $this->addOption( 'ORDER BY', 'rev_page, rev_user' );
-               } else {
-                       $this->addOption( 'ORDER BY', 'rev_user' );
-               }
 
                $limitGroups = [];
                if ( $params['group'] ) {
@@ -172,10 +205,6 @@
                }
 
                if ( $params['continue'] !== null ) {
-                       $cont = explode( '|', $params['continue'] );
-                       $this->dieContinueUsageIf( count( $cont ) != 2 );
-                       $cont_page = (int)$cont[0];
-                       $cont_user = (int)$cont[1];
                        $this->addWhere(
                                "rev_page > $cont_page OR " .
                                "(rev_page = $cont_page AND " .
@@ -189,8 +218,7 @@
                        if ( ++$count > $params['limit'] ) {
                                // We've reached the one extra which shows that
                                // there are additional pages to be had. Stop 
here...
-                               $this->setContinueEnumParameter( 'continue', 
$row->page . '|' . $row->user );
-
+                               $this->setContinueEnumParameter( 'continue', 
"$cont_anonpage|$row->page|$row->user" );
                                return;
                        }
 
@@ -199,8 +227,7 @@
                                'user'
                        );
                        if ( !$fit ) {
-                               $this->setContinueEnumParameter( 'continue', 
$row->page . '|' . $row->user );
-
+                               $this->setContinueEnumParameter( 'continue', 
"$cont_anonpage|$row->page|$row->user" );
                                return;
                        }
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/349479
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I1a7ccd4f1e5fe478edf0a8d216037c429bd7e868
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to