GWicke has submitted this change and it was merged.

Change subject: Adapt the extension to RESTBase's needs
......................................................................


Adapt the extension to RESTBase's needs

The extension monitors the following hooks:
- ArticleEditUpdates
- ArticleDeleteComplete
- ArticleUndelete
- ArticleRevisionVisibilitySet
- TitleMoveComplete
- FileUpload

For each case, a queue job is created invalidating RESTBase's version of
the page (by sending a request with the no-cache header). Additional
jobs are created for any dependent pages which transclude the edited or
deleted page.

Bug: T87520
Change-Id: I9814249f350e2278f3941d62255e4ee9bb89c4b1
---
M RestbaseUpdate.hooks.php
M RestbaseUpdate.php
M RestbaseUpdateJob.php
3 files changed, 232 insertions(+), 130 deletions(-)

Approvals:
  GWicke: Verified; Looks good to me, approved



diff --git a/RestbaseUpdate.hooks.php b/RestbaseUpdate.hooks.php
index c7cfbc2..5948457 100644
--- a/RestbaseUpdate.hooks.php
+++ b/RestbaseUpdate.hooks.php
@@ -1,5 +1,6 @@
 <?php
 
+
 /**
  * Hooks for events that should trigger RESTBase updates.
  */
@@ -7,16 +8,57 @@
 
 
        /**
+        * Get the job parameters for a given title, job type and table name.
+        *
+        * @param Title $title
+        * @param string $type the job type (OnEdit or OnDependencyChange)
+        * @param string $table (optional for OnDependencyChange, templatelinks 
or
+        * imagelinks)
+        * @return Array
+        */
+       private static function getJobParams( Title $title, $type, $table = 
null ) {
+
+               $params = array( 'type' => $type );
+               if ( $type == 'OnDependencyChange' ) {
+                       $params['table'] = $table;
+                       $params['recursive'] = true;
+                       return $params + Job::newRootJobParams(
+                               
"RestbaseUpdateJob{$type}:{$table}:{$title->getPrefixedText()}:{$title->getLatestRevID()}");
+               } else {
+                       return $params;
+               }
+
+       }
+
+
+       /**
         * Schedule an async update job in the job queue.
         *
         * @param Title $title
-        * @param string $type
+        * @param string $action
         * @param array $extra_params
         */
-       private static function schedule( $title, $type, $extra_params = 
array() ) {
+       private static function schedule( $title, $action, $extra_params = 
array() ) {
 
-               $params = array( 'type' => $type ) + $extra_params;
-               JobQueueGroup::singleton()->push( new RestbaseUpdateJob( 
$title, $params ) );
+               wfDebug( "RestbaseUpdateJobHook::schedule: " . 
$title->getText() . ' - ' . $action . "\n" );
+               if ( $title->getNamespace() == NS_FILE ) {
+                       // File. For now we assume the actual image or file has
+                       // changed, not just the description page.
+                       $params = self::getJobParams( $title, 
'OnDependencyChange', 'imagelinks' );
+                       $job = new RestbaseUpdateJob( $title, $params );
+                       JobQueueGroup::singleton()->push( $job );
+                       JobQueueGroup::singleton()->deduplicateRootJob( $job );
+               } else {
+                       // Push one job for the page itself
+                       $params = self::getJobParams( $title, 'OnEdit' )
+                               + array( 'mode' => $action ) + $extra_params;
+                       JobQueueGroup::singleton()->push( new 
RestbaseUpdateJob( $title, $params ) );
+                       // and one for pages transcluding this page.
+                       $params = self::getJobParams( $title, 
'OnDependencyChange', 'templatelinks' );
+                       $job = new RestbaseUpdateJob( $title, $params );
+                       JobQueueGroup::singleton()->push( $job );
+                       JobQueueGroup::singleton()->deduplicateRootJob( $job );
+               }
 
        }
 
@@ -74,8 +116,7 @@
        public static function onArticleRevisionVisibilitySet( $title, $revs ) {
 
                // TODO complete here with more info / the hidden fields 
perhaps ?
-               // XXX do not forget that rev IDs are not yet actually returned
-               self::schedule( $title, 'rev_delete', array( 'revs' => $revs ) 
);
+               self::schedule( $title, 'rev_visibility', array( 'revs' => 
$revs ) );
                return true;
 
        }
@@ -94,5 +135,24 @@
        }
 
 
+       /**
+        * File upload hook. See filerepo/file/LocalFile.php.
+        *
+        * XXX gwicke: This tracks file uploads including re-uploads of a new
+        * version of an image. These will implicitly also trigger null edits on
+        * the associated WikiPage (which normally exists), which then triggers
+        * the onArticleEditUpdates hook. Maybe we should thus drop this hook 
and
+        * simply assume that all edits to the WikiPage also change the image
+        * data.  Those edits tend to happen not long after an upload, at which
+        * point the image is likely not used in many pages.
+        */
+       public static function onFileUpload( File $file ) {
+
+               self::updateTitle( $file->getTitle(), 'file' );
+               return true;
+
+       }
+
+
 }
 
diff --git a/RestbaseUpdate.php b/RestbaseUpdate.php
index bed4007..bce71ac 100644
--- a/RestbaseUpdate.php
+++ b/RestbaseUpdate.php
@@ -27,31 +27,27 @@
                # Set up class autoloading
                $wgAutoloadClasses['RestbaseUpdateHooks'] = 
"$dir/RestbaseUpdate.hooks.php";
                $wgAutoloadClasses['RestbaseUpdateJob'] = 
"$dir/RestbaseUpdateJob.php";
-               $wgAutoloadClasses['CurlMultiClient'] = 
"$dir/CurlMultiClient.php";
 
-               # Add the parsoid job types
+               # Add the job types
                $wgJobClasses['RestbaseUpdateJobOnEdit'] = 'RestbaseUpdateJob';
                $wgJobClasses['RestbaseUpdateJobOnDependencyChange'] = 
'RestbaseUpdateJob';
-               # Old type for transition
-               # @TODO: remove when old jobs are drained
-               $wgJobClasses['RestabseUpdateJob'] = 'RestbaseUpdateJob';
 
                $wgExtensionCredits['other'][] = array(
                        'path' => __FILE__,
-                       'name' => 'RestbaseUpdate',
+                       'name' => 'RestBaseUpdateJobs',
                        'author' => array(
                                'Gabriel Wicke',
                                'Marko Obrovac'
                        ),
                        'version' => '0.2.0',
-                       'url' => 
'https://www.mediawiki.org/wiki/Extension:RestbaseUpdateJobs',
+                       'url' => 
'https://www.mediawiki.org/wiki/Extension:RestBaseUpdateJobs',
                        'descriptionmsg' => 'restbaseupdatejobs-desc',
                        'license-name' => 'GPL-2.0+',
                );
 
                # Register localizations.
-               $wgMessagesDirs['RestbaseUpdateJobs'] = __DIR__ . '/i18n';
-               $wgExtensionMessagesFiles['RestbaseUpdateJobs'] = $dir . 
'/RestbaseUpdate.i18n.php';
+               $wgMessagesDirs['RestBaseUpdateJobs'] = __DIR__ . '/i18n';
+               $wgExtensionMessagesFiles['RestBaseUpdateJobs'] = $dir . 
'/RestbaseUpdate.i18n.php';
 
                # Set up a default configuration
                self::setupDefaultConfig();
@@ -69,7 +65,7 @@
         */
        protected static function setupDefaultConfig() {
 
-               global $wgRestbaseServers, $wgRestbaseDomain, $wgServer;
+               global $wgRestbaseServer, $wgRestbaseAPIVersion, 
$wgRestbaseUpdateTitlesPerJob;
 
                /**
                 * The RESTBase server to inform of updates.
@@ -77,10 +73,14 @@
                $wgRestbaseServers = 'http://localhost:7321';
 
                /**
-                * This wiki's domain.
-                * Defaults to $wgServer's domain name
-               */
-               $wgRestbaseDomain = preg_replace( '/^(https?:\/\/)?(.+?)\/?$/', 
'$2', $wgServer );
+                * The RESTBase API version in use
+                */
+               $wgRestbaseAPIVersion = 'v1';
+
+               /**
+                * The number of recursive jobs to process in parallel
+                */
+               $wgRestbaseUpdateTitlesPerJob = 50;
 
        }
 
@@ -103,12 +103,15 @@
                $wgHooks['ArticleRevisionVisibilitySet'][] = 
'RestbaseUpdateHooks::onArticleRevisionVisibilitySet';
                # Article move
                $wgHooks['TitleMoveComplete'][] = 
'RestbaseUpdateHooks::onTitleMoveComplete';
+               # File upload
+               $wgHooks['FileUpload'][] = 'RestbaseUpdateHooks::onFileUpload';
 
        }
 
 
 }
 
+
 # Load hooks that are always set
 RestbaseUpdateSetup::setup();
 
diff --git a/RestbaseUpdateJob.php b/RestbaseUpdateJob.php
index 717f580..1883f5f 100644
--- a/RestbaseUpdateJob.php
+++ b/RestbaseUpdateJob.php
@@ -1,7 +1,7 @@
 <?php
 
 /**
- * HTML cache refreshing and -invalidation job for the Parsoid varnish caches.
+ * HTML cache refreshing and -invalidation job for RESTBase.
  *
  * This job comes in a few variants:
  *   - a) Recursive jobs to purge caches for backlink pages for a given title.
@@ -10,9 +10,6 @@
  *           They have have (type:OnDependencyChange,pages:(<page 
ID>:(<namespace>,<title>),...) set.
  *   - c) Jobs to purge caches for a single page (the job title)
  *        They have (type:OnEdit) set.
- *
- * See
- * 
http://www.mediawiki.org/wiki/Parsoid/Minimal_performance_strategy_for_July_release
  */
 class RestbaseUpdateJob extends Job {
 
@@ -35,21 +32,77 @@
        }
 
 
-       function run() {
+       /**
+        * Constructs the URL prefix for RESTBase and caches it
+        * @return string RESTBase's URL prefix
+        */
+       private static function getRestbasePrefix() {
 
-               // for now we are capable of handling only OnEdit jobs
-               if ( $this->params['type'] !== 'OnEdit' ) {
-                       return true;
+               static $prefix = null;
+               // set the static variable so as not to construct
+               // the prefix URL every time
+               if ( is_null( $prefix ) ) {
+                       global $wgRestbaseServer, $wgRestbaseAPIVersion,
+                               $wgRestbaseDomain, $wgServer;
+                       if ( !isset( $wgRestbaseDomain ) || is_null( 
$wgRestbaseDomain ) ) {
+                               $wgRestbaseDomain = preg_replace( 
'/^(https?:\/\/)?(.+?)\/?$/', '$2', $wgServer );
+                       }
+                       $prefix = implode( '/', array(
+                               $wgRestbaseServer,
+                               $wgRestbaseDomain,
+                               $wgRestbaseAPIVersion
+                       ) );
                }
+
+               return $prefix;
+
+       }
+
+
+       /**
+        * Construct a revision ID invalidation URL
+        *
+        * @param $revid integer the revision ID to invalidate
+        * @return string an absolute URL for the revision
+        */
+       private static function getRevisionURL( $revid ) {
+
+               // construct the URL
+               return implode( '/', array( self::getRestbasePrefix(),
+                       'page', 'revision', $revid ) );
+
+       }
+
+
+       /**
+        * Construct a page title invalidation URL
+        *
+        * @param $title Title
+        * @param $revid integer the revision ID to use
+        * @return string an absolute URL for the article
+        */
+       private static function getPageTitleURL( Title $title, $revid ) {
+
+               // construct the URL
+               return implode( '/', array( self::getRestbasePrefix(), 'page',
+                       'html', wfUrlencode( $title->getPrefixedDBkey() ), 
$revid ) );
+
+       }
+
+
+       function run() {
 
                global $wgRestbaseUpdateTitlesPerJob, $wgUpdateRowsPerJob;
 
-
-
                if ( $this->params['type'] === 'OnEdit' ) {
-                       // this is the simple case, resolve it
-                       // TODO
-                       $this->invalidateTitle( $this->title );
+                       // there are two cases here:
+                       // a) this is a rev_visibility action
+                       // b) this is some type of a page edit
+                       if ( $this->params['mode'] === 'rev_visibility' ) {
+                               $this->signalRevChange();
+                       } else {
+                               $this->invalidateTitle();
+                       }
                } elseif ( $this->params['type'] === 'OnDependencyChange' ) {
                        // recursive update of linked pages
                        static $expected = array( 'recursive', 'pages' ); // 
new jobs have one of these
@@ -60,7 +113,7 @@
                        // Job to purge all (or a range of) backlink pages for 
a page
                        if ( !empty( $this->params['recursive'] ) ) {
                                // Convert this into some title-batch jobs and 
possibly a
-                               // recursive ParsoidCacheUpdateJob job for the 
rest of the backlinks
+                               // recursive RestbaseUpdateJob job for the rest 
of the backlinks
                                $jobs = BacklinkJobUtils::partitionBacklinkJob(
                                        $this,
                                        $wgUpdateRowsPerJob,
@@ -68,8 +121,7 @@
                                        // Carry over information for 
de-duplication
                                        array(
                                                'params' => 
$this->getRootJobParams() + array(
-                                                       'table' => 
$this->params['table'], 'type' => 'OnDependencyChange',
-                                                       'extra' => 
$this->params['extra'] )
+                                                       'table' => 
$this->params['table'], 'type' => 'OnDependencyChange' )
                                        )
                                );
                                JobQueueGroup::singleton()->push( $jobs );
@@ -79,139 +131,126 @@
                }
 
                return true;
+
        }
 
-       /**
-        * Construct a cache server URL
-        *
-        * @param $title Title
-        * @param string $server the server name
-        * @param bool $prev use previous revision id if true
-        * @return string an absolute URL for the article on the given server
-        */
-       protected function getParsoidURL( Title $title, $server, $prev = false 
) {
-               global $wgParsoidWikiPrefix;
-
-               $oldid = $prev ?
-                       $title->getPreviousRevisionID( $title->getLatestRevID() 
) :
-                       $title->getLatestRevID();
-
-               // Construct Parsoid web service URL
-               return $server . '/' . $wgParsoidWikiPrefix . '/' .
-                       wfUrlencode( $title->getPrefixedDBkey() ) . '?oldid=' . 
$oldid;
-       }
 
        /**
-        * Check an array of CurlMultiClient results for errors, and 
setLastError
-        * if there are any.
-        * @param $results CurlMultiClient result array
+        * Dispatches the request(s) using MultiHttpClient, waits for the 
result(s),
+        * checks them and sets the error flag if needed
+        * @param $reqs array an array of request maps to dispatch
+        * @return boolean whether all of the requests have been executed 
successfully
         */
-       protected function checkCurlResults( $results ) {
-               foreach( $results as $k => $result ) {
-                       if ($results[$k]['error'] != null) {
-                               $this->setLastError($results[$k]['error']);
+       protected function dispatchRequests( array $reqs ) {
+
+               // create a new MultiHttpClient instance with default params
+               $http = new MultiHttpClient( array( 'maxConnsPerHost' => count( 
$reqs ) ) );
+
+               // send the requests and wait for responses
+               $reqs = $http->runMulti( $reqs );
+
+               // check for errors
+               foreach( $reqs as $k => $arr ) {
+                       if ( $reqs[$k]['response']['error'] != '' ) {
+                               $this->setLastError( 
$reqs[$k]['response']['error'] );
                                return false;
                        }
                }
+
+               // ok, all good
                return true;
+
        }
 
+
        /**
-        * Invalidate a single title object after an edit. Send headers that let
-        * Parsoid reuse transclusion and extension expansions.
-        * @param $title Title
+        * Signals to RESTBase a change has happened in the
+        * visibility of a revision
         */
-       protected function invalidateTitle( Title $title ) {
-               global $wgParsoidCacheServers;
+       protected function signalRevChange() {
 
-               # First request the new version
-               $parsoidInfo = array();
-               $parsoidInfo['cacheID'] = $title->getPreviousRevisionID( 
$title->getLatestRevID() );
-               $parsoidInfo['changedTitle'] = $this->title->getPrefixedDBkey();
-
+               // construct the requests
                $requests = array();
-               foreach ( $wgParsoidCacheServers as $server ) {
+               foreach( $this->params['revs'] as $revid ) {
                        $requests[] = array(
-                               'url'     => $this->getParsoidURL( $title, 
$server ),
+                               'method' => 'GET',
+                               'url' => self::getRevisionURL( $revid ),
                                'headers' => array(
-                                       'X-Parsoid: ' . json_encode( 
$parsoidInfo ),
-                                       // Force implicit cache refresh similar 
to
-                                       // 
https://www.varnish-cache.org/trac/wiki/VCLExampleEnableForceRefresh
                                        'Cache-control: no-cache'
                                )
                        );
                }
-               wfDebug( "ParsoidCacheUpdateJob::invalidateTitle: " . 
serialize( $requests ) . "\n" );
-               $this->checkCurlResults( CurlMultiClient::request( $requests ) 
);
 
-               # And now purge the previous revision so that we make efficient 
use of
-               # the Varnish cache space without relying on LRU. Since the URL
-               # differs we can't use implicit refresh.
-               $requests = array();
-               foreach ( $wgParsoidCacheServers as $server ) {
-                       // @TODO: this triggers a getPreviousRevisionID() query 
per server
-                       $requests[] = array(
-                               'url' => $this->getParsoidURL( $title, $server, 
true )
-                       );
-               }
-               $options = CurlMultiClient::getDefaultOptions();
-               $options[CURLOPT_CUSTOMREQUEST] = "PURGE";
-               $this->checkCurlResults( CurlMultiClient::request( $requests, 
$options ) );
+               // dispatch the requests
+               ///wfDebug( "RestbaseUpdateJob::signalRevChange: " . 
json_encode( $requests ) . "\n" );
+               $this->dispatchRequests( $requests );
+
                return $this->getLastError() == null;
+
        }
 
 
        /**
-        * Invalidate an array (or iterator) of Title objects, right now. Send
-        * headers that signal Parsoid which of transclusions or extensions need
-        * to be updated.
+        * Invalidate a single title object after an edit. Send headers that let
+        * RESTBase/Parsoid reuse transclusion and extension expansions.
+        */
+       protected function invalidateTitle() {
+
+               $title = $this->title;
+               $latest = $title->getLatestRevID();
+               $previous = $title->getPreviousRevisionID( $latest );
+
+               $requests = array( array(
+                       'method' => 'GET',
+                       'url'     => self::getPageTitleURL( $title, $latest ),
+                       'headers' => array(
+                               'X-Restbase-ParentRevision: ' . $previous,
+                               'Cache-control: no-cache'
+                       )
+               ) );
+               ///wfDebug( "RestbaseUpdateJob::invalidateTitle: " . 
json_encode( $requests ) . "\n" );
+               $this->dispatchRequests( $requests );
+
+               return $this->getLastError() == null;
+
+       }
+
+
+       /**
+        * Invalidate an array (or iterator) of Title objects, right now.
         * @param $pages array (page ID => (namespace, DB key)) mapping
         */
        protected function invalidateTitles( array $pages ) {
-               global $wgParsoidCacheServers, $wgLanguageCode;
 
-               if ( !isset( $wgParsoidCacheServers ) ) {
-                       $wgParsoidCacheServers = array( 'localhost' );
-               }
-
-               # Re-render
-               $parsoidInfo = array();
-
-               # Pass some useful info to Parsoid
-               $parsoidInfo['changedTitle'] = $this->title->getPrefixedDBkey();
-               $parsoidInfo['mode'] = $this->params['table'] == 
'templatelinks' ?
+               $mode = $this->params['table'] == 'templatelinks' ?
                        'templates' : 'files';
 
-               # Build an array of update requests
+               // Build an array of update requests
                $requests = array();
-               foreach ( $wgParsoidCacheServers as $server ) {
-                       foreach ( $pages as $id => $nsDbKey ) {
-                               $title = Title::makeTitle( $nsDbKey[0], 
$nsDbKey[1] );
-                               # TODO, but low prio: if getLatestRevID returns 
0, only purge title (deletion).
-                               # Low prio because VE would normally refuse to 
load the page
-                               # anyway, and no private info is exposed.
-                               $url = $this->getParsoidURL( $title, $server );
-
-                               $parsoidInfo['cacheID'] = 
$title->getLatestRevID();
-
-                               $requests[] = array(
-                                       'url'     => $url,
-                                       'headers' => array(
-                                               'X-Parsoid: ' . json_encode( 
$parsoidInfo ),
-                                               // Force implicit cache refresh 
similar to
-                                               // 
https://www.varnish-cache.org/trac/wiki/VCLExampleEnableForceRefresh
-                                               'Cache-control: no-cache'
-                                       )
-                               );
-                       }
+               foreach ( $pages as $id => $nsDbKey ) {
+                       $title = Title::makeTitle( $nsDbKey[0], $nsDbKey[1] );
+                       $latest = $title->getLatestRevID();
+                       $url = self::getPageTitleURL( $title, $latest );
+                       $requests[] = array(
+                               'method' => 'GET',
+                               'url'     => $url,
+                               'headers' => array(
+                                       'X-Restbase-Mode: ' . $mode,
+                                       'Cache-control: no-cache'
+                               )
+                       );
                }
 
                // Now send off all those update requests
-               $this->checkCurlResults( CurlMultiClient::request( $requests ) 
);
+               $this->dispatchRequests( $requests );
 
-               wfDebug( 'ParsoidCacheUpdateJob::invalidateTitles update: ' .
-                       serialize( $requests ) . "\n" );
+               //wfDebug( 'RestbaseUpdateJob::invalidateTitles update: ' .
+               //      json_encode( $requests ) . "\n" );
 
                return $this->getLastError() == null;
+
        }
+
+
 }
+

-- 
To view, visit https://gerrit.wikimedia.org/r/188952
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I9814249f350e2278f3941d62255e4ee9bb89c4b1
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/extensions/RestBaseUpdateJobs
Gerrit-Branch: master
Gerrit-Owner: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org>
Gerrit-Reviewer: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Jdouglas <jdoug...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to