GWicke has submitted this change and it was merged. Change subject: Adapt the extension to RESTBase's needs ......................................................................
Adapt the extension to RESTBase's needs The extension monitors the following hooks: - ArticleEditUpdates - ArticleDeleteComplete - ArticleUndelete - ArticleRevisionVisibilitySet - TitleMoveComplete - FileUpload For each case, a queue job is created invalidating RESTBase's version of the page (by sending a request with the no-cache header). Additional jobs are created for any dependent pages which transclude the edited or deleted page. Bug: T87520 Change-Id: I9814249f350e2278f3941d62255e4ee9bb89c4b1 --- M RestbaseUpdate.hooks.php M RestbaseUpdate.php M RestbaseUpdateJob.php 3 files changed, 232 insertions(+), 130 deletions(-) Approvals: GWicke: Verified; Looks good to me, approved diff --git a/RestbaseUpdate.hooks.php b/RestbaseUpdate.hooks.php index c7cfbc2..5948457 100644 --- a/RestbaseUpdate.hooks.php +++ b/RestbaseUpdate.hooks.php @@ -1,5 +1,6 @@ <?php + /** * Hooks for events that should trigger RESTBase updates. */ @@ -7,16 +8,57 @@ /** + * Get the job parameters for a given title, job type and table name. + * + * @param Title $title + * @param string $type the job type (OnEdit or OnDependencyChange) + * @param string $table (optional for OnDependencyChange, templatelinks or + * imagelinks) + * @return Array + */ + private static function getJobParams( Title $title, $type, $table = null ) { + + $params = array( 'type' => $type ); + if ( $type == 'OnDependencyChange' ) { + $params['table'] = $table; + $params['recursive'] = true; + return $params + Job::newRootJobParams( + "RestbaseUpdateJob{$type}:{$table}:{$title->getPrefixedText()}:{$title->getLatestRevID()}"); + } else { + return $params; + } + + } + + + /** * Schedule an async update job in the job queue. * * @param Title $title - * @param string $type + * @param string $action * @param array $extra_params */ - private static function schedule( $title, $type, $extra_params = array() ) { + private static function schedule( $title, $action, $extra_params = array() ) { - $params = array( 'type' => $type ) + $extra_params; - JobQueueGroup::singleton()->push( new RestbaseUpdateJob( $title, $params ) ); + wfDebug( "RestbaseUpdateJobHook::schedule: " . $title->getText() . ' - ' . $action . "\n" ); + if ( $title->getNamespace() == NS_FILE ) { + // File. For now we assume the actual image or file has + // changed, not just the description page. + $params = self::getJobParams( $title, 'OnDependencyChange', 'imagelinks' ); + $job = new RestbaseUpdateJob( $title, $params ); + JobQueueGroup::singleton()->push( $job ); + JobQueueGroup::singleton()->deduplicateRootJob( $job ); + } else { + // Push one job for the page itself + $params = self::getJobParams( $title, 'OnEdit' ) + + array( 'mode' => $action ) + $extra_params; + JobQueueGroup::singleton()->push( new RestbaseUpdateJob( $title, $params ) ); + // and one for pages transcluding this page. + $params = self::getJobParams( $title, 'OnDependencyChange', 'templatelinks' ); + $job = new RestbaseUpdateJob( $title, $params ); + JobQueueGroup::singleton()->push( $job ); + JobQueueGroup::singleton()->deduplicateRootJob( $job ); + } } @@ -74,8 +116,7 @@ public static function onArticleRevisionVisibilitySet( $title, $revs ) { // TODO complete here with more info / the hidden fields perhaps ? - // XXX do not forget that rev IDs are not yet actually returned - self::schedule( $title, 'rev_delete', array( 'revs' => $revs ) ); + self::schedule( $title, 'rev_visibility', array( 'revs' => $revs ) ); return true; } @@ -94,5 +135,24 @@ } + /** + * File upload hook. See filerepo/file/LocalFile.php. + * + * XXX gwicke: This tracks file uploads including re-uploads of a new + * version of an image. These will implicitly also trigger null edits on + * the associated WikiPage (which normally exists), which then triggers + * the onArticleEditUpdates hook. Maybe we should thus drop this hook and + * simply assume that all edits to the WikiPage also change the image + * data. Those edits tend to happen not long after an upload, at which + * point the image is likely not used in many pages. + */ + public static function onFileUpload( File $file ) { + + self::updateTitle( $file->getTitle(), 'file' ); + return true; + + } + + } diff --git a/RestbaseUpdate.php b/RestbaseUpdate.php index bed4007..bce71ac 100644 --- a/RestbaseUpdate.php +++ b/RestbaseUpdate.php @@ -27,31 +27,27 @@ # Set up class autoloading $wgAutoloadClasses['RestbaseUpdateHooks'] = "$dir/RestbaseUpdate.hooks.php"; $wgAutoloadClasses['RestbaseUpdateJob'] = "$dir/RestbaseUpdateJob.php"; - $wgAutoloadClasses['CurlMultiClient'] = "$dir/CurlMultiClient.php"; - # Add the parsoid job types + # Add the job types $wgJobClasses['RestbaseUpdateJobOnEdit'] = 'RestbaseUpdateJob'; $wgJobClasses['RestbaseUpdateJobOnDependencyChange'] = 'RestbaseUpdateJob'; - # Old type for transition - # @TODO: remove when old jobs are drained - $wgJobClasses['RestabseUpdateJob'] = 'RestbaseUpdateJob'; $wgExtensionCredits['other'][] = array( 'path' => __FILE__, - 'name' => 'RestbaseUpdate', + 'name' => 'RestBaseUpdateJobs', 'author' => array( 'Gabriel Wicke', 'Marko Obrovac' ), 'version' => '0.2.0', - 'url' => 'https://www.mediawiki.org/wiki/Extension:RestbaseUpdateJobs', + 'url' => 'https://www.mediawiki.org/wiki/Extension:RestBaseUpdateJobs', 'descriptionmsg' => 'restbaseupdatejobs-desc', 'license-name' => 'GPL-2.0+', ); # Register localizations. - $wgMessagesDirs['RestbaseUpdateJobs'] = __DIR__ . '/i18n'; - $wgExtensionMessagesFiles['RestbaseUpdateJobs'] = $dir . '/RestbaseUpdate.i18n.php'; + $wgMessagesDirs['RestBaseUpdateJobs'] = __DIR__ . '/i18n'; + $wgExtensionMessagesFiles['RestBaseUpdateJobs'] = $dir . '/RestbaseUpdate.i18n.php'; # Set up a default configuration self::setupDefaultConfig(); @@ -69,7 +65,7 @@ */ protected static function setupDefaultConfig() { - global $wgRestbaseServers, $wgRestbaseDomain, $wgServer; + global $wgRestbaseServer, $wgRestbaseAPIVersion, $wgRestbaseUpdateTitlesPerJob; /** * The RESTBase server to inform of updates. @@ -77,10 +73,14 @@ $wgRestbaseServers = 'http://localhost:7321'; /** - * This wiki's domain. - * Defaults to $wgServer's domain name - */ - $wgRestbaseDomain = preg_replace( '/^(https?:\/\/)?(.+?)\/?$/', '$2', $wgServer ); + * The RESTBase API version in use + */ + $wgRestbaseAPIVersion = 'v1'; + + /** + * The number of recursive jobs to process in parallel + */ + $wgRestbaseUpdateTitlesPerJob = 50; } @@ -103,12 +103,15 @@ $wgHooks['ArticleRevisionVisibilitySet'][] = 'RestbaseUpdateHooks::onArticleRevisionVisibilitySet'; # Article move $wgHooks['TitleMoveComplete'][] = 'RestbaseUpdateHooks::onTitleMoveComplete'; + # File upload + $wgHooks['FileUpload'][] = 'RestbaseUpdateHooks::onFileUpload'; } } + # Load hooks that are always set RestbaseUpdateSetup::setup(); diff --git a/RestbaseUpdateJob.php b/RestbaseUpdateJob.php index 717f580..1883f5f 100644 --- a/RestbaseUpdateJob.php +++ b/RestbaseUpdateJob.php @@ -1,7 +1,7 @@ <?php /** - * HTML cache refreshing and -invalidation job for the Parsoid varnish caches. + * HTML cache refreshing and -invalidation job for RESTBase. * * This job comes in a few variants: * - a) Recursive jobs to purge caches for backlink pages for a given title. @@ -10,9 +10,6 @@ * They have have (type:OnDependencyChange,pages:(<page ID>:(<namespace>,<title>),...) set. * - c) Jobs to purge caches for a single page (the job title) * They have (type:OnEdit) set. - * - * See - * http://www.mediawiki.org/wiki/Parsoid/Minimal_performance_strategy_for_July_release */ class RestbaseUpdateJob extends Job { @@ -35,21 +32,77 @@ } - function run() { + /** + * Constructs the URL prefix for RESTBase and caches it + * @return string RESTBase's URL prefix + */ + private static function getRestbasePrefix() { - // for now we are capable of handling only OnEdit jobs - if ( $this->params['type'] !== 'OnEdit' ) { - return true; + static $prefix = null; + // set the static variable so as not to construct + // the prefix URL every time + if ( is_null( $prefix ) ) { + global $wgRestbaseServer, $wgRestbaseAPIVersion, + $wgRestbaseDomain, $wgServer; + if ( !isset( $wgRestbaseDomain ) || is_null( $wgRestbaseDomain ) ) { + $wgRestbaseDomain = preg_replace( '/^(https?:\/\/)?(.+?)\/?$/', '$2', $wgServer ); + } + $prefix = implode( '/', array( + $wgRestbaseServer, + $wgRestbaseDomain, + $wgRestbaseAPIVersion + ) ); } + + return $prefix; + + } + + + /** + * Construct a revision ID invalidation URL + * + * @param $revid integer the revision ID to invalidate + * @return string an absolute URL for the revision + */ + private static function getRevisionURL( $revid ) { + + // construct the URL + return implode( '/', array( self::getRestbasePrefix(), + 'page', 'revision', $revid ) ); + + } + + + /** + * Construct a page title invalidation URL + * + * @param $title Title + * @param $revid integer the revision ID to use + * @return string an absolute URL for the article + */ + private static function getPageTitleURL( Title $title, $revid ) { + + // construct the URL + return implode( '/', array( self::getRestbasePrefix(), 'page', + 'html', wfUrlencode( $title->getPrefixedDBkey() ), $revid ) ); + + } + + + function run() { global $wgRestbaseUpdateTitlesPerJob, $wgUpdateRowsPerJob; - - if ( $this->params['type'] === 'OnEdit' ) { - // this is the simple case, resolve it - // TODO - $this->invalidateTitle( $this->title ); + // there are two cases here: + // a) this is a rev_visibility action + // b) this is some type of a page edit + if ( $this->params['mode'] === 'rev_visibility' ) { + $this->signalRevChange(); + } else { + $this->invalidateTitle(); + } } elseif ( $this->params['type'] === 'OnDependencyChange' ) { // recursive update of linked pages static $expected = array( 'recursive', 'pages' ); // new jobs have one of these @@ -60,7 +113,7 @@ // Job to purge all (or a range of) backlink pages for a page if ( !empty( $this->params['recursive'] ) ) { // Convert this into some title-batch jobs and possibly a - // recursive ParsoidCacheUpdateJob job for the rest of the backlinks + // recursive RestbaseUpdateJob job for the rest of the backlinks $jobs = BacklinkJobUtils::partitionBacklinkJob( $this, $wgUpdateRowsPerJob, @@ -68,8 +121,7 @@ // Carry over information for de-duplication array( 'params' => $this->getRootJobParams() + array( - 'table' => $this->params['table'], 'type' => 'OnDependencyChange', - 'extra' => $this->params['extra'] ) + 'table' => $this->params['table'], 'type' => 'OnDependencyChange' ) ) ); JobQueueGroup::singleton()->push( $jobs ); @@ -79,139 +131,126 @@ } return true; + } - /** - * Construct a cache server URL - * - * @param $title Title - * @param string $server the server name - * @param bool $prev use previous revision id if true - * @return string an absolute URL for the article on the given server - */ - protected function getParsoidURL( Title $title, $server, $prev = false ) { - global $wgParsoidWikiPrefix; - - $oldid = $prev ? - $title->getPreviousRevisionID( $title->getLatestRevID() ) : - $title->getLatestRevID(); - - // Construct Parsoid web service URL - return $server . '/' . $wgParsoidWikiPrefix . '/' . - wfUrlencode( $title->getPrefixedDBkey() ) . '?oldid=' . $oldid; - } /** - * Check an array of CurlMultiClient results for errors, and setLastError - * if there are any. - * @param $results CurlMultiClient result array + * Dispatches the request(s) using MultiHttpClient, waits for the result(s), + * checks them and sets the error flag if needed + * @param $reqs array an array of request maps to dispatch + * @return boolean whether all of the requests have been executed successfully */ - protected function checkCurlResults( $results ) { - foreach( $results as $k => $result ) { - if ($results[$k]['error'] != null) { - $this->setLastError($results[$k]['error']); + protected function dispatchRequests( array $reqs ) { + + // create a new MultiHttpClient instance with default params + $http = new MultiHttpClient( array( 'maxConnsPerHost' => count( $reqs ) ) ); + + // send the requests and wait for responses + $reqs = $http->runMulti( $reqs ); + + // check for errors + foreach( $reqs as $k => $arr ) { + if ( $reqs[$k]['response']['error'] != '' ) { + $this->setLastError( $reqs[$k]['response']['error'] ); return false; } } + + // ok, all good return true; + } + /** - * Invalidate a single title object after an edit. Send headers that let - * Parsoid reuse transclusion and extension expansions. - * @param $title Title + * Signals to RESTBase a change has happened in the + * visibility of a revision */ - protected function invalidateTitle( Title $title ) { - global $wgParsoidCacheServers; + protected function signalRevChange() { - # First request the new version - $parsoidInfo = array(); - $parsoidInfo['cacheID'] = $title->getPreviousRevisionID( $title->getLatestRevID() ); - $parsoidInfo['changedTitle'] = $this->title->getPrefixedDBkey(); - + // construct the requests $requests = array(); - foreach ( $wgParsoidCacheServers as $server ) { + foreach( $this->params['revs'] as $revid ) { $requests[] = array( - 'url' => $this->getParsoidURL( $title, $server ), + 'method' => 'GET', + 'url' => self::getRevisionURL( $revid ), 'headers' => array( - 'X-Parsoid: ' . json_encode( $parsoidInfo ), - // Force implicit cache refresh similar to - // https://www.varnish-cache.org/trac/wiki/VCLExampleEnableForceRefresh 'Cache-control: no-cache' ) ); } - wfDebug( "ParsoidCacheUpdateJob::invalidateTitle: " . serialize( $requests ) . "\n" ); - $this->checkCurlResults( CurlMultiClient::request( $requests ) ); - # And now purge the previous revision so that we make efficient use of - # the Varnish cache space without relying on LRU. Since the URL - # differs we can't use implicit refresh. - $requests = array(); - foreach ( $wgParsoidCacheServers as $server ) { - // @TODO: this triggers a getPreviousRevisionID() query per server - $requests[] = array( - 'url' => $this->getParsoidURL( $title, $server, true ) - ); - } - $options = CurlMultiClient::getDefaultOptions(); - $options[CURLOPT_CUSTOMREQUEST] = "PURGE"; - $this->checkCurlResults( CurlMultiClient::request( $requests, $options ) ); + // dispatch the requests + ///wfDebug( "RestbaseUpdateJob::signalRevChange: " . json_encode( $requests ) . "\n" ); + $this->dispatchRequests( $requests ); + return $this->getLastError() == null; + } /** - * Invalidate an array (or iterator) of Title objects, right now. Send - * headers that signal Parsoid which of transclusions or extensions need - * to be updated. + * Invalidate a single title object after an edit. Send headers that let + * RESTBase/Parsoid reuse transclusion and extension expansions. + */ + protected function invalidateTitle() { + + $title = $this->title; + $latest = $title->getLatestRevID(); + $previous = $title->getPreviousRevisionID( $latest ); + + $requests = array( array( + 'method' => 'GET', + 'url' => self::getPageTitleURL( $title, $latest ), + 'headers' => array( + 'X-Restbase-ParentRevision: ' . $previous, + 'Cache-control: no-cache' + ) + ) ); + ///wfDebug( "RestbaseUpdateJob::invalidateTitle: " . json_encode( $requests ) . "\n" ); + $this->dispatchRequests( $requests ); + + return $this->getLastError() == null; + + } + + + /** + * Invalidate an array (or iterator) of Title objects, right now. * @param $pages array (page ID => (namespace, DB key)) mapping */ protected function invalidateTitles( array $pages ) { - global $wgParsoidCacheServers, $wgLanguageCode; - if ( !isset( $wgParsoidCacheServers ) ) { - $wgParsoidCacheServers = array( 'localhost' ); - } - - # Re-render - $parsoidInfo = array(); - - # Pass some useful info to Parsoid - $parsoidInfo['changedTitle'] = $this->title->getPrefixedDBkey(); - $parsoidInfo['mode'] = $this->params['table'] == 'templatelinks' ? + $mode = $this->params['table'] == 'templatelinks' ? 'templates' : 'files'; - # Build an array of update requests + // Build an array of update requests $requests = array(); - foreach ( $wgParsoidCacheServers as $server ) { - foreach ( $pages as $id => $nsDbKey ) { - $title = Title::makeTitle( $nsDbKey[0], $nsDbKey[1] ); - # TODO, but low prio: if getLatestRevID returns 0, only purge title (deletion). - # Low prio because VE would normally refuse to load the page - # anyway, and no private info is exposed. - $url = $this->getParsoidURL( $title, $server ); - - $parsoidInfo['cacheID'] = $title->getLatestRevID(); - - $requests[] = array( - 'url' => $url, - 'headers' => array( - 'X-Parsoid: ' . json_encode( $parsoidInfo ), - // Force implicit cache refresh similar to - // https://www.varnish-cache.org/trac/wiki/VCLExampleEnableForceRefresh - 'Cache-control: no-cache' - ) - ); - } + foreach ( $pages as $id => $nsDbKey ) { + $title = Title::makeTitle( $nsDbKey[0], $nsDbKey[1] ); + $latest = $title->getLatestRevID(); + $url = self::getPageTitleURL( $title, $latest ); + $requests[] = array( + 'method' => 'GET', + 'url' => $url, + 'headers' => array( + 'X-Restbase-Mode: ' . $mode, + 'Cache-control: no-cache' + ) + ); } // Now send off all those update requests - $this->checkCurlResults( CurlMultiClient::request( $requests ) ); + $this->dispatchRequests( $requests ); - wfDebug( 'ParsoidCacheUpdateJob::invalidateTitles update: ' . - serialize( $requests ) . "\n" ); + //wfDebug( 'RestbaseUpdateJob::invalidateTitles update: ' . + // json_encode( $requests ) . "\n" ); return $this->getLastError() == null; + } + + } + -- To view, visit https://gerrit.wikimedia.org/r/188952 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9814249f350e2278f3941d62255e4ee9bb89c4b1 Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/extensions/RestBaseUpdateJobs Gerrit-Branch: master Gerrit-Owner: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Jdouglas <jdoug...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits