Mobrovac has uploaded a new change for review. https://gerrit.wikimedia.org/r/188618
Change subject: T87520: Initial repo commit ...................................................................... T87520: Initial repo commit This commit also includes some work in progress started before the repository was created. Change-Id: Id9e4af9035adbbe2e45c90dfd7a3346fc685aa18 --- A AUTHORS.txt A RestbaseUpdate.hooks.php A RestbaseUpdate.i18n.php A RestbaseUpdate.php A RestbaseUpdateJob.php A i18n/de.json A i18n/en.json A i18n/es.json A i18n/fr.json A i18n/it.json A i18n/pt.json 11 files changed, 523 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/RestBaseUpdateJobs refs/changes/18/188618/1 diff --git a/AUTHORS.txt b/AUTHORS.txt new file mode 100644 index 0000000..27b2432 --- /dev/null +++ b/AUTHORS.txt @@ -0,0 +1,5 @@ +The Services Team: +* Gabriel Wicke <[email protected]> +* Marko Obrovac <[email protected]> +* James Earl Douglas <[email protected]> + diff --git a/RestbaseUpdate.hooks.php b/RestbaseUpdate.hooks.php new file mode 100644 index 0000000..d1b6f51 --- /dev/null +++ b/RestbaseUpdate.hooks.php @@ -0,0 +1,98 @@ +<?php + +/** + * Hooks for events that should trigger RESTBase updates. + */ +class RestbaseUpdateHooks { + + + /** + * Schedule an async update job in the job queue. + * + * @param Title $title + * @param string $type + * @param array $extra_params + */ + private static function schedule( $title, $type, $extra_params = array() ) { + + $params = array( 'type' => $type ) + $extra_params; + JobQueueGroup::singleton()->push( new RestbaseUpdateJob( $title, $params ) ); + + } + + + /** + * Callback for regular article edits + * + * @param $article WikiPage the modified wiki page object + * @param $editInfo + * @param bool $changed + * @return bool + */ + public static function onArticleEditUpdates( $article, $editInfo, $changed ) { + + if ( $changed ) { + self::schedule( $article->getTitle(), 'edit' ); + } + return true; + + } + + + /** + * Callback for article deletions + * + * @param $article WikiPage the modified wiki page object + * @param $user User the deleting user + * @param string $reason + * @param int $id the page id + * @return bool + */ + public static function onArticleDeleteComplete( $article, $user, $reason, $id ) { + + self::schedule( $article->getTitle(), 'delete' ); + return true; + + } + + + /** + * Callback for article undeletion. See specials/SpecialUndelete.php. + */ + public static function onArticleUndelete( Title $title, $created, $comment ) { + + self::schedule( $title, 'edit' ); + return true; + + } + + + /** + * Callback for article revision changes. See + * revisiondelete/RevDelRevisionList.php. + */ + public static function onArticleRevisionVisibilitySet( $title, $revs ) { + + // TODO complete here with more info / the hidden fields perhaps ? + // XXX do not forget that rev IDs are not yet actually returned + self::schedule( $title, 'rev_delete', array( 'revs' => $revs ) ); + return true; + + } + + + /** + * Title move callback. See Title.php. + */ + public static function onTitleMoveComplete( $title, Title $newtitle, $user, $oldid, $newid ) { + + # Simply update both old and new title. + self::schedule( $title, 'delete', array( 'rev' => $oldid ) ); + self::schedule( $newtitle, 'edit', array( 'rev' => $newid ) ); + return true; + + } + + +} + diff --git a/RestbaseUpdate.i18n.php b/RestbaseUpdate.i18n.php new file mode 100644 index 0000000..0c95e5c --- /dev/null +++ b/RestbaseUpdate.i18n.php @@ -0,0 +1,35 @@ +<?php +/** + * This is a backwards-compatibility shim, generated by: + * https://git.wikimedia.org/blob/mediawiki%2Fcore.git/HEAD/maintenance%2FgenerateJsonI18n.php + * + * Beginning with MediaWiki 1.23, translation strings are stored in json files, + * and the EXTENSION.i18n.php file only exists to provide compatibility with + * older releases of MediaWiki. For more information about this migration, see: + * https://www.mediawiki.org/wiki/Requests_for_comment/Localisation_format + * + * This shim maintains compatibility back to MediaWiki 1.17. + */ +$messages = array(); +if ( !function_exists( 'wfJsonI18nShim8d1c46917fb38c99' ) ) { + function wfJsonI18nShim8d1c46917fb38c99( $cache, $code, &$cachedData ) { + $codeSequence = array_merge( array( $code ), $cachedData['fallbackSequence'] ); + foreach ( $codeSequence as $csCode ) { + $fileName = dirname( __FILE__ ) . "/i18n/$csCode.json"; + if ( is_readable( $fileName ) ) { + $data = FormatJson::decode( file_get_contents( $fileName ), true ); + foreach ( array_keys( $data ) as $key ) { + if ( $key === '' || $key[0] === '@' ) { + unset( $data[$key] ); + } + } + $cachedData['messages'] = array_merge( $data, $cachedData['messages'] ); + } + + $cachedData['deps'][] = new FileDependency( $fileName ); + } + return true; + } + + $GLOBALS['wgHooks']['LocalisationCacheRecache'][] = 'wfJsonI18nShim8d1c46917fb38c99'; +} diff --git a/RestbaseUpdate.php b/RestbaseUpdate.php new file mode 100644 index 0000000..6e72f80 --- /dev/null +++ b/RestbaseUpdate.php @@ -0,0 +1,114 @@ +<?php + +/** + * Basic cache invalidation for RESTBase + */ +if ( !defined( 'MEDIAWIKI' ) ) { + echo "RestbaseUpdateJobs extension\n"; + exit( 1 ); +} + +/** + * Class containing basic setup functions. + */ +class RestbaseUpdateSetup { + /** + * Set up RestbaseUpdate. + * + * @return void + */ + public static function setup() { + + global $wgAutoloadClasses, $wgJobClasses, + $wgExtensionCredits, $wgExtensionMessagesFiles, $wgMessagesDirs; + + $dir = __DIR__; + + # Set up class autoloading + $wgAutoloadClasses['RestbaseUpdateHooks'] = "$dir/RestbaseUpdate.hooks.php"; + $wgAutoloadClasses['RestbaseUpdateJob'] = "$dir/RestbaseUpdateJob.php"; + $wgAutoloadClasses['CurlMultiClient'] = "$dir/CurlMultiClient.php"; + + # Add the parsoid job types + $wgJobClasses['RestbaseUpdateJobOnEdit'] = 'RestbaseUpdateJob'; + $wgJobClasses['RestbaseUpdateJobOnDependencyChange'] = 'RestbaseUpdateJob'; + # Old type for transition + # @TODO: remove when old jobs are drained + $wgJobClasses['RestabseUpdateJob'] = 'RestbaseUpdateJob'; + + $wgExtensionCredits['other'][] = array( + 'path' => __FILE__, + 'name' => 'RestbaseUpdate', + 'author' => array( + 'Gabriel Wicke', + 'Marko Obrovac' + ), + 'version' => '0.2.0', + 'url' => 'https://www.mediawiki.org/wiki/Extension:RestbaseUpdateJobs', + 'descriptionmsg' => 'restbase-desc', + 'license-name' => 'GPL-2.0+', + ); + + # Register localizations. + $wgMessagesDirs['RestbaseUpdateJobs'] = __DIR__ . '/i18n'; + $wgExtensionMessagesFiles['RestbaseUpdateJobs'] = $dir . '/RestbaseUpdate.i18n.php'; + + # Set up a default configuration + self::setupDefaultConfig(); + + # Now register our hooks. + self::registerHooks(); + + } + + + /** + * Set up default config values. Override after requiring the extension. + * + * @return void + */ + protected static function setupDefaultConfig() { + + global $wgRestbaseServers, $wgRestbaseDomain, $wgServer; + + /** + * The RESTBase server to inform of updates. + */ + $wgRestbaseServers = 'http://localhost:7321'; + + /** + * This wiki's domain. + * Defaults to $wgServer's domain name + */ + $wgRestbaseDomain = preg_replace( '/^(https?:\/\/)?(.+?)\/?$/', '$2', $wgServer ); + + } + + + /** + * Register hook handlers. + * + * @return void + */ + protected static function registerHooks() { + + global $wgHooks; + + # Article edit/create + $wgHooks['ArticleEditUpdates'][] = 'RestbaseUpdateHooks::onArticleEditUpdates'; + # Article delete/restore + $wgHooks['ArticleDeleteComplete'][] = 'RestbaseUpdateHooks::onArticleDeleteComplete'; + $wgHooks['ArticleUndelete'][] = 'RestbaseUpdateHooks::onArticleUndelete'; + # Revision delete/restore + $wgHooks['ArticleRevisionVisibilitySet'][] = 'RestbaseUpdateHooks::onArticleRevisionVisibilitySet'; + # Article move + $wgHooks['TitleMoveComplete'][] = 'RestbaseUpdateHooks::onTitleMoveComplete'; + + } + + +} + +# Load hooks that are always set +RestbaseUpdateSetup::setup(); + diff --git a/RestbaseUpdateJob.php b/RestbaseUpdateJob.php new file mode 100644 index 0000000..d172fd0 --- /dev/null +++ b/RestbaseUpdateJob.php @@ -0,0 +1,217 @@ +<?php + +/** + * HTML cache refreshing and -invalidation job for the Parsoid varnish caches. + * + * This job comes in a few variants: + * - a) Recursive jobs to purge caches for backlink pages for a given title. + * They have have (type:OnDependencyChange,recursive:true,table:<table>) set. + * - b) Jobs to purge caches for a set of titles (the job title is ignored). + * They have have (type:OnDependencyChange,pages:(<page ID>:(<namespace>,<title>),...) set. + * - c) Jobs to purge caches for a single page (the job title) + * They have (type:OnEdit) set. + * + * See + * http://www.mediawiki.org/wiki/Parsoid/Minimal_performance_strategy_for_July_release + */ +class RestbaseUpdateJob extends Job { + + + function __construct( $title, $params, $id = 0 ) { + + // Map old jobs to new 'OnEdit' jobs + if ( !isset( $params['type'] ) ) { + $params['type'] = 'OnEdit'; // b/c + } + + parent::__construct( 'RestbaseUpdateJob' . $params['type'], $title, $params, $id ); + + if ( $params['type'] == 'OnEdit' ) { + // Simple duplicate removal for single-title jobs. Other jobs are + // deduplicated with root job parameters. + $this->removeDuplicates = true; + } + + } + + + function run() { + + // for now we are capable of handling only OnEdit jobs + if ( $this->params['type'] !== 'OnEdit' ) { + return true; + } + + global $wgRestbaseUpdateTitlesPerJob, $wgUpdateRowsPerJob; + + + + if ( $this->params['type'] === 'OnEdit' ) { + // this is the simple case, resolve it + // TODO + $this->invalidateTitle( $this->title ); + } elseif ( $this->params['type'] === 'OnDependencyChange' ) { + // recursive update of linked pages + static $expected = array( 'recursive', 'pages' ); // new jobs have one of these + if ( !array_intersect( array_keys( $this->params ), $expected ) ) { + // Old-style job; discard + return true; + } + // Job to purge all (or a range of) backlink pages for a page + if ( !empty( $this->params['recursive'] ) ) { + // Convert this into some title-batch jobs and possibly a + // recursive ParsoidCacheUpdateJob job for the rest of the backlinks + $jobs = BacklinkJobUtils::partitionBacklinkJob( + $this, + $wgUpdateRowsPerJob, + $wgRestbaseUpdateTitlesPerJob, // jobs-per-title + // Carry over information for de-duplication + array( + 'params' => $this->getRootJobParams() + array( + 'table' => $this->params['table'], 'type' => 'OnDependencyChange', + 'extra' => $this->params['extra'] ) + ) + ); + JobQueueGroup::singleton()->push( $jobs ); + } elseif ( isset( $this->params['pages'] ) ) { + $this->invalidateTitles( $this->params['pages'] ); + } + } + + return true; + } + + /** + * Construct a cache server URL + * + * @param $title Title + * @param string $server the server name + * @param bool $prev use previous revision id if true + * @return string an absolute URL for the article on the given server + */ + protected function getParsoidURL( Title $title, $server, $prev = false ) { + global $wgParsoidWikiPrefix; + + $oldid = $prev ? + $title->getPreviousRevisionID( $title->getLatestRevID() ) : + $title->getLatestRevID(); + + // Construct Parsoid web service URL + return $server . '/' . $wgParsoidWikiPrefix . '/' . + wfUrlencode( $title->getPrefixedDBkey() ) . '?oldid=' . $oldid; + } + + /** + * Check an array of CurlMultiClient results for errors, and setLastError + * if there are any. + * @param $results CurlMultiClient result array + */ + protected function checkCurlResults( $results ) { + foreach( $results as $k => $result ) { + if ($results[$k]['error'] != null) { + $this->setLastError($results[$k]['error']); + return false; + } + } + return true; + } + + /** + * Invalidate a single title object after an edit. Send headers that let + * Parsoid reuse transclusion and extension expansions. + * @param $title Title + */ + protected function invalidateTitle( Title $title ) { + global $wgParsoidCacheServers; + + # First request the new version + $parsoidInfo = array(); + $parsoidInfo['cacheID'] = $title->getPreviousRevisionID( $title->getLatestRevID() ); + $parsoidInfo['changedTitle'] = $this->title->getPrefixedDBkey(); + + $requests = array(); + foreach ( $wgParsoidCacheServers as $server ) { + $requests[] = array( + 'url' => $this->getParsoidURL( $title, $server ), + 'headers' => array( + 'X-Parsoid: ' . json_encode( $parsoidInfo ), + // Force implicit cache refresh similar to + // https://www.varnish-cache.org/trac/wiki/VCLExampleEnableForceRefresh + 'Cache-control: no-cache' + ) + ); + } + wfDebug( "ParsoidCacheUpdateJob::invalidateTitle: " . serialize( $requests ) . "\n" ); + $this->checkCurlResults( CurlMultiClient::request( $requests ) ); + + # And now purge the previous revision so that we make efficient use of + # the Varnish cache space without relying on LRU. Since the URL + # differs we can't use implicit refresh. + $requests = array(); + foreach ( $wgParsoidCacheServers as $server ) { + // @TODO: this triggers a getPreviousRevisionID() query per server + $requests[] = array( + 'url' => $this->getParsoidURL( $title, $server, true ) + ); + } + $options = CurlMultiClient::getDefaultOptions(); + $options[CURLOPT_CUSTOMREQUEST] = "PURGE"; + $this->checkCurlResults( CurlMultiClient::request( $requests, $options ) ); + return $this->getLastError() == null; + } + + + /** + * Invalidate an array (or iterator) of Title objects, right now. Send + * headers that signal Parsoid which of transclusions or extensions need + * to be updated. + * @param $pages array (page ID => (namespace, DB key)) mapping + */ + protected function invalidateTitles( array $pages ) { + global $wgParsoidCacheServers, $wgLanguageCode; + + if ( !isset( $wgParsoidCacheServers ) ) { + $wgParsoidCacheServers = array( 'localhost' ); + } + + # Re-render + $parsoidInfo = array(); + + # Pass some useful info to Parsoid + $parsoidInfo['changedTitle'] = $this->title->getPrefixedDBkey(); + $parsoidInfo['mode'] = $this->params['table'] == 'templatelinks' ? + 'templates' : 'files'; + + # Build an array of update requests + $requests = array(); + foreach ( $wgParsoidCacheServers as $server ) { + foreach ( $pages as $id => $nsDbKey ) { + $title = Title::makeTitle( $nsDbKey[0], $nsDbKey[1] ); + # TODO, but low prio: if getLatestRevID returns 0, only purge title (deletion). + # Low prio because VE would normally refuse to load the page + # anyway, and no private info is exposed. + $url = $this->getParsoidURL( $title, $server ); + + $parsoidInfo['cacheID'] = $title->getLatestRevID(); + + $requests[] = array( + 'url' => $url, + 'headers' => array( + 'X-Parsoid: ' . json_encode( $parsoidInfo ), + // Force implicit cache refresh similar to + // https://www.varnish-cache.org/trac/wiki/VCLExampleEnableForceRefresh + 'Cache-control: no-cache' + ) + ); + } + } + + // Now send off all those update requests + $this->checkCurlResults( CurlMultiClient::request( $requests ) ); + + wfDebug( 'ParsoidCacheUpdateJob::invalidateTitles update: ' . + serialize( $requests ) . "\n" ); + + return $this->getLastError() == null; + } +} diff --git a/i18n/de.json b/i18n/de.json new file mode 100644 index 0000000..1024707 --- /dev/null +++ b/i18n/de.json @@ -0,0 +1,9 @@ +{ + "@metadata": { + "authors": [ + "Marko Obrovac" + ] + }, + "restbase-desc": "Erweiterung für aktualisieren RESTBase" +} + diff --git a/i18n/en.json b/i18n/en.json new file mode 100644 index 0000000..15e2f27 --- /dev/null +++ b/i18n/en.json @@ -0,0 +1,9 @@ +{ + "@metadata": { + "authors": [ + "Marko Obrovac" + ] + }, + "restbase-desc": "RESTBase update job extension" +} + diff --git a/i18n/es.json b/i18n/es.json new file mode 100644 index 0000000..89e6422 --- /dev/null +++ b/i18n/es.json @@ -0,0 +1,9 @@ +{ + "@metadata": { + "authors": [ + "Marko Obrovac" + ] + }, + "restbase-desc": "Extensione para actualizar el contenido de RESTBase" +} + diff --git a/i18n/fr.json b/i18n/fr.json new file mode 100644 index 0000000..94ba97a --- /dev/null +++ b/i18n/fr.json @@ -0,0 +1,9 @@ +{ + "@metadata": { + "authors": [ + "Marko Obrovac" + ] + }, + "restbase-desc": "Extension pour la mise à jour du contenu de RESTBase" +} + diff --git a/i18n/it.json b/i18n/it.json new file mode 100644 index 0000000..21ab5d1 --- /dev/null +++ b/i18n/it.json @@ -0,0 +1,9 @@ +{ + "@metadata": { + "authors": [ + "Marko Obrovac" + ] + }, + "restbase-desc": "Estonsione per l'aggiornamento del contenuto di RESTBase" +} + diff --git a/i18n/pt.json b/i18n/pt.json new file mode 100644 index 0000000..6ed36bf --- /dev/null +++ b/i18n/pt.json @@ -0,0 +1,9 @@ +{ + "@metadata": { + "authors": [ + "Marko Obrovac" + ] + }, + "parsoid-desc": "Extensão para a atualização do conteúdo de RESTBase" +} + -- To view, visit https://gerrit.wikimedia.org/r/188618 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id9e4af9035adbbe2e45c90dfd7a3346fc685aa18 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/RestBaseUpdateJobs Gerrit-Branch: master Gerrit-Owner: Mobrovac <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
