[MediaWiki-commits] [Gerrit] Adapt the extension to RESTBase's needs - change (mediawiki...RestBaseUpdateJobs)

2015-02-24 Thread GWicke (Code Review)
GWicke has submitted this change and it was merged.

Change subject: Adapt the extension to RESTBase's needs
..


Adapt the extension to RESTBase's needs

The extension monitors the following hooks:
- ArticleEditUpdates
- ArticleDeleteComplete
- ArticleUndelete
- ArticleRevisionVisibilitySet
- TitleMoveComplete
- FileUpload

For each case, a queue job is created invalidating RESTBase's version of
the page (by sending a request with the no-cache header). Additional
jobs are created for any dependent pages which transclude the edited or
deleted page.

Bug: T87520
Change-Id: I9814249f350e2278f3941d62255e4ee9bb89c4b1
---
M RestbaseUpdate.hooks.php
M RestbaseUpdate.php
M RestbaseUpdateJob.php
3 files changed, 232 insertions(+), 130 deletions(-)

Approvals:
  GWicke: Verified; Looks good to me, approved



diff --git a/RestbaseUpdate.hooks.php b/RestbaseUpdate.hooks.php
index c7cfbc2..5948457 100644
--- a/RestbaseUpdate.hooks.php
+++ b/RestbaseUpdate.hooks.php
@@ -1,5 +1,6 @@
 ?php
 
+
 /**
  * Hooks for events that should trigger RESTBase updates.
  */
@@ -7,16 +8,57 @@
 
 
/**
+* Get the job parameters for a given title, job type and table name.
+*
+* @param Title $title
+* @param string $type the job type (OnEdit or OnDependencyChange)
+* @param string $table (optional for OnDependencyChange, templatelinks 
or
+* imagelinks)
+* @return Array
+*/
+   private static function getJobParams( Title $title, $type, $table = 
null ) {
+
+   $params = array( 'type' = $type );
+   if ( $type == 'OnDependencyChange' ) {
+   $params['table'] = $table;
+   $params['recursive'] = true;
+   return $params + Job::newRootJobParams(
+   
RestbaseUpdateJob{$type}:{$table}:{$title-getPrefixedText()}:{$title-getLatestRevID()});
+   } else {
+   return $params;
+   }
+
+   }
+
+
+   /**
 * Schedule an async update job in the job queue.
 *
 * @param Title $title
-* @param string $type
+* @param string $action
 * @param array $extra_params
 */
-   private static function schedule( $title, $type, $extra_params = 
array() ) {
+   private static function schedule( $title, $action, $extra_params = 
array() ) {
 
-   $params = array( 'type' = $type ) + $extra_params;
-   JobQueueGroup::singleton()-push( new RestbaseUpdateJob( 
$title, $params ) );
+   wfDebug( RestbaseUpdateJobHook::schedule:  . 
$title-getText() . ' - ' . $action . \n );
+   if ( $title-getNamespace() == NS_FILE ) {
+   // File. For now we assume the actual image or file has
+   // changed, not just the description page.
+   $params = self::getJobParams( $title, 
'OnDependencyChange', 'imagelinks' );
+   $job = new RestbaseUpdateJob( $title, $params );
+   JobQueueGroup::singleton()-push( $job );
+   JobQueueGroup::singleton()-deduplicateRootJob( $job );
+   } else {
+   // Push one job for the page itself
+   $params = self::getJobParams( $title, 'OnEdit' )
+   + array( 'mode' = $action ) + $extra_params;
+   JobQueueGroup::singleton()-push( new 
RestbaseUpdateJob( $title, $params ) );
+   // and one for pages transcluding this page.
+   $params = self::getJobParams( $title, 
'OnDependencyChange', 'templatelinks' );
+   $job = new RestbaseUpdateJob( $title, $params );
+   JobQueueGroup::singleton()-push( $job );
+   JobQueueGroup::singleton()-deduplicateRootJob( $job );
+   }
 
}
 
@@ -74,8 +116,7 @@
public static function onArticleRevisionVisibilitySet( $title, $revs ) {
 
// TODO complete here with more info / the hidden fields 
perhaps ?
-   // XXX do not forget that rev IDs are not yet actually returned
-   self::schedule( $title, 'rev_delete', array( 'revs' = $revs ) 
);
+   self::schedule( $title, 'rev_visibility', array( 'revs' = 
$revs ) );
return true;
 
}
@@ -94,5 +135,24 @@
}
 
 
+   /**
+* File upload hook. See filerepo/file/LocalFile.php.
+*
+* XXX gwicke: This tracks file uploads including re-uploads of a new
+* version of an image. These will implicitly also trigger null edits on
+* the associated WikiPage (which normally exists), which then triggers
+* the onArticleEditUpdates hook. Maybe we should thus drop this hook 
and
+* simply assume that all edits to 

[MediaWiki-commits] [Gerrit] Adapt the extension to RESTBase's needs - change (mediawiki...RestBaseUpdateJobs)

2015-02-05 Thread Mobrovac (Code Review)
Mobrovac has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/188952

Change subject: Adapt the extension to RESTBase's needs
..

Adapt the extension to RESTBase's needs

The extension monitors the following hooks:
- ArticleEditUpdates
- ArticleDeleteComplete
- ArticleUndelete
- ArticleRevisionVisibilitySet
- TitleMoveComplete
- FileUpload

For each case, a queue job is created invalidating RESTBase's version of
the page (by sending a request with the no-cache header). Additional
jobs are created for any dependent pages which transclude the edited or
deleted page.

Bug: T87520
Change-Id: I9814249f350e2278f3941d62255e4ee9bb89c4b1
---
A CurlMultiClient.php
M RestbaseUpdate.hooks.php
M RestbaseUpdate.php
M RestbaseUpdateJob.php
4 files changed, 339 insertions(+), 131 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/RestBaseUpdateJobs 
refs/changes/52/188952/1

diff --git a/CurlMultiClient.php b/CurlMultiClient.php
new file mode 100644
index 000..61928a4
--- /dev/null
+++ b/CurlMultiClient.php
@@ -0,0 +1,113 @@
+?php
+
+/**
+ * A simple parallel CURL client helper class
+ */
+class CurlMultiClient {
+
+   /**
+* Get the default CURL options used for each request
+*
+* @static
+* @return array default options
+*/
+   public static function getDefaultOptions() {
+   return array(
+   CURLOPT_HEADER = 0,
+   CURLOPT_RETURNTRANSFER = 1
+   );
+   }
+
+   /**
+* Peform several CURL requests in parallel, and return the combined
+* results.
+*
+* @static
+* @param $requests array requests, each with an url and an optional
+*  'headers' member:
+*array(
+*  'url' = 'http://server.com/foo',
+*  'headers' = array( 'X-Foo: Bar' )
+*)
+* @param $options array curl options used for each request, default
+* {CurlMultiClient::getDefaultOptions}.
+* @return array An array of arrays containing 'error' and 'data'
+* members. If there are errors, data will be null. If there are no
+* errors, the error member will be null and data will contain the
+* response data as a string.
+*/
+   public static function request( $requests, array $options = null ) {
+   if ( !count( $requests ) ) {
+   return array();
+   }
+
+   $handles = array();
+
+   if ( $options === null ) { // add default options
+   $options = CurlMultiClient::getDefaultOptions();
+   }
+
+   // add curl options to each handle
+   foreach ( $requests as $k = $row ) {
+   $handle = curl_init();
+   $reqOptions = array(
+   CURLOPT_URL = $row['url'],
+   // https://github.com/guzzle/guzzle/issues/349
+   CURLOPT_FORBID_REUSE = true
+   ) + $options;
+   wfDebug( adding url:  . $row['url'] );
+   if ( isset( $row['headers'] ) ) {
+   $reqOptions[CURLOPT_HTTPHEADER] = 
$row['headers'];
+   }
+   curl_setopt_array( $handle, $reqOptions );
+
+   $handles[$k] = $handle;
+   }
+
+   $mh = curl_multi_init();
+
+   foreach ( $handles as $handle ) {
+   curl_multi_add_handle( $mh, $handle );
+   }
+
+   $active = null; // handles still being processed
+   //execute the handles
+   do {
+   do {
+   // perform work as long as there is any
+   $status_cme = curl_multi_exec( $mh, $active );
+   } while ( $status_cme == CURLM_CALL_MULTI_PERFORM );
+   if ( $active  0  $status_cme === CURLM_OK ) {
+   // wait for more work to become available
+   if ( curl_multi_select( $mh, 10 ) ) {
+   // Wait for 5 ms, somewhat similar to 
the suggestion at
+   // 
http://curl.haxx.se/libcurl/c/curl_multi_fdset.html
+   // We pick a smaller value as we are 
typically hitting
+   // fast internal services so status 
changes are more
+   // likely.
+   usleep(5000);
+   }
+   }
+   } while ( $active  $status_cme ==