https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114658
Revision: 114658
Author: kaldari
Date: 2012-04-02 18:26:37 +0000 (Mon, 02 Apr 2012)
Log Message:
-----------
adding importing script for testing
Added Paths:
-----------
trunk/extensions/PageTriage/tools/
trunk/extensions/PageTriage/tools/importNewPages.php
Added: trunk/extensions/PageTriage/tools/importNewPages.php
===================================================================
--- trunk/extensions/PageTriage/tools/importNewPages.php
(rev 0)
+++ trunk/extensions/PageTriage/tools/importNewPages.php 2012-04-02
18:26:37 UTC (rev 114658)
@@ -0,0 +1,256 @@
+<?php
+/**
+ * This script imports newly created articles from one wiki to another.
+ * It only copies the text of the articles, not the history or editor
information.
+ * This script should only be used for testing purposes. For normal transwiki
importing, refer to:
+ * http://meta.wikimedia.org/wiki/Help:Import
+ *
+ * This script can only be run from the command line.
+ * The syntax is:
+ * importNewPages.php <# of articles> <username> <password> <source API path>
<destination API path>
+ * The API path parameters are optional.
+ **/
+
+/**
+ * Interface to cURL
+ **/
+class Http {
+ private $curlHandle;
+ private $id;
+
+ function __construct() {
+ $this->id = rand( 0, 1000000 );
+ $this->curlHandle = curl_init();
+ curl_setopt( $this->curlHandle, CURLOPT_COOKIEJAR,
'/tmp/cookies'.$this->id.'.dat' );
+ curl_setopt( $this->curlHandle, CURLOPT_COOKIEFILE,
'/tmp/cookies'.$this->id.'.dat' );
+ curl_setopt( $this->curlHandle, CURLOPT_MAXCONNECTS, 10 );
+ curl_setopt( $this->curlHandle, CURLOPT_CLOSEPOLICY,
CURLCLOSEPOLICY_LEAST_RECENTLY_USED );
+ }
+
+ function get( $url ) {
+ curl_setopt( $this->curlHandle, CURLOPT_URL, $url );
+ curl_setopt( $this->curlHandle, CURLOPT_USERAGENT, 'php PageTriageBot'
);
+ curl_setopt( $this->curlHandle, CURLOPT_HTTPGET, 1 );
+ curl_setopt( $this->curlHandle, CURLOPT_RETURNTRANSFER, 1 );
+ curl_setopt( $this->curlHandle, CURLOPT_TIMEOUT, 60 );
+ curl_setopt( $this->curlHandle, CURLOPT_CONNECTTIMEOUT, 10 );
+ curl_setopt( $this->curlHandle, CURLOPT_HEADER, 0 );
+ curl_setopt( $this->curlHandle, CURLOPT_ENCODING, 'UTF-8' );
+ curl_setopt( $this->curlHandle, CURLOPT_FOLLOWLOCATION, 1 );
+ curl_setopt( $this->curlHandle, CURLOPT_MAXREDIRS, 5 );
+ return curl_exec( $this->curlHandle );
+ }
+
+ function post( $url, $postVars ) {
+ curl_setopt( $this->curlHandle, CURLOPT_URL, $url );
+ curl_setopt( $this->curlHandle, CURLOPT_USERAGENT, 'php PageTriageBot'
);
+ curl_setopt( $this->curlHandle, CURLOPT_POST, 1 );
+ curl_setopt( $this->curlHandle, CURLOPT_RETURNTRANSFER, 1 );
+ curl_setopt( $this->curlHandle, CURLOPT_TIMEOUT, 60 );
+ curl_setopt( $this->curlHandle, CURLOPT_CONNECTTIMEOUT, 10 );
+ curl_setopt( $this->curlHandle, CURLOPT_HTTPHEADER, array( 'Expect:' )
);
+ curl_setopt( $this->curlHandle, CURLOPT_ENCODING, 'UTF-8' );
+ curl_setopt( $this->curlHandle, CURLOPT_FOLLOWLOCATION, 0 );
+ curl_setopt( $this->curlHandle, CURLOPT_MAXREDIRS, 5 );
+ curl_setopt( $this->curlHandle, CURLOPT_POSTFIELDS, $postVars );
+ return curl_exec( $this->curlHandle );
+ }
+
+ function __destruct() {
+ curl_close( $this->curlHandle );
+ @unlink('/tmp/cookies'.$this->id.'.dat');
+ }
+
+}
+
+/**
+ * Interface to the wiki's API
+ **/
+class WikiApi {
+ private $http, $token, $url;
+
+ /**
+ * Construct the class instance
+ * @param $url string The URL used to access the API
+ **/
+ function __construct( $url ) {
+ $this->http = new Http;
+ $this->url = $url;
+ }
+
+ /**
+ * Send a get query to the API
+ * @param $query The query string
+ * @return The result from the API
+ **/
+ function get( $query ) {
+ $result = $this->http->get( $this->url.$query );
+ return unserialize( $result );
+ }
+
+ /**
+ * Send a post query to the API
+ * @param $query The query string
+ * @return The result from the API
+ **/
+ function post( $query, $postVars ) {
+ $result = $this->http->post( $this->url.$query, $postVars );
+ return unserialize( $result );
+ }
+
+ /**
+ * Log into the wiki via the API
+ * @param $username The user's username
+ * @param $password The user's password
+ * @return The result from the API
+ **/
+ function login( $username, $password ) {
+ $postVars = array( 'lgname' => $username, 'lgpassword' => $password );
+ $result = $this->post( '?action=login&format=php', $postVars );
+ if ( $result['login']['result'] === 'NeedToken' ) {
+ // Do it again with the token
+ $postVars['lgtoken'] = $result['login']['token'];
+ $result = $this->post( '?action=login&format=php', $postVars );
+ }
+ if ( $result['login']['result'] !== 'Success' ) {
+ echo "Login failed.\n";
+ die();
+ } else {
+ return $result;
+ }
+ }
+
+ /**
+ * Get an edit token for the user
+ * @return The token
+ **/
+ function getToken () {
+ $params = array(
+ 'action' => 'query',
+ 'format' => 'php',
+ 'prop' => 'info',
+ 'intoken' => 'edit',
+ 'titles' => 'Main Page'
+ );
+ $params = http_build_query( $params );
+ $result = $this->get( '?'.$params );
+ foreach ( $result['query']['pages'] as $page ) {
+ return $page['edittoken'];
+ }
+ }
+
+ /**
+ * Get the contents of a page
+ * @param $title string The title of the wikipedia page to fetch
+ * @return The wikitext for the page (or false)
+ **/
+ function getPage( $title ) {
+ $params = array(
+ 'action' => 'query',
+ 'format' => 'php',
+ 'prop' => 'revisions',
+ 'titles' => $title,
+ 'rvlimit' => 1,
+ 'rvprop' => 'content'
+ );
+ $params = http_build_query( $params );
+ $result = $this->get('?'.$params );
+ foreach ( $result['query']['pages'] as $page ) {
+ if ( isset( $page['revisions'][0]['*'] ) ) {
+ return $page['revisions'][0]['*'];
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * Get the newest pages from the wiki
+ * @param $namespace The namespace to limit the search to
+ * @param $limit The maximum number of pages to return
+ * @return array of titles
+ **/
+ function getNewPages( $namespace = 0, $limit = 10 ) {
+ $params = array(
+ 'action' => 'query',
+ 'list' => 'recentchanges',
+ 'format' => 'php',
+ 'rctype' => 'new',
+ 'rcprop' => 'title',
+ 'rcnamespace' => $namespace,
+ 'rclimit' => $limit
+ );
+ $params = http_build_query( $params );
+ $result = $this->get( '?'.$params );
+ $pages = $result['query']['recentchanges'];
+ $pageTitles = array();
+ foreach ( $pages as $page ) {
+ $pageTitles[] = $page['title'];
+ }
+ return $pageTitles;
+ }
+
+ /**
+ * Create a new page on the wiki
+ * @param $title The title of the new page
+ * @param $text The text of the new page
+ * @return The result from the API
+ **/
+ function createPage ( $title, $text ) {
+ if ( !$this->token ) {
+ $this->token = $this->getToken();
+ }
+ $params = array(
+ 'title' => $title,
+ 'text' => $text,
+ 'token' => $this->token,
+ 'summary' => 'Importing article from another wiki for testing
purposes',
+ 'createonly' => '1'
+ );
+ return $this->post('?action=edit&format=php', $params);
+ }
+}
+
+if ( isset( $_SERVER ) && isset( $_SERVER['REQUEST_METHOD'] ) ) {
+ print( 'This script must be run from the command line.' );
+ die();
+}
+
+if ( !isset( $argv[1] ) || !isset( $argv[2] ) || !isset( $argv[3] ) ) {
+ print( 'The correct syntax is:\nimportNewPages.php <# of articles>
<username> <password>\n' );
+ die();
+}
+
+if ( $argv[4] ) {
+ $source = new WikiApi( $argv[4] );
+} else {
+ $source = new WikiApi( 'http://en.wikipedia.org/w/api.php' );
+}
+
+$pages = array();
+
+if ( $argv[1] > 0 && $argv[1] <= 10000 ) {
+ $pages = $source->getNewPages( 0, $argv[1] );
+} else {
+ $pages = $source->getNewPages( 0, 10 );
+}
+
+if ( $argv[5] ) {
+ $destination = new WikiApi( $argv[5] );
+} else {
+ $destination = new WikiApi( 'http://ee-prototype.wmflabs.org/w/api.php'
);
+}
+$destination->login( $argv[2], $argv[3] );
+
+foreach ( $pages as $page ) {
+ $text = $source->getPage( $page );
+ $text = $text."\n[[Category:Copied from another wiki for testing
purposes only]]";
+ $result = $destination->createPage( $page, $text );
+ if ( isset( $result['error'] ) ) {
+ echo "Error: $page\n";
+ } else {
+ echo "Success: $page\n";
+ }
+}
+
+echo "Done.\n";
Property changes on: trunk/extensions/PageTriage/tools/importNewPages.php
___________________________________________________________________
Added: svn:eol-style
+ native
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs