jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/338916 )

Change subject: Add feature to import pages from local directory hierarchy
......................................................................


Add feature to import pages from local directory hierarchy

This adds a maintenance script that scans a directory for files
and imports them. Top-level files are imported into the main
namespace, and files in subdirectories are imported into namespaces
named as those subdirectories.

No file extensions are stripped (in order to be able to import JS
and CSS files etc.).

A 'watch' option is added, which will keep the script running and
monitoring the files for changes. Whenever one is modified it will
be re-imported. This feature relies on the inotify Pecl extension.

Also fix a couple of coding-standard errors elsewhere.

Change-Id: I872dddfe43eaf6029d5282ea18e9fca426fd8995
---
M .gitignore
M CHANGES
M ExternalArticles.i18n.php
M README
A composer.json
M extension.json
A maintenance/importTextFiles.php
A phpcs.xml
M src/Hooks.php
A src/TextFileImporter.php
10 files changed, 251 insertions(+), 21 deletions(-)

Approvals:
  jenkins-bot: Verified
  Samwilson: Looks good to me, approved



diff --git a/.gitignore b/.gitignore
index 1689d7a..c60b13a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 *.kate-swp
 .*.swp
 node_modules/**
+/vendor
+/composer.lock
diff --git a/CHANGES b/CHANGES
index cab6d11..c4ed4d5 100755
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,6 @@
+== 2017-02-21 version 0.4.0 ==
+* Add system for importing text files.
+
 == 2017-02-21 version 0.3.0 ==
 * Switch to use the extension registration system (and therefore increase 
MediaWiki version requirement to 1.24).
 
diff --git a/ExternalArticles.i18n.php b/ExternalArticles.i18n.php
index 748a1a6..4f668b1 100644
--- a/ExternalArticles.i18n.php
+++ b/ExternalArticles.i18n.php
@@ -10,12 +10,12 @@
  *
  * This shim maintains compatibility back to MediaWiki 1.17.
  */
-$messages = array();
+$messages = [];
 if ( !function_exists( 'wfJsonI18nShim7ed12447d100dede' ) ) {
        function wfJsonI18nShim7ed12447d100dede( $cache, $code, &$cachedData ) {
-               $codeSequence = array_merge( array( $code ), 
$cachedData['fallbackSequence'] );
+               $codeSequence = array_merge( [ $code ], 
$cachedData['fallbackSequence'] );
                foreach ( $codeSequence as $csCode ) {
-                       $fileName = dirname( __FILE__ ) . "/i18n/$csCode.json";
+                       $fileName = __DIR__ . "/i18n/$csCode.json";
                        if ( is_readable( $fileName ) ) {
                                $data = FormatJson::decode( file_get_contents( 
$fileName ), true );
                                foreach ( array_keys( $data ) as $key ) {
diff --git a/README b/README
index f693ff6..8a095e2 100755
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
-ExternalArticles
-----------------
-
-A MediaWiki extension to automatically fetch article text from external wikis.
-
-https://www.mediawiki.org/wiki/Extension:ExternalArticles
+ExternalArticles
+----------------
+
+A MediaWiki extension for loading page text from external sources
+
+https://www.mediawiki.org/wiki/Extension:ExternalArticles
diff --git a/composer.json b/composer.json
new file mode 100644
index 0000000..4b963f0
--- /dev/null
+++ b/composer.json
@@ -0,0 +1,25 @@
+{
+       "name": "samwilson/mediawiki-extensions-external-articles",
+       "description": "A MediaWiki extension for loading page text from 
external sources",
+       "type": "mediawiki-extension",
+       "license": "GPL-3.0",
+       "keywords": ["importing", "MediaWiki"],
+       "support": {
+               "issues": "https://phabricator.wikimedia.org";,
+               "irc": "irc://irc.freenode.net/mediawiki",
+               "source": 
"https://gerrit.wikimedia.org/r/p/mediawiki/extensions/ExternalArticles.git";
+       },
+       "require-dev": {
+               "jakub-onderka/php-parallel-lint": "^0.9",
+               "mediawiki/mediawiki-codesniffer": "^0.7"
+       },
+       "scripts": {
+               "test": [
+                       "parallel-lint . --exclude vendor",
+                       "phpcs -p -s"
+               ],
+               "fix": [
+                       "phpcbf"
+               ]
+       }
+}
diff --git a/extension.json b/extension.json
index 43050ab..a5c3421 100644
--- a/extension.json
+++ b/extension.json
@@ -1,6 +1,6 @@
 {
        "name": "ExternalArticles",
-       "version": "0.3.0",
+       "version": "0.4.0",
        "author": [
                "Nathan Perry",
                "Alvinos",
@@ -10,7 +10,8 @@
        "descriptionmsg": "externalarticles-desc",
        "type": "extension",
        "AutoloadClasses": {
-               "MediaWiki\\Extensions\\ExternalArticles\\Hooks": 
"src/Hooks.php"
+               "MediaWiki\\Extensions\\ExternalArticles\\Hooks": 
"src/Hooks.php",
+               "MediaWiki\\Extensions\\ExternalArticles\\TextFileImporter": 
"src/TextFileImporter.php"
        },
        "MessagesDirs": {
                "ExternalArticles": [
diff --git a/maintenance/importTextFiles.php b/maintenance/importTextFiles.php
new file mode 100644
index 0000000..2c328d6
--- /dev/null
+++ b/maintenance/importTextFiles.php
@@ -0,0 +1,30 @@
+<?php
+
+namespace MediaWiki\Extensions\ExternalArticles;
+
+use Maintenance;
+
+require_once __DIR__ . "/../../../maintenance/Maintenance.php";
+
+class ImportTextFiles extends Maintenance {
+
+       /**
+        */
+       public function __construct() {
+               parent::__construct();
+               $this->requireExtension( 'ExternalArticles' );
+               $this->addOption( 'watch', 'Keep watching the files and 
re-import whenever one changes' );
+               $this->addArg( 'directory', 'The directory to import' );
+       }
+
+       /**
+        * Run the import.
+        */
+       public function execute() {
+               $importer = new TextFileImporter( $this->getArg( 0 ), 
$this->getOption( 'watch' ) );
+               $importer->import();
+       }
+}
+
+$maintClass = ImportTextFiles::class;
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/phpcs.xml b/phpcs.xml
new file mode 100644
index 0000000..a6380ce
--- /dev/null
+++ b/phpcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<ruleset name="MediaWiki">
+       <rule ref="vendor/mediawiki/mediawiki-codesniffer/MediaWiki" />
+       <file>.</file>
+       <exclude-pattern>vendor</exclude-pattern>
+</ruleset>
diff --git a/src/Hooks.php b/src/Hooks.php
index 709373e..1eaef47 100644
--- a/src/Hooks.php
+++ b/src/Hooks.php
@@ -19,24 +19,24 @@
         * @return boolean
         */
        function onEditFormPreloadText( &$text, Title &$title ) {
-               global $wgOut, $eagRules;
+               global $wgOut, $wgEagRules;
 
                // @todo: change this so each setting is set to it's default if 
it is not defined.
                // Currently, if anything is overridden, all must be defined.
-               if ( !isset( $eagRules ) || is_null( $eagRules ) ) {
-                       $eagRules = [];
-                       $eagRules['onpreload'] = true;
-                       $eagRules['url'] = 
'https://en.wikipedia.org/w/index.php?title=';
+               if ( !isset( $wgEagRules ) || is_null( $wgEagRules ) ) {
+                       $wgEagRules = [];
+                       $wgEagRules['onpreload'] = true;
+                       $wgEagRules['url'] = 
'https://en.wikipedia.org/w/index.php?title=';
 
                        // @todo: remove assumption of English.
-                       $eagRules['rule'] = '/^Template:.*$/';
+                       $wgEagRules['rule'] = '/^Template:.*$/';
                } else {
-                       // @todo: validate $eagRules URL's, etc...
+                       // @todo: validate $wgEagRules URL's, etc...
                }
 
                $pagename = $title->getPrefixedURL();
-               $url = $eagRules['url'] . $pagename . '&action=raw';
-               $ismatch = preg_match( $eagRules['rule'], $pagename ) > 0;
+               $url = $wgEagRules['url'] . $pagename . '&action=raw';
+               $ismatch = preg_match( $wgEagRules['rule'], $pagename ) > 0;
 
                if ( defined( 'EXTERNALARTICLES_DEBUG' ) ) {
                        if ( $ismatch ) {
@@ -46,7 +46,7 @@
                        }
                }
 
-               if ( $eagRules['onpreload'] && $ismatch && empty( $text ) ) {
+               if ( $wgEagRules['onpreload'] && $ismatch && empty( $text ) ) {
                        $options = [
                                'followRedirects' => true,
                        ];
diff --git a/src/TextFileImporter.php b/src/TextFileImporter.php
new file mode 100644
index 0000000..26c9178
--- /dev/null
+++ b/src/TextFileImporter.php
@@ -0,0 +1,163 @@
+<?php
+
+namespace MediaWiki\Extensions\ExternalArticles;
+
+use DirectoryIterator;
+use Exception;
+use MediaWiki\MediaWikiServices;
+use RecentChange;
+use Revision;
+use Title;
+use User;
+use WikiRevision;
+
+class TextFileImporter {
+
+       /** @var resource */
+       protected $inotify;
+
+       /** @var string[] */
+       protected $watches;
+
+       /**
+        * Import text files from a directory.
+        * @param string $dir The directory to import from.
+        * @param boolean $watch Whether to continue to watch the files for 
changes.
+        * @throws Exception If the directory does not exist.
+        */
+       public function __construct( $dir, $watch ) {
+               if ( !is_dir( $dir ) ) {
+                       throw new Exception( "'$dir' is not a directory" );
+               }
+               $this->dir = realpath( $dir );
+               $this->watch = (boolean)$watch && function_exists( 
'inotify_init' );
+               if ( $this->watch ) {
+                       $this->inotify = inotify_init();
+               }
+       }
+
+       /**
+        *
+        */
+       public function import() {
+               $topLevel = new DirectoryIterator( $this->dir );
+               foreach ( $topLevel as $file ) {
+                       if ( $file->isDot() ) {
+                               continue;
+                       }
+                       if ( $file->isDir() ) {
+                               // Use the directory names as namespaces.
+                               $secondLevel = new DirectoryIterator( 
$this->dir . '/' . $file );
+                               foreach ( $secondLevel as $subfile ) {
+                                       if ( $subfile->isDot() ) {
+                                               continue;
+                                       }
+                                       $this->importFile( 
$subfile->getPathname() );
+                               }
+                       } else {
+                               $this->importFile( $file->getPathname() );
+                       }
+               }
+
+               if ( $this->watch ) {
+                       while ( true ) {
+                               $events = inotify_read( $this->inotify );
+                               foreach ( $events as $event ) {
+                                       $file = $this->watches[ $event['wd'] ];
+                                       $this->importFile( $file );
+                               }
+                       }
+               }
+       }
+
+       /**
+        * Import a single file.
+        * @param string $file Full filesystem path to the file to import.
+        * @return boolean
+        */
+       protected function importFile( $file ) {
+               // Construct the page name from the last components of the file 
path.
+               $pagePath = substr( $file, strlen( $this->dir ) + 1 );
+               $pageName = str_replace( '/', ':', $pagePath );
+
+               // Have to check for # manually, since it gets interpreted as a 
fragment
+               $title = Title::newFromText( $pageName );
+               if ( !$title || $title->hasFragment() ) {
+                       echo "Invalid title: $pageName\n";
+                       return false;
+               }
+
+               if ( $this->watch ) {
+                       // Watch this file.
+                       $watchId = inotify_add_watch( $this->inotify, $file, 
IN_MODIFY );
+                       $this->watches[ $watchId ] = $file;
+               }
+
+               $exists = $title->exists();
+               $oldRevID = $title->getLatestRevID();
+               $oldRev = $oldRevID ? Revision::newFromId( $oldRevID ) : null;
+               $actualTitle = $title->getPrefixedDBkey();
+
+               $text = file_get_contents( $file );
+
+               $rev = new WikiRevision( 
MediaWikiServices::getInstance()->getMainConfig() );
+               $rev->setText( rtrim( $text ) );
+               $rev->setTitle( $title );
+               $user = User::newSystemUser( 'Maintenance script', [ 'steal' => 
true ] );
+               $rev->setUserObj( $user );
+               $rev->setComment( 'Imported by ExternalArticles extension' );
+               $rev->setTimestamp( wfTimestampNow() );
+
+               if ( $exists && $rev->getContent()->equals( 
$oldRev->getContent() ) ) {
+                       echo "$actualTitle does not need to be updated\n";
+                       return false;
+               }
+
+               $status = $rev->importOldRevision();
+               $newId = $title->getLatestRevID();
+
+               if ( $status ) {
+                       $action = $exists ? 'updated' : 'created';
+                       echo "Successfully $action $actualTitle\n";
+               } else {
+                       $action = $exists ? 'update' : 'create';
+                       echo "Failed to $action $actualTitle\n";
+                       return false;
+               }
+
+               // Create the RecentChanges entry if necessary
+               if ( $exists ) {
+                       if ( is_object( $oldRev ) ) {
+                               $oldContent = $oldRev->getContent();
+                               RecentChange::notifyEdit(
+                                       $rev->getTimestamp(),
+                                       $title,
+                                       $rev->getMinor(),
+                                       $user,
+                                       $rev->getComment(),
+                                       $oldRevID,
+                                       $oldRev->getTimestamp(),
+                                       false,
+                                       '',
+                                       $oldContent ? $oldContent->getSize() : 
0,
+                                       $rev->getContent()->getSize(),
+                                       $newId,
+                                       1
+                               );
+                       }
+               } else {
+                       RecentChange::notifyNew(
+                               $rev->getTimestamp(),
+                               $title,
+                               $rev->getMinor(),
+                               $user,
+                               $rev->getTimestamp(),
+                               false,
+                               '',
+                               $rev->getContent()->getSize(),
+                               $newId,
+                               1
+                       );
+               }
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/338916
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I872dddfe43eaf6029d5282ea18e9fca426fd8995
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/ExternalArticles
Gerrit-Branch: master
Gerrit-Owner: Samwilson <[email protected]>
Gerrit-Reviewer: Samwilson <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to