Samwilson has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/338916 )

Change subject: Add feature to import pages from local directory hierarchy
......................................................................

Add feature to import pages from local directory hierarchy

This adds a maintenance script that scans a directory for files
and imports them. Top-level files are imported into the main
namespace, and files in subdirectories are imported into namespaces
named as those subdirectories.

No file extensions are stripped (in order to be able to import JS
and CSS files etc.).

A 'watch' option is added, which will keep the script running and
monitoring the files for changes. Whenever one is modified it will
be re-imported. This feature relies on the inotify Pecl extension.

Change-Id: I872dddfe43eaf6029d5282ea18e9fca426fd8995
---
M .gitignore
A composer.json
M extension.json
A maintenance/importTextFiles.php
A src/TextFileImporter.php
5 files changed, 221 insertions(+), 1 deletion(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ExternalArticles 
refs/changes/16/338916/1

diff --git a/.gitignore b/.gitignore
index 1689d7a..c60b13a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 *.kate-swp
 .*.swp
 node_modules/**
+/vendor
+/composer.lock
diff --git a/composer.json b/composer.json
new file mode 100644
index 0000000..968e300
--- /dev/null
+++ b/composer.json
@@ -0,0 +1,25 @@
+{
+       "name": "samwilson/mediawiki-extensions-external-articles",
+       "description": "A MediaWiki extension for loading page text from 
external sources",
+       "type": "mediawiki-extension",
+       "license": "GPL-3.0",
+       "keywords": ["importing", "MediaWiki"],
+       "support": {
+               "issues": "https://phabricator.wikimedia.org";,
+               "irc": "irc://irc.freenode.net/mediawiki",
+               "source": 
"https://gerrit.wikimedia.org/r/p/mediawiki/extensions/ExternalArticles.git";
+       },
+       "require-dev": {
+               "jakub-onderka/php-parallel-lint": "0.9.2",
+               "mediawiki/mediawiki-codesniffer": "0.7.2"
+       },
+       "scripts": {
+               "test": [
+                       "parallel-lint . --exclude vendor",
+                       "phpcs -p -s"
+               ],
+               "fix": [
+                       "phpcbf"
+               ]
+       }
+}
diff --git a/extension.json b/extension.json
index 43050ab..ee08098 100644
--- a/extension.json
+++ b/extension.json
@@ -10,7 +10,8 @@
        "descriptionmsg": "externalarticles-desc",
        "type": "extension",
        "AutoloadClasses": {
-               "MediaWiki\\Extensions\\ExternalArticles\\Hooks": 
"src/Hooks.php"
+               "MediaWiki\\Extensions\\ExternalArticles\\Hooks": 
"src/Hooks.php",
+               "MediaWiki\\Extensions\\ExternalArticles\\TextFileImporter": 
"src/TextFileImporter.php"
        },
        "MessagesDirs": {
                "ExternalArticles": [
diff --git a/maintenance/importTextFiles.php b/maintenance/importTextFiles.php
new file mode 100644
index 0000000..3fef728
--- /dev/null
+++ b/maintenance/importTextFiles.php
@@ -0,0 +1,29 @@
+<?php
+
+use MediaWiki\Extensions\ExternalArticles\TextFileImporter;
+
+require_once __DIR__ . "/../../../maintenance/Maintenance.php";
+
+class ExternalArticles_Maintenance_ImportTextFiles extends Maintenance {
+
+       /**
+        */
+       public function __construct() {
+               parent::__construct();
+               $this->requireExtension( 'ExternalArticles' );
+               $this->addOption( 'watch', 'Whether to keep watching the files 
and re-importing whenever one changes', false, false );
+               $this->addArg( 'dir', 'The directory to import', true );
+       }
+
+       /**
+        * Run the import.
+        */
+       public function execute() {
+               $this->output( "This is the ExternalArticles extension\n" );
+               $importer = new TextFileImporter( $this->getArg( 0 ), 
$this->getOption( 'watch' ) );
+               $importer->import();
+       }
+}
+
+$maintClass = ExternalArticles_Maintenance_ImportTextFiles::class;
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/src/TextFileImporter.php b/src/TextFileImporter.php
new file mode 100644
index 0000000..26c9178
--- /dev/null
+++ b/src/TextFileImporter.php
@@ -0,0 +1,163 @@
+<?php
+
+namespace MediaWiki\Extensions\ExternalArticles;
+
+use DirectoryIterator;
+use Exception;
+use MediaWiki\MediaWikiServices;
+use RecentChange;
+use Revision;
+use Title;
+use User;
+use WikiRevision;
+
+class TextFileImporter {
+
+       /** @var resource */
+       protected $inotify;
+
+       /** @var string[] */
+       protected $watches;
+
+       /**
+        * Import text files from a directory.
+        * @param string $dir The directory to import from.
+        * @param boolean $watch Whether to continue to watch the files for 
changes.
+        * @throws Exception If the directory does not exist.
+        */
+       public function __construct( $dir, $watch ) {
+               if ( !is_dir( $dir ) ) {
+                       throw new Exception( "'$dir' is not a directory" );
+               }
+               $this->dir = realpath( $dir );
+               $this->watch = (boolean)$watch && function_exists( 
'inotify_init' );
+               if ( $this->watch ) {
+                       $this->inotify = inotify_init();
+               }
+       }
+
+       /**
+        *
+        */
+       public function import() {
+               $topLevel = new DirectoryIterator( $this->dir );
+               foreach ( $topLevel as $file ) {
+                       if ( $file->isDot() ) {
+                               continue;
+                       }
+                       if ( $file->isDir() ) {
+                               // Use the directory names as namespaces.
+                               $secondLevel = new DirectoryIterator( 
$this->dir . '/' . $file );
+                               foreach ( $secondLevel as $subfile ) {
+                                       if ( $subfile->isDot() ) {
+                                               continue;
+                                       }
+                                       $this->importFile( 
$subfile->getPathname() );
+                               }
+                       } else {
+                               $this->importFile( $file->getPathname() );
+                       }
+               }
+
+               if ( $this->watch ) {
+                       while ( true ) {
+                               $events = inotify_read( $this->inotify );
+                               foreach ( $events as $event ) {
+                                       $file = $this->watches[ $event['wd'] ];
+                                       $this->importFile( $file );
+                               }
+                       }
+               }
+       }
+
+       /**
+        * Import a single file.
+        * @param string $file Full filesystem path to the file to import.
+        * @return boolean
+        */
+       protected function importFile( $file ) {
+               // Construct the page name from the last components of the file 
path.
+               $pagePath = substr( $file, strlen( $this->dir ) + 1 );
+               $pageName = str_replace( '/', ':', $pagePath );
+
+               // Have to check for # manually, since it gets interpreted as a 
fragment
+               $title = Title::newFromText( $pageName );
+               if ( !$title || $title->hasFragment() ) {
+                       echo "Invalid title: $pageName\n";
+                       return false;
+               }
+
+               if ( $this->watch ) {
+                       // Watch this file.
+                       $watchId = inotify_add_watch( $this->inotify, $file, 
IN_MODIFY );
+                       $this->watches[ $watchId ] = $file;
+               }
+
+               $exists = $title->exists();
+               $oldRevID = $title->getLatestRevID();
+               $oldRev = $oldRevID ? Revision::newFromId( $oldRevID ) : null;
+               $actualTitle = $title->getPrefixedDBkey();
+
+               $text = file_get_contents( $file );
+
+               $rev = new WikiRevision( 
MediaWikiServices::getInstance()->getMainConfig() );
+               $rev->setText( rtrim( $text ) );
+               $rev->setTitle( $title );
+               $user = User::newSystemUser( 'Maintenance script', [ 'steal' => 
true ] );
+               $rev->setUserObj( $user );
+               $rev->setComment( 'Imported by ExternalArticles extension' );
+               $rev->setTimestamp( wfTimestampNow() );
+
+               if ( $exists && $rev->getContent()->equals( 
$oldRev->getContent() ) ) {
+                       echo "$actualTitle does not need to be updated\n";
+                       return false;
+               }
+
+               $status = $rev->importOldRevision();
+               $newId = $title->getLatestRevID();
+
+               if ( $status ) {
+                       $action = $exists ? 'updated' : 'created';
+                       echo "Successfully $action $actualTitle\n";
+               } else {
+                       $action = $exists ? 'update' : 'create';
+                       echo "Failed to $action $actualTitle\n";
+                       return false;
+               }
+
+               // Create the RecentChanges entry if necessary
+               if ( $exists ) {
+                       if ( is_object( $oldRev ) ) {
+                               $oldContent = $oldRev->getContent();
+                               RecentChange::notifyEdit(
+                                       $rev->getTimestamp(),
+                                       $title,
+                                       $rev->getMinor(),
+                                       $user,
+                                       $rev->getComment(),
+                                       $oldRevID,
+                                       $oldRev->getTimestamp(),
+                                       false,
+                                       '',
+                                       $oldContent ? $oldContent->getSize() : 
0,
+                                       $rev->getContent()->getSize(),
+                                       $newId,
+                                       1
+                               );
+                       }
+               } else {
+                       RecentChange::notifyNew(
+                               $rev->getTimestamp(),
+                               $title,
+                               $rev->getMinor(),
+                               $user,
+                               $rev->getTimestamp(),
+                               false,
+                               '',
+                               $rev->getContent()->getSize(),
+                               $newId,
+                               1
+                       );
+               }
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/338916
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I872dddfe43eaf6029d5282ea18e9fca426fd8995
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ExternalArticles
Gerrit-Branch: master
Gerrit-Owner: Samwilson <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to