Addshore has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/338972 )
Change subject: Generic improvements ...................................................................... Generic improvements - Actually check posted details hash & token - Extra logging - Improved docs & started README - Split DetailRetreiver out of Impoerter interface - Can now use mobile & zero domains for example en.m.wikipedia.org - Improved checks on the fullness of API responses Change-Id: Ifbfc158b5fe14984d7836bb199cca135967cb1aa --- M README.md M extension.json A src/Generic/DetailRetriever.php A src/Generic/DispatchingDetailRetriever.php D src/Generic/DispatchingImporter.php M src/Generic/Exceptions/HttpRequestException.php M src/Generic/Exceptions/ImportTargetException.php M src/Generic/Importer.php R src/MediaWiki/ApiDetailRetriever.php M src/MediaWiki/HttpApiLookup.php M src/MediaWiki/SiteTableSiteLookup.php M src/ServiceWiring.php M src/SpecialImportFile.php M tests/MediaWiki/SiteTableSiteLookupTest.php 14 files changed, 232 insertions(+), 132 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/FileImporter refs/changes/72/338972/1 diff --git a/README.md b/README.md index 29ebd3d..fa10a75 100644 --- a/README.md +++ b/README.md @@ -1 +1,8 @@ # FileImporter extension + +This MediaWiki extension allows for the easy importing of a file from one site to another. +The word site is chosen specifically here as there is no reason code can not be written allowing the importing of Files from sites that are not MediaWiki wikis. + +This extension has been created as part of the 2013 German Community Technical Wishlist https://phabricator.wikimedia.org/T140462 where a wish requested that it be possible to "Correctly move files from Wikipedia to Commons" including file & description page history along with maintaining edit attribution & migrating templates. + +Please also see the FileExporter extension which provides a link on the file pages of a MediaWiki site to link to a wiki that is running the FileImporter extension. diff --git a/extension.json b/extension.json index 597cf16..9190579 100644 --- a/extension.json +++ b/extension.json @@ -19,7 +19,8 @@ "ImportFile": "FileImporter\\SpecialImportFile" }, "AutoloadClasses": { - "FileImporter\\Generic\\DispatchingImporter": "src/Generic/DispatchingImporter.php", + "FileImporter\\Generic\\DetailRetriever": "src/Generic/DetailRetriever.php", + "FileImporter\\Generic\\DispatchingDetailRetriever": "src/Generic/DispatchingDetailRetriever.php", "FileImporter\\Generic\\Exceptions\\HttpRequestException": "src/Generic/Exceptions/HttpRequestException.php", "FileImporter\\Generic\\Exceptions\\ImportException": "src/Generic/Exceptions/ImportException.php", "FileImporter\\Generic\\Exceptions\\ImportTargetException": "src/Generic/Exceptions/ImportTargetException.php", @@ -31,7 +32,7 @@ "FileImporter\\Generic\\TargetUrl": "src/Generic/TargetUrl.php", "FileImporter\\Generic\\FileRevision": "src/Generic/FileRevision.php", "FileImporter\\Generic\\TextRevision": "src/Generic/TextRevision.php", - "FileImporter\\MediaWiki\\ApiImporter": "src/MediaWiki/ApiImporter.php", + "FileImporter\\MediaWiki\\ApiDetailRetriever": "src/MediaWiki/ApiDetailRetriever.php", "FileImporter\\MediaWiki\\HttpApiLookup": "src/MediaWiki/HttpApiLookup.php", "FileImporter\\MediaWiki\\SiteTableSiteLookup": "src/MediaWiki/SiteTableSiteLookup.php", "FileImporter\\FileImporterHooks": "src/FileImporterHooks.php", @@ -41,7 +42,7 @@ "src/ServiceWiring.php" ], "config": { - "FileImporterImporterServices": [ + "FileImporterDetailRetrieverServices": [ "FileImporterMediaWikiApiImporter" ] }, diff --git a/src/Generic/DetailRetriever.php b/src/Generic/DetailRetriever.php new file mode 100644 index 0000000..40a95bf --- /dev/null +++ b/src/Generic/DetailRetriever.php @@ -0,0 +1,31 @@ +<?php + +namespace FileImporter\Generic; + +use FileImporter\Generic\Exceptions\ImportTargetException; + +/** + * This interface creates ImportDetails objects from a TargetUrl. + * This interface can be implemented multiple times for a single set of or type of site. + * + * When the implementation is added to the FileImporterDetailRetrieverServices setting the order + * is respected, and thus, more important / better retrievers should be entered first. + */ +interface DetailRetriever { + + /** + * @param TargetUrl $targetUrl + * + * @return bool + */ + public function canGetImportDetails( TargetUrl $targetUrl ); + + /** + * @param TargetUrl $targetUrl + * + * @return ImportDetails + * @throws ImportTargetException if the given target can't be imported by this importer + */ + public function getImportDetails( TargetUrl $targetUrl ); + +} diff --git a/src/Generic/DispatchingDetailRetriever.php b/src/Generic/DispatchingDetailRetriever.php new file mode 100644 index 0000000..6b3476f --- /dev/null +++ b/src/Generic/DispatchingDetailRetriever.php @@ -0,0 +1,50 @@ +<?php + +namespace FileImporter\Generic; + +use FileImporter\Generic\Exceptions\ImportTargetException; + +class DispatchingDetailRetriever implements DetailRetriever { + + /** + * @var DetailRetriever[] + */ + private $retrievers; + + /** + * @param DetailRetriever[] $retrievers + */ + public function __construct( array $retrievers ) { + $this->retrievers = $retrievers; + } + + /** + * @param TargetUrl $targetUrl + * + * @return DetailRetriever + * @throws ImportTargetException + */ + private function getRetrieverForTarget( TargetUrl $targetUrl ) { + foreach ( $this->retrievers as $retriever ) { + if ( $retriever->canGetImportDetails( $targetUrl ) ) { + return $retriever; + } + } + throw new ImportTargetException(); + } + + public function canGetImportDetails( TargetUrl $targetUrl ) { + try { + $this->getRetrieverForTarget( $targetUrl ); + return true; + } catch ( ImportTargetException $e ) { + return false; + } + } + + public function getImportDetails( TargetUrl $targetUrl ) { + $importer = $this->getRetrieverForTarget( $targetUrl ); + return $importer->getImportDetails( $targetUrl ); + } + +} diff --git a/src/Generic/DispatchingImporter.php b/src/Generic/DispatchingImporter.php deleted file mode 100644 index a75764d..0000000 --- a/src/Generic/DispatchingImporter.php +++ /dev/null @@ -1,61 +0,0 @@ -<?php - -namespace FileImporter\Generic; - -use FileImporter\Generic\Exceptions\ImportTargetException; - -class DispatchingImporter implements Importer { - - /** - * @var Importer[] - */ - private $services; - - /** - * @param Importer[] $services - */ - public function __construct( array $services ) { - $this->services = $services; - } - - /** - * @param TargetUrl $targetUrl - * - * @return Importer - * @throws ImportTargetException - */ - private function getServiceForTarget( TargetUrl $targetUrl ) { - foreach ( $this->services as $service ) { - if ( $service->canImport( $targetUrl ) ) { - return $service; - } - } - throw new ImportTargetException(); - } - - public function canImport( TargetUrl $targetUrl ) { - try { - $this->getServiceForTarget( $targetUrl ); - return true; - } catch ( ImportTargetException $e ) { - return false; - } - } - - public function getImportDetails( TargetUrl $targetUrl ) { - $importer = $this->getServiceForTarget( $targetUrl ); - return $importer->getImportDetails( $targetUrl ); - } - - /** - * @param TargetUrl $targetUrl - * @param ImportAdjustments $importAdjustments - * - * @return bool success - */ - public function import( TargetUrl $targetUrl, ImportAdjustments $importAdjustments ) { - $importer = $this->getServiceForTarget( $targetUrl ); - return $importer->import( $targetUrl, $importAdjustments ); - } - -} diff --git a/src/Generic/Exceptions/HttpRequestException.php b/src/Generic/Exceptions/HttpRequestException.php index b965ee6..9943f08 100644 --- a/src/Generic/Exceptions/HttpRequestException.php +++ b/src/Generic/Exceptions/HttpRequestException.php @@ -2,6 +2,9 @@ namespace FileImporter\Generic\Exceptions; +/** + * Thrown in cases where a HttpRequest has failed + */ class HttpRequestException extends ImportException { } diff --git a/src/Generic/Exceptions/ImportTargetException.php b/src/Generic/Exceptions/ImportTargetException.php index 5e857fb..f9a930b 100644 --- a/src/Generic/Exceptions/ImportTargetException.php +++ b/src/Generic/Exceptions/ImportTargetException.php @@ -5,7 +5,7 @@ use RuntimeException; /** - * Thrown in cases that the ImportTarget is not deemed to be acceptable + * Thrown in cases that the ImportTarget is not deemed to be acceptable. */ class ImportTargetException extends ImportException{ diff --git a/src/Generic/Importer.php b/src/Generic/Importer.php index f5fe74b..c497348 100644 --- a/src/Generic/Importer.php +++ b/src/Generic/Importer.php @@ -2,32 +2,22 @@ namespace FileImporter\Generic; -use FileImporter\Generic\Exceptions\ImportTargetException; +use FileImporter\Generic\Exceptions\ImportException; -interface Importer { +class Importer { /** - * @param TargetUrl $targetUrl - * - * @return bool - */ - public function canImport( TargetUrl $targetUrl ); - - /** - * @param TargetUrl $targetUrl - * - * @return ImportDetails - * @throws ImportTargetException if the given target can't be imported by this importer - */ - public function getImportDetails( TargetUrl $targetUrl ); - - /** - * @param TargetUrl $targetUrl - * @param ImportAdjustments $importAdjustments + * @param ImportDetails $importDetails + * @param ImportAdjustments $importAdjustments adjustments to be made to the details * * @return bool success - * @throws ImportTargetException if the given target can't be imported by this importer + * @throws ImportException */ - public function import( TargetUrl $targetUrl, ImportAdjustments $importAdjustments ); + public function import( ImportDetails $importDetails, ImportAdjustments $importAdjustments ){ + // TODO implement + // TODO copy files directly in swift if possible? + + return false; + } } diff --git a/src/MediaWiki/ApiImporter.php b/src/MediaWiki/ApiDetailRetriever.php similarity index 74% rename from src/MediaWiki/ApiImporter.php rename to src/MediaWiki/ApiDetailRetriever.php index 940812b..7aa66db 100644 --- a/src/MediaWiki/ApiImporter.php +++ b/src/MediaWiki/ApiDetailRetriever.php @@ -6,16 +6,15 @@ use FileImporter\Generic\Exceptions\ImportException; use FileImporter\Generic\FileRevision; use FileImporter\Generic\HttpRequestExecutor; -use FileImporter\Generic\ImportAdjustments; use FileImporter\Generic\ImportDetails; -use FileImporter\Generic\Importer; +use FileImporter\Generic\DetailRetriever; use FileImporter\Generic\TargetUrl; use FileImporter\Generic\TextRevision; use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; -class ApiImporter implements Importer, LoggerAwareInterface { +class ApiDetailRetriever implements DetailRetriever, LoggerAwareInterface { /** * @var SiteTableSiteLookup @@ -57,7 +56,7 @@ * * @return bool */ - public function canImport( TargetUrl $targetUrl ) { + public function canGetImportDetails( TargetUrl $targetUrl ) { return $this->siteTableSiteLookup->getSite( $targetUrl->getParsedUrl()['host'] ) !== null; } @@ -77,14 +76,46 @@ throw new ImportException( 'Failed to retrieve file information from: ' . $requestUrl ); } $requestData = json_decode( $imageInfoRequest->getContent(), true ); - // TODO check if the response has any continuation data. Either continue or die here... + + if ( array_key_exists( 'continue', $requestData ) ) { + $this->logger->warning( + 'API returned continue data', + [ + 'targetUrl' => $targetUrl->getUrl(), + 'requestUrl' => $requestUrl, + ] + ); + throw new ImportException( 'Too many revisions, can not import.' ); + } if ( count( $requestData['query']['pages'] ) !== 1 ) { - // TODO log? - throw new ImportException( 'Unexpected number of pages received from the API.' ); + $this->logger->warning( + 'No pages returned by the API', + [ + 'targetUrl' => $targetUrl->getUrl(), + 'requestUrl' => $requestUrl, + ] + ); + throw new ImportException( 'No pages returned by the API' ); } $pageInfoData = array_pop( $requestData['query']['pages'] ); + + if ( !array_key_exists( 'imageinfo', $pageInfoData ) || + !array_key_exists( 'revisions', $pageInfoData ) || + count( $pageInfoData['imageinfo'] ) < 1 || + count( $pageInfoData['revisions'] ) < 1 + ) { + $this->logger->warning( + 'Bad image or revision info returned by the API', + [ + 'targetUrl' => $targetUrl->getUrl(), + 'requestUrl' => $requestUrl, + ] + ); + throw new ImportException( 'Bad image or revision info returned by the API' ); + } + $normalizationData = array_pop( $requestData['query']['normalized'] ); $imageInfoData = $pageInfoData['imageinfo']; $revisionsData = $pageInfoData['revisions']; @@ -130,9 +161,14 @@ private function getParams( TargetUrl $targetUrl ) { $parsed = $targetUrl->getParsedUrl(); - // TODO what if the url is title=XXX? - $bits = explode( '/', $parsed['path'] ); - $fullTitle = array_pop( $bits ); + + if ( array_key_exists( 'query', $parsed ) && strstr( $parsed['query'], 'title' ) ) { + parse_str( $parsed['query'], $bits ); + $fullTitle = $bits['title']; + } else { + $bits = explode( '/', $parsed['path'] ); + $fullTitle = array_pop( $bits ); + } return [ 'action' => 'query', @@ -187,17 +223,6 @@ ] ), ]; - } - - /** - * @param TargetUrl $targetUrl - * @param ImportAdjustments $importAdjustments - * - * @return bool success - */ - public function import( TargetUrl $targetUrl, ImportAdjustments $importAdjustments ) { - // TODO implement - return false; } } diff --git a/src/MediaWiki/HttpApiLookup.php b/src/MediaWiki/HttpApiLookup.php index 00fd47b..0cc3630 100644 --- a/src/MediaWiki/HttpApiLookup.php +++ b/src/MediaWiki/HttpApiLookup.php @@ -12,6 +12,9 @@ use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; +/** + * Lookup that can take a MediaWiki site URL and return the URL of the action API. + */ class HttpApiLookup implements LoggerAwareInterface{ /** diff --git a/src/MediaWiki/SiteTableSiteLookup.php b/src/MediaWiki/SiteTableSiteLookup.php index b934e05..f73cd72 100644 --- a/src/MediaWiki/SiteTableSiteLookup.php +++ b/src/MediaWiki/SiteTableSiteLookup.php @@ -5,6 +5,10 @@ use Site; use SiteLookup; +/** + * Lookup that can be used to get a Site object from the locally configured sites based on the + * hostname. + */ class SiteTableSiteLookup { /** @@ -22,17 +26,36 @@ } /** - * @param string $host e.g. en.wikipedia.org or commons.wikimedia.org + * @param string $host e.g. en.wikipedia.org or commons.m.wikimedia.org * * @return Site|null */ public function getSite( $host ) { - $site = $this->getSiteFromHostMap( $host ); - if ( !$site ) { - $site = $this->getSiteFromSitesLoop( $host ); + $hosts = [ $host ]; + + // XXX: Wikimedia specific hacks!? + if ( strstr( $host, '.m.' ) ) { + $hosts[] = str_replace( '.m.', '.', $host ); + } + if ( strstr( $host, '.zero.' ) ) { + $hosts[] = str_replace( '.zero.', '.', $host ); } - return $site; + foreach ( $hosts as $host ) { + $site = $this->getSiteFromHostMap( $host ); + if ( $site ) { + return $site; + } + } + + foreach ( $hosts as $host ) { + $site = $this->getSiteFromSitesLoop( $host ); + if ( $site ) { + return $site; + } + } + + return null; } /** diff --git a/src/ServiceWiring.php b/src/ServiceWiring.php index ae408c1..1c6c0bf 100644 --- a/src/ServiceWiring.php +++ b/src/ServiceWiring.php @@ -2,7 +2,7 @@ namespace FileImporter; -use FileImporter\Generic\DispatchingImporter; +use FileImporter\Generic\DispatchingDetailRetriever; use FileImporter\Generic\HttpRequestExecutor; use MediaWiki\Logger\LoggerFactory; use MediaWiki\MediaWikiServices; @@ -12,15 +12,15 @@ // Generic - 'FileImporterDispatchingImporter' => function( MediaWikiServices $services ) { + 'FileImporterDispatchingDetailRetriever' => function( MediaWikiServices $services ) { $config = $services->getMainConfig(); - $importers = []; - foreach ( $config->get( 'FileImporterImporterServices' ) as $serviceName ) { - $importers[] = $services->getService( $serviceName ); + $detailRetrievers = []; + foreach ( $config->get( 'FileImporterDetailRetrieverServices' ) as $serviceName ) { + $detailRetrievers[] = $services->getService( $serviceName ); } - return new DispatchingImporter( $importers ); + return new DispatchingDetailRetriever( $detailRetrievers ); }, 'FileImporterHttpRequestExecutor' => function( MediaWikiServices $services ) { @@ -58,7 +58,7 @@ $httpApiLookup = $services->getService( 'FileImporterMediaWikiHttpApiLookup' ); $httpRequestExecutor = $services->getService( 'FileImporterHttpRequestExecutor' ); - $service = new \FileImporter\MediaWiki\ApiImporter( + $service = new \FileImporter\MediaWiki\ApiDetailRetriever( $siteTableSiteLookup, $httpApiLookup, $httpRequestExecutor diff --git a/src/SpecialImportFile.php b/src/SpecialImportFile.php index 3d91340..bf1241a 100644 --- a/src/SpecialImportFile.php +++ b/src/SpecialImportFile.php @@ -2,7 +2,9 @@ namespace FileImporter; +use FileImporter\Generic\ImportAdjustments; use FileImporter\Generic\ImportDetails; +use FileImporter\Generic\DetailRetriever; use FileImporter\Generic\Importer; use FileImporter\Generic\TargetUrl; use Html; @@ -33,32 +35,56 @@ } // TODO inject! - /** @var Importer $importer */ - $importer = MediaWikiServices::getInstance() - ->getService( 'FileImporterDispatchingImporter' ); + /** @var DetailRetriever $detailRetriever */ + $detailRetriever = MediaWikiServices::getInstance() + ->getService( 'FileImporterDispatchingDetailRetriever' ); $this->getOutput()->addModuleStyles( 'ext.FileImporter.Special' ); if ( !$targetUrl->isParsable() ) { $this->showUnparsableUrlMessage( $targetUrl->getUrl() ); $this->showUrlEntryPage(); - } elseif ( !$importer->canImport( $targetUrl ) ) { + } elseif ( !$detailRetriever->canGetImportDetails( $targetUrl ) ) { $this->showDisallowedUrlMessage(); $this->showUrlEntryPage(); } else { - $importDetails = $importer->getImportDetails( $targetUrl ); + $importDetails = $detailRetriever->getImportDetails( $targetUrl ); if ( $wasPosted ) { - $this->doImport( $importer, $importDetails ); + $this->doImport( $importDetails ); } else { $this->showImportPage( $importDetails ); } } } - private function doImport( Importer $importer, ImportDetails $importDetails ) { - // TODO implement importing - // TODO check token & ImportDetails hash here - $this->getOutput()->addHTML( 'TODO do the import' ); + private function doImport( ImportDetails $importDetails ) { + $out = $this->getOutput(); + + $importDetailsHash = $out->getRequest()->getVal( 'importDetailsHash', '' ); + $token = $out->getRequest()->getVal( 'token', '' ); + + if ( $this->getUser()->getEditToken() !== $token ) { + $this->showWarningMessage( 'Incorrect token submitted for import' ); // TODO i18n + } + + if ( $importDetails->getHash() !== $importDetailsHash ) { + // TODO i18n + $this->showWarningMessage( 'Incorrect import details hash submitted for import' ); + } + + $adjustments = new ImportAdjustments(); // TODO populate adjustments based on import form + + $importer = new Importer(); + $result = $importer->import( $importDetails, $adjustments ); + + if ( $result ) { + // TODO show completion page showing old and new files & other possible actions + $this->getOutput()->addHTML( 'Import was a success!' ); // TODO i18n + } else { + $this->showWarningMessage( 'Import failed' ); // TODO i18n + } + + } private function showUnparsableUrlMessage( $rawUrl ) { diff --git a/tests/MediaWiki/SiteTableSiteLookupTest.php b/tests/MediaWiki/SiteTableSiteLookupTest.php index 9b86023..7d9ae9f 100644 --- a/tests/MediaWiki/SiteTableSiteLookupTest.php +++ b/tests/MediaWiki/SiteTableSiteLookupTest.php @@ -28,9 +28,11 @@ return [ 'google' => [ 'google.com' ], 'commons' => [ 'commons.wikimedia.org', 'commonswiki' ], - 'enwiki http' => [ 'en.wikipedia.org', 'enwiki' ], - 'enwiki https' => [ 'en.wikipedia.org', 'enwiki' ], - 'dewiki index.php' => [ 'de.wikipedia.org', 'dewiki', ], + 'mobile commons' => [ 'commons.m.wikimedia.org', 'commonswiki' ], + 'enwiki' => [ 'en.wikipedia.org', 'enwiki' ], + 'dewiki' => [ 'de.wikipedia.org', 'dewiki', ], + 'mobile dewiki' => [ 'de.m.wikipedia.org', 'dewiki', ], + 'zero dewiki' => [ 'de.zero.wikipedia.org', 'dewiki', ], ]; } -- To view, visit https://gerrit.wikimedia.org/r/338972 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ifbfc158b5fe14984d7836bb199cca135967cb1aa Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/FileImporter Gerrit-Branch: master Gerrit-Owner: Addshore <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
