Hoo man has uploaded a new change for review.
https://gerrit.wikimedia.org/r/260715
Change subject: Implement validation for CommonsMedia
......................................................................
Implement validation for CommonsMedia
This also re-enables other previously disabled validators
which have been disabled by accident (AFAICT).
I've decided to implement a CachingCommonsMediaFileNameLookup
in order to be able to use it for both validation and (later
on) value parsing/ normalization without having to do more
than one API query.
Needs I008cadd29a2aa1f21098339b895c35a100959b04 in core.
Bug: T87263
Change-Id: I65507d6ef974481c73d93353e4e9084ea77a6354
---
M repo/WikibaseRepo.datatypes.php
A repo/includes/CachingCommonsMediaFileNameLookup.php
M repo/includes/ValidatorBuilders.php
A repo/includes/Validators/CommonsMediaExistsValidator.php
M repo/includes/WikibaseRepo.php
A repo/tests/phpunit/includes/CachingCommonsMediaFileNameLookupTest.php
M repo/tests/phpunit/includes/ValidatorBuildersTest.php
A repo/tests/phpunit/includes/Validators/CommonsMediaExistsValidatorTest.php
M repo/tests/phpunit/includes/WikibaseRepoTest.php
9 files changed, 404 insertions(+), 8 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase
refs/changes/15/260715/1
diff --git a/repo/WikibaseRepo.datatypes.php b/repo/WikibaseRepo.datatypes.php
index a01e7dc..c35c7c8 100644
--- a/repo/WikibaseRepo.datatypes.php
+++ b/repo/WikibaseRepo.datatypes.php
@@ -86,7 +86,12 @@
'PT:commonsMedia' => array(
'validator-factory-callback' => function() {
$factory =
WikibaseRepo::getDefaultValidatorBuilders();
- return $factory->buildStringValidators();
+ // Don't go for commons during unit tests. This
sucks.
+ if ( !defined( 'MW_PHPUNIT_TEST' ) ) {
+ return $factory->buildMediaValidators();
+ } else {
+ return
$factory->buildStringValidators();
+ }
},
'parser-factory-callback' => $newStringParser,
'formatter-factory-callback' => function( $format,
FormatterOptions $options ) {
diff --git a/repo/includes/CachingCommonsMediaFileNameLookup.php
b/repo/includes/CachingCommonsMediaFileNameLookup.php
new file mode 100644
index 0000000..b2ac6c9
--- /dev/null
+++ b/repo/includes/CachingCommonsMediaFileNameLookup.php
@@ -0,0 +1,92 @@
+<?php
+
+namespace Wikibase\Repo;
+
+use BagOStuff;
+use MediaWiki\Site\MediaWikiPageNameNormalizer;
+use InvalidArgumentException;
+use Wikimedia\Assert\Assert;
+
+/**
+ * Caching service that looks up normalized file names from Wikimedia Commons.
+ *
+ * @license GPL 2+
+ * @author Marius Hoch
+ */
+class CachingCommonsMediaFileNameLookup {
+
+ const CACHE_DURATION = 600;
+
+ /**
+ * @var MediaWikiPageNameNormalizer
+ */
+ private $mediaWikiPageNameNormalizer;
+
+ /**
+ * @var BagOStuff
+ */
+ private $cache;
+
+ /**
+ * @param MediaWikiPageNameNormalizer $mediaWikiPageNameNormalizer
+ * @param BagOStuff $cache
+ */
+ public function __construct(
+ MediaWikiPageNameNormalizer $mediaWikiPageNameNormalizer,
+ BagOStuff $cache
+ ) {
+ $this->mediaWikiPageNameNormalizer =
$mediaWikiPageNameNormalizer;
+ $this->cache = $cache;
+ }
+
+ /**
+ * @param string $fileName File name, without the File: prefix.
+ *
+ * @return string|null The normalized file name or null (if the page
does not exist)
+ * @throws InvalidArgumentException
+ */
+ public function normalize( $fileName ) {
+ Assert::parameterType( 'string', $fileName, '$pageName' );
+
+ $cachedValue = $this->cache->get( $this->getCacheKey( $fileName
) );
+ if ( $cachedValue !== false ) {
+ return $cachedValue;
+ }
+
+ $actualPageName =
$this->mediaWikiPageNameNormalizer->normalizePageName(
+ 'File:' . $fileName
+ );
+
+ if ( $actualPageName === false ) {
+ $actualPageName = null;
+ } else {
+ $actualPageName = str_replace( 'File:', '',
$actualPageName );
+ }
+
+ // Cache with the given name and (if available) the normalized
one.
+ if ( $actualPageName !== null ) {
+ $this->cache->set(
+ $this->getCacheKey( $actualPageName ),
+ $actualPageName,
+ self::CACHE_DURATION
+ );
+ }
+
+ $this->cache->set(
+ $this->getCacheKey( $fileName ),
+ $actualPageName,
+ self::CACHE_DURATION
+ );
+
+ return $actualPageName;
+ }
+
+ /**
+ * @param string $pageName
+ * @return string
+ */
+ private function getCacheKey( $pageName ) {
+ return 'commons-media-' . $pageName;
+ }
+
+}
diff --git a/repo/includes/ValidatorBuilders.php
b/repo/includes/ValidatorBuilders.php
index 15fbf65..06ebc7c 100644
--- a/repo/includes/ValidatorBuilders.php
+++ b/repo/includes/ValidatorBuilders.php
@@ -9,7 +9,9 @@
use Wikibase\DataModel\Entity\EntityIdParser;
use Wikibase\DataModel\Services\Lookup\EntityLookup;
use Wikibase\Lib\ContentLanguages;
+use Wikibase\Repo\CachingCommonsMediaFileNameLookup;
use Wikibase\Repo\Validators\AlternativeValidator;
+use Wikibase\Repo\Validators\CommonsMediaExistsValidator;
use Wikibase\Repo\Validators\CompositeValidator;
use Wikibase\Repo\Validators\DataFieldValidator;
use Wikibase\Repo\Validators\DataValueValidator;
@@ -73,24 +75,32 @@
private $contentLanguages;
/**
+ * @var CachingCommonsMediaFileNameLookup
+ */
+ private $cachingCommonsMediaFileNameLookup;
+
+ /**
* @param EntityLookup $lookup
* @param EntityIdParser $idParser
* @param string[] $urlSchemes
* @param string $vocabularyBaseUri The base URI for vocabulary
concepts.
* @param ContentLanguages $contentLanguages
+ * @param CachingCommonsMediaFileNameLookup
$cachingCommonsMediaFileNameLookup
*/
public function __construct(
EntityLookup $lookup,
EntityIdParser $idParser,
array $urlSchemes,
$vocabularyBaseUri,
- ContentLanguages $contentLanguages
+ ContentLanguages $contentLanguages,
+ CachingCommonsMediaFileNameLookup
$cachingCommonsMediaFileNameLookup
) {
$this->entityLookup = $lookup;
$this->entityIdParser = $idParser;
$this->urlSchemes = $urlSchemes;
$this->vocabularyBaseUri = $vocabularyBaseUri;
$this->contentLanguages = $contentLanguages;
+ $this->cachingCommonsMediaFileNameLookup =
$cachingCommonsMediaFileNameLookup;
}
/**
@@ -157,9 +167,7 @@
$validators[] = new RegexValidator( '@[#/:\\\\]@u', true ); //
no nasty chars
// Must contain a non-empty file name and a non-empty,
character-only file extension.
$validators[] = new RegexValidator( '/.\.\w+$/u' );
- //TODO: add a validator that checks the rules that MediaWiki
imposes on filenames for uploads.
- // $wgLegalTitleChars and $wgIllegalFileChars define this,
but we need these for the *target* wiki.
- //TODO: add a validator that uses a foreign DB query to check
whether the file actually exists on commons.
+ $validators[] = new CommonsMediaExistsValidator(
$this->cachingCommonsMediaFileNameLookup );
$topValidator = new DataValueValidator(
new CompositeValidator( $validators ) //Note: each
validator is fatal
diff --git a/repo/includes/Validators/CommonsMediaExistsValidator.php
b/repo/includes/Validators/CommonsMediaExistsValidator.php
new file mode 100644
index 0000000..d23a73b
--- /dev/null
+++ b/repo/includes/Validators/CommonsMediaExistsValidator.php
@@ -0,0 +1,75 @@
+<?php
+
+namespace Wikibase\Repo\Validators;
+
+use DataValues\StringValue;
+use InvalidArgumentException;
+use ValueValidators\Error;
+use ValueValidators\Result;
+use ValueValidators\ValueValidator;
+use Wikibase\Repo\CachingCommonsMediaFileNameLookup;
+
+/**
+ * Validator for commons media values which checks whether the file in question
+ * exists. Doesn't check whether the name is normalized.
+ *
+ * @license GPL 2+
+ * @author Marius Hoch
+ */
+class CommonsMediaExistsValidator implements ValueValidator {
+
+ /**
+ * @var CachingCommonsMediaFileNameLookup
+ */
+ private $fileNameLookup;
+
+ /**
+ * @param CachingCommonsMediaFileNameLookup $fileNameLookup
+ */
+ public function __construct( CachingCommonsMediaFileNameLookup
$fileNameLookup ) {
+ $this->fileNameLookup = $fileNameLookup;
+ }
+
+ /**
+ * @see ValueValidator::validate()
+ *
+ * @param StringValue $value
+ *
+ * @return Result
+ * @throws InvalidArgumentException
+ */
+ public function validate( $value ) {
+ if ( $value instanceof StringValue ) {
+ $value = $value->getValue();
+ }
+
+ if ( !is_string( $value ) ) {
+ throw new InvalidArgumentException( "Expected a
StringValue." );
+ }
+
+ $actualName = $this->fileNameLookup->normalize( $value );
+
+ $errors = array();
+
+ if ( $actualName === null ) {
+ $errors[] = Error::newError(
+ "File does not exist: " . $value,
+ null,
+ 'no-such-media',
+ array( $value )
+ );
+ }
+
+ return empty( $errors ) ? Result::newSuccess() :
Result::newError( $errors );
+ }
+
+ /**
+ * @see ValueValidator::setOptions()
+ *
+ * @param array $options
+ */
+ public function setOptions( array $options ) {
+ // Do nothing. This method shouldn't even be in the interface.
+ }
+
+}
diff --git a/repo/includes/WikibaseRepo.php b/repo/includes/WikibaseRepo.php
index 380c0c6..fe6d5c6 100644
--- a/repo/includes/WikibaseRepo.php
+++ b/repo/includes/WikibaseRepo.php
@@ -7,9 +7,11 @@
use DataValues\Deserializers\DataValueDeserializer;
use DataValues\Serializers\DataValueSerializer;
use Deserializers\Deserializer;
+use HashBagOStuff;
use Hooks;
use IContextSource;
use Language;
+use MediaWiki\Site\MediaWikiPageNameNormalizer;
use Serializers\Serializer;
use SiteSQLStore;
use SiteStore;
@@ -67,6 +69,8 @@
use Wikibase\Rdf\ValueSnakRdfBuilderFactory;
use Wikibase\PropertyInfoBuilder;
use Wikibase\Repo\Api\ApiHelperFactory;
+use Wikibase\Repo\CachingCommonsMediaFileNameLookup;
+use Wikibase\Repo\CommonsMediaNormalizer;
use Wikibase\Repo\Content\EntityContentFactory;
use Wikibase\Repo\Content\ItemHandler;
use Wikibase\Repo\Content\PropertyHandler;
@@ -217,6 +221,11 @@
private $valueSnakRdfBuilderFactory;
/**
+ * @var CachingCommonsMediaFileNameLookup|null
+ */
+ private $cachingCommonsMediaFileNameLookup = null;
+
+ /**
* IMPORTANT: Use only when it is not feasible to inject an instance
properly.
*
* @since 0.4
@@ -275,7 +284,8 @@
$this->getEntityIdParser(),
$urlSchemes,
$this->getVocabularyBaseUri(),
- $this->getMonolingualTextLanguages()
+ $this->getMonolingualTextLanguages(),
+ $this->getCachingCommonsMediaFileNameLookup()
);
}
@@ -1421,6 +1431,20 @@
return new MediaWikiContentLanguages();
}
+ /**
+ * @return CachingCommonsMediaFileNameLookup
+ */
+ public function getCachingCommonsMediaFileNameLookup() {
+ if ( $this->cachingCommonsMediaFileNameLookup === null ) {
+ $this->cachingCommonsMediaFileNameLookup = new
CachingCommonsMediaFileNameLookup(
+ new MediaWikiPageNameNormalizer(
'https://commons.wikimedia.org/w/api.php' ),
+ new HashBagOStuff()
+ );
+ }
+
+ return $this->cachingCommonsMediaFileNameLookup;
+ }
+
private function getHtmlSnakFormatterFactory() {
return new WikibaseHtmlSnakFormatterFactory(
$this->getSnakFormatterFactory() );
}
diff --git
a/repo/tests/phpunit/includes/CachingCommonsMediaFileNameLookupTest.php
b/repo/tests/phpunit/includes/CachingCommonsMediaFileNameLookupTest.php
new file mode 100644
index 0000000..ba04d6c
--- /dev/null
+++ b/repo/tests/phpunit/includes/CachingCommonsMediaFileNameLookupTest.php
@@ -0,0 +1,101 @@
+<?php
+
+namespace Wikibase\Test\Repo\Validators;
+
+use HashBagOStuff;
+use PHPUnit_Framework_MockObject_Matcher_Invocation;
+use Wikibase\Repo\CachingCommonsMediaFileNameLookup;
+
+/**
+ * @covers Wikibase\Repo\Validators\CachingCommonsMediaFileNameLookupTest
+ *
+ * @license GPL 2+
+ *
+ * @group WikibaseRepo
+ * @group Wikibase
+ * @group WikibaseValidators
+ *
+ * @author Marius Hoch
+ */
+class CachingCommonsMediaFileNameLookupTest extends
\PHPUnit_Framework_TestCase {
+
+ public function testNormalize() {
+ $lookup = new CachingCommonsMediaFileNameLookup(
+ $this->getMediaWikiPageNameNormalizer( $this->once() ),
+ new HashBagOStuff()
+ );
+
+ // Two lookups only cause one API call.
+ $this->assertSame(
+ 'Foo.bar',
+ $lookup->normalize( 'Foo.bar' )
+ );
+ $this->assertSame(
+ 'Foo.bar',
+ $lookup->normalize( 'Foo.bar' )
+ );
+ }
+
+ public function testNormalize_cachedValueIsUsed() {
+ $cache = new HashBagOStuff();
+ $cache->set( 'commons-media-Foo.bar', 'Bar.foo' );
+
+ $lookup = new CachingCommonsMediaFileNameLookup(
+ $this->getMediaWikiPageNameNormalizer( $this->never() ),
+ $cache
+ );
+
+ $this->assertSame(
+ 'Bar.foo',
+ $lookup->normalize( 'Foo.bar' )
+ );
+ }
+
+ public function testNormalize_cachedWithOriginalNameAndNormalized() {
+ $cache = new HashBagOStuff();
+ $cache->set( 'commons-media-Foo.bar', 'Bar.foo' );
+
+ $lookup = new CachingCommonsMediaFileNameLookup(
+ $this->getMediaWikiPageNameNormalizer( $this->once() ),
+ $cache
+ );
+
+ $this->assertSame(
+ 'TARGET.png',
+ $lookup->normalize( 'REDIRECT.cat' )
+ );
+ $this->assertSame(
+ 'TARGET.png',
+ $lookup->normalize( 'TARGET.png' )
+ );
+ $this->assertSame(
+ 'TARGET.png',
+ $lookup->normalize( 'REDIRECT.cat' )
+ );
+ }
+
+ private function getMediaWikiPageNameNormalizer(
+ PHPUnit_Framework_MockObject_Matcher_Invocation $matcher
+ ) {
+ $fileNameLookup = $this->getMockBuilder(
'MediaWiki\Site\MediaWikiPageNameNormalizer' )
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $fileNameLookup->expects( $matcher )
+ ->method( 'normalizePageName' )
+ ->will( $this->returnCallback( function( $fileName ) {
+ if ( strpos( $fileName, 'NOT-FOUND' ) !== false
) {
+ return false;
+ }
+
+ if ( strpos( $fileName, 'REDIRECT' ) !== false
) {
+ return 'File:TARGET.png';
+ }
+
+ return 'File:' . $fileName;
+ } ) );
+
+ return $fileNameLookup;
+ }
+
+}
diff --git a/repo/tests/phpunit/includes/ValidatorBuildersTest.php
b/repo/tests/phpunit/includes/ValidatorBuildersTest.php
index 66b328e..ac82de6 100644
--- a/repo/tests/phpunit/includes/ValidatorBuildersTest.php
+++ b/repo/tests/phpunit/includes/ValidatorBuildersTest.php
@@ -30,7 +30,7 @@
*/
class ValidatorBuildersTest extends PHPUnit_Framework_TestCase {
- protected function newValidatorBuilders() {
+ private function newValidatorBuilders() {
$entityIdParser = new BasicEntityIdParser();
$q8 = new Item( new ItemId( 'Q8' ) );
@@ -54,10 +54,26 @@
$entityIdParser,
$urlSchemes,
'http://qudt.org/vocab/',
- $contentLanguages
+ $contentLanguages,
+ $this->getCachingCommonsMediaFileNameLookup()
);
return $builders;
+ }
+
+ private function getCachingCommonsMediaFileNameLookup() {
+ $fileNameLookup = $this->getMockBuilder(
'Wikibase\Repo\CachingCommonsMediaFileNameLookup' )
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $fileNameLookup->expects( $this->any() )
+ ->method( 'normalize' )
+ ->with( $this->isType( 'string' ) )
+ ->will( $this->returnCallback( function( $fileName ) {
+ return strpos( $fileName, 'NOT-FOUND' ) ===
false ? $fileName : null;
+ } ) );
+
+ return $fileNameLookup;
}
public function provideDataTypeValidation() {
@@ -96,6 +112,7 @@
array( 'commonsMedia', new StringValue( 'Äöü.jpg' ),
true, 'Unicode support' ),
array( 'commonsMedia', new StringValue( ' Foo.jpg' ),
false, 'media name with leading space' ),
array( 'commonsMedia', new StringValue( 'Foo.jpg ' ),
false, 'media name with trailing space' ),
+ array( 'commonsMedia', new StringValue(
'Foo-NOT-FOUND.jpg' ), false, 'file not found' ),
//string
array( 'string', 'Foo', false, 'StringValue expected,
string supplied' ),
diff --git
a/repo/tests/phpunit/includes/Validators/CommonsMediaExistsValidatorTest.php
b/repo/tests/phpunit/includes/Validators/CommonsMediaExistsValidatorTest.php
new file mode 100644
index 0000000..5328f96
--- /dev/null
+++ b/repo/tests/phpunit/includes/Validators/CommonsMediaExistsValidatorTest.php
@@ -0,0 +1,69 @@
+<?php
+
+namespace Wikibase\Test\Repo\Validators;
+
+use DataValues\StringValue;
+use Wikibase\Repo\Validators\CommonsMediaExistsValidator;
+
+/**
+ * @covers Wikibase\Repo\Validators\CommonsMediaExistsValidator
+ *
+ * @license GPL 2+
+ *
+ * @group WikibaseRepo
+ * @group Wikibase
+ * @group WikibaseValidators
+ *
+ * @author Marius Hoch
+ */
+class CommonsMediaExistsValidatorTest extends \PHPUnit_Framework_TestCase {
+
+ private function getCachingCommonsMediaFileNameLookup() {
+ $fileNameLookup = $this->getMockBuilder(
'Wikibase\Repo\CachingCommonsMediaFileNameLookup' )
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $fileNameLookup->expects( $this->any() )
+ ->method( 'normalize' )
+ ->with( $this->isType( 'string' ) )
+ ->will( $this->returnCallback( function( $fileName ) {
+ return strpos( $fileName, 'NOT-FOUND' ) ===
false ? $fileName : null;
+ } ) );
+
+ return $fileNameLookup;
+ }
+
+ /**
+ * @dataProvider provideValidate()
+ */
+ public function testValidate( $expected, $value ) {
+ $validator = new CommonsMediaExistsValidator(
$this->getCachingCommonsMediaFileNameLookup() );
+
+ $this->assertSame(
+ $expected,
+ $validator->validate( $value )->isValid()
+ );
+ }
+
+ public function provideValidate() {
+ return array(
+ "Valid, plain string" => array(
+ true, "Foo.png"
+ ),
+ "Valid, StringValue" => array(
+ true, new StringValue( "Foo.png" )
+ ),
+ "Invalid, StringValue" => array(
+ false, new StringValue( "Foo.NOT-FOUND.png" )
+ )
+ );
+ }
+
+ public function testValidate_noString() {
+ $validator = new CommonsMediaExistsValidator(
$this->getCachingCommonsMediaFileNameLookup() );
+
+ $this->setExpectedException( 'InvalidArgumentException' );
+ $validator->validate( 5 );
+ }
+
+}
diff --git a/repo/tests/phpunit/includes/WikibaseRepoTest.php
b/repo/tests/phpunit/includes/WikibaseRepoTest.php
index c5ef8d6..a8b71df 100644
--- a/repo/tests/phpunit/includes/WikibaseRepoTest.php
+++ b/repo/tests/phpunit/includes/WikibaseRepoTest.php
@@ -362,4 +362,9 @@
$this->assertInstanceOf(
'Wikibase\Rdf\ValueSnakRdfBuilderFactory', $factory );
}
+ public function testGetCachingCommonsMediaFileNameLookup() {
+ $lookup =
$this->getWikibaseRepo()->getCachingCommonsMediaFileNameLookup();
+ $this->assertInstanceOf(
'Wikibase\Repo\CachingCommonsMediaFileNameLookup', $lookup );
+ }
+
}
--
To view, visit https://gerrit.wikimedia.org/r/260715
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I65507d6ef974481c73d93353e4e9084ea77a6354
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Hoo man <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits