Hoo man has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/260715

Change subject: Implement validation for CommonsMedia
......................................................................

Implement validation for CommonsMedia

This also re-enables other previously disabled validators
which have been disabled by accident (AFAICT).

I've decided to implement a CachingCommonsMediaFileNameLookup
in order to be able to use it for both validation and (later
on) value parsing/ normalization without having to do more
than one API query.

Needs I008cadd29a2aa1f21098339b895c35a100959b04 in core.

Bug: T87263
Change-Id: I65507d6ef974481c73d93353e4e9084ea77a6354
---
M repo/WikibaseRepo.datatypes.php
A repo/includes/CachingCommonsMediaFileNameLookup.php
M repo/includes/ValidatorBuilders.php
A repo/includes/Validators/CommonsMediaExistsValidator.php
M repo/includes/WikibaseRepo.php
A repo/tests/phpunit/includes/CachingCommonsMediaFileNameLookupTest.php
M repo/tests/phpunit/includes/ValidatorBuildersTest.php
A repo/tests/phpunit/includes/Validators/CommonsMediaExistsValidatorTest.php
M repo/tests/phpunit/includes/WikibaseRepoTest.php
9 files changed, 404 insertions(+), 8 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/15/260715/1

diff --git a/repo/WikibaseRepo.datatypes.php b/repo/WikibaseRepo.datatypes.php
index a01e7dc..c35c7c8 100644
--- a/repo/WikibaseRepo.datatypes.php
+++ b/repo/WikibaseRepo.datatypes.php
@@ -86,7 +86,12 @@
                'PT:commonsMedia' => array(
                        'validator-factory-callback' => function() {
                                $factory = 
WikibaseRepo::getDefaultValidatorBuilders();
-                               return $factory->buildStringValidators();
+                               // Don't go for commons during unit tests. This 
sucks.
+                               if ( !defined( 'MW_PHPUNIT_TEST' ) ) {
+                                       return $factory->buildMediaValidators();
+                               } else {
+                                       return 
$factory->buildStringValidators();
+                               }
                        },
                        'parser-factory-callback' => $newStringParser,
                        'formatter-factory-callback' => function( $format, 
FormatterOptions $options ) {
diff --git a/repo/includes/CachingCommonsMediaFileNameLookup.php 
b/repo/includes/CachingCommonsMediaFileNameLookup.php
new file mode 100644
index 0000000..b2ac6c9
--- /dev/null
+++ b/repo/includes/CachingCommonsMediaFileNameLookup.php
@@ -0,0 +1,92 @@
+<?php
+
+namespace Wikibase\Repo;
+
+use BagOStuff;
+use MediaWiki\Site\MediaWikiPageNameNormalizer;
+use InvalidArgumentException;
+use Wikimedia\Assert\Assert;
+
+/**
+ * Caching service that looks up normalized file names from Wikimedia Commons.
+ *
+ * @license GPL 2+
+ * @author Marius Hoch
+ */
+class CachingCommonsMediaFileNameLookup {
+
+       const CACHE_DURATION = 600;
+
+       /**
+        * @var MediaWikiPageNameNormalizer
+        */
+       private $mediaWikiPageNameNormalizer;
+
+       /**
+        * @var BagOStuff
+        */
+       private $cache;
+
+       /**
+        * @param MediaWikiPageNameNormalizer $mediaWikiPageNameNormalizer
+        * @param BagOStuff $cache
+        */
+       public function __construct(
+               MediaWikiPageNameNormalizer $mediaWikiPageNameNormalizer,
+               BagOStuff $cache
+       ) {
+               $this->mediaWikiPageNameNormalizer = 
$mediaWikiPageNameNormalizer;
+               $this->cache = $cache;
+       }
+
+       /**
+        * @param string $fileName File name, without the File: prefix.
+        *
+        * @return string|null The normalized file name or null (if the page 
does not exist)
+        * @throws InvalidArgumentException
+        */
+       public function normalize( $fileName ) {
+               Assert::parameterType( 'string', $fileName, '$pageName' );
+
+               $cachedValue = $this->cache->get( $this->getCacheKey( $fileName 
) );
+               if ( $cachedValue !== false ) {
+                       return $cachedValue;
+               }
+
+               $actualPageName = 
$this->mediaWikiPageNameNormalizer->normalizePageName(
+                       'File:' . $fileName
+               );
+
+               if ( $actualPageName === false ) {
+                       $actualPageName = null;
+               } else {
+                       $actualPageName = str_replace( 'File:', '', 
$actualPageName );
+               }
+
+               // Cache with the given name and (if available) the normalized 
one.
+               if ( $actualPageName !== null ) {
+                       $this->cache->set(
+                               $this->getCacheKey( $actualPageName ),
+                               $actualPageName,
+                               self::CACHE_DURATION
+                       );
+               }
+
+               $this->cache->set(
+                       $this->getCacheKey( $fileName ),
+                       $actualPageName,
+                       self::CACHE_DURATION
+               );
+
+               return $actualPageName;
+       }
+
+       /**
+        * @param string $pageName
+        * @return string
+        */
+       private function getCacheKey( $pageName ) {
+               return 'commons-media-' . $pageName;
+       }
+
+}
diff --git a/repo/includes/ValidatorBuilders.php 
b/repo/includes/ValidatorBuilders.php
index 15fbf65..06ebc7c 100644
--- a/repo/includes/ValidatorBuilders.php
+++ b/repo/includes/ValidatorBuilders.php
@@ -9,7 +9,9 @@
 use Wikibase\DataModel\Entity\EntityIdParser;
 use Wikibase\DataModel\Services\Lookup\EntityLookup;
 use Wikibase\Lib\ContentLanguages;
+use Wikibase\Repo\CachingCommonsMediaFileNameLookup;
 use Wikibase\Repo\Validators\AlternativeValidator;
+use Wikibase\Repo\Validators\CommonsMediaExistsValidator;
 use Wikibase\Repo\Validators\CompositeValidator;
 use Wikibase\Repo\Validators\DataFieldValidator;
 use Wikibase\Repo\Validators\DataValueValidator;
@@ -73,24 +75,32 @@
        private $contentLanguages;
 
        /**
+        * @var CachingCommonsMediaFileNameLookup
+        */
+       private $cachingCommonsMediaFileNameLookup;
+
+       /**
         * @param EntityLookup $lookup
         * @param EntityIdParser $idParser
         * @param string[] $urlSchemes
         * @param string $vocabularyBaseUri The base URI for vocabulary 
concepts.
         * @param ContentLanguages $contentLanguages
+        * @param CachingCommonsMediaFileNameLookup 
$cachingCommonsMediaFileNameLookup
         */
        public function __construct(
                EntityLookup $lookup,
                EntityIdParser $idParser,
                array $urlSchemes,
                $vocabularyBaseUri,
-               ContentLanguages $contentLanguages
+               ContentLanguages $contentLanguages,
+               CachingCommonsMediaFileNameLookup 
$cachingCommonsMediaFileNameLookup
        ) {
                $this->entityLookup = $lookup;
                $this->entityIdParser = $idParser;
                $this->urlSchemes = $urlSchemes;
                $this->vocabularyBaseUri = $vocabularyBaseUri;
                $this->contentLanguages = $contentLanguages;
+               $this->cachingCommonsMediaFileNameLookup = 
$cachingCommonsMediaFileNameLookup;
        }
 
        /**
@@ -157,9 +167,7 @@
                $validators[] = new RegexValidator( '@[#/:\\\\]@u', true ); // 
no nasty chars
                // Must contain a non-empty file name and a non-empty, 
character-only file extension.
                $validators[] = new RegexValidator( '/.\.\w+$/u' );
-               //TODO: add a validator that checks the rules that MediaWiki 
imposes on filenames for uploads.
-               //      $wgLegalTitleChars and $wgIllegalFileChars define this, 
but we need these for the *target* wiki.
-               //TODO: add a validator that uses a foreign DB query to check 
whether the file actually exists on commons.
+               $validators[] = new CommonsMediaExistsValidator( 
$this->cachingCommonsMediaFileNameLookup );
 
                $topValidator = new DataValueValidator(
                        new CompositeValidator( $validators ) //Note: each 
validator is fatal
diff --git a/repo/includes/Validators/CommonsMediaExistsValidator.php 
b/repo/includes/Validators/CommonsMediaExistsValidator.php
new file mode 100644
index 0000000..d23a73b
--- /dev/null
+++ b/repo/includes/Validators/CommonsMediaExistsValidator.php
@@ -0,0 +1,75 @@
+<?php
+
+namespace Wikibase\Repo\Validators;
+
+use DataValues\StringValue;
+use InvalidArgumentException;
+use ValueValidators\Error;
+use ValueValidators\Result;
+use ValueValidators\ValueValidator;
+use Wikibase\Repo\CachingCommonsMediaFileNameLookup;
+
+/**
+ * Validator for commons media values which checks whether the file in question
+ * exists. Doesn't check whether the name is normalized.
+ *
+ * @license GPL 2+
+ * @author Marius Hoch
+ */
+class CommonsMediaExistsValidator implements ValueValidator {
+
+       /**
+        * @var CachingCommonsMediaFileNameLookup
+        */
+       private $fileNameLookup;
+
+       /**
+        * @param CachingCommonsMediaFileNameLookup $fileNameLookup
+        */
+       public function __construct( CachingCommonsMediaFileNameLookup 
$fileNameLookup ) {
+               $this->fileNameLookup = $fileNameLookup;
+       }
+
+       /**
+        * @see ValueValidator::validate()
+        *
+        * @param StringValue $value
+        *
+        * @return Result
+        * @throws InvalidArgumentException
+        */
+       public function validate( $value ) {
+               if ( $value instanceof StringValue ) {
+                       $value = $value->getValue();
+               }
+
+               if ( !is_string( $value ) ) {
+                       throw new InvalidArgumentException( "Expected a 
StringValue." );
+               }
+
+               $actualName = $this->fileNameLookup->normalize( $value );
+
+               $errors = array();
+
+               if ( $actualName === null ) {
+                       $errors[] = Error::newError(
+                               "File does not exist: " . $value,
+                               null,
+                               'no-such-media',
+                               array( $value )
+                       );
+               }
+
+               return empty( $errors ) ? Result::newSuccess() : 
Result::newError( $errors );
+       }
+
+       /**
+        * @see ValueValidator::setOptions()
+        *
+        * @param array $options
+        */
+       public function setOptions( array $options ) {
+               // Do nothing. This method shouldn't even be in the interface.
+       }
+
+}
diff --git a/repo/includes/WikibaseRepo.php b/repo/includes/WikibaseRepo.php
index 380c0c6..fe6d5c6 100644
--- a/repo/includes/WikibaseRepo.php
+++ b/repo/includes/WikibaseRepo.php
@@ -7,9 +7,11 @@
 use DataValues\Deserializers\DataValueDeserializer;
 use DataValues\Serializers\DataValueSerializer;
 use Deserializers\Deserializer;
+use HashBagOStuff;
 use Hooks;
 use IContextSource;
 use Language;
+use MediaWiki\Site\MediaWikiPageNameNormalizer;
 use Serializers\Serializer;
 use SiteSQLStore;
 use SiteStore;
@@ -67,6 +69,8 @@
 use Wikibase\Rdf\ValueSnakRdfBuilderFactory;
 use Wikibase\PropertyInfoBuilder;
 use Wikibase\Repo\Api\ApiHelperFactory;
+use Wikibase\Repo\CachingCommonsMediaFileNameLookup;
+use Wikibase\Repo\CommonsMediaNormalizer;
 use Wikibase\Repo\Content\EntityContentFactory;
 use Wikibase\Repo\Content\ItemHandler;
 use Wikibase\Repo\Content\PropertyHandler;
@@ -217,6 +221,11 @@
        private $valueSnakRdfBuilderFactory;
 
        /**
+        * @var CachingCommonsMediaFileNameLookup|null
+        */
+       private $cachingCommonsMediaFileNameLookup = null;
+
+       /**
         * IMPORTANT: Use only when it is not feasible to inject an instance 
properly.
         *
         * @since 0.4
@@ -275,7 +284,8 @@
                        $this->getEntityIdParser(),
                        $urlSchemes,
                        $this->getVocabularyBaseUri(),
-                       $this->getMonolingualTextLanguages()
+                       $this->getMonolingualTextLanguages(),
+                       $this->getCachingCommonsMediaFileNameLookup()
                );
        }
 
@@ -1421,6 +1431,20 @@
                return new MediaWikiContentLanguages();
        }
 
+       /**
+        * @return CachingCommonsMediaFileNameLookup
+        */
+       public function getCachingCommonsMediaFileNameLookup() {
+               if ( $this->cachingCommonsMediaFileNameLookup === null ) {
+                       $this->cachingCommonsMediaFileNameLookup = new 
CachingCommonsMediaFileNameLookup(
+                               new MediaWikiPageNameNormalizer( 
'https://commons.wikimedia.org/w/api.php' ),
+                               new HashBagOStuff()
+                       );
+               }
+
+               return $this->cachingCommonsMediaFileNameLookup;
+       }
+
        private function getHtmlSnakFormatterFactory() {
                return new WikibaseHtmlSnakFormatterFactory( 
$this->getSnakFormatterFactory() );
        }
diff --git 
a/repo/tests/phpunit/includes/CachingCommonsMediaFileNameLookupTest.php 
b/repo/tests/phpunit/includes/CachingCommonsMediaFileNameLookupTest.php
new file mode 100644
index 0000000..ba04d6c
--- /dev/null
+++ b/repo/tests/phpunit/includes/CachingCommonsMediaFileNameLookupTest.php
@@ -0,0 +1,101 @@
+<?php
+
+namespace Wikibase\Test\Repo\Validators;
+
+use HashBagOStuff;
+use PHPUnit_Framework_MockObject_Matcher_Invocation;
+use Wikibase\Repo\CachingCommonsMediaFileNameLookup;
+
+/**
+ * @covers Wikibase\Repo\Validators\CachingCommonsMediaFileNameLookupTest
+ *
+ * @license GPL 2+
+ *
+ * @group WikibaseRepo
+ * @group Wikibase
+ * @group WikibaseValidators
+ *
+ * @author Marius Hoch
+ */
+class CachingCommonsMediaFileNameLookupTest extends 
\PHPUnit_Framework_TestCase {
+
+       public function testNormalize() {
+               $lookup = new CachingCommonsMediaFileNameLookup(
+                       $this->getMediaWikiPageNameNormalizer( $this->once() ),
+                       new HashBagOStuff()
+               );
+
+               // Two lookups only cause one API call.
+               $this->assertSame(
+                       'Foo.bar',
+                       $lookup->normalize( 'Foo.bar' )
+               );
+               $this->assertSame(
+                       'Foo.bar',
+                       $lookup->normalize( 'Foo.bar' )
+               );
+       }
+
+       public function testNormalize_cachedValueIsUsed() {
+               $cache = new HashBagOStuff();
+               $cache->set( 'commons-media-Foo.bar', 'Bar.foo' );
+
+               $lookup = new CachingCommonsMediaFileNameLookup(
+                       $this->getMediaWikiPageNameNormalizer( $this->never() ),
+                       $cache
+               );
+
+               $this->assertSame(
+                       'Bar.foo',
+                       $lookup->normalize( 'Foo.bar' )
+               );
+       }
+
+       public function testNormalize_cachedWithOriginalNameAndNormalized() {
+               $cache = new HashBagOStuff();
+               $cache->set( 'commons-media-Foo.bar', 'Bar.foo' );
+
+               $lookup = new CachingCommonsMediaFileNameLookup(
+                       $this->getMediaWikiPageNameNormalizer( $this->once() ),
+                       $cache
+               );
+
+               $this->assertSame(
+                       'TARGET.png',
+                       $lookup->normalize( 'REDIRECT.cat' )
+               );
+               $this->assertSame(
+                       'TARGET.png',
+                       $lookup->normalize( 'TARGET.png' )
+               );
+               $this->assertSame(
+                       'TARGET.png',
+                       $lookup->normalize( 'REDIRECT.cat' )
+               );
+       }
+
+       private function getMediaWikiPageNameNormalizer(
+               PHPUnit_Framework_MockObject_Matcher_Invocation $matcher
+       ) {
+               $fileNameLookup = $this->getMockBuilder( 
'MediaWiki\Site\MediaWikiPageNameNormalizer' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $fileNameLookup->expects( $matcher )
+                       ->method( 'normalizePageName' )
+                       ->will( $this->returnCallback( function( $fileName ) {
+                               if ( strpos( $fileName, 'NOT-FOUND' ) !== false 
) {
+                                       return false;
+                               }
+
+                               if ( strpos( $fileName, 'REDIRECT' ) !== false 
) {
+                                       return 'File:TARGET.png';
+                               }
+
+                               return 'File:' . $fileName;
+                       } ) );
+
+               return $fileNameLookup;
+       }
+
+}
diff --git a/repo/tests/phpunit/includes/ValidatorBuildersTest.php 
b/repo/tests/phpunit/includes/ValidatorBuildersTest.php
index 66b328e..ac82de6 100644
--- a/repo/tests/phpunit/includes/ValidatorBuildersTest.php
+++ b/repo/tests/phpunit/includes/ValidatorBuildersTest.php
@@ -30,7 +30,7 @@
  */
 class ValidatorBuildersTest extends PHPUnit_Framework_TestCase {
 
-       protected function newValidatorBuilders() {
+       private function newValidatorBuilders() {
                $entityIdParser = new BasicEntityIdParser();
 
                $q8 = new Item( new ItemId( 'Q8' ) );
@@ -54,10 +54,26 @@
                        $entityIdParser,
                        $urlSchemes,
                        'http://qudt.org/vocab/',
-                       $contentLanguages
+                       $contentLanguages,
+                       $this->getCachingCommonsMediaFileNameLookup()
                );
 
                return $builders;
+       }
+
+       private function getCachingCommonsMediaFileNameLookup() {
+               $fileNameLookup = $this->getMockBuilder( 
'Wikibase\Repo\CachingCommonsMediaFileNameLookup' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $fileNameLookup->expects( $this->any() )
+                       ->method( 'normalize' )
+                       ->with( $this->isType( 'string' ) )
+                       ->will( $this->returnCallback( function( $fileName ) {
+                               return strpos( $fileName, 'NOT-FOUND' ) === 
false ? $fileName : null;
+                       } ) );
+
+               return $fileNameLookup;
        }
 
        public function provideDataTypeValidation() {
@@ -96,6 +112,7 @@
                        array( 'commonsMedia', new StringValue( 'Äöü.jpg' ), 
true, 'Unicode support' ),
                        array( 'commonsMedia', new StringValue( ' Foo.jpg' ), 
false, 'media name with leading space' ),
                        array( 'commonsMedia', new StringValue( 'Foo.jpg ' ), 
false, 'media name with trailing space' ),
+                       array( 'commonsMedia', new StringValue( 
'Foo-NOT-FOUND.jpg' ), false, 'file not found' ),
 
                        //string
                        array( 'string', 'Foo', false, 'StringValue expected, 
string supplied' ),
diff --git 
a/repo/tests/phpunit/includes/Validators/CommonsMediaExistsValidatorTest.php 
b/repo/tests/phpunit/includes/Validators/CommonsMediaExistsValidatorTest.php
new file mode 100644
index 0000000..5328f96
--- /dev/null
+++ b/repo/tests/phpunit/includes/Validators/CommonsMediaExistsValidatorTest.php
@@ -0,0 +1,69 @@
+<?php
+
+namespace Wikibase\Test\Repo\Validators;
+
+use DataValues\StringValue;
+use Wikibase\Repo\Validators\CommonsMediaExistsValidator;
+
+/**
+ * @covers Wikibase\Repo\Validators\CommonsMediaExistsValidator
+ *
+ * @license GPL 2+
+ *
+ * @group WikibaseRepo
+ * @group Wikibase
+ * @group WikibaseValidators
+ *
+ * @author Marius Hoch
+ */
+class CommonsMediaExistsValidatorTest extends \PHPUnit_Framework_TestCase {
+
+       private function getCachingCommonsMediaFileNameLookup() {
+               $fileNameLookup = $this->getMockBuilder( 
'Wikibase\Repo\CachingCommonsMediaFileNameLookup' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $fileNameLookup->expects( $this->any() )
+                       ->method( 'normalize' )
+                       ->with( $this->isType( 'string' ) )
+                       ->will( $this->returnCallback( function( $fileName ) {
+                               return strpos( $fileName, 'NOT-FOUND' ) === 
false ? $fileName : null;
+                       } ) );
+
+               return $fileNameLookup;
+       }
+
+       /**
+        * @dataProvider provideValidate()
+        */
+       public function testValidate( $expected, $value ) {
+               $validator = new CommonsMediaExistsValidator( 
$this->getCachingCommonsMediaFileNameLookup() );
+
+               $this->assertSame(
+                       $expected,
+                       $validator->validate( $value )->isValid()
+               );
+       }
+
+       public function provideValidate() {
+               return array(
+                       "Valid, plain string" => array(
+                               true, "Foo.png"
+                       ),
+                       "Valid, StringValue" => array(
+                               true, new StringValue( "Foo.png" )
+                       ),
+                       "Invalid, StringValue" => array(
+                               false, new StringValue( "Foo.NOT-FOUND.png" )
+                       )
+               );
+       }
+
+       public function testValidate_noString() {
+               $validator = new CommonsMediaExistsValidator( 
$this->getCachingCommonsMediaFileNameLookup() );
+
+               $this->setExpectedException( 'InvalidArgumentException' );
+               $validator->validate( 5 );
+       }
+
+}
diff --git a/repo/tests/phpunit/includes/WikibaseRepoTest.php 
b/repo/tests/phpunit/includes/WikibaseRepoTest.php
index c5ef8d6..a8b71df 100644
--- a/repo/tests/phpunit/includes/WikibaseRepoTest.php
+++ b/repo/tests/phpunit/includes/WikibaseRepoTest.php
@@ -362,4 +362,9 @@
                $this->assertInstanceOf( 
'Wikibase\Rdf\ValueSnakRdfBuilderFactory', $factory );
        }
 
+       public function testGetCachingCommonsMediaFileNameLookup() {
+               $lookup = 
$this->getWikibaseRepo()->getCachingCommonsMediaFileNameLookup();
+               $this->assertInstanceOf( 
'Wikibase\Repo\CachingCommonsMediaFileNameLookup', $lookup );
+       }
+
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/260715
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I65507d6ef974481c73d93353e4e9084ea77a6354
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Hoo man <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to