Gilles has uploaded a new change for review. https://gerrit.wikimedia.org/r/115379
Change subject: Reduce Wikidata author vcard to creator part ...................................................................... Reduce Wikidata author vcard to creator part Wikidata templates dump a vcard with too much data in the artist field. When that format is found, TemplateParser reduces it to meaningful information. Change-Id: Id646261cf7b7516512a722af44f7f53da1e52005 Mingle: https://wikimedia.mingle.thoughtworks.com/projects/multimedia/cards/202 --- M TemplateParser.php M tests/phpunit/TemplateParserTest.php 2 files changed, 22 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CommonsMetadata refs/changes/79/115379/1 diff --git a/TemplateParser.php b/TemplateParser.php index c987c0d..36df00f 100755 --- a/TemplateParser.php +++ b/TemplateParser.php @@ -154,13 +154,31 @@ $table = $domNavigator->closest( $labelField, 'table' ); $groupName = $table ? $table->getNodePath() : '-'; - $data[$groupName][$fieldName] = $this->parseText( $domNavigator, $informationField ); + $method = 'parse' . $fieldName; + + if ( !method_exists( $this, $method ) ) { + $method = 'parseText'; + } + + $data[$groupName][$fieldName] = $this->{$method}( $domNavigator, $informationField ); } //return $this->arrayTranspose( $data ); // FIXME bug 57259 - for now select the first information template if there are more than one return $data ? reset($data) : array(); } + protected function parseArtist( DomNavigator $domNavigator, DOMNode $node ) { + foreach ( $domNavigator->findElementsWithId( 'span', 'creator', $node ) as $creator ) { + foreach ( $domNavigator->findElementsWithClass( 'a', 'extiw', $creator) as $creatorLink ) { + return $this->toHtml( $creatorLink ); + } + + return $creator->textContent; + } + + return $this->parseText( $domNavigator, $node ); + } + protected function parseLicenses( DomNavigator $domNavigator ) { $data = array(); foreach ( $domNavigator->findElementsWithClass( '*', 'licensetpl' ) as $licenseNode ) { diff --git a/tests/phpunit/TemplateParserTest.php b/tests/phpunit/TemplateParserTest.php index 8283fa7..c3096a8 100644 --- a/tests/phpunit/TemplateParserTest.php +++ b/tests/phpunit/TemplateParserTest.php @@ -214,10 +214,10 @@ * The template markup should not be present in the metadata. */ public function testHCard() { - $this->markTestSkipped( 'bug 57383' ); - $data = $this->parseTestHTML( 'creator_template' ); - $this->assertFieldEquals( 'Artist', 'George Gower', $data ); + $this->assertFieldEquals( 'Artist', + '<a href="//en.wikipedia.org/wiki/George_Gower" class="extiw" title="en:George Gower">George Gower</a>', + $data ); } -- To view, visit https://gerrit.wikimedia.org/r/115379 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id646261cf7b7516512a722af44f7f53da1e52005 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CommonsMetadata Gerrit-Branch: master Gerrit-Owner: Gilles <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
