jenkins-bot has submitted this change and it was merged. Change subject: Reduce Wikidata author vcard to creator part ......................................................................
Reduce Wikidata author vcard to creator part Wikidata templates dump a vcard with too much data in the artist field. When that format is found, TemplateParser reduces it to meaningful information. Change-Id: Id646261cf7b7516512a722af44f7f53da1e52005 Mingle: https://wikimedia.mingle.thoughtworks.com/projects/multimedia/cards/202 --- M TemplateParser.php M tests/phpunit/TemplateParserTest.php 2 files changed, 43 insertions(+), 4 deletions(-) Approvals: Gergő Tisza: Looks good to me, approved jenkins-bot: Verified diff --git a/TemplateParser.php b/TemplateParser.php index c987c0d..c850368 100755 --- a/TemplateParser.php +++ b/TemplateParser.php @@ -154,13 +154,52 @@ $table = $domNavigator->closest( $labelField, 'table' ); $groupName = $table ? $table->getNodePath() : '-'; - $data[$groupName][$fieldName] = $this->parseText( $domNavigator, $informationField ); + $method = 'parseField' . $fieldName; + + if ( !method_exists( $this, $method ) ) { + $method = 'parseText'; + } + + $data[$groupName][$fieldName] = $this->{$method}( $domNavigator, $informationField ); } //return $this->arrayTranspose( $data ); // FIXME bug 57259 - for now select the first information template if there are more than one return $data ? reset($data) : array(); } + /** + * Parses the artist, which might be an hCard + * @param DomNavigator $domNavigator + * @param DOMNode $node + * @returns string + */ + protected function parseFieldArtist( DomNavigator $domNavigator, DOMNode $node ) { + if ( $field = $this->extractHCardProperty( $domNavigator, $node, 'fn' ) ) { + return $this->innerHtml( $field ); + } + + return $this->parseText( $domNavigator, $node ); + } + + /** + * Extracts an hCard property from a DOMNode that contains an hCard + * @param DomNavigator $domNavigator + * @param DOMNode $node + * @param string $property hCard property to be extracted + * @return DOMNode + */ + protected function extractHCardProperty( DomNavigator $domNavigator, DOMNode $node, $property ) { + foreach ( $domNavigator->findElementsWithClass( '*', 'vcard', $node ) as $vcard ) { + foreach ( $domNavigator->findElementsWithClass( '*', $property, $vcard ) as $name ) { + return $name; + } + } + } + + /** + * @param DomNavigator $domNavigator + * @return array + */ protected function parseLicenses( DomNavigator $domNavigator ) { $data = array(); foreach ( $domNavigator->findElementsWithClass( '*', 'licensetpl' ) as $licenseNode ) { diff --git a/tests/phpunit/TemplateParserTest.php b/tests/phpunit/TemplateParserTest.php index 8283fa7..5af6396 100644 --- a/tests/phpunit/TemplateParserTest.php +++ b/tests/phpunit/TemplateParserTest.php @@ -214,10 +214,10 @@ * The template markup should not be present in the metadata. */ public function testHCard() { - $this->markTestSkipped( 'bug 57383' ); - $data = $this->parseTestHTML( 'creator_template' ); - $this->assertFieldEquals( 'Artist', 'George Gower', $data ); + $this->assertFieldEquals( 'Artist', + '<bdi>After <a href="//en.wikipedia.org/wiki/George_Gower" class="extiw" title="en:George Gower">George Gower</a></bdi>', + $data ); } -- To view, visit https://gerrit.wikimedia.org/r/115379 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Id646261cf7b7516512a722af44f7f53da1e52005 Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/CommonsMetadata Gerrit-Branch: master Gerrit-Owner: Gilles <[email protected]> Gerrit-Reviewer: Gergő Tisza <[email protected]> Gerrit-Reviewer: Gilles <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
