jenkins-bot has submitted this change and it was merged.

Change subject: Reduce Wikidata author vcard to creator part
......................................................................


Reduce Wikidata author vcard to creator part

Wikidata templates dump a vcard with too much data
in the artist field. When that format is found, TemplateParser
reduces it to meaningful information.

Change-Id: Id646261cf7b7516512a722af44f7f53da1e52005
Mingle: https://wikimedia.mingle.thoughtworks.com/projects/multimedia/cards/202
---
M TemplateParser.php
M tests/phpunit/TemplateParserTest.php
2 files changed, 43 insertions(+), 4 deletions(-)

Approvals:
  Gergő Tisza: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/TemplateParser.php b/TemplateParser.php
index c987c0d..c850368 100755
--- a/TemplateParser.php
+++ b/TemplateParser.php
@@ -154,13 +154,52 @@
                        $table = $domNavigator->closest( $labelField, 'table' );
                        $groupName = $table ? $table->getNodePath() : '-';
 
-                       $data[$groupName][$fieldName] = $this->parseText( 
$domNavigator, $informationField );
+                       $method = 'parseField' . $fieldName;
+
+                       if ( !method_exists( $this, $method ) ) {
+                               $method = 'parseText';
+                       }
+
+                       $data[$groupName][$fieldName] = $this->{$method}( 
$domNavigator, $informationField );
                }
                //return $this->arrayTranspose( $data );
                // FIXME bug 57259 - for now select the first information 
template if there are more than one
                return $data ? reset($data) : array();
        }
 
+       /**
+        * Parses the artist, which might be an hCard
+        * @param DomNavigator $domNavigator
+        * @param DOMNode $node
+        * @returns string
+        */
+       protected function parseFieldArtist( DomNavigator $domNavigator, 
DOMNode $node ) {
+               if ( $field = $this->extractHCardProperty(  $domNavigator, 
$node, 'fn' ) ) {
+                       return $this->innerHtml( $field );
+               }
+
+               return $this->parseText( $domNavigator, $node );
+       }
+
+       /**
+        * Extracts an hCard property from a DOMNode that contains an hCard
+        * @param DomNavigator $domNavigator
+        * @param DOMNode $node
+        * @param string $property hCard property to be extracted
+        * @return DOMNode
+        */
+       protected function extractHCardProperty( DomNavigator $domNavigator, 
DOMNode $node, $property ) {
+               foreach ( $domNavigator->findElementsWithClass( '*', 'vcard', 
$node ) as $vcard ) {
+                       foreach ( $domNavigator->findElementsWithClass( '*', 
$property, $vcard ) as $name ) {
+                               return $name;
+                       }
+               }
+       }
+
+       /**
+        * @param DomNavigator $domNavigator
+        * @return array
+        */
        protected function parseLicenses( DomNavigator $domNavigator ) {
                $data = array();
                foreach ( $domNavigator->findElementsWithClass( '*', 
'licensetpl' ) as $licenseNode ) {
diff --git a/tests/phpunit/TemplateParserTest.php 
b/tests/phpunit/TemplateParserTest.php
index 8283fa7..5af6396 100644
--- a/tests/phpunit/TemplateParserTest.php
+++ b/tests/phpunit/TemplateParserTest.php
@@ -214,10 +214,10 @@
         * The template markup should not be present in the metadata.
         */
        public function testHCard() {
-               $this->markTestSkipped( 'bug 57383' );
-
                $data = $this->parseTestHTML( 'creator_template' );
-               $this->assertFieldEquals( 'Artist', 'George Gower', $data );
+               $this->assertFieldEquals( 'Artist',
+                       '<bdi>After <a 
href="//en.wikipedia.org/wiki/George_Gower" class="extiw" title="en:George 
Gower">George Gower</a></bdi>',
+                       $data );
        }
 
 

-- 
To view, visit https://gerrit.wikimedia.org/r/115379
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Id646261cf7b7516512a722af44f7f53da1e52005
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/CommonsMetadata
Gerrit-Branch: master
Gerrit-Owner: Gilles <[email protected]>
Gerrit-Reviewer: GergÅ‘ Tisza <[email protected]>
Gerrit-Reviewer: Gilles <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to