Gilles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/115379

Change subject: Reduce Wikidata author vcard to creator part
......................................................................

Reduce Wikidata author vcard to creator part

Wikidata templates dump a vcard with too much data
in the artist field. When that format is found, TemplateParser
reduces it to meaningful information.

Change-Id: Id646261cf7b7516512a722af44f7f53da1e52005
Mingle: https://wikimedia.mingle.thoughtworks.com/projects/multimedia/cards/202
---
M TemplateParser.php
M tests/phpunit/TemplateParserTest.php
2 files changed, 22 insertions(+), 4 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CommonsMetadata 
refs/changes/79/115379/1

diff --git a/TemplateParser.php b/TemplateParser.php
index c987c0d..36df00f 100755
--- a/TemplateParser.php
+++ b/TemplateParser.php
@@ -154,13 +154,31 @@
                        $table = $domNavigator->closest( $labelField, 'table' );
                        $groupName = $table ? $table->getNodePath() : '-';
 
-                       $data[$groupName][$fieldName] = $this->parseText( 
$domNavigator, $informationField );
+                       $method = 'parse' . $fieldName;
+
+                       if ( !method_exists( $this, $method ) ) {
+                               $method = 'parseText';
+                       }
+
+                       $data[$groupName][$fieldName] = $this->{$method}( 
$domNavigator, $informationField );
                }
                //return $this->arrayTranspose( $data );
                // FIXME bug 57259 - for now select the first information 
template if there are more than one
                return $data ? reset($data) : array();
        }
 
+       protected function parseArtist( DomNavigator $domNavigator, DOMNode 
$node ) {
+               foreach ( $domNavigator->findElementsWithId( 'span', 'creator', 
$node ) as $creator ) {
+                       foreach ( $domNavigator->findElementsWithClass( 'a', 
'extiw', $creator) as $creatorLink )  {
+                               return $this->toHtml( $creatorLink );
+                       }
+
+                       return $creator->textContent;
+               }
+
+               return $this->parseText( $domNavigator, $node );
+       }
+
        protected function parseLicenses( DomNavigator $domNavigator ) {
                $data = array();
                foreach ( $domNavigator->findElementsWithClass( '*', 
'licensetpl' ) as $licenseNode ) {
diff --git a/tests/phpunit/TemplateParserTest.php 
b/tests/phpunit/TemplateParserTest.php
index 8283fa7..c3096a8 100644
--- a/tests/phpunit/TemplateParserTest.php
+++ b/tests/phpunit/TemplateParserTest.php
@@ -214,10 +214,10 @@
         * The template markup should not be present in the metadata.
         */
        public function testHCard() {
-               $this->markTestSkipped( 'bug 57383' );
-
                $data = $this->parseTestHTML( 'creator_template' );
-               $this->assertFieldEquals( 'Artist', 'George Gower', $data );
+               $this->assertFieldEquals( 'Artist',
+                       '<a href="//en.wikipedia.org/wiki/George_Gower" 
class="extiw" title="en:George Gower">George Gower</a>',
+                       $data );
        }
 
 

-- 
To view, visit https://gerrit.wikimedia.org/r/115379
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id646261cf7b7516512a722af44f7f53da1e52005
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CommonsMetadata
Gerrit-Branch: master
Gerrit-Owner: Gilles <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to