Lokal Profil has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/370770 )

Change subject: Make api handle sparql harvested source fields
......................................................................

Make api handle sparql harvested source fields

Uppdated url handlers for:
* Wikitable
* HTML
* HTMLlist

Bug: T172841
Change-Id: I20e25de8c391ba9e06cc6a4374d094af401bdf44
---
M api/includes/CommonFunctions.php
M api/includes/FormatHtml.php
M api/includes/FormatHtmllist.php
M api/includes/FormatWikitable.php
M api/tests/CommonFunctionsTest.php
5 files changed, 122 insertions(+), 20 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/tools/heritage 
refs/changes/70/370770/1

diff --git a/api/includes/CommonFunctions.php b/api/includes/CommonFunctions.php
index 485b5c8..d8df10e 100644
--- a/api/includes/CommonFunctions.php
+++ b/api/includes/CommonFunctions.php
@@ -53,7 +53,8 @@
        $projects = array("wikipedia", "wikivoyage", "wikidata", "wikimedia");
        $projectRegex = "(" . join("|", $projects) . ")";
        $var = NULL;
-       if (!preg_match( '/(https?:)?\/\/(([a-z\-]+)\.' . $projectRegex . 
'\.org\/w\/index\.php\?title=(.*)&oldid=(.*))/', $text, $var )) {
+       $pattern = '/(https?:)?\/\/(([a-z\-]+)\.' . $projectRegex . 
'\.org\/w\/index\.php\?title=(.*)&oldid=(.*))/';
+       if ( !preg_match( $pattern, $text, $var )) {
                throw new Exception('No project link in text.');
        }
        return $var;
@@ -63,6 +64,15 @@
        return str_replace(' ', '_', $in_string);
 }
 
+function matchWikidataQid($url) {
+       /* Extract the Qid from a Wikidata page or entity link */
+       $var = NULL;
+       $pattern = '/(https?:)?\/\/(www\.wikidata\.org\/(wiki|entity)\/(.*))/';
+       if ( !preg_match( $pattern, $text, $var )) {
+               throw new Exception('The provided url was not a wikidata 
link.');
+       }
+       return $var;
+
 function makeWikidataUrl($qid) {
        /* Creates a URL to Wikidata from a Qid */
        if ( $qid ) {
diff --git a/api/includes/FormatHtml.php b/api/includes/FormatHtml.php
index 7042dff..61a0fb4 100644
--- a/api/includes/FormatHtml.php
+++ b/api/includes/FormatHtml.php
@@ -4,7 +4,7 @@
  * HTML output type, based on XML. This output is for users (and not automated 
tools) so internationalization will be used.
  * @author Joancreus (jcreus), based on Platonides work
  */
-// functions: processWikitext, matchWikiprojectLink, getImageFromCommons, 
makeWikidataUrl
+// functions: processWikitext, matchWikiprojectLink, getImageFromCommons, 
makeWikidataUrl, matchWikidataQid
 require_once ( 'CommonFunctions.php' );
 
 class FormatHtml extends FormatBase {
@@ -29,13 +29,13 @@
 
        function outputBegin( $selectedItems ) {
                echo '<html>';
-$this->linebreak();
+               $this->linebreak();
                echo '<head>';
-$this->linebreak();
+               $this->linebreak();
                echo '<meta http-equiv="Content-Type" 
content="text/html;charset=UTF-8">';
-$this->linebreak();
+               $this->linebreak();
                echo '<link media="all" type="text/css" href="jscss/style.css" 
rel="stylesheet">';
-$this->linebreak();
+               $this->linebreak();
                echo '<script src="jscss/custom.js" 
type="text/javascript"></script>';
                echo "</head>\n<body>\n<table class=\"sortable wlm-result\" 
id=\"sortable_table_id_0\">\n";
 
@@ -64,7 +64,7 @@
                                if ( in_array( $name, $selectedItems ) ) {
                                        // $label = $name.'<a href="#" 
class="sortheader" onclick="ts_resortTable(this);return false;"><span 
class="sortarrow" sortdir="down"><img 
src="http://commons.wikimedia.org/skins-1.17/common/images/sort_none.gif"; 
alt="↑"></span></a>';
                                        echo '<th class="sortheader">' . _i18n( 
'db-field-' . $name ) . '</th>';
-$this->linebreak();
+                                       $this->linebreak();
                                }
                        }
                        echo '</tr>';
@@ -109,11 +109,11 @@
                                }
 
                                echo '<td'.$tdattrs.'>' . $cellData . '</td>';
-$this->linebreak();
+                               $this->linebreak();
                        }
                }
                echo '</tr>';
-$this->linebreak();
+               $this->linebreak();
        }
 
        function outputEnd() {
@@ -134,8 +134,14 @@
                        $linkText = urldecode( $encodedLinkText );
                        return self::makeHTMLlink( 'https://' . $m[2], 
$linkText );
                } catch ( Exception $e ) {
-                       // Normal text
-                       return htmlspecialchars( $text );
+                       // Possibly a wikidata entity/wiki link
+                       try {
+                               $m = matchWikidataQid( $text );
+                               return self::makeHTMLlink( 'https://' . $m[2], 
$m[4] );
+                       } catch ( Exception $e ) {
+                               // Normal text
+                               return htmlspecialchars( $text );
+                       }
                }
        }
 
diff --git a/api/includes/FormatHtmllist.php b/api/includes/FormatHtmllist.php
index ace5c27..8bbfc28 100644
--- a/api/includes/FormatHtmllist.php
+++ b/api/includes/FormatHtmllist.php
@@ -5,7 +5,7 @@
  * This output is for users (and not automated tools) so internationalization 
will be used.
  *
  */
-// functions: processWikitext
+// functions: processWikitext, matchWikiprojectLink, matchWikidataQid
 require_once ( 'CommonFunctions.php' );
 
 class FormatHtmllist extends FormatBase {
@@ -125,8 +125,8 @@
                }
 
                if ( isset( $row->source ) and $row->source ) {
-                       if ( preg_match( "/^(.+?)&/", $row->source, $matches ) 
) {
-                               $wikiListUrl = $matches[1];
+                       $wikiListUrl = self::matchUrl( $url );
+                       if ( $wikiListUrl ) {
                                $desc .= '<li><a href="' . $wikiListUrl. '">' . 
$I18N->msg( 'source-monuments-list' ) . '</a></li>';
                        }
                }
@@ -159,4 +159,23 @@
                $this->outputEnd();
        }
 
+       /**
+        * Return a mathing wikiproject or wikidata url
+        */
+       static function matchUrl( $url ) {
+               try {
+                       $m = matchWikiprojectLink( $url );
+                       return $m[2];
+               } catch ( Exception $e ) {
+                       // Possibly a wikidata entity/wiki link
+                       try {
+                               $m = matchWikidataQid( $text );
+                               return $m[2];
+                       } catch ( Exception $e ) {
+                               // Normal text
+                               return null;
+                       }
+               }
+       }
+
 }
diff --git a/api/includes/FormatWikitable.php b/api/includes/FormatWikitable.php
index e6e0477..b94f8fa 100644
--- a/api/includes/FormatWikitable.php
+++ b/api/includes/FormatWikitable.php
@@ -4,7 +4,7 @@
  * Wikitable output type, based on HTML, which at its turn is based on XML
  * @author Joancreus (jcreus), based on Platonides work
  */
-// functions: matchWikiprojectLink, makeWikidataWikilink
+// functions: matchWikiprojectLink, makeWikidataWikilink, matchWikidataQid
 require_once ( 'CommonFunctions.php' );
 
 class FormatWikitable extends FormatBase {
@@ -36,7 +36,7 @@
                $continue = substr( $continue, 1 );
 
                echo '|}';
-$this->linebreak();
+               $this->linebreak();
                $this->isTableOpen = false;
 
                echo '<p 
style="text-align:right;">[https://tools.wmflabs.org/heritage' .
@@ -50,7 +50,7 @@
                        foreach ( $row as $name => $value ) {
                                if ( in_array( $name, $selectedItems ) ) {
                                        echo '!' . $name;
-$this->linebreak();
+                                       $this->linebreak();
                                }
                        }
                        $this->isTableOpen = true;
@@ -71,7 +71,7 @@
                                }
 
                                echo '|' . $cellData;
-$this->linebreak();
+                               $this->linebreak();
                        }
                }
        }
@@ -91,8 +91,14 @@
                        $linkText = urldecode( $encodedLinkText );
                        return '[//' . htmlspecialchars( $m[2] ) .' '. 
htmlspecialchars( $m[3] . ': ' . $linkText ) . ']';
                } catch ( Exception $e ) {
-                       // Normal text
-                       return htmlspecialchars( $text );
+                       // Possibly a wikidata entity/wiki link
+                       try {
+                               $m = matchWikidataQid( $text );
+                               return '[[:d:' . $m[4] . '|' . $m[4] . ']]';
+                       } catch ( Exception $e ) {
+                               // Normal text
+                               return htmlspecialchars( $text );
+                       }
                }
        }
 
diff --git a/api/tests/CommonFunctionsTest.php 
b/api/tests/CommonFunctionsTest.php
index f7d8901..82ba5d2 100644
--- a/api/tests/CommonFunctionsTest.php
+++ b/api/tests/CommonFunctionsTest.php
@@ -131,6 +131,67 @@
                );
        }
 
+       public function test_matchWikidataQid_no_match()
+       {
+               $input = 'not-a-link';
+               $this->setExpectedException('Exception');  // present in 
phpUnit 4.8
+               // $this->expectException('Exception');  // present in phpUnit 
5.2+
+               // $this->expectExceptionMessage('The provided url was not a 
wikidata link.');  // present in phpUnit 5.2+
+               matchWikidataQid( $input );
+       }
+
+       public function test_matchWikidataQid_entity()
+       {
+               $input = "https://www.wikidata.org/entity/Q5943";;
+               $expected = Array(
+                       "https://www.wikidata.org/entity/Q5943";,
+                       "https:",
+                       "www.wikidata.org/entity/Q5943",
+                       "entity",
+                       "Q5943"
+                       );
+               $this->assertEquals($expected, matchWikidataQid($input));
+       }
+
+       public function test_matchWikidataQid_wikipage()
+       {
+               $input = "https://www.wikidata.org/wiki/Q5943";;
+               $expected = Array(
+                       "https://www.wikidata.org/wiki/Q5943";,
+                       "https:",
+                       "www.wikidata.org/wiki/Q5943",
+                       "wiki",
+                       "Q5943"
+                       );
+               $this->assertEquals($expected, matchWikidataQid($input));
+       }
+
+       public function test_matchWikidataQid_http()
+       {
+               $input = "http://www.wikidata.org/entity/Q5943";;
+               $expected = Array(
+                       "http://www.wikidata.org/entity/Q5943";,
+                       "http:",
+                       "www.wikidata.org/entity/Q5943",
+                       "entity",
+                       "Q5943"
+                       );
+               $this->assertEquals($expected, matchWikidataQid($input));
+       }
+
+       public function test_matchWikidataQid_no_protocol()
+       {
+               $input = "//www.wikidata.org/entity/Q5943";
+               $expected = Array(
+                       "//www.wikidata.org/entity/Q5943",
+                       "",
+                       "www.wikidata.org/entity/Q5943",
+                       "entity",
+                       "Q5943"
+                       );
+               $this->assertEquals($expected, matchWikidataQid($input));
+       }
+
        public function test_makeWikidataUrl()
        {
                $this->assertEquals(

-- 
To view, visit https://gerrit.wikimedia.org/r/370770
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I20e25de8c391ba9e06cc6a4374d094af401bdf44
Gerrit-PatchSet: 1
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: wikidata
Gerrit-Owner: Lokal Profil <lokal.pro...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to