jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/370770 )
Change subject: Make api handle sparql harvested source fields ...................................................................... Make api handle sparql harvested source fields Uppdated url handlers for: * Wikitable * HTML * HTMLlist Bug: T172841 Change-Id: I20e25de8c391ba9e06cc6a4374d094af401bdf44 --- M api/includes/ApiMonuments.php M api/includes/CommonFunctions.php M api/includes/FormatHtml.php M api/includes/FormatHtmllist.php M api/includes/FormatJson.php M api/includes/FormatWikitable.php M api/tests/CommonFunctionsTest.php 7 files changed, 128 insertions(+), 25 deletions(-) Approvals: Jean-Frédéric: Looks good to me, approved jenkins-bot: Verified diff --git a/api/includes/ApiMonuments.php b/api/includes/ApiMonuments.php index a4a3713..5efa5f4 100644 --- a/api/includes/ApiMonuments.php +++ b/api/includes/ApiMonuments.php @@ -121,7 +121,7 @@ $fulltextColumns = [ 'name' => 1 ]; if ( $this->getParam( 'format' ) == 'dynamickml' ) { - # don't search just pass along the search parameters to kml network link file + // don't search just pass along the search parameters to kml network link file $DynKml = new DynamicKml; $reqUrl = 'http://' . $_SERVER['SERVER_NAME'] . $_SERVER['REQUEST_URI']; $DynKml->output( $reqUrl ); diff --git a/api/includes/CommonFunctions.php b/api/includes/CommonFunctions.php index 485b5c8..ad60814 100644 --- a/api/includes/CommonFunctions.php +++ b/api/includes/CommonFunctions.php @@ -53,7 +53,8 @@ $projects = array("wikipedia", "wikivoyage", "wikidata", "wikimedia"); $projectRegex = "(" . join("|", $projects) . ")"; $var = NULL; - if (!preg_match( '/(https?:)?\/\/(([a-z\-]+)\.' . $projectRegex . '\.org\/w\/index\.php\?title=(.*)&oldid=(.*))/', $text, $var )) { + $pattern = '/(https?:)?\/\/(([a-z\-]+)\.' . $projectRegex . '\.org\/w\/index\.php\?title=(.*)&oldid=(.*))/'; + if ( !preg_match( $pattern, $text, $var )) { throw new Exception('No project link in text.'); } return $var; @@ -63,6 +64,16 @@ return str_replace(' ', '_', $in_string); } +function matchWikidataQid($url) { + /* Extract the Qid from a Wikidata page or entity link */ + $var = NULL; + $pattern = '/(https?:)?\/\/(www\.wikidata\.org\/(wiki|entity)\/(.*))/'; + if ( !preg_match( $pattern, $url, $var )) { + throw new Exception('The provided url was not a wikidata link.'); + } + return $var; +} + function makeWikidataUrl($qid) { /* Creates a URL to Wikidata from a Qid */ if ( $qid ) { diff --git a/api/includes/FormatHtml.php b/api/includes/FormatHtml.php index 7042dff..61a0fb4 100644 --- a/api/includes/FormatHtml.php +++ b/api/includes/FormatHtml.php @@ -4,7 +4,7 @@ * HTML output type, based on XML. This output is for users (and not automated tools) so internationalization will be used. * @author Joancreus (jcreus), based on Platonides work */ -// functions: processWikitext, matchWikiprojectLink, getImageFromCommons, makeWikidataUrl +// functions: processWikitext, matchWikiprojectLink, getImageFromCommons, makeWikidataUrl, matchWikidataQid require_once ( 'CommonFunctions.php' ); class FormatHtml extends FormatBase { @@ -29,13 +29,13 @@ function outputBegin( $selectedItems ) { echo '<html>'; -$this->linebreak(); + $this->linebreak(); echo '<head>'; -$this->linebreak(); + $this->linebreak(); echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'; -$this->linebreak(); + $this->linebreak(); echo '<link media="all" type="text/css" href="jscss/style.css" rel="stylesheet">'; -$this->linebreak(); + $this->linebreak(); echo '<script src="jscss/custom.js" type="text/javascript"></script>'; echo "</head>\n<body>\n<table class=\"sortable wlm-result\" id=\"sortable_table_id_0\">\n"; @@ -64,7 +64,7 @@ if ( in_array( $name, $selectedItems ) ) { // $label = $name.'<a href="#" class="sortheader" onclick="ts_resortTable(this);return false;"><span class="sortarrow" sortdir="down"><img src="http://commons.wikimedia.org/skins-1.17/common/images/sort_none.gif" alt="↑"></span></a>'; echo '<th class="sortheader">' . _i18n( 'db-field-' . $name ) . '</th>'; -$this->linebreak(); + $this->linebreak(); } } echo '</tr>'; @@ -109,11 +109,11 @@ } echo '<td'.$tdattrs.'>' . $cellData . '</td>'; -$this->linebreak(); + $this->linebreak(); } } echo '</tr>'; -$this->linebreak(); + $this->linebreak(); } function outputEnd() { @@ -134,8 +134,14 @@ $linkText = urldecode( $encodedLinkText ); return self::makeHTMLlink( 'https://' . $m[2], $linkText ); } catch ( Exception $e ) { - // Normal text - return htmlspecialchars( $text ); + // Possibly a wikidata entity/wiki link + try { + $m = matchWikidataQid( $text ); + return self::makeHTMLlink( 'https://' . $m[2], $m[4] ); + } catch ( Exception $e ) { + // Normal text + return htmlspecialchars( $text ); + } } } diff --git a/api/includes/FormatHtmllist.php b/api/includes/FormatHtmllist.php index ace5c27..a7607ae 100644 --- a/api/includes/FormatHtmllist.php +++ b/api/includes/FormatHtmllist.php @@ -5,7 +5,7 @@ * This output is for users (and not automated tools) so internationalization will be used. * */ -// functions: processWikitext +// functions: processWikitext, matchWikiprojectLink, matchWikidataQid require_once ( 'CommonFunctions.php' ); class FormatHtmllist extends FormatBase { @@ -125,9 +125,9 @@ } if ( isset( $row->source ) and $row->source ) { - if ( preg_match( "/^(.+?)&/", $row->source, $matches ) ) { - $wikiListUrl = $matches[1]; - $desc .= '<li><a href="' . $wikiListUrl. '">' . $I18N->msg( 'source-monuments-list' ) . '</a></li>'; + $wikiListUrl = self::matchUrl( $row->source ); + if ( $wikiListUrl ) { + $desc .= '<li><a href="//' . $wikiListUrl. '">' . $I18N->msg( 'source-monuments-list' ) . '</a></li>'; } } @@ -159,4 +159,23 @@ $this->outputEnd(); } + /** + * Return a mathing wikiproject or wikidata url + */ + static function matchUrl( $url ) { + try { + $m = matchWikiprojectLink( $url ); + return $m[2]; + } catch ( Exception $e ) { + // Possibly a wikidata entity/wiki link + try { + $m = matchWikidataQid( $url ); + return $m[2]; + } catch ( Exception $e ) { + // Normal text + return null; + } + } + } + } diff --git a/api/includes/FormatJson.php b/api/includes/FormatJson.php index a775aa0..3a52837 100644 --- a/api/includes/FormatJson.php +++ b/api/includes/FormatJson.php @@ -3,9 +3,9 @@ /** * JSON output type */ -# error_reporting(E_ALL); -# ini_set('display_errors', true); -# ini_set('html_errors', false); +// error_reporting(E_ALL); +// ini_set('display_errors', true); +// ini_set('html_errors', false); class FormatJson extends FormatBase { diff --git a/api/includes/FormatWikitable.php b/api/includes/FormatWikitable.php index e6e0477..b94f8fa 100644 --- a/api/includes/FormatWikitable.php +++ b/api/includes/FormatWikitable.php @@ -4,7 +4,7 @@ * Wikitable output type, based on HTML, which at its turn is based on XML * @author Joancreus (jcreus), based on Platonides work */ -// functions: matchWikiprojectLink, makeWikidataWikilink +// functions: matchWikiprojectLink, makeWikidataWikilink, matchWikidataQid require_once ( 'CommonFunctions.php' ); class FormatWikitable extends FormatBase { @@ -36,7 +36,7 @@ $continue = substr( $continue, 1 ); echo '|}'; -$this->linebreak(); + $this->linebreak(); $this->isTableOpen = false; echo '<p style="text-align:right;">[https://tools.wmflabs.org/heritage' . @@ -50,7 +50,7 @@ foreach ( $row as $name => $value ) { if ( in_array( $name, $selectedItems ) ) { echo '!' . $name; -$this->linebreak(); + $this->linebreak(); } } $this->isTableOpen = true; @@ -71,7 +71,7 @@ } echo '|' . $cellData; -$this->linebreak(); + $this->linebreak(); } } } @@ -91,8 +91,14 @@ $linkText = urldecode( $encodedLinkText ); return '[//' . htmlspecialchars( $m[2] ) .' '. htmlspecialchars( $m[3] . ': ' . $linkText ) . ']'; } catch ( Exception $e ) { - // Normal text - return htmlspecialchars( $text ); + // Possibly a wikidata entity/wiki link + try { + $m = matchWikidataQid( $text ); + return '[[:d:' . $m[4] . '|' . $m[4] . ']]'; + } catch ( Exception $e ) { + // Normal text + return htmlspecialchars( $text ); + } } } diff --git a/api/tests/CommonFunctionsTest.php b/api/tests/CommonFunctionsTest.php index f7d8901..82ba5d2 100644 --- a/api/tests/CommonFunctionsTest.php +++ b/api/tests/CommonFunctionsTest.php @@ -131,6 +131,67 @@ ); } + public function test_matchWikidataQid_no_match() + { + $input = 'not-a-link'; + $this->setExpectedException('Exception'); // present in phpUnit 4.8 + // $this->expectException('Exception'); // present in phpUnit 5.2+ + // $this->expectExceptionMessage('The provided url was not a wikidata link.'); // present in phpUnit 5.2+ + matchWikidataQid( $input ); + } + + public function test_matchWikidataQid_entity() + { + $input = "https://www.wikidata.org/entity/Q5943"; + $expected = Array( + "https://www.wikidata.org/entity/Q5943", + "https:", + "www.wikidata.org/entity/Q5943", + "entity", + "Q5943" + ); + $this->assertEquals($expected, matchWikidataQid($input)); + } + + public function test_matchWikidataQid_wikipage() + { + $input = "https://www.wikidata.org/wiki/Q5943"; + $expected = Array( + "https://www.wikidata.org/wiki/Q5943", + "https:", + "www.wikidata.org/wiki/Q5943", + "wiki", + "Q5943" + ); + $this->assertEquals($expected, matchWikidataQid($input)); + } + + public function test_matchWikidataQid_http() + { + $input = "http://www.wikidata.org/entity/Q5943"; + $expected = Array( + "http://www.wikidata.org/entity/Q5943", + "http:", + "www.wikidata.org/entity/Q5943", + "entity", + "Q5943" + ); + $this->assertEquals($expected, matchWikidataQid($input)); + } + + public function test_matchWikidataQid_no_protocol() + { + $input = "//www.wikidata.org/entity/Q5943"; + $expected = Array( + "//www.wikidata.org/entity/Q5943", + "", + "www.wikidata.org/entity/Q5943", + "entity", + "Q5943" + ); + $this->assertEquals($expected, matchWikidataQid($input)); + } + public function test_makeWikidataUrl() { $this->assertEquals( -- To view, visit https://gerrit.wikimedia.org/r/370770 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I20e25de8c391ba9e06cc6a4374d094af401bdf44 Gerrit-PatchSet: 3 Gerrit-Project: labs/tools/heritage Gerrit-Branch: wikidata Gerrit-Owner: Lokal Profil <lokal.pro...@gmail.com> Gerrit-Reviewer: Jean-Frédéric <jeanfrederic.w...@gmail.com> Gerrit-Reviewer: Lokal Profil <lokal.pro...@gmail.com> Gerrit-Reviewer: Multichill <maar...@mdammers.nl> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits