jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/309858 )

Change subject: Store plain permalink instead of urlencoded one
......................................................................


Store plain permalink instead of urlencoded one

This is done to remove the need to, yet again, increase the size
of the source field.

Done:
- Set "percent_encoded=False" for all page.permalink() calls
  thus changing source in monuments_all and id_dump tables.
- Add urlencodeWikiprojectLink method to API
- Make Format: Html, Htlmllist, Wikitable use urlencode
- Set urlencode for tools/id_checker

Bug: T112460
Change-Id: I5f65162f046a1ed8c7a66bb11dbb682c7330e2ca
---
M api/includes/CommonFunctions.php
M api/includes/FormatHtml.php
M api/includes/FormatHtmllist.php
M api/includes/FormatWikitable.php
M api/tests/CommonFunctionsTest.php
M api/tests/FormatHtmlTest.php
M api/tests/FormatWikitableTest.php
M erfgoedbot/update_database.py
M erfgoedbot/update_id_dump.py
M tools/id_checker.php
10 files changed, 138 insertions(+), 53 deletions(-)

Approvals:
  Jean-Frédéric: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/api/includes/CommonFunctions.php b/api/includes/CommonFunctions.php
index 485b5c8..e56b382 100644
--- a/api/includes/CommonFunctions.php
+++ b/api/includes/CommonFunctions.php
@@ -59,6 +59,18 @@
        return $var;
 }
 
+function urlencodeWikiprojectLink($var, $drop_oldid = false) {
+       /* Takes a matching group from matchWikiprojectLink
+        * and returns an url with the pagename urlencoded.
+        */
+       $site = $var[3] . '.' . $var[4] . '.org';
+       $title = urlencode( $var[5] );
+       if ( $drop_oldid ){
+               return $site . '/w/index.php?title=' . $title;
+       }
+       return $site . '/w/index.php?title=' . $title . '&oldid=' . $var[6];
+}
+
 function replaceSpaces( $in_string ) {
        return str_replace(' ', '_', $in_string);
 }
@@ -80,3 +92,14 @@
                return '';
        }
 }
+
+function makeHTMLlink( $url, $text=false ) {
+       /*
+        * Creates a html link with correctly escaped characters.
+        * Defaults to using the url as link text if no alternative is given.
+        */
+       if ( ! $text ) {
+               $text = $url;
+       }
+       return '<a href="' . htmlspecialchars( $url ) . '">' . 
htmlspecialchars( $text ) . '</a>';
+}
diff --git a/api/includes/FormatHtml.php b/api/includes/FormatHtml.php
index b177640..16cb1e6 100644
--- a/api/includes/FormatHtml.php
+++ b/api/includes/FormatHtml.php
@@ -4,7 +4,7 @@
  * HTML output type, based on XML. This output is for users (and not automated 
tools) so internationalization will be used.
  * @author Joancreus (jcreus), based on Platonides work
  */
-// functions: processWikitext, matchWikiprojectLink, getImageFromCommons, 
makeWikidataUrl
+// functions: processWikitext, matchWikiprojectLink, getImageFromCommons, 
makeWikidataUrl, urlencodeWikiprojectLink
 require_once ( 'CommonFunctions.php' );
 
 class FormatHtml extends FormatBase {
@@ -60,7 +60,7 @@
                                if ( in_array( $name, $selectedItems ) ) {
                                        // $label = $name.'<a href="#" 
class="sortheader" onclick="ts_resortTable(this);return false;"><span 
class="sortarrow" sortdir="down"><img 
src="http://commons.wikimedia.org/skins-1.17/common/images/sort_none.gif"; 
alt="↑"></span></a>';
                                        echo '<th class="sortheader">' . _i18n( 
'db-field-' . $name ) . '</th>';
-$this->linebreak();
+                                       $this->linebreak();
                                }
                        }
                        echo '</tr>';
@@ -78,12 +78,12 @@
                                if ( $name == "image" || $name == "img_name" ) {
                                        $cellData = self::genImage( $value );
                                } elseif ( $name == "registrant_url" ) {
-                                       $cellData = self::makeHTMLlink( $value 
);
+                                       $cellData = makeHTMLlink( $value );
                                } elseif ( $name == "source" || $name == 
"img_thumb" ) {
                                        $cellData = self::prettifyUrls( $value 
);
                                } elseif ( $name == "wd_item" ) {
                                        $link = makeWikidataUrl( $value );
-                                       $cellData = self::makeHTMLlink( $link, 
$value );
+                                       $cellData = makeHTMLlink( $link, $value 
);
                                } elseif ( in_array( $name, $hasWikitext ) ) {
                                        $makeLinks = true;
                                        // not all datasets are ResultWrapper
@@ -105,11 +105,11 @@
                                }
 
                                echo '<td'.$tdattrs.'>' . $cellData . '</td>';
-$this->linebreak();
+                               $this->linebreak();
                        }
                }
                echo '</tr>';
-$this->linebreak();
+               $this->linebreak();
        }
 
        function outputEnd() {
@@ -126,26 +126,19 @@
        static function prettifyUrls( $text ) {
                try {
                        $m = matchWikiprojectLink( $text );
-                       $encodedLinkText = str_replace( '_', ' ', $m[5] );
-                       $linkText = urldecode( $encodedLinkText );
-                       return self::makeHTMLlink( 'https://' . $m[2], 
$linkText );
+                       $linkText = str_replace( '_', ' ', $m[5] );
+                       $encodedLink = urlencodeWikiprojectLink( $m );
+                       return makeHTMLlink( 'https://' . $encodedLink, 
$linkText );
                } catch ( Exception $e ) {
                        // Normal text
                        return htmlspecialchars( $text );
                }
        }
 
-       static function makeHTMLlink( $url, $text=false ) {
-               if ( ! $text ) {
-                       $text = $url;
-               }
-               return '<a href="' . htmlspecialchars( $url ) . '">' . 
htmlspecialchars( $text ) . '</a>';
-       }
-
        static function genImage( $img ) {
                if ( $img == "" ) {
                        return '';
-        }
+               }
 
                $img = str_replace( " ", "_", $img );
                $url = getImageFromCommons( $img, 100 );
diff --git a/api/includes/FormatHtmllist.php b/api/includes/FormatHtmllist.php
index ace5c27..6d7c4f4 100644
--- a/api/includes/FormatHtmllist.php
+++ b/api/includes/FormatHtmllist.php
@@ -5,7 +5,7 @@
  * This output is for users (and not automated tools) so internationalization 
will be used.
  *
  */
-// functions: processWikitext
+// functions: processWikitext, matchWikiprojectLink, getImageFromCommons, 
makeWikidataUrl, urlencodeWikiprojectLink
 require_once ( 'CommonFunctions.php' );
 
 class FormatHtmllist extends FormatBase {
@@ -85,8 +85,11 @@
                if ( isset( $row->name ) and $row->name ) {
                        if ( isset( $row->monument_article ) and 
$row->monument_article ) {
                                $makeLinks = false;
-                               $article_url = '//'. $row->lang .'.'. 
$row->project .'.org/wiki/'. htmlspecialchars( $row->monument_article );
-                               $desc .= '<h2><a href="'. $article_url .'">'. 
processWikitext( $row->lang, $row->name, $makeLinks, $row->project ) . 
'</a></h2>';
+                               $article_url = '//'. $row->lang . '.' . 
$row->project . '.org/wiki/' . $row->monument_article;
+                               $desc .= '<h2>';
+                               $desc .= makeHTMLlink( $article_url, 
processWikitext(
+                                       $row->lang, $row->name, $makeLinks, 
$row->project ) );
+                               $desc .= '</h2>';
                        } else {
                                $makeLinks = true;
                                $desc .= '<h2>'. processWikitext( $row->lang, 
$row->name, $makeLinks, $row->project ) . '</h2>';
@@ -105,13 +108,9 @@
                                        } else {
                                                if ( strcmp( $name, 'id' ) == 0 
and
                                                           isset( 
$row->registrant_url ) and $row->registrant_url ) {
-                                                       $desc .= '<a href="' . 
htmlspecialchars( $row->registrant_url ) . '">';
-                                                       $desc .= 
htmlspecialchars( $value );
-                                                       $desc .= '</a>';
+                                                       $desc .= makeHTMLlink( 
$row->registrant_url, $value );
                                                } elseif ( strcmp( $name, 
'wd_item' ) == 0 ) {
-                                                       $desc .= '<a href="' . 
makeWikidataUrl( $value ) . '">';
-                                                       $desc .= 
htmlspecialchars( $value );
-                                                       $desc .= '</a>';
+                                                       $desc .= makeHTMLlink( 
makeWikidataUrl( $value ), $value );
                                                } else {
                                                        $desc .= 
htmlspecialchars( $value );
                                                }
@@ -125,9 +124,12 @@
                }
 
                if ( isset( $row->source ) and $row->source ) {
-                       if ( preg_match( "/^(.+?)&/", $row->source, $matches ) 
) {
-                               $wikiListUrl = $matches[1];
-                               $desc .= '<li><a href="' . $wikiListUrl. '">' . 
$I18N->msg( 'source-monuments-list' ) . '</a></li>';
+                       $m = matchWikiprojectLink( $row->source );
+                       if ( $m ) {
+                               $encodedLink = '//' . urlencodeWikiprojectLink( 
$m, true );
+                               $desc .= '<li>';
+                               $desc .= makeHTMLlink( $encodedLink, 
$I18N->msg( 'source-monuments-list' ) );
+                               $desc .= '</li>';
                        }
                }
 
diff --git a/api/includes/FormatWikitable.php b/api/includes/FormatWikitable.php
index e6e0477..cbc0809 100644
--- a/api/includes/FormatWikitable.php
+++ b/api/includes/FormatWikitable.php
@@ -36,7 +36,7 @@
                $continue = substr( $continue, 1 );
 
                echo '|}';
-$this->linebreak();
+               $this->linebreak();
                $this->isTableOpen = false;
 
                echo '<p 
style="text-align:right;">[https://tools.wmflabs.org/heritage' .
@@ -50,7 +50,7 @@
                        foreach ( $row as $name => $value ) {
                                if ( in_array( $name, $selectedItems ) ) {
                                        echo '!' . $name;
-$this->linebreak();
+                                       $this->linebreak();
                                }
                        }
                        $this->isTableOpen = true;
@@ -71,7 +71,7 @@
                                }
 
                                echo '|' . $cellData;
-$this->linebreak();
+                               $this->linebreak();
                        }
                }
        }
@@ -87,9 +87,10 @@
        static function prettifyUrls( $text ) {
                try {
                        $m = matchWikiprojectLink( $text );
-                       $encodedLinkText = str_replace( '_', ' ', $m[5] );
-                       $linkText = urldecode( $encodedLinkText );
-                       return '[//' . htmlspecialchars( $m[2] ) .' '. 
htmlspecialchars( $m[3] . ': ' . $linkText ) . ']';
+                       $linkText = str_replace( '_', ' ', $m[5] );
+                       $encodedLink = urlencodeWikiprojectLink( $m );
+                       return '[//' . htmlspecialchars( $encodedLink ) . ' ' .
+                               htmlspecialchars( $m[3] . ': ' . $linkText ) . 
']';
                } catch ( Exception $e ) {
                        // Normal text
                        return htmlspecialchars( $text );
diff --git a/api/tests/CommonFunctionsTest.php 
b/api/tests/CommonFunctionsTest.php
index f7d8901..bb7c2e9 100644
--- a/api/tests/CommonFunctionsTest.php
+++ b/api/tests/CommonFunctionsTest.php
@@ -123,6 +123,66 @@
                $this->assertEquals($expected, matchWikiprojectLink($input));
        }
 
+       public function test_matchWikiprojectLink_match_with_unicode()
+       {
+               $input = 
'https://sv.wikipedia.org/w/index.php?title=ö&oldid=00000';
+               $expected = Array(
+                       
"https://sv.wikipedia.org/w/index.php?title=ö&oldid=00000";,
+                       "https:",
+                       "sv.wikipedia.org/w/index.php?title=ö&oldid=00000",
+                       "sv",
+                       "wikipedia",
+                       "ö",
+                       "00000"
+                       );
+               $this->assertEquals($expected, matchWikiprojectLink($input));
+       }
+
+       public function test_urlencodeWikiprojectLink_without_unicode()
+       {
+               $input = Array(
+                       
"https://fr.wikivoyage.org/w/index.php?title=Hello_World&oldid=00000";,
+                       "https:",
+                       
"fr.wikivoyage.org/w/index.php?title=Hello_World&oldid=00000",
+                       "fr",
+                       "wikivoyage",
+                       "Hello_World",
+                       "00000"
+                       );
+               $expected = 
'fr.wikivoyage.org/w/index.php?title=Hello_World&oldid=00000';
+               $this->assertEquals($expected, 
urlencodeWikiprojectLink($input));
+       }
+
+       public function test_urlencodeWikiprojectLink_with_unicode()
+       {
+               $input = Array(
+                       
"https://sv.wikipedia.org/w/index.php?title=ö&oldid=00000";,
+                       "https:",
+                       "sv.wikipedia.org/w/index.php?title=ö&oldid=00000",
+                       "sv",
+                       "wikipedia",
+                       "ö",
+                       "00000"
+                       );
+               $expected = 
'sv.wikipedia.org/w/index.php?title=%C3%B6&oldid=00000';
+               $this->assertEquals($expected, 
urlencodeWikiprojectLink($input));
+       }
+
+       public function test_urlencodeWikiprojectLink_drop_oldid()
+       {
+               $input = Array(
+                       
"https://fr.wikivoyage.org/w/index.php?title=Hello_World&oldid=00000";,
+                       "https:",
+                       
"fr.wikivoyage.org/w/index.php?title=Hello_World&oldid=00000",
+                       "fr",
+                       "wikivoyage",
+                       "Hello_World",
+                       "00000"
+                       );
+               $expected = 'fr.wikivoyage.org/w/index.php?title=Hello_World';
+               $this->assertEquals($expected, urlencodeWikiprojectLink($input, 
true));
+       }
+
        public function test_replaceSpaces()
        {
                $this->assertEquals(
@@ -162,5 +222,18 @@
                        makeWikidataWikilink("")
                );
        }
+
+       public function 
test_makeHTMLlink_with_one_argument_uses_the_url_as_text() {
+               $input = 'http://example.com';
+               $expected = '<a 
href="http://example.com";>http://example.com</a>';
+               $this->assertEquals( $expected, makeHTMLlink( $input ) );
+       }
+
+       public function test_makeHTMLlink_two_arguments() {
+               $input1 = 'http://example.com';
+               $input2 = 'Example';
+               $expected = '<a href="http://example.com";>Example</a>';
+               $this->assertEquals( $expected, makeHTMLlink( $input1, $input2 
) );
+       }
 }
 ?>
diff --git a/api/tests/FormatHtmlTest.php b/api/tests/FormatHtmlTest.php
index caa66d7..45c9af5 100644
--- a/api/tests/FormatHtmlTest.php
+++ b/api/tests/FormatHtmlTest.php
@@ -22,7 +22,7 @@
 
        public function test_prettifyUrls_match_encoded() {
 
-               $input = 
'http://sv.wikipedia.org/w/index.php?title=%C3%B6&oldid=00000';
+               $input = 
'http://sv.wikipedia.org/w/index.php?title=ö&oldid=00000';
                $expected = '<a 
href="https://sv.wikipedia.org/w/index.php?title=%C3%B6&amp;oldid=00000";>ö</a>';
                $this->assertEquals( $expected, FormatHtml::prettifyUrls( 
$input ) );
        }
@@ -39,19 +39,6 @@
                        
'//upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Example.jpg/100px-Example.jpg'
 .
                        '" /></a>';
                $this->assertEquals( $expected, FormatHtml::genImage( $input ) 
);
-       }
-
-       public function 
test_makeHTMLlink_with_one_argument_uses_the_url_as_text() {
-               $input = 'http://example.com';
-               $expected = '<a 
href="http://example.com";>http://example.com</a>';
-               $this->assertEquals( $expected, FormatHtml::makeHTMLlink( 
$input ) );
-       }
-
-       public function test_makeHTMLlink_two_arguments() {
-               $input1 = 'http://example.com';
-               $input2= 'Example';
-               $expected = '<a href="http://example.com";>Example</a>';
-               $this->assertEquals( $expected, FormatHtml::makeHTMLlink( 
$input1, $input2 ) );
        }
 
 }
diff --git a/api/tests/FormatWikitableTest.php 
b/api/tests/FormatWikitableTest.php
index 00323a4..021dca6 100644
--- a/api/tests/FormatWikitableTest.php
+++ b/api/tests/FormatWikitableTest.php
@@ -21,7 +21,7 @@
 
        public function test_prettifyUrls_match_encoded() {
 
-               $input = 
'http://sv.wikipedia.org/w/index.php?title=%C3%B6&oldid=00000';
+               $input = 
'http://sv.wikipedia.org/w/index.php?title=ö&oldid=00000';
                $expected = 
'[//sv.wikipedia.org/w/index.php?title=%C3%B6&amp;oldid=00000 sv: ö]';
                $this->assertEquals( $expected, FormatWikitable::prettifyUrls( 
$input ) );
        }
diff --git a/erfgoedbot/update_database.py b/erfgoedbot/update_database.py
index 9fb2c35..54bab6e 100755
--- a/erfgoedbot/update_database.py
+++ b/erfgoedbot/update_database.py
@@ -371,7 +371,7 @@
         if page.exists() and not page.isRedirectPage():
             # Do some checking
             unknownFields = processPage(
-                page, page.permalink(), countryconfig,
+                page, page.permalink(percent_encoded=False), countryconfig,
                 conn, cursor, unknownFields=unknownFields)
 
     unknownFieldsStatistics(countryconfig, unknownFields)
diff --git a/erfgoedbot/update_id_dump.py b/erfgoedbot/update_id_dump.py
index e77ba2b..a76df58 100755
--- a/erfgoedbot/update_id_dump.py
+++ b/erfgoedbot/update_id_dump.py
@@ -110,7 +110,8 @@
         if page.exists() and not page.isRedirectPage():
             # Do some checking
             processPage(countrycode, lang,
-                        page.permalink(), countryconfig, conn, cursor, 
page=page)
+                        page.permalink(percent_encoded=False),
+                        countryconfig, conn, cursor, page=page)
 
 
 def main():
diff --git a/tools/id_checker.php b/tools/id_checker.php
index 93b05b1..964ab11 100644
--- a/tools/id_checker.php
+++ b/tools/id_checker.php
@@ -4,6 +4,8 @@
 ini_set( 'html_errors', false );
 
 require_once dirname( __DIR__ ) . '/api/common.php';
+require_once ( '/api/includes/CommonFunctions.php' );
+
 
 $db = Database::getDb();
 print '<html>';
@@ -53,8 +55,11 @@
                        <td></td>
                </tr>';
                foreach ( $dres as $drow ) {
+                       $m = matchWikiprojectLink( $drow->source );
+                       $linkText = str_replace( '_', ' ', $m[5] );
+                       $encodedLink = urlencodeWikiprojectLink( $m );
                        print '<tr>
-                               <td><a href="'. htmlspecialchars( $drow->source 
) .'">' . htmlspecialchars( $drow->source ) . '</a></td>
+                               <td><a href="'. htmlspecialchars( $encodedLink 
) .'">' . htmlspecialchars( $linkText ) . '</a></td>
                        </tr>';
                }
        }

-- 
To view, visit https://gerrit.wikimedia.org/r/309858
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5f65162f046a1ed8c7a66bb11dbb682c7330e2ca
Gerrit-PatchSet: 6
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Lokal Profil <lokal.pro...@gmail.com>
Gerrit-Reviewer: Jean-Frédéric <jeanfrederic.w...@gmail.com>
Gerrit-Reviewer: Multichill <maar...@mdammers.nl>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to