Legoktm has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/231748

Change subject: [WIP] Attempt to canonicalize URLs before storing them
......................................................................

[WIP] Attempt to canonicalize URLs before storing them

Convert index.php?title=$1 to /wiki/$1

TODO:
* Write tests

Bug: T108602
Change-Id: Ib47ca7c9e0c75aa295d9edb81b5fd0f671a6fb57
---
M UrlShortener.utils.php
1 file changed, 22 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/UrlShortener 
refs/changes/48/231748/1

diff --git a/UrlShortener.utils.php b/UrlShortener.utils.php
index 72ad2df..34e6d5e 100755
--- a/UrlShortener.utils.php
+++ b/UrlShortener.utils.php
@@ -28,10 +28,7 @@
         * @return Status with value of base36 encoded shortcode that refers to 
the $url
         */
        public static function maybeCreateShortCode( $url, User $user ) {
-               // First, cannonicalize the URL
-               // store everything in the db as HTTP, we'll convert it before
-               // redirecting users
-               $url = self::convertToProtocol( $url, PROTO_HTTP );
+               $url = self::normalizeUrl( $url );
 
                $dbw = self::getDB( DB_MASTER );
                $id = $dbw->selectField(
@@ -68,6 +65,27 @@
                return Status::newGood( self::encodeId( $id ) );
        }
 
+       public static function normalizeUrl( $url ) {
+               global $wgArticlePath;
+               // First, force the protocol to HTTP, we'll convert
+               // it to a different one when redirecting
+               $url = self::convertToProtocol( $url, PROTO_HTTP );
+
+               // If the wiki is using an article path (e.g. /wiki/$1) try
+               // and convert plain index.php?title=$1 URLs to the canonical 
form
+               if ( $wgArticlePath !== false && strpos( $url, '?' ) ) {
+                       $parsed = wfParseUrl( $url );
+                       $query = wfCgiToArray( $parsed['query'] );
+                       if ( count( $query ) === 1 && isset( $query['title'] ) 
&& $parsed['path'] === wfScript() ) {
+                               $parsed['path'] = str_replace( '$1', 
$query['title'], $wgArticlePath );
+                               unset( $parsed['query'] );
+                       }
+                       $url = wfAssembleUrl( $parsed );
+               }
+
+               return $url;
+       }
+
        /**
         * Converts a possibly protocol'd url to the one specified
         *

-- 
To view, visit https://gerrit.wikimedia.org/r/231748
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib47ca7c9e0c75aa295d9edb81b5fd0f671a6fb57
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/UrlShortener
Gerrit-Branch: master
Gerrit-Owner: Legoktm <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to