Ejegg has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/85199


Change subject: LinkFilter: use wfMakeUrlIndexes in makeLikeArray
......................................................................

LinkFilter: use wfMakeUrlIndexes in makeLikeArray

De-duplicate the link munging logic shared by wfMakeUrlIndexes
(used to index links in the first place) and LinkFilter::makeLikeArray
(used when searching for those links).  Searching for links with
a port number is now possible, as long as you specify the port
or a subdomain wildcard.  Also, searching for mailto links with
no @ sign now distinguishes between single domains and subdomains.

Change-Id: I5fbd90d4e925420ba76f16c80f6eeab097192561
---
M includes/LinkFilter.php
1 file changed, 30 insertions(+), 35 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/99/85199/1

diff --git a/includes/LinkFilter.php b/includes/LinkFilter.php
index d552c69..0c8b8bb 100644
--- a/includes/LinkFilter.php
+++ b/includes/LinkFilter.php
@@ -90,10 +90,11 @@
         */
        public static function makeLikeArray( $filterEntry, $prot = 'http://' ) 
{
                $db = wfGetDB( DB_MASTER );
+               $entryNoStar = $filterEntry;
                if ( substr( $filterEntry, 0, 2 ) == '*.' ) {
                        $subdomains = true;
-                       $filterEntry = substr( $filterEntry, 2 );
-                       if ( $filterEntry == '' ) {
+                       $entryNoStar = substr( $filterEntry, 2 );
+                       if ( $entryNoStar == '' ) {
                                // We don't want to make a clause that will 
match everything,
                                // that could be dangerous
                                return false;
@@ -104,45 +105,39 @@
                // No stray asterisks, that could cause confusion
                // It's not simple or efficient to handle it properly so we 
don't
                // handle it at all.
-               if ( strpos( $filterEntry, '*' ) !== false ) {
+               if ( strpos( $entryNoStar, '*' ) !== false ) {
                        return false;
                }
-               $slash = strpos( $filterEntry, '/' );
-               if ( $slash !== false ) {
-                       $path = substr( $filterEntry, $slash );
-                       $host = substr( $filterEntry, 0, $slash );
+               // Use wfMakeUrlIndexes to be consistent with the logic used to
+               // munge the links on their way into the database.  It has a
+               // funny way of handling mailto links, so we work around it.
+               if ( $prot == 'mailto:' && !strpos( $filterEntry, '@' ) ) {
+                       $filterEntry = '@' . $filterEntry;
+                       $mailtoDomain = true;
                } else {
-                       $path = '/';
-                       $host = $filterEntry;
+                       $mailtoDomain = false;
                }
-               // Reverse the labels in the hostname, convert to lower case
-               // For emails reverse domainpart only
-               if ( $prot == 'mailto:' && strpos( $host, '@' ) ) {
-                       // complete email address
-                       $mailparts = explode( '@', $host );
-                       $domainpart = strtolower( implode( '.', array_reverse( 
explode( '.', $mailparts[1] ) ) ) );
-                       $host = $domainpart . '@' . $mailparts[0];
-                       $like = array( "$prot$host", $db->anyString() );
-               } elseif ( $prot == 'mailto:' ) {
-                       // domainpart of email address only. do not add '.'
-                       $host = strtolower( implode( '.', array_reverse( 
explode( '.', $host ) ) ) );
-                       $like = array( "$prot$host", $db->anyString() );
-               } else {
-                       $host = strtolower( implode( '.', array_reverse( 
explode( '.', $host ) ) ) );
-                       if ( substr( $host, -1, 1 ) !== '.' ) {
-                               $host .= '.';
-                       }
-                       $like = array( "$prot$host" );
-
-                       if ( $subdomains ) {
-                               $like[] = $db->anyString();
-                       }
-                       if ( !$subdomains || $path !== '/' ) {
-                               $like[] = $path;
-                               $like[] = $db->anyString();
+               $munged = wfMakeUrlIndexes( $prot . $filterEntry )[0];
+               if ( $mailtoDomain ) {
+                       $munged = str_replace( '.*@', '*', $munged );
+                       $munged = str_replace( '@.', '@', $munged );
+               }
+               $munged = str_replace( '*.', '*', $munged );
+               // Preserve original trimming of bare '/' path if url
+               // had no slashes or just one at the end.
+               if ( $subdomains && substr( $munged, -1, 1 ) == '/' ) {
+                       $slash = strpos( $filterEntry, '/' );
+                       if ( $slash === false || $slash == strlen( $filterEntry 
) - 1 ) {
+                               $munged = substr( $munged, 0, strlen( $munged ) 
- 1 );
                        }
                }
-               return $like;
+               $parts = explode( '*', $munged );
+               // Seems like we could just use the first return statement
+               // and get rid of keepOneWildcard. 
+               if ( count( $parts ) == 1 ) {
+                       return array ( $parts[0], $db->anyString() );
+               }
+               return array ( $parts[0], $db->anyString(), $parts[1], 
$db->anyString() );
        }
 
        /**

-- 
To view, visit https://gerrit.wikimedia.org/r/85199
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5fbd90d4e925420ba76f16c80f6eeab097192561
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Ejegg <ej...@ejegg.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to