Ejegg has uploaded a new change for review. https://gerrit.wikimedia.org/r/85199
Change subject: LinkFilter: use wfMakeUrlIndexes in makeLikeArray ...................................................................... LinkFilter: use wfMakeUrlIndexes in makeLikeArray De-duplicate the link munging logic shared by wfMakeUrlIndexes (used to index links in the first place) and LinkFilter::makeLikeArray (used when searching for those links). Searching for links with a port number is now possible, as long as you specify the port or a subdomain wildcard. Also, searching for mailto links with no @ sign now distinguishes between single domains and subdomains. Change-Id: I5fbd90d4e925420ba76f16c80f6eeab097192561 --- M includes/LinkFilter.php 1 file changed, 30 insertions(+), 35 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/99/85199/1 diff --git a/includes/LinkFilter.php b/includes/LinkFilter.php index d552c69..0c8b8bb 100644 --- a/includes/LinkFilter.php +++ b/includes/LinkFilter.php @@ -90,10 +90,11 @@ */ public static function makeLikeArray( $filterEntry, $prot = 'http://' ) { $db = wfGetDB( DB_MASTER ); + $entryNoStar = $filterEntry; if ( substr( $filterEntry, 0, 2 ) == '*.' ) { $subdomains = true; - $filterEntry = substr( $filterEntry, 2 ); - if ( $filterEntry == '' ) { + $entryNoStar = substr( $filterEntry, 2 ); + if ( $entryNoStar == '' ) { // We don't want to make a clause that will match everything, // that could be dangerous return false; @@ -104,45 +105,39 @@ // No stray asterisks, that could cause confusion // It's not simple or efficient to handle it properly so we don't // handle it at all. - if ( strpos( $filterEntry, '*' ) !== false ) { + if ( strpos( $entryNoStar, '*' ) !== false ) { return false; } - $slash = strpos( $filterEntry, '/' ); - if ( $slash !== false ) { - $path = substr( $filterEntry, $slash ); - $host = substr( $filterEntry, 0, $slash ); + // Use wfMakeUrlIndexes to be consistent with the logic used to + // munge the links on their way into the database. It has a + // funny way of handling mailto links, so we work around it. + if ( $prot == 'mailto:' && !strpos( $filterEntry, '@' ) ) { + $filterEntry = '@' . $filterEntry; + $mailtoDomain = true; } else { - $path = '/'; - $host = $filterEntry; + $mailtoDomain = false; } - // Reverse the labels in the hostname, convert to lower case - // For emails reverse domainpart only - if ( $prot == 'mailto:' && strpos( $host, '@' ) ) { - // complete email address - $mailparts = explode( '@', $host ); - $domainpart = strtolower( implode( '.', array_reverse( explode( '.', $mailparts[1] ) ) ) ); - $host = $domainpart . '@' . $mailparts[0]; - $like = array( "$prot$host", $db->anyString() ); - } elseif ( $prot == 'mailto:' ) { - // domainpart of email address only. do not add '.' - $host = strtolower( implode( '.', array_reverse( explode( '.', $host ) ) ) ); - $like = array( "$prot$host", $db->anyString() ); - } else { - $host = strtolower( implode( '.', array_reverse( explode( '.', $host ) ) ) ); - if ( substr( $host, -1, 1 ) !== '.' ) { - $host .= '.'; - } - $like = array( "$prot$host" ); - - if ( $subdomains ) { - $like[] = $db->anyString(); - } - if ( !$subdomains || $path !== '/' ) { - $like[] = $path; - $like[] = $db->anyString(); + $munged = wfMakeUrlIndexes( $prot . $filterEntry )[0]; + if ( $mailtoDomain ) { + $munged = str_replace( '.*@', '*', $munged ); + $munged = str_replace( '@.', '@', $munged ); + } + $munged = str_replace( '*.', '*', $munged ); + // Preserve original trimming of bare '/' path if url + // had no slashes or just one at the end. + if ( $subdomains && substr( $munged, -1, 1 ) == '/' ) { + $slash = strpos( $filterEntry, '/' ); + if ( $slash === false || $slash == strlen( $filterEntry ) - 1 ) { + $munged = substr( $munged, 0, strlen( $munged ) - 1 ); } } - return $like; + $parts = explode( '*', $munged ); + // Seems like we could just use the first return statement + // and get rid of keepOneWildcard. + if ( count( $parts ) == 1 ) { + return array ( $parts[0], $db->anyString() ); + } + return array ( $parts[0], $db->anyString(), $parts[1], $db->anyString() ); } /** -- To view, visit https://gerrit.wikimedia.org/r/85199 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I5fbd90d4e925420ba76f16c80f6eeab097192561 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Ejegg <ej...@ejegg.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits