Ejegg has uploaded a new change for review.
https://gerrit.wikimedia.org/r/85199
Change subject: LinkFilter: use wfMakeUrlIndexes in makeLikeArray
......................................................................
LinkFilter: use wfMakeUrlIndexes in makeLikeArray
De-duplicate the link munging logic shared by wfMakeUrlIndexes
(used to index links in the first place) and LinkFilter::makeLikeArray
(used when searching for those links). Searching for links with
a port number is now possible, as long as you specify the port
or a subdomain wildcard. Also, searching for mailto links with
no @ sign now distinguishes between single domains and subdomains.
Change-Id: I5fbd90d4e925420ba76f16c80f6eeab097192561
---
M includes/LinkFilter.php
1 file changed, 30 insertions(+), 35 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/99/85199/1
diff --git a/includes/LinkFilter.php b/includes/LinkFilter.php
index d552c69..0c8b8bb 100644
--- a/includes/LinkFilter.php
+++ b/includes/LinkFilter.php
@@ -90,10 +90,11 @@
*/
public static function makeLikeArray( $filterEntry, $prot = 'http://' )
{
$db = wfGetDB( DB_MASTER );
+ $entryNoStar = $filterEntry;
if ( substr( $filterEntry, 0, 2 ) == '*.' ) {
$subdomains = true;
- $filterEntry = substr( $filterEntry, 2 );
- if ( $filterEntry == '' ) {
+ $entryNoStar = substr( $filterEntry, 2 );
+ if ( $entryNoStar == '' ) {
// We don't want to make a clause that will
match everything,
// that could be dangerous
return false;
@@ -104,45 +105,39 @@
// No stray asterisks, that could cause confusion
// It's not simple or efficient to handle it properly so we
don't
// handle it at all.
- if ( strpos( $filterEntry, '*' ) !== false ) {
+ if ( strpos( $entryNoStar, '*' ) !== false ) {
return false;
}
- $slash = strpos( $filterEntry, '/' );
- if ( $slash !== false ) {
- $path = substr( $filterEntry, $slash );
- $host = substr( $filterEntry, 0, $slash );
+ // Use wfMakeUrlIndexes to be consistent with the logic used to
+ // munge the links on their way into the database. It has a
+ // funny way of handling mailto links, so we work around it.
+ if ( $prot == 'mailto:' && !strpos( $filterEntry, '@' ) ) {
+ $filterEntry = '@' . $filterEntry;
+ $mailtoDomain = true;
} else {
- $path = '/';
- $host = $filterEntry;
+ $mailtoDomain = false;
}
- // Reverse the labels in the hostname, convert to lower case
- // For emails reverse domainpart only
- if ( $prot == 'mailto:' && strpos( $host, '@' ) ) {
- // complete email address
- $mailparts = explode( '@', $host );
- $domainpart = strtolower( implode( '.', array_reverse(
explode( '.', $mailparts[1] ) ) ) );
- $host = $domainpart . '@' . $mailparts[0];
- $like = array( "$prot$host", $db->anyString() );
- } elseif ( $prot == 'mailto:' ) {
- // domainpart of email address only. do not add '.'
- $host = strtolower( implode( '.', array_reverse(
explode( '.', $host ) ) ) );
- $like = array( "$prot$host", $db->anyString() );
- } else {
- $host = strtolower( implode( '.', array_reverse(
explode( '.', $host ) ) ) );
- if ( substr( $host, -1, 1 ) !== '.' ) {
- $host .= '.';
- }
- $like = array( "$prot$host" );
-
- if ( $subdomains ) {
- $like[] = $db->anyString();
- }
- if ( !$subdomains || $path !== '/' ) {
- $like[] = $path;
- $like[] = $db->anyString();
+ $munged = wfMakeUrlIndexes( $prot . $filterEntry )[0];
+ if ( $mailtoDomain ) {
+ $munged = str_replace( '.*@', '*', $munged );
+ $munged = str_replace( '@.', '@', $munged );
+ }
+ $munged = str_replace( '*.', '*', $munged );
+ // Preserve original trimming of bare '/' path if url
+ // had no slashes or just one at the end.
+ if ( $subdomains && substr( $munged, -1, 1 ) == '/' ) {
+ $slash = strpos( $filterEntry, '/' );
+ if ( $slash === false || $slash == strlen( $filterEntry
) - 1 ) {
+ $munged = substr( $munged, 0, strlen( $munged )
- 1 );
}
}
- return $like;
+ $parts = explode( '*', $munged );
+ // Seems like we could just use the first return statement
+ // and get rid of keepOneWildcard.
+ if ( count( $parts ) == 1 ) {
+ return array ( $parts[0], $db->anyString() );
+ }
+ return array ( $parts[0], $db->anyString(), $parts[1],
$db->anyString() );
}
/**
--
To view, visit https://gerrit.wikimedia.org/r/85199
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I5fbd90d4e925420ba76f16c80f6eeab097192561
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Ejegg <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits