http://www.mediawiki.org/wiki/Special:Code/MediaWiki/84693
Revision: 84693
Author: aaron
Date: 2011-03-24 18:39:30 +0000 (Thu, 24 Mar 2011)
Log Message:
-----------
* Follow-up for r84660:
** Don't regress in terms of making sure truncating and adding an ellipses
won't make the string longer (or equally long)
** Fixed ellipsis adjustment check in truncateHtml()
* Other:
** Made removeBadCharLast/removeBadCharFirst not give warnings on empty string
input
** Removed tidy call in truncateHtml, slower and adds some garbage
** Code comment tweaks
Modified Paths:
--------------
trunk/phase3/languages/Language.php
Modified: trunk/phase3/languages/Language.php
===================================================================
--- trunk/phase3/languages/Language.php 2011-03-24 18:35:34 UTC (rev 84692)
+++ trunk/phase3/languages/Language.php 2011-03-24 18:39:30 UTC (rev 84693)
@@ -2393,31 +2393,34 @@
if ( $ellipsis == '...' ) {
$ellipsis = wfMsgExt( 'ellipsis', array(
'escapenoentities', 'language' => $this ) );
}
- $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
# Check if there is no need to truncate
if ( $length == 0 ) {
- return $ellipsis;
+ return $ellipsis; // convention
} elseif ( strlen( $string ) <= abs( $length ) ) {
- return $string;
- } elseif ( strlen( $ellipsis ) >= abs( $length ) ) {
- // Not combined with first (length == 0) if statement,
since
- // we want to return the string instead of ellipsis if
both
- // this and the proceeding elseif are true.
- return $ellipsis;
+ return $string; // no need to truncate
}
$stringOriginal = $string;
- if ( $length > 0 ) {
- $length -= $eLength;
- $string = substr( $string, 0, $length ); // xyz...
- $string = $this->removeBadCharLast( $string );
- $string = $string . $ellipsis;
+ # If ellipsis length is >= $length then we can't apply
$adjustLength
+ if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
+ $string = $ellipsis; // this can be slightly unexpected
+ # Otherwise, truncate and add ellipsis...
} else {
- $length += $eLength;
- $string = substr( $string, $length ); // ...xyz
- $string = $this->removeBadCharFirst( $string );
- $string = $ellipsis . $string;
+ $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
+ if ( $length > 0 ) {
+ $length -= $eLength;
+ $string = substr( $string, 0, $length ); //
xyz...
+ $string = $this->removeBadCharLast( $string );
+ $string = $string . $ellipsis;
+ } else {
+ $length += $eLength;
+ $string = substr( $string, $length ); // ...xyz
+ $string = $this->removeBadCharFirst( $string );
+ $string = $ellipsis . $string;
+ }
}
- # Do not truncate if the ellipsis makes the string longer/equal
(bug 22181)
+ # Do not truncate if the ellipsis makes the string longer/equal
(bug 22181).
+ # This check is *not* redundant if $adjustLength, due to the
single case where
+ # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be
shorter than $string.
if ( strlen( $string ) < strlen( $stringOriginal ) ) {
return $string;
} else {
@@ -2433,17 +2436,19 @@
* @return string
*/
protected function removeBadCharLast( $string ) {
- $char = ord( $string[strlen( $string ) - 1] );
- $m = array();
- if ( $char >= 0xc0 ) {
- # We got the first byte only of a multibyte char;
remove it.
- $string = substr( $string, 0, -1 );
- } elseif ( $char >= 0x80 &&
- preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
-
'[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
- {
- # We chopped in the middle of a character; remove it
- $string = $m[1];
+ if ( $string != '' ) {
+ $char = ord( $string[strlen( $string ) - 1] );
+ $m = array();
+ if ( $char >= 0xc0 ) {
+ # We got the first byte only of a multibyte
char; remove it.
+ $string = substr( $string, 0, -1 );
+ } elseif ( $char >= 0x80 &&
+ preg_match(
'/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
+
'[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
+ {
+ # We chopped in the middle of a character;
remove it
+ $string = $m[1];
+ }
}
return $string;
}
@@ -2456,10 +2461,12 @@
* @return string
*/
protected function removeBadCharFirst( $string ) {
- $char = ord( $string[0] );
- if ( $char >= 0x80 && $char < 0xc0 ) {
- # We chopped in the middle of a character; remove the
whole thing
- $string = preg_replace( '/^[\x80-\xbf]+/', '', $string
);
+ if ( $string != '' ) {
+ $char = ord( $string[0] );
+ if ( $char >= 0x80 && $char < 0xc0 ) {
+ # We chopped in the middle of a character;
remove the whole thing
+ $string = preg_replace( '/^[\x80-\xbf]+/', '',
$string );
+ }
}
return $string;
}
@@ -2469,10 +2476,9 @@
* appending an optional string (e.g. for ellipses), and return valid
HTML
*
* This is only intended for styled/linked text, such as HTML with
- * tags like <span> and <a>, were the tags are self-contained (valid
HTML)
+ * tags like <span> and <a>, were the tags are self-contained (valid
HTML).
+ * Also, this will not detect things like "display:none" CSS.
*
- * Note: tries to fix broken HTML with MWTidy
- *
* Note: since 1.18 you do not need to leave extra room in $length for
ellipses.
*
* @param string $text HTML string to truncate
@@ -2485,23 +2491,44 @@
if ( $ellipsis == '...' ) {
$ellipsis = wfMsgExt( 'ellipsis', array(
'escapenoentities', 'language' => $this ) );
}
- $length -= strlen( $ellipsis );
- # Check if there is no need to truncate
+ # Check if there is clearly no need to truncate
if ( $length <= 0 ) {
- return $ellipsis; // no text shown, nothing to format
+ return $ellipsis; // no text shown, nothing to format
(convention)
} elseif ( strlen( $text ) <= $length ) {
- return $text; // string short enough even *with* HTML
+ return $text; // string short enough even *with* HTML
(short-circuit)
}
- $text = MWTidy::tidy( $text ); // fix tags
+
$displayLen = 0; // innerHTML legth so far
$testingEllipsis = false; // checking if ellipses will make
string longer/equal?
$tagType = 0; // 0-open, 1-close
$bracketState = 0; // 1-tag start, 2-tag name, 0-neither
$entityState = 0; // 0-not entity, 1-entity
- $tag = $ret = '';
+ $tag = $ret = ''; // accumulated tag name, accumulated result
string
$openTags = array(); // open tag stack
+
$textLen = strlen( $text );
- for ( $pos = 0; $pos < $textLen; ++$pos ) {
+ $neLength = max( 0, $length - strlen( $ellipsis ) ); //
non-ellipsis len if truncated
+ for ( $pos = 0; true; ++$pos ) {
+ # Consider truncation once the display length has
reached the maximim.
+ # Check that we're not in the middle of a
bracket/entity...
+ if ( $displayLen >= $neLength && $bracketState == 0 &&
$entityState == 0 ) {
+ if ( !$testingEllipsis ) {
+ $testingEllipsis = true;
+ # Save where we are; we will truncate
here unless there turn out to
+ # be so few remaining characters that
truncation is not necessary.
+ $pOpenTags = $openTags; // save state
+ $pRet = $ret; // save state
+ } elseif ( $displayLen > $length && $displayLen
> strlen( $ellipsis ) ) {
+ # String in fact does need truncation,
the truncation point was OK.
+ $openTags = $pOpenTags; // reload state
+ $ret = $this->removeBadCharLast( $pRet
); // reload state, multi-byte char fix
+ $ret .= $ellipsis; // add ellipsis
+ break;
+ }
+ }
+ if ( $pos >= $textLen ) break; // extra iteration just
for above checks
+
+ # Read the next char...
$ch = $text[$pos];
$lastCh = $pos ? $text[$pos - 1] : '';
$ret .= $ch; // add to result string
@@ -2539,31 +2566,14 @@
$entityState = 1; // entity
found, (e.g. " ")
} else {
$displayLen++; // this char is
displayed
- // Add on the other display
text after this...
- $skipped = $this->truncate_skip(
- $ret, $text, "<>&",
$pos + 1, $length - $displayLen );
+ // Add the next $max display
text chars after this in one swoop...
+ $max = ( $testingEllipsis ?
$length : $neLength ) - $displayLen;
+ $skipped =
$this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max );
$displayLen += $skipped;
$pos += $skipped;
}
}
}
- # Consider truncation once the display length has
reached the maximim.
- # Double-check that we're not in the middle of a
bracket/entity...
- if ( $displayLen >= $length && $bracketState == 0 &&
$entityState == 0 ) {
- if ( !$testingEllipsis ) {
- $testingEllipsis = true;
- # Save where we are; we will truncate
here unless
- # the ellipsis actually makes the
string longer.
- $pOpenTags = $openTags; // save state
- $pRet = $ret; // save state
- } elseif ( $displayLen > ( $length + strlen(
$ellipsis ) ) ) {
- # Ellipsis won't make string
longer/equal, the truncation point was OK.
- $openTags = $pOpenTags; // reload state
- $ret = $this->removeBadCharLast( $pRet
); // reload state, multi-byte char fix
- $ret .= $ellipsis; // add ellipsis
- break;
- }
- }
}
if ( $displayLen == 0 ) {
return ''; // no text shown, nothing to format
@@ -2578,7 +2588,12 @@
// truncateHtml() helper function
// like strcspn() but adds the skipped chars to $ret
- private function truncate_skip( &$ret, $text, $search, $start, $len =
-1 ) {
+ private function truncate_skip( &$ret, $text, $search, $start, $len =
null ) {
+ if ( $len === null ) {
+ $len = -1; // -1 means "no limit" for strcspn
+ } elseif ( $len < 0 ) {
+ $len = 0; // sanity
+ }
$skipCount = 0;
if ( $start < strlen( $text ) ) {
$skipCount = strcspn( $text, $search, $start, $len );
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs