jenkins-bot has submitted this change and it was merged.
Change subject: Language::truncate(): don't chop up multibyte characters when
input contains newlines
......................................................................
Language::truncate(): don't chop up multibyte characters when input contains
newlines
To detect whether the truncation had chopped up a multibyte
character after the first byte, a regex was used. But in this
regex, the dot (.) didn't match newlines, so it failed to
detect chopped multibyte characters (after the first byte)
if there was a newline preceding the chopped character.
Bug: T116693
Change-Id: I66e4fd451acac0a1019da7060d5a37d70963a15a
---
M languages/Language.php
M tests/phpunit/languages/LanguageTest.php
2 files changed, 12 insertions(+), 1 deletion(-)
Approvals:
Krinkle: Looks good to me, approved
jenkins-bot: Verified
diff --git a/languages/Language.php b/languages/Language.php
index 50ed513..3ea2693 100644
--- a/languages/Language.php
+++ b/languages/Language.php
@@ -3691,8 +3691,9 @@
# We got the first byte only of a multibyte
char; remove it.
$string = substr( $string, 0, -1 );
} elseif ( $char >= 0x80 &&
+ // Use the /s modifier (PCRE_DOTALL) so (.*)
also matches newlines
preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
- '[\xf0-\xf7][\x80-\xbf]{1,2})$/',
$string, $m )
+ '[\xf0-\xf7][\x80-\xbf]{1,2})$/s',
$string, $m )
) {
# We chopped in the middle of a character;
remove it
$string = $m[1];
diff --git a/tests/phpunit/languages/LanguageTest.php
b/tests/phpunit/languages/LanguageTest.php
index 4fca002..77c3c02 100644
--- a/tests/phpunit/languages/LanguageTest.php
+++ b/tests/phpunit/languages/LanguageTest.php
@@ -261,6 +261,16 @@
$this->getLang()->truncate( "1234567890", 5, 'XXX',
false ),
'truncate without adjustment'
);
+ $this->assertEquals(
+ "泰乐菌...",
+ $this->getLang()->truncate( "泰乐菌素123456789", 11, '...',
false ),
+ 'truncate does not chop Unicode characters in half'
+ );
+ $this->assertEquals(
+ "\n泰乐菌...",
+ $this->getLang()->truncate( "\n泰乐菌素123456789", 12,
'...', false ),
+ 'truncate does not chop Unicode characters in half if
there is a preceding newline'
+ );
}
/**
--
To view, visit https://gerrit.wikimedia.org/r/249051
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I66e4fd451acac0a1019da7060d5a37d70963a15a
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: wmf/1.27.0-wmf.3
Gerrit-Owner: Krinkle <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Krinkle <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits