jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/391733 )
Change subject: SECURITY: XSS in langconverter when regex hits
pcre.backtrack_limit
......................................................................
SECURITY: XSS in langconverter when regex hits pcre.backtrack_limit
Adjust regexes for what not to convert to avoid backtracking by
preferring possesive quantifiers
Add check that we really have matched to the end of the string, and
log error if the regex hits some sort of error preventing the
entire string from being matched. Should the regex not match to the
end, then language conversion is disabled for the string.
Bug: T124404
Change-Id: I4f0c171c7da804e9c1508ef1f59556665a318f6a
(cherry picked from commit fbe78cfa094645b907d0fd2885c5797321f794eb)
---
M languages/LanguageConverter.php
M tests/phpunit/languages/LanguageConverterTest.php
2 files changed, 61 insertions(+), 15 deletions(-)
Approvals:
Chad: Looks good to me, approved
jenkins-bot: Verified
diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php
index 67c0ca7..dfcab19 100644
--- a/languages/LanguageConverter.php
+++ b/languages/LanguageConverter.php
@@ -20,6 +20,8 @@
*/
use MediaWiki\MediaWikiServices;
+use MediaWiki\Logger\LoggerFactory;
+
/**
* Base class for language conversion.
* @ingroup Language
@@ -353,24 +355,30 @@
}
/* we convert everything except:
- * 1. HTML markups (anything between < and >)
- * 2. HTML entities
- * 3. placeholders created by the parser
- */
- $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+';
+ 1. HTML markups (anything between < and >)
+ 2. HTML entities
+ 3. placeholders created by the parser
+ IMPORTANT: Beware of failure from pcre.backtrack_limit
(T124404).
+ Minimize use of backtracking where possible.
+ */
+ $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
// this one is needed when the text is inside an HTML markup
- $htmlfix = '|<[^>]+$|^[^<>]*>';
+ $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
+
+ // Optimize for the common case where these tags have
+ // few or no children. Thus try and possesively get as much as
+ // possible, and only engage in backtracking when we hit a '<'.
// disable convert to variants between <code> tags
- $codefix = '<code>.+?<\/code>|';
+ $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
// disable conversion of <script> tags
- $scriptfix = '<script.*?>.*?<\/script>|';
+ $scriptfix =
'<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
// disable conversion of <pre> tags
- $prefix = '<pre.*?>.*?<\/pre>|';
+ $prefix =
'<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
$reg = '/' . $codefix . $scriptfix . $prefix .
- '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix .
'/s';
+ '<[^>]++>|&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix .
'|\004$/s';
$startPos = 0;
$sourceBlob = '';
$literalBlob = '';
@@ -381,15 +389,34 @@
$markupMatches = null;
$elementMatches = null;
+
+ // We add a marker (\004) at the end of text, to ensure we
always match the
+ // entire text (Otherwise, pcre.backtrack_limit might cause
silent failure)
while ( $startPos < strlen( $text ) ) {
- if ( preg_match( $reg, $text, $markupMatches,
PREG_OFFSET_CAPTURE, $startPos ) ) {
+ if ( preg_match( $reg, $text . "\004", $markupMatches,
PREG_OFFSET_CAPTURE, $startPos ) ) {
$elementPos = $markupMatches[0][1];
$element = $markupMatches[0][0];
+ if ( $element === "\004" ) {
+ // We hit the end.
+ $elementPos = strlen( $text );
+ $element = '';
+ }
} else {
- $elementPos = strlen( $text );
- $element = '';
+ // If we hit here, then Language Converter
could be tricked
+ // into doing an XSS, so we refuse to translate.
+ // If non-crazy input manages to reach this
code path,
+ // we should consider it a bug.
+ $log = LoggerFactory::getInstance(
'languageconverter' );
+ $log->error( "Hit pcre.backtrack_limit in " .
__METHOD__
+ . ". Disabling language conversion for
this page.",
+ [
+ "method" => __METHOD__,
+ "variant" => $toVariant,
+ "startOfText" => substr( $text,
0, 500 )
+ ]
+ );
+ return $text;
}
-
// Queue the part before the markup for translation in
a batch
$sourceBlob .= substr( $text, $startPos, $elementPos -
$startPos ) . "\000";
@@ -398,7 +425,7 @@
// Translate any alt or title attributes inside the
matched element
if ( $element !== ''
- && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/',
$element, $elementMatches )
+ && preg_match(
'/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
) {
$attrs = Sanitizer::decodeTagAttributes(
$elementMatches[2] );
$changed = false;
diff --git a/tests/phpunit/languages/LanguageConverterTest.php
b/tests/phpunit/languages/LanguageConverterTest.php
index 81184aa..fc2ed33 100644
--- a/tests/phpunit/languages/LanguageConverterTest.php
+++ b/tests/phpunit/languages/LanguageConverterTest.php
@@ -157,6 +157,25 @@
$wgRequest->setVal( 'variant', null );
$this->assertEquals( 'tg', $this->lc->getPreferredVariant() );
}
+
+ /**
+ * Test exhausting pcre.backtrack_limit
+ */
+ public function testAutoConvertT124404() {
+ $testString = '';
+ for ( $i = 0; $i < 1000; $i++ ) {
+ $testString .= 'xxx xxx xxx';
+ }
+ $testString .= "\n<big id='в'></big>";
+ $old = ini_set( 'pcre.backtrack_limit', 200 );
+ $result = $this->lc->autoConvert( $testString, 'tg-latn' );
+ ini_set( 'pcre.backtrack_limit', $old );
+ // The в in the id attribute should not get converted to a v
+ $this->assertFalse(
+ strpos( $result, 'v' ),
+ "в converted to v despite being in attribue"
+ );
+ }
}
/**
--
To view, visit https://gerrit.wikimedia.org/r/391733
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4f0c171c7da804e9c1508ef1f59556665a318f6a
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/core
Gerrit-Branch: wmf/1.31.0-wmf.8
Gerrit-Owner: Chad <[email protected]>
Gerrit-Reviewer: Brian Wolff <[email protected]>
Gerrit-Reviewer: Chad <[email protected]>
Gerrit-Reviewer: Liangent <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits