Reedy has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/391382 )
Change subject: SECURITY: XSS in langconverter when regex hits pcre.backtrack_limit ...................................................................... SECURITY: XSS in langconverter when regex hits pcre.backtrack_limit Adjust regexes for what not to convert to avoid backtracking by preferring possesive quantifiers Add check that we really have matched to the end of the string, and log error if the regex hits some sort of error preventing the entire string from being matched. Should the regex not match to the end, then language conversion is disabled for the string. Bug: T124404 Change-Id: I4f0c171c7da804e9c1508ef1f59556665a318f6a --- M RELEASE-NOTES-1.27 M languages/LanguageConverter.php M tests/phpunit/languages/LanguageConverterTest.php 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/RELEASE-NOTES-1.27 b/RELEASE-NOTES-1.27 index ed6b1f6..79b8b98 100644 --- a/RELEASE-NOTES-1.27 +++ b/RELEASE-NOTES-1.27 @@ -27,6 +27,7 @@ * (T125163) SECURITY: Make anchor for headlines escape > and <. * (T180237) SECURITY: Protect vendor folder with .htaccess. * (T180231) SECURITY: Remove PHPUnit file with known RCE if exists in update.php. +* (T124404) SECURITY: XSS in langconverter when regex hits pcre.backtrack_limit. == MediaWiki 1.27.3 == Due to a packaging error, the wrong version of the SyntaxHighlight extension was diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index b31b10f..4200978 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -19,6 +19,8 @@ * @ingroup Language */ +use MediaWiki\Logger\LoggerFactory; + /** * Base class for language conversion. * @ingroup Language @@ -357,21 +359,27 @@ 1. HTML markups (anything between < and >) 2. HTML entities 3. placeholders created by the parser + IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). + Minimize use of backtracking where possible. */ - $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+'; + $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; // this one is needed when the text is inside an HTML markup - $htmlfix = '|<[^>]+$|^[^<>]*>'; + $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; + + // Optimize for the common case where these tags have + // few or no children. Thus try and possesively get as much as + // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between <code> tags - $codefix = '<code>.+?<\/code>|'; + $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of <script> tags - $scriptfix = '<script.*?>.*?<\/script>|'; + $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|'; // disable conversion of <pre> tags - $prefix = '<pre.*?>.*?<\/pre>|'; + $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|'; $reg = '/' . $codefix . $scriptfix . $prefix . - '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; + '<[^>]++>|&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s'; $startPos = 0; $sourceBlob = ''; $literalBlob = ''; @@ -381,15 +389,34 @@ $markupMatches = null; $elementMatches = null; + + // We add a marker (\004) at the end of text, to ensure we always match the + // entire text (Otherwise, pcre.backtrack_limit might cause silent failure) while ( $startPos < strlen( $text ) ) { - if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { + if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { $elementPos = $markupMatches[0][1]; $element = $markupMatches[0][0]; + if ( $element === "\004" ) { + // We hit the end. + $elementPos = strlen( $text ); + $element = ''; + } } else { - $elementPos = strlen( $text ); - $element = ''; + // If we hit here, then Language Converter could be tricked + // into doing an XSS, so we refuse to translate. + // If non-crazy input manages to reach this code path, + // we should consider it a bug. + $log = LoggerFactory::getInstance( 'languageconverter' ); + $log->error( "Hit pcre.backtrack_limit in " . __METHOD__ + . ". Disabling language conversion for this page.", + array( + "method" => __METHOD__, + "variant" => $toVariant, + "startOfText" => substr( $text, 0, 500 ) + ) + ); + return $text; } - // Queue the part before the markup for translation in a batch $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; @@ -398,7 +425,7 @@ // Translate any alt or title attributes inside the matched element if ( $element !== '' - && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches ) + && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches ) ) { $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); $changed = false; @@ -413,7 +440,7 @@ } // Remove HTML tags to avoid disrupting the layout - $attr = preg_replace( '/<[^>]+>/', '', $attr ); + $attr = preg_replace( '/<[^>]++>/', '', $attr ); if ( $attr !== $attrs[$attrName] ) { $attrs[$attrName] = $attr; $changed = true; diff --git a/tests/phpunit/languages/LanguageConverterTest.php b/tests/phpunit/languages/LanguageConverterTest.php index 81184aa..331368e 100644 --- a/tests/phpunit/languages/LanguageConverterTest.php +++ b/tests/phpunit/languages/LanguageConverterTest.php @@ -157,6 +157,25 @@ $wgRequest->setVal( 'variant', null ); $this->assertEquals( 'tg', $this->lc->getPreferredVariant() ); } + + /** + * Test exhausting pcre.backtrack_limit + */ + public function testAutoConvertT124404() { + $testString = ''; + for ( $i = 0; $i < 1000; $i++ ) { + $testString .= 'xxx xxx xxx'; + } + $testString .= "\n<big id='в'></big>"; + $old = ini_set('pcre.backtrack_limit', 200 ); + $result = $this->lc->autoConvert( $testString, 'tg-latn' ); + ini_set( 'pcre.backtrack_limit', $old ); + // The в in the id attribute should not get converted to a v + $this->assertFalse( + strpos( $result, 'v' ), + "в converted to v despite being in attribue" + ); + } } /** -- To view, visit https://gerrit.wikimedia.org/r/391382 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I4f0c171c7da804e9c1508ef1f59556665a318f6a Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: REL1_27 Gerrit-Owner: Reedy <re...@wikimedia.org> Gerrit-Reviewer: Brian Wolff <bawolff...@gmail.com> Gerrit-Reviewer: Reedy <re...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits