C. Scott Ananian has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/327112 )
Change subject: Protect -{...}- variant constructs in definition lists. ...................................................................... Protect -{...}- variant constructs in definition lists. Given the wikitext: ;-{zh-cn:AAA;zh-tw:BBB}- Prevent `doBlockLevels` from trying to split the definition list at the embedded colon and using `AAA;zh-tw:BBB}-` as the `<dd>` portion. Bug: T153135 Change-Id: I3a4d02f1fbd0d0fe8278d6b7c66005f0dd3dd36b --- M includes/parser/BlockLevelPass.php M tests/parser/parserTests.txt 2 files changed, 40 insertions(+), 31 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/12/327112/1 diff --git a/includes/parser/BlockLevelPass.php b/includes/parser/BlockLevelPass.php index cbacd34..1bb3c49 100644 --- a/includes/parser/BlockLevelPass.php +++ b/includes/parser/BlockLevelPass.php @@ -38,6 +38,7 @@ const COLON_STATE_COMMENT = 5; const COLON_STATE_COMMENTDASH = 6; const COLON_STATE_COMMENTDASHDASH = 7; + const COLON_STATE_LC = 8; /** * Make lists from lines starting with ':', '*', '#', etc. @@ -389,15 +390,14 @@ * @return string The position of the ':', or false if none found */ private function findColonNoLinks( $str, &$before, &$after ) { - $colonPos = strpos( $str, ':' ); - if ( $colonPos === false ) { + if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE ) ) { # Nothing to find! return false; } - $ltPos = strpos( $str, '<' ); - if ( $ltPos === false || $ltPos > $colonPos ) { + if ( $m[0][0] === ':' ) { # Easy; no tag nesting to worry about + $colonPos = $m[0][1]; $before = substr( $str, 0, $colonPos ); $after = substr( $str, $colonPos + 1 ); return $colonPos; @@ -405,9 +405,10 @@ # Ugly state machine to walk through avoiding tags. $state = self::COLON_STATE_TEXT; - $level = 0; + $ltLevel = 0; + $lcLevel = 0; $len = strlen( $str ); - for ( $i = 0; $i < $len; $i++ ) { + for ( $i = $m[0][1]; $i < $len; $i++ ) { $c = $str[$i]; switch ( $state ) { @@ -418,7 +419,7 @@ $state = self::COLON_STATE_TAGSTART; break; case ":": - if ( $level === 0 ) { + if ( $ltLevel === 0 ) { # We found it! $before = substr( $str, 0, $i ); $after = substr( $str, $i + 1 ); @@ -428,35 +429,44 @@ break; default: # Skip ahead looking for something interesting - $colonPos = strpos( $str, ':', $i ); - if ( $colonPos === false ) { + if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) { # Nothing else interesting return false; } - $ltPos = strpos( $str, '<', $i ); - if ( $level === 0 ) { - if ( $ltPos === false || $colonPos < $ltPos ) { - # We found it! - $before = substr( $str, 0, $colonPos ); - $after = substr( $str, $colonPos + 1 ); - return $i; - } + if ( $m[0][0] === '-{' ) { + $state = self::COLON_STATE_LC; + $lcLevel++; + $i = $m[0][1] + 1; + } else { + # Skip ahead to next interesting character. + $i = $m[0][1] - 1; } - if ( $ltPos === false ) { - # Nothing else interesting to find; abort! - # We're nested, but there's no close tags left. Abort! - break 2; + break; + } + break; + case self::COLON_STATE_LC: + # In language converter markup -{ ... }- + if ( !preg_match( '/-\{|\}-/', $str, $m, PREG_OFFSET_CAPTURE, $i ) ) { + # Nothing else interesting to find; abort! + # We're nested in language converter markup, but there + # are no close tags left. Abort! + break 2; + } else if ( $m[0][0] === '-{' ) { + $i = $m[0][1] + 1; + $lcLevel++; + } else if ( $m[0][0] === '}-' ) { + $i = $m[0][1] + 1; + $lcLevel--; + if ( $lcLevel === 0 ) { + $state = self::COLON_STATE_TEXT; } - # Skip ahead to next tag start - $i = $ltPos; - $state = self::COLON_STATE_TAGSTART; } break; case self::COLON_STATE_TAG: # In a <tag> switch ( $c ) { case ">": - $level++; + $ltLevel++; $state = self::COLON_STATE_TEXT; break; case "/": @@ -486,8 +496,8 @@ case self::COLON_STATE_CLOSETAG: # In a </tag> if ( $c === ">" ) { - $level--; - if ( $level < 0 ) { + $ltLevel--; + if ( $ltLevel < 0 ) { wfDebug( __METHOD__ . ": Invalid input; too many close tags\n" ); return false; } @@ -526,8 +536,8 @@ throw new MWException( "State machine error in " . __METHOD__ ); } } - if ( $level > 0 ) { - wfDebug( __METHOD__ . ": Invalid input; not enough close tags (level $level, state $state)\n" ); + if ( $ltLevel > 0 || $lcLevel > 0) { + wfDebug( __METHOD__ . ": Invalid input; not enough close tags (level $ltLevel/$lcLevel, state $state)\n" ); return false; } return false; diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index a7db9db..7f5b0fc 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -20359,7 +20359,6 @@ !! end -# FIXME: This test is currently broken in the PHP parser (bug 52661) !! test Don't break list handling if language converter markup is in the item. !! options @@ -20367,7 +20366,7 @@ !! wikitext ;-{zh-cn:AAA;zh-tw:BBB}- !! html/php -<dl><dt><span class="error">在手动语言转换规则中检测到错误</span></dd></dl> +<dl><dt>AAA</dt></dl> !! html/parsoid <dl><dt>AAA -- To view, visit https://gerrit.wikimedia.org/r/327112 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I3a4d02f1fbd0d0fe8278d6b7c66005f0dd3dd36b Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits