C. Scott Ananian has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/327112 )

Change subject: Protect -{...}- variant constructs in definition lists.
......................................................................

Protect -{...}- variant constructs in definition lists.

Given the wikitext:

        ;-{zh-cn:AAA;zh-tw:BBB}-

Prevent `doBlockLevels` from trying to split the definition list at the
embedded colon and using `AAA;zh-tw:BBB}-` as the `<dd>` portion.

Bug: T153135
Change-Id: I3a4d02f1fbd0d0fe8278d6b7c66005f0dd3dd36b
---
M includes/parser/BlockLevelPass.php
M tests/parser/parserTests.txt
2 files changed, 40 insertions(+), 31 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/12/327112/1

diff --git a/includes/parser/BlockLevelPass.php 
b/includes/parser/BlockLevelPass.php
index cbacd34..1bb3c49 100644
--- a/includes/parser/BlockLevelPass.php
+++ b/includes/parser/BlockLevelPass.php
@@ -38,6 +38,7 @@
        const COLON_STATE_COMMENT = 5;
        const COLON_STATE_COMMENTDASH = 6;
        const COLON_STATE_COMMENTDASHDASH = 7;
+       const COLON_STATE_LC = 8;
 
        /**
         * Make lists from lines starting with ':', '*', '#', etc.
@@ -389,15 +390,14 @@
         * @return string The position of the ':', or false if none found
         */
        private function findColonNoLinks( $str, &$before, &$after ) {
-               $colonPos = strpos( $str, ':' );
-               if ( $colonPos === false ) {
+               if ( !preg_match( '/:|<|-\{/', $str, $m, PREG_OFFSET_CAPTURE ) 
) {
                        # Nothing to find!
                        return false;
                }
 
-               $ltPos = strpos( $str, '<' );
-               if ( $ltPos === false || $ltPos > $colonPos ) {
+               if ( $m[0][0] === ':' ) {
                        # Easy; no tag nesting to worry about
+                       $colonPos = $m[0][1];
                        $before = substr( $str, 0, $colonPos );
                        $after = substr( $str, $colonPos + 1 );
                        return $colonPos;
@@ -405,9 +405,10 @@
 
                # Ugly state machine to walk through avoiding tags.
                $state = self::COLON_STATE_TEXT;
-               $level = 0;
+               $ltLevel = 0;
+               $lcLevel = 0;
                $len = strlen( $str );
-               for ( $i = 0; $i < $len; $i++ ) {
+               for ( $i = $m[0][1]; $i < $len; $i++ ) {
                        $c = $str[$i];
 
                        switch ( $state ) {
@@ -418,7 +419,7 @@
                                        $state = self::COLON_STATE_TAGSTART;
                                        break;
                                case ":":
-                                       if ( $level === 0 ) {
+                                       if ( $ltLevel === 0 ) {
                                                # We found it!
                                                $before = substr( $str, 0, $i );
                                                $after = substr( $str, $i + 1 );
@@ -428,35 +429,44 @@
                                        break;
                                default:
                                        # Skip ahead looking for something 
interesting
-                                       $colonPos = strpos( $str, ':', $i );
-                                       if ( $colonPos === false ) {
+                                       if ( !preg_match( '/:|<|-\{/', $str, 
$m, PREG_OFFSET_CAPTURE, $i ) ) {
                                                # Nothing else interesting
                                                return false;
                                        }
-                                       $ltPos = strpos( $str, '<', $i );
-                                       if ( $level === 0 ) {
-                                               if ( $ltPos === false || 
$colonPos < $ltPos ) {
-                                                       # We found it!
-                                                       $before = substr( $str, 
0, $colonPos );
-                                                       $after = substr( $str, 
$colonPos + 1 );
-                                                       return $i;
-                                               }
+                                       if ( $m[0][0] === '-{' ) {
+                                               $state = self::COLON_STATE_LC;
+                                               $lcLevel++;
+                                               $i = $m[0][1] + 1;
+                                       } else {
+                                               # Skip ahead to next 
interesting character.
+                                               $i = $m[0][1] - 1;
                                        }
-                                       if ( $ltPos === false ) {
-                                               # Nothing else interesting to 
find; abort!
-                                               # We're nested, but there's no 
close tags left. Abort!
-                                               break 2;
+                                       break;
+                               }
+                               break;
+                       case self::COLON_STATE_LC:
+                               # In language converter markup -{ ... }-
+                               if ( !preg_match( '/-\{|\}-/', $str, $m, 
PREG_OFFSET_CAPTURE, $i ) ) {
+                                       # Nothing else interesting to find; 
abort!
+                                       # We're nested in language converter 
markup, but there
+                                       # are no close tags left.  Abort!
+                                       break 2;
+                               } else if ( $m[0][0] === '-{' ) {
+                                       $i = $m[0][1] + 1;
+                                       $lcLevel++;
+                               } else if ( $m[0][0] === '}-' ) {
+                                       $i = $m[0][1] + 1;
+                                       $lcLevel--;
+                                       if ( $lcLevel === 0 ) {
+                                               $state = self::COLON_STATE_TEXT;
                                        }
-                                       # Skip ahead to next tag start
-                                       $i = $ltPos;
-                                       $state = self::COLON_STATE_TAGSTART;
                                }
                                break;
                        case self::COLON_STATE_TAG:
                                # In a <tag>
                                switch ( $c ) {
                                case ">":
-                                       $level++;
+                                       $ltLevel++;
                                        $state = self::COLON_STATE_TEXT;
                                        break;
                                case "/":
@@ -486,8 +496,8 @@
                        case self::COLON_STATE_CLOSETAG:
                                # In a </tag>
                                if ( $c === ">" ) {
-                                       $level--;
-                                       if ( $level < 0 ) {
+                                       $ltLevel--;
+                                       if ( $ltLevel < 0 ) {
                                                wfDebug( __METHOD__ . ": 
Invalid input; too many close tags\n" );
                                                return false;
                                        }
@@ -526,8 +536,8 @@
                                throw new MWException( "State machine error in 
" . __METHOD__ );
                        }
                }
-               if ( $level > 0 ) {
-                       wfDebug( __METHOD__ . ": Invalid input; not enough 
close tags (level $level, state $state)\n" );
+               if ( $ltLevel > 0 || $lcLevel > 0) {
+                       wfDebug( __METHOD__ . ": Invalid input; not enough 
close tags (level $ltLevel/$lcLevel, state $state)\n" );
                        return false;
                }
                return false;
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index a7db9db..7f5b0fc 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -20359,7 +20359,6 @@
 
 !! end
 
-# FIXME: This test is currently broken in the PHP parser (bug 52661)
 !! test
 Don't break list handling if language converter markup is in the item.
 !! options
@@ -20367,7 +20366,7 @@
 !! wikitext
 ;-{zh-cn:AAA;zh-tw:BBB}-
 !! html/php
-<dl><dt><span class="error">在手动语言转换规则中检测到错误</span></dd></dl>
+<dl><dt>AAA</dt></dl>
 
 !! html/parsoid
 <dl><dt>AAA

-- 
To view, visit https://gerrit.wikimedia.org/r/327112
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3a4d02f1fbd0d0fe8278d6b7c66005f0dd3dd36b
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to