C. Scott Ananian has uploaded a new change for review. https://gerrit.wikimedia.org/r/311849
Change subject: WIP: protect language converter markup in the preprocessor. ...................................................................... WIP: protect language converter markup in the preprocessor. TODO: disable preprocessor protection if $wgDisableLangConversion ? Bug: T54661 Bug: T54190 Bug: T54192 Change-Id: I709d007c70a3fd19264790055042c615999b2f67 --- M includes/parser/Preprocessor.php M includes/parser/Preprocessor_DOM.php M includes/parser/Preprocessor_Hash.php 3 files changed, 57 insertions(+), 9 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/49/311849/1 diff --git a/includes/parser/Preprocessor.php b/includes/parser/Preprocessor.php index cc98abd..cb8e3a7 100644 --- a/includes/parser/Preprocessor.php +++ b/includes/parser/Preprocessor.php @@ -48,7 +48,13 @@ 'names' => [ 2 => null ], 'min' => 2, 'max' => 2, - ] + ], + '-{' => [ + 'end' => '}-', + 'names' => [ 2 => null ], + 'min' => 2, + 'max' => 2, + ], ]; /** diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index 5da7cd7..ea67b19 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -219,7 +219,7 @@ $stack = new PPDStack; - $searchBase = "[{<\n"; # } + $searchBase = "[{<\n-"; # } // For fast reverse searches $revText = strrev( $text ); $lengthText = strlen( $text ); @@ -298,7 +298,10 @@ break; } } else { - $curChar = $text[$i]; + $curChar = $curTwoChar = $text[$i]; + if ( ( $i + 1 ) < $lengthText ) { + $curTwoChar .= $text[$i + 1]; + } if ( $curChar == '|' ) { $found = 'pipe'; } elseif ( $curChar == '=' ) { @@ -311,11 +314,20 @@ } else { $found = 'line-start'; } + } elseif ( $curTwoChar == $currentClosing ) { + $found = 'close'; + $curChar = $curTwoChar; } elseif ( $curChar == $currentClosing ) { $found = 'close'; + } elseif ( isset( $this->rules[$curTwoChar] ) ) { + $curChar = $curTwoChar; + $found = 'open'; + $rule = $this->rules[$curChar]; } elseif ( isset( $this->rules[$curChar] ) ) { $found = 'open'; $rule = $this->rules[$curChar]; + } else if ( $curChar == '-' ) { + $found = 'dash'; } else { # Some versions of PHP have a strcspn which stops on null characters # Ignore and continue @@ -595,7 +607,10 @@ // input pointer. } elseif ( $found == 'open' ) { # count opening brace characters - $count = strspn( $text, $curChar, $i ); + $count = strlen( $curChar ); + if ( $count === 1 ) { + $count = strspn( $text, $curChar, $i ); + } # we need to add to stack only if opening brace count is enough for one of the rules if ( $count >= $rule['min'] ) { @@ -620,7 +635,10 @@ $piece = $stack->top; # lets check if there are enough characters for closing brace $maxCount = $piece->count; - $count = strspn( $text, $curChar, $i, $maxCount ); + $count = strlen( $curChar ); + if ( $count === 1 ) { + $count = strspn( $text, $curChar, $i, $maxCount ); + } # check for maximum matching characters (if there are 5 closing # characters, we will probably need only 3 - depending on the rules) @@ -716,6 +734,9 @@ $stack->getCurrentPart()->eqpos = strlen( $accum ); $accum .= '='; ++$i; + } elseif ( $found == 'dash' ) { + $accum .= '-'; + ++$i; } } diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index 8a4637e..a9c85e0 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -151,7 +151,7 @@ $stack = new PPDStack_Hash; - $searchBase = "[{<\n"; + $searchBase = "[{<\n-"; // For fast reverse searches $revText = strrev( $text ); $lengthText = strlen( $text ); @@ -229,7 +229,10 @@ break; } } else { - $curChar = $text[$i]; + $curChar = $curTwoChar = $text[$i]; + if ( ( $i + 1 ) < $lengthText ) { + $curTwoChar .= $text[$i + 1]; + } if ( $curChar == '|' ) { $found = 'pipe'; } elseif ( $curChar == '=' ) { @@ -242,11 +245,20 @@ } else { $found = 'line-start'; } + } elseif ( $curTwoChar == $currentClosing ) { + $found = 'close'; + $currChar = $curTwoChar; } elseif ( $curChar == $currentClosing ) { $found = 'close'; + } elseif ( isset( $this->rules[$curTwoChar] ) ) { + $curChar = $curTwoChar; + $found = 'open'; + $rule = $this->rules[$curChar]; } elseif ( isset( $this->rules[$curChar] ) ) { $found = 'open'; $rule = $this->rules[$curChar]; + } else if ( $curChar == '-' ) { + $found = 'dash'; } else { # Some versions of PHP have a strcspn which stops on null characters # Ignore and continue @@ -538,7 +550,10 @@ // input pointer. } elseif ( $found == 'open' ) { # count opening brace characters - $count = strspn( $text, $curChar, $i ); + $count = strlen( $curChar ); + if ( $count === 1 ) { + $count = strspn( $text, $curChar, $i ); + } # we need to add to stack only if opening brace count is enough for one of the rules if ( $count >= $rule['min'] ) { @@ -562,7 +577,10 @@ $piece = $stack->top; # lets check if there are enough characters for closing brace $maxCount = $piece->count; - $count = strspn( $text, $curChar, $i, $maxCount ); + $count = strlen( $curChar ); + if ( $count === 1 ) { + $count = strspn( $text, $curChar, $i, $maxCount ); + } # check for maximum matching characters (if there are 5 closing # characters, we will probably need only 3 - depending on the rules) @@ -661,6 +679,9 @@ $accum[] = [ 'equals', [ '=' ] ]; $stack->getCurrentPart()->eqpos = count( $accum ) - 1; ++$i; + } elseif ( $found == 'dash' ) { + self::addLiteral( $accum, '-' ); + ++$i; } } -- To view, visit https://gerrit.wikimedia.org/r/311849 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I709d007c70a3fd19264790055042c615999b2f67 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits