C. Scott Ananian has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/311849

Change subject: WIP: protect language converter markup in the preprocessor.
......................................................................

WIP: protect language converter markup in the preprocessor.

TODO: disable preprocessor protection if $wgDisableLangConversion ?

Bug: T54661
Bug: T54190
Bug: T54192
Change-Id: I709d007c70a3fd19264790055042c615999b2f67
---
M includes/parser/Preprocessor.php
M includes/parser/Preprocessor_DOM.php
M includes/parser/Preprocessor_Hash.php
3 files changed, 57 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/49/311849/1

diff --git a/includes/parser/Preprocessor.php b/includes/parser/Preprocessor.php
index cc98abd..cb8e3a7 100644
--- a/includes/parser/Preprocessor.php
+++ b/includes/parser/Preprocessor.php
@@ -48,7 +48,13 @@
                        'names' => [ 2 => null ],
                        'min' => 2,
                        'max' => 2,
-               ]
+               ],
+               '-{' => [
+                       'end' => '}-',
+                       'names' => [ 2 => null ],
+                       'min' => 2,
+                       'max' => 2,
+               ],
        ];
 
        /**
diff --git a/includes/parser/Preprocessor_DOM.php 
b/includes/parser/Preprocessor_DOM.php
index 5da7cd7..ea67b19 100644
--- a/includes/parser/Preprocessor_DOM.php
+++ b/includes/parser/Preprocessor_DOM.php
@@ -219,7 +219,7 @@
 
                $stack = new PPDStack;
 
-               $searchBase = "[{<\n"; # }
+               $searchBase = "[{<\n-"; # }
                // For fast reverse searches
                $revText = strrev( $text );
                $lengthText = strlen( $text );
@@ -298,7 +298,10 @@
                                                break;
                                        }
                                } else {
-                                       $curChar = $text[$i];
+                                       $curChar = $curTwoChar = $text[$i];
+                                       if ( ( $i + 1 ) < $lengthText ) {
+                                               $curTwoChar .= $text[$i + 1];
+                                       }
                                        if ( $curChar == '|' ) {
                                                $found = 'pipe';
                                        } elseif ( $curChar == '=' ) {
@@ -311,11 +314,20 @@
                                                } else {
                                                        $found = 'line-start';
                                                }
+                                       } elseif ( $curTwoChar == 
$currentClosing ) {
+                                               $found = 'close';
+                                               $curChar = $curTwoChar;
                                        } elseif ( $curChar == $currentClosing 
) {
                                                $found = 'close';
+                                       } elseif ( isset( 
$this->rules[$curTwoChar] ) ) {
+                                               $curChar = $curTwoChar;
+                                               $found = 'open';
+                                               $rule = $this->rules[$curChar];
                                        } elseif ( isset( 
$this->rules[$curChar] ) ) {
                                                $found = 'open';
                                                $rule = $this->rules[$curChar];
+                                       } else if ( $curChar == '-' ) {
+                                               $found = 'dash';
                                        } else {
                                                # Some versions of PHP have a 
strcspn which stops on null characters
                                                # Ignore and continue
@@ -595,7 +607,10 @@
                                // input pointer.
                        } elseif ( $found == 'open' ) {
                                # count opening brace characters
-                               $count = strspn( $text, $curChar, $i );
+                               $count = strlen( $curChar );
+                               if ( $count === 1 ) {
+                                       $count = strspn( $text, $curChar, $i );
+                               }
 
                                # we need to add to stack only if opening brace 
count is enough for one of the rules
                                if ( $count >= $rule['min'] ) {
@@ -620,7 +635,10 @@
                                $piece = $stack->top;
                                # lets check if there are enough characters for 
closing brace
                                $maxCount = $piece->count;
-                               $count = strspn( $text, $curChar, $i, $maxCount 
);
+                               $count = strlen( $curChar );
+                               if ( $count === 1 ) {
+                                       $count = strspn( $text, $curChar, $i, 
$maxCount );
+                               }
 
                                # check for maximum matching characters (if 
there are 5 closing
                                # characters, we will probably need only 3 - 
depending on the rules)
@@ -716,6 +734,9 @@
                                $stack->getCurrentPart()->eqpos = strlen( 
$accum );
                                $accum .= '=';
                                ++$i;
+                       } elseif ( $found == 'dash' ) {
+                               $accum .= '-';
+                               ++$i;
                        }
                }
 
diff --git a/includes/parser/Preprocessor_Hash.php 
b/includes/parser/Preprocessor_Hash.php
index 8a4637e..a9c85e0 100644
--- a/includes/parser/Preprocessor_Hash.php
+++ b/includes/parser/Preprocessor_Hash.php
@@ -151,7 +151,7 @@
 
                $stack = new PPDStack_Hash;
 
-               $searchBase = "[{<\n";
+               $searchBase = "[{<\n-";
                // For fast reverse searches
                $revText = strrev( $text );
                $lengthText = strlen( $text );
@@ -229,7 +229,10 @@
                                                break;
                                        }
                                } else {
-                                       $curChar = $text[$i];
+                                       $curChar = $curTwoChar = $text[$i];
+                                       if ( ( $i + 1 ) < $lengthText ) {
+                                               $curTwoChar .= $text[$i + 1];
+                                       }
                                        if ( $curChar == '|' ) {
                                                $found = 'pipe';
                                        } elseif ( $curChar == '=' ) {
@@ -242,11 +245,20 @@
                                                } else {
                                                        $found = 'line-start';
                                                }
+                                       } elseif ( $curTwoChar == 
$currentClosing ) {
+                                               $found = 'close';
+                                               $currChar = $curTwoChar;
                                        } elseif ( $curChar == $currentClosing 
) {
                                                $found = 'close';
+                                       } elseif ( isset( 
$this->rules[$curTwoChar] ) ) {
+                                               $curChar = $curTwoChar;
+                                               $found = 'open';
+                                               $rule = $this->rules[$curChar];
                                        } elseif ( isset( 
$this->rules[$curChar] ) ) {
                                                $found = 'open';
                                                $rule = $this->rules[$curChar];
+                                       } else if ( $curChar == '-' ) {
+                                               $found = 'dash';
                                        } else {
                                                # Some versions of PHP have a 
strcspn which stops on null characters
                                                # Ignore and continue
@@ -538,7 +550,10 @@
                                // input pointer.
                        } elseif ( $found == 'open' ) {
                                # count opening brace characters
-                               $count = strspn( $text, $curChar, $i );
+                               $count = strlen( $curChar );
+                               if ( $count === 1 ) {
+                                       $count = strspn( $text, $curChar, $i );
+                               }
 
                                # we need to add to stack only if opening brace 
count is enough for one of the rules
                                if ( $count >= $rule['min'] ) {
@@ -562,7 +577,10 @@
                                $piece = $stack->top;
                                # lets check if there are enough characters for 
closing brace
                                $maxCount = $piece->count;
-                               $count = strspn( $text, $curChar, $i, $maxCount 
);
+                               $count = strlen( $curChar );
+                               if ( $count === 1 ) {
+                                       $count = strspn( $text, $curChar, $i, 
$maxCount );
+                               }
 
                                # check for maximum matching characters (if 
there are 5 closing
                                # characters, we will probably need only 3 - 
depending on the rules)
@@ -661,6 +679,9 @@
                                $accum[] = [ 'equals', [ '=' ] ];
                                $stack->getCurrentPart()->eqpos = count( $accum 
) - 1;
                                ++$i;
+                       } elseif ( $found == 'dash' ) {
+                               self::addLiteral( $accum, '-' );
+                               ++$i;
                        }
                }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/311849
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I709d007c70a3fd19264790055042c615999b2f67
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to