jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/403331 )

Change subject: JavaScriptMinifier: Improve docs for parsing of string literals
......................................................................


JavaScriptMinifier: Improve docs for parsing of string literals

Also update docs for parsing of regexp literals to match.

Bug: T75556
Change-Id: I86c79b1b1866339d65d1c69e56d457c62544aaac
---
M includes/libs/JavaScriptMinifier.php
1 file changed, 23 insertions(+), 4 deletions(-)

Approvals:
  MaxSem: Looks good to me, approved
  jenkins-bot: Verified
  Jforrester: Looks good to me, but someone else must approve



diff --git a/includes/libs/JavaScriptMinifier.php 
b/includes/libs/JavaScriptMinifier.php
index e0bbb59..679da2b 100644
--- a/includes/libs/JavaScriptMinifier.php
+++ b/includes/libs/JavaScriptMinifier.php
@@ -433,27 +433,43 @@
                                continue;
                        }
 
-                       // Find out which kind of token we're handling. $end 
will point past the end of it.
+                       // Find out which kind of token we're handling.
+                       // Note: $end must point past the end of the current 
token
+                       // so that `substr($s, $pos, $end - $pos)` would be the 
entire token.
+                       // In order words, $end will be the offset of the last 
relevant character
+                       // in the stream + 1, or simply put: The offset of the 
first character
+                       // of any next token in the stream.
                        $end = $pos + 1;
                        // Handle string literals
                        if( $ch === "'" || $ch === '"' ) {
                                // Search to the end of the string literal, 
skipping over backslash escapes
                                $search = $ch . '\\';
                                do{
+                                       // Speculatively add 2 to the end so 
that if we see a backslash,
+                                       // the next iteration will start 2 
characters further (one for the
+                                       // backslash, one for the escaped 
character).
+                                       // We'll correct this outside the loop.
                                        $end += strcspn( $s, $search, $end ) + 
2;
+                                       // If the last character in our search 
for a quote or a backlash
+                                       // matched a backslash and we haven't 
reached the end, keep searching..
                                } while( $end - 2 < $length && $s[$end - 2] === 
'\\' );
+                               // Correction (1): Undo speculative add, keep 
only one (end of string literal)
                                $end--;
                        // We have to distinguish between regexp literals and 
division operators
                        // A division operator is only possible in certain 
states
                        } elseif( $ch === '/' && !isset( $divStates[$state] ) ) 
{
                                // Regexp literal
                                for( ; ; ) {
+                                       // Search until we find "/" (end of 
regexp), "\" (backslash escapes),
+                                       // or "[" (start of character classes).
                                        do{
-                                               // Skip until we find "/" (end 
of regexp), "\" (backslash escapes),
-                                               // or "[" (start of character 
classes).
+                                               // Speculatively add 2 to 
ensure next iteration skips
+                                               // over backslash and escaped 
character.
+                                               // We'll correct this outside 
the loop.
                                                $end += strcspn( $s, '/[\\', 
$end ) + 2;
                                                // If backslash escape, keep 
searching...
                                        } while( $end - 2 < $length && $s[$end 
- 2] === '\\' );
+                                       // Correction (1): Undo speculative 
add, keep only one (end of regexp)
                                        $end--;
                                        // If the end, stop here.
                                        if( $end - 1 >= $length || $s[$end - 1] 
=== '/' ) {
@@ -462,11 +478,14 @@
                                        // (Implicit else), we must've found 
the start of a char class,
                                        // skip until we find "]" (end of char 
class), or "\" (backslash escape)
                                        do{
+                                               // Speculatively add 2 for 
backslash escape.
+                                               // We'll substract one outside 
the loop.
                                                $end += strcspn( $s, ']\\', 
$end ) + 2;
                                                // If backslash escape, keep 
searching...
                                        } while( $end - 2 < $length && $s[$end 
- 2] === '\\' );
+                                       // Correction (1): Undo speculative 
add, keep only one (end of regexp)
                                        $end--;
-                               };
+                               }
                                // Search past the regexp modifiers (gi)
                                while( $end < $length && ctype_alpha( $s[$end] 
) ) {
                                        $end++;

-- 
To view, visit https://gerrit.wikimedia.org/r/403331
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I86c79b1b1866339d65d1c69e56d457c62544aaac
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Krinkle <krinklem...@gmail.com>
Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org>
Gerrit-Reviewer: Catrope <r...@wikimedia.org>
Gerrit-Reviewer: Imarlier <imarl...@wikimedia.org>
Gerrit-Reviewer: Jforrester <jforres...@wikimedia.org>
Gerrit-Reviewer: MaxSem <maxsem.w...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to