Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/96678


Change subject: Add an HTML_PRE state to the PreHandler state machine.
......................................................................

Add an HTML_PRE state to the PreHandler state machine.

 * This removes the need for an inPre property.

 * It was suggested to avoid the confusion in,
   https://gerrit.wikimedia.org/r/#/c/96407/2/js/lib/ext.core.PreHandler.js

 * Relevant commits are:
     20c6afe7f51d14e978f348f376d58e28a9d2b3df
     81ab0f9f1d9a5a0830f1d14a98e10a959d173620

Change-Id: Iaff9b0d00e2dd052c7500c47fe3a98c96cb5108e
---
M js/lib/ext.core.PreHandler.js
1 file changed, 53 insertions(+), 41 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/78/96678/1

diff --git a/js/lib/ext.core.PreHandler.js b/js/lib/ext.core.PreHandler.js
index ddbf044..cbc519a 100644
--- a/js/lib/ext.core.PreHandler.js
+++ b/js/lib/ext.core.PreHandler.js
@@ -1,7 +1,7 @@
 "use strict";
 /* --------------------------------------------------------------------------
 
- PRE-handling relies on the following 5-state FSM.
+ PRE-handling relies on the following 6-state FSM.
 
  ------
  States
@@ -16,6 +16,8 @@
                   (depending on whether we see a white-space tok or not)
  IGNORE        -- nothing to do for the rest of the line.
 
+ HTML_PRE      -- we're already in an html pre.
+
  -----------
  Transitions
  -----------
@@ -28,9 +30,10 @@
  + --------------+-----------------+---------------+--------------------------+
  | SOL           | --- nl      --> | SOL           | purge                    |
  | SOL           | --- eof     --> | SOL           | purge                    |
- | SOL           | --- ws      --> | PRE|SOL       | save ws token|purge(#,##)|
+ | SOL           | --- ws      --> | PRE           | save whitespace token(##)|
  | SOL           | --- sol-tr  --> | SOL           | TOKS << tok              |
  | SOL           | --- other   --> | IGNORE        | purge                    |
+ | SOL           | --- pre     --> | HTML_PRE      | purge (#)                |
  + --------------+-----------------+---------------+--------------------------+
  | PRE           | --- nl      --> | SOL           | purge                    |
  | PRE           |  html-blk tag   | IGNORE        | purge                    |
@@ -53,11 +56,14 @@
  + --------------+-----------------+---------------+--------------------------+
  | IGNORE        | --- nl      --> | SOL           | purge                    |
  | IGNORE        | --- eof     --> | SOL           | purge                    |
+ | IGNORE        | --- pre     --> | HTML_PRE      | purge (#)                |
+ + --------------+-----------------+---------------+--------------------------+
+ | HTML_PRE      | --- pre     --> | IGNORE        | purge                    |
  + --------------+-----------------+---------------+--------------------------+
 
  # We're being careful to avoid a situation where we generate a pre when we're
-   already inside a pre. If we've seen an open pre tag (marked as inPre), stay
-   in SOL and purge. Otherwise, save the whitespace token and transition to 
PRE.
+   already inside a html pre. If we've seen an open pre tag, transition to
+   HTML_PRE and purge.
 
  ## In these states, check if the whitespace token is a single space or has
    additional chars (white-space or non-whitespace) -- if yes, slice it off
@@ -67,14 +73,13 @@
 
 var Util = require('./mediawiki.Util.js').Util,
     defines = require('./mediawiki.parser.defines.js');
+
 // define some constructor shortcuts
 var CommentTk = defines.CommentTk,
     EOFTk = defines.EOFTk,
     TagTk = defines.TagTk,
     SelfclosingTagTk = defines.SelfclosingTagTk,
     EndTagTk = defines.EndTagTk;
-
-var init; // forward declaration.
 
 function isPre( token, tag ) {
        return token.constructor === tag && token.isHTMLTag() && 
token.name.toUpperCase() === "PRE";
@@ -92,7 +97,9 @@
                        "PreHandler:onNewline", this.nlRank, 'newline');
                this.manager.addTransform(this.onEnd.bind(this),
                        "PreHandler:onEnd", this.endRank, 'end');
-               init(this, true);
+               this.manager.addTransform(this.onAny.bind(this),
+                       "PreHandler:onAny", this.anyRank, 'any');
+               this.init();
        }
 }
 
@@ -108,6 +115,7 @@
 PreHandler.STATE_PRE_COLLECT = 3;
 PreHandler.STATE_MULTILINE_PRE = 4;
 PreHandler.STATE_IGNORE = 5;
+PreHandler.STATE_HTML_PRE = 6;
 
 // debug string output of FSM states
 PreHandler.STATE_STR = {
@@ -115,30 +123,25 @@
        2: 'pre        ',
        3: 'pre_collect',
        4: 'multiline  ',
-       5: 'ignore     '
+       5: 'ignore     ',
+       6: 'html_pre   '
 };
 
-init = function(handler, addAnyHandler) {
-       handler.state  = PreHandler.STATE_SOL;
-       handler.lastNlTk = null;
+PreHandler.prototype.init = function() {
+       this.state = PreHandler.STATE_SOL;
+       this.lastNlTk = null;
        // Initialize to zero to deal with indent-pre
        // on the very first line where there is no
        // preceding newline to initialize this.
-       handler.preTSR = 0;
-       handler.tokens = [];
-       handler.preWSToken = null;
-       handler.multiLinePreWSToken = null;
-       handler.solTransparentTokens = [];
-       if (addAnyHandler) {
-               handler.manager.addTransform(handler.onAny.bind(handler),
-                       "PreHandler:onAny", handler.anyRank, 'any');
-       }
-       handler.inPre = false;
+       this.preTSR = 0;
+       this.tokens = [];
+       this.preWSToken = null;
+       this.multiLinePreWSToken = null;
+       this.solTransparentTokens = [];
 };
 
 PreHandler.prototype.moveToIgnoreState = function() {
        this.state = PreHandler.STATE_IGNORE;
-       this.manager.removeTransform(this.anyRank, 'any');
 };
 
 PreHandler.prototype.popLastNL = function(ret) {
@@ -246,8 +249,12 @@
                case PreHandler.STATE_IGNORE:
                        ret = [token];
                        ret.rank = this.skipRank; // prevent this from being 
processed again
-                       init(this, true); // Reset!
+                       this.init(); // Reset!
                        this.preTSR = initPreTSR(token);
+                       break;
+
+               case PreHandler.STATE_HTML_PRE:
+                       ret = [ token ];
                        break;
        }
 
@@ -260,15 +267,13 @@
 };
 
 PreHandler.prototype.onEnd = function (token, manager, cb) {
-       this.inPre = false;
-
        if (this.state !== PreHandler.STATE_IGNORE) {
                console.error("!ERROR! Not IGNORE! Cannot get here: " + 
this.state + "; " + JSON.stringify(token));
-               init(this, false);
+               this.init();
                return {tokens: [token]};
        }
 
-       init(this, true);
+       this.init();
        return {tokens: [token]};
 };
 
@@ -289,20 +294,9 @@
 
 PreHandler.prototype.onAny = function ( token, manager, cb ) {
 
-       if ( isPre( token, TagTk ) ) {
-               this.inPre = true;
-       } else if ( isPre( token, EndTagTk ) ) {
-               this.inPre = false;
-       }
-
        if (this.trace) {
                if (this.debug) { console.warn("----------"); }
                console.warn("T:pre:any: " + PreHandler.STATE_STR[this.state] + 
" : " + JSON.stringify(token));
-       }
-
-       if (this.state === PreHandler.STATE_IGNORE) {
-               console.error("!ERROR! IGNORE! Cannot get here: " + 
JSON.stringify(token));
-               return {tokens: null};
        }
 
        var ret = null;
@@ -311,6 +305,8 @@
                switch (this.state) {
                        case PreHandler.STATE_SOL:
                        case PreHandler.STATE_PRE:
+                       case PreHandler.STATE_HTML_PRE:
+                       case PreHandler.STATE_IGNORE:
                                ret = this.getResultAndReset(token);
                                break;
 
@@ -321,12 +317,11 @@
                }
 
                // reset for next use of this pipeline!
-               this.inPre = false;
-               init(this, false);
+               this.init();
        } else {
                switch (this.state) {
                        case PreHandler.STATE_SOL:
-                               if ((tc === String) && token.match(/^ /) && 
!this.inPre) {
+                               if ((tc === String) && token.match(/^ /)) {
                                        ret = this.tokens;
                                        this.tokens = [];
                                        this.preWSToken = token[0];
@@ -341,6 +336,9 @@
                                        // update pre-tsr since we haven't 
transitioned to PRE yet
                                        this.preTSR = 
getUpdatedPreTSR(this.preTSR, token);
                                        this.tokens.push(token);
+                               } else if ( isPre( token, TagTk ) ) {
+                                       ret = this.getResultAndReset( token );
+                                       this.state = PreHandler.STATE_HTML_PRE;
                                } else {
                                        ret = this.getResultAndReset(token);
                                        this.moveToIgnoreState();
@@ -401,6 +399,20 @@
                                        this.moveToIgnoreState();
                                }
                                break;
+
+                       case PreHandler.STATE_IGNORE:
+                               if ( isPre( token, TagTk ) ) {
+                                       this.state = PreHandler.STATE_HTML_PRE;
+                               }
+                               ret = [ token ];
+                               break;
+
+                       case PreHandler.STATE_HTML_PRE:
+                               if ( isPre( token, EndTagTk ) ) {
+                                       this.moveToIgnoreState();
+                               }
+                               ret = [ token ];
+                               break;
                }
        }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/96678
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iaff9b0d00e2dd052c7500c47fe3a98c96cb5108e
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to