Cenarium has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/278609

Change subject: Allow to resume parse from a serialized half parsed text
......................................................................

Allow to resume parse from a serialized half parsed text

This allows to resume parse from a serialized half parsed text
(as given by Parser::serializeHalfParsedText) that was extracted
from another parse.

Change-Id: Ib39e26ad762e2da39ba0b3ca08ce1bf558af6de7
---
M includes/parser/LinkHolderArray.php
M includes/parser/Parser.php
M includes/parser/StripState.php
3 files changed, 106 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/09/278609/1

diff --git a/includes/parser/LinkHolderArray.php 
b/includes/parser/LinkHolderArray.php
index 04b5614..7ec51ca 100644
--- a/includes/parser/LinkHolderArray.php
+++ b/includes/parser/LinkHolderArray.php
@@ -95,6 +95,28 @@
        }
 
        /**
+        * Returns whether the links holder does not have any useful data
+        * (and is thus useless to serialize)
+        *
+        * @return bool
+        */
+       public function isTrivial() {
+               return !count( $this->internals ) && !count( $this->interwikis 
);
+       }
+
+       /**
+        * Returns all data about this object that can be made available 
publicly through the API
+        *
+        * @return array
+        */
+       public function getPublicData() {
+               return [
+                       'internals' => $this->internals,
+                       'interwikis' => $this->interwikis
+               ];
+       }
+
+       /**
         * Merge another LinkHolderArray into this one
         * @param LinkHolderArray $other
         */
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index d7ba266..3bc8e3d 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -1216,15 +1216,18 @@
         * @param string $text
         * @param bool $isMain
         * @param PPFrame|bool $frame
+        * @param bool $isResume
         *
         * @return string
         */
-       public function internalParse( $text, $isMain = true, $frame = false ) {
+       public function internalParse( $text, $isMain = true, $frame = false, 
$isResume = false ) {
 
                $origText = $text;
 
                # Hook to suspend the parser in this state
-               if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, 
&$text, &$this->mStripState ] ) ) {
+               if ( !$isResume &&
+                       !Hooks::run( 'ParserBeforeInternalParse', [ &$this, 
&$text, &$this->mStripState ] )
+               ) {
                        return $text;
                }
 
@@ -1276,6 +1279,33 @@
                $text = $this->formatHeadings( $text, $origText, $isMain );
 
                return $text;
+       }
+
+       /**
+        * Resume parse from a serialised HalfParsedText extracted from another
+        * parser instance
+        * @warning This may leave strip markers in some convoluted cases
+        * @todo Fix this, StripState::getSubState probably not recursive enough
+        *
+        * @param array $data
+        * @param Title|null $title
+        * @param ParserOptions $options
+        * @param bool $clearState
+        * @param bool $isMain
+        * @param bool $linestart
+        *
+        * @return string Fully parsed html
+        */
+       public function resumeParse( $data, $title, $options, $clearState = 
true, $isMain = true,
+               $linestart = true
+       ) {
+               $this->startParse( $title, $options, self::OT_HTML, $clearState 
);
+               // retrieve text, merge StripStates
+               $text = $this->unserializeHalfParsedText( $data, false );
+               $text = $this->internalParse( $text, $isMain, false, true );
+               // this hook gets called between internalParse and 
internalParseHalfParsed in parse
+               Hooks::run( 'ParserAfterParse', [ &$this, &$text, 
&$this->mStripState ] );
+               return $this->internalParseHalfParsed( $text, true, $linestart 
);
        }
 
        /**
@@ -6306,16 +6336,27 @@
         * the return value of a parser hook.
         *
         * @param string $text
+        * @param bool $includeVersion
         *
         * @return array
         */
-       public function serializeHalfParsedText( $text ) {
+       public function serializeHalfParsedText( $text, $includeVersion = true 
) {
                $data = [
                        'text' => $text,
-                       'version' => self::HALF_PARSED_VERSION,
                        'stripState' => $this->mStripState->getSubState( $text 
),
                        'linkHolders' => $this->mLinkHolders->getSubArray( 
$text )
                ];
+               if ( $includeVersion ) {
+                       $data['version'] = self::HALF_PARSED_VERSION;
+               }
+               // no need to store strip state if it has no markers
+               if ( $data['stripState']->isTrivial() ) {
+                       unset( $data['stripState'] );
+               }
+               // no need to store link holders if it has no links
+               if ( $data['linkHolders']->isTrivial() ) {
+                       unset( $data['linkHolders'] );
+               }
                return $data;
        }
 
@@ -6331,20 +6372,27 @@
         * check whether it is still valid, by calling isValidHalfParsedText().
         *
         * @param array $data Serialized data
+        * @param bool $checkVersion
         * @throws MWException
         * @return string
         */
-       public function unserializeHalfParsedText( $data ) {
-               if ( !isset( $data['version'] ) || $data['version'] != 
self::HALF_PARSED_VERSION ) {
+       public function unserializeHalfParsedText( $data, $checkVersion = true 
) {
+               if ( $checkVersion && 
+                       ( !isset( $data['version'] ) || $data['version'] != 
self::HALF_PARSED_VERSION )
+               ) {
                        throw new MWException( __METHOD__ . ': invalid version' 
);
                }
 
                # First, extract the strip state.
                $texts = [ $data['text'] ];
-               $texts = $this->mStripState->merge( $data['stripState'], $texts 
);
+               if ( isset( $data['stripState'] ) ) {
+                       $texts = $this->mStripState->merge( 
$data['stripState'], $texts );
+               }
 
                # Now renumber links
-               $texts = $this->mLinkHolders->mergeForeign( 
$data['linkHolders'], $texts );
+               if ( isset( $data['linkHolders'] ) ) {
+                       $texts = $this->mLinkHolders->mergeForeign( 
$data['linkHolders'], $texts );
+               }
 
                # Should be good to go.
                return $texts[0];
diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php
index c168aa6..22137d6 100644
--- a/includes/parser/StripState.php
+++ b/includes/parser/StripState.php
@@ -26,7 +26,6 @@
  * @ingroup Parser
  */
 class StripState {
-       protected $prefix;
        protected $data;
        protected $regex;
 
@@ -54,6 +53,34 @@
                $this->circularRefGuard = [];
        }
 
+       function __sleep() {
+               return [ 'data' ];
+       }
+
+       function __wakeup() {
+               $this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f]+)" . 
Parser::MARKER_SUFFIX . '/';
+               $this->circularRefGuard = [];
+       }
+
+       /**
+        * Returns whether the strip state does not have any useful data
+        * (and is thus useless to serialize)
+        *
+        * @return bool
+        */
+       public function isTrivial() {
+               return !count( $this->data['nowiki'] ) && !count( 
$this->data['general'] );
+       }
+
+       /**
+        * Returns all data about this object that can be made available 
publicly through the API
+        *
+        * @return array
+        */
+       public function getPublicData() {
+               return [ 'markers' => $this->data ];
+       }
+
        /**
         * Add a nowiki strip item
         * @param string $marker

-- 
To view, visit https://gerrit.wikimedia.org/r/278609
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib39e26ad762e2da39ba0b3ca08ce1bf558af6de7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Cenarium <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to