Cenarium has uploaded a new change for review.
https://gerrit.wikimedia.org/r/278609
Change subject: Allow to resume parse from a serialized half parsed text
......................................................................
Allow to resume parse from a serialized half parsed text
This allows to resume parse from a serialized half parsed text
(as given by Parser::serializeHalfParsedText) that was extracted
from another parse.
Change-Id: Ib39e26ad762e2da39ba0b3ca08ce1bf558af6de7
---
M includes/parser/LinkHolderArray.php
M includes/parser/Parser.php
M includes/parser/StripState.php
3 files changed, 106 insertions(+), 9 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/09/278609/1
diff --git a/includes/parser/LinkHolderArray.php
b/includes/parser/LinkHolderArray.php
index 04b5614..7ec51ca 100644
--- a/includes/parser/LinkHolderArray.php
+++ b/includes/parser/LinkHolderArray.php
@@ -95,6 +95,28 @@
}
/**
+ * Returns whether the links holder does not have any useful data
+ * (and is thus useless to serialize)
+ *
+ * @return bool
+ */
+ public function isTrivial() {
+ return !count( $this->internals ) && !count( $this->interwikis
);
+ }
+
+ /**
+ * Returns all data about this object that can be made available
publicly through the API
+ *
+ * @return array
+ */
+ public function getPublicData() {
+ return [
+ 'internals' => $this->internals,
+ 'interwikis' => $this->interwikis
+ ];
+ }
+
+ /**
* Merge another LinkHolderArray into this one
* @param LinkHolderArray $other
*/
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index d7ba266..3bc8e3d 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -1216,15 +1216,18 @@
* @param string $text
* @param bool $isMain
* @param PPFrame|bool $frame
+ * @param bool $isResume
*
* @return string
*/
- public function internalParse( $text, $isMain = true, $frame = false ) {
+ public function internalParse( $text, $isMain = true, $frame = false,
$isResume = false ) {
$origText = $text;
# Hook to suspend the parser in this state
- if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this,
&$text, &$this->mStripState ] ) ) {
+ if ( !$isResume &&
+ !Hooks::run( 'ParserBeforeInternalParse', [ &$this,
&$text, &$this->mStripState ] )
+ ) {
return $text;
}
@@ -1276,6 +1279,33 @@
$text = $this->formatHeadings( $text, $origText, $isMain );
return $text;
+ }
+
+ /**
+ * Resume parse from a serialised HalfParsedText extracted from another
+ * parser instance
+ * @warning This may leave strip markers in some convoluted cases
+ * @todo Fix this, StripState::getSubState probably not recursive enough
+ *
+ * @param array $data
+ * @param Title|null $title
+ * @param ParserOptions $options
+ * @param bool $clearState
+ * @param bool $isMain
+ * @param bool $linestart
+ *
+ * @return string Fully parsed html
+ */
+ public function resumeParse( $data, $title, $options, $clearState =
true, $isMain = true,
+ $linestart = true
+ ) {
+ $this->startParse( $title, $options, self::OT_HTML, $clearState
);
+ // retrieve text, merge StripStates
+ $text = $this->unserializeHalfParsedText( $data, false );
+ $text = $this->internalParse( $text, $isMain, false, true );
+ // this hook gets called between internalParse and
internalParseHalfParsed in parse
+ Hooks::run( 'ParserAfterParse', [ &$this, &$text,
&$this->mStripState ] );
+ return $this->internalParseHalfParsed( $text, true, $linestart
);
}
/**
@@ -6306,16 +6336,27 @@
* the return value of a parser hook.
*
* @param string $text
+ * @param bool $includeVersion
*
* @return array
*/
- public function serializeHalfParsedText( $text ) {
+ public function serializeHalfParsedText( $text, $includeVersion = true
) {
$data = [
'text' => $text,
- 'version' => self::HALF_PARSED_VERSION,
'stripState' => $this->mStripState->getSubState( $text
),
'linkHolders' => $this->mLinkHolders->getSubArray(
$text )
];
+ if ( $includeVersion ) {
+ $data['version'] = self::HALF_PARSED_VERSION;
+ }
+ // no need to store strip state if it has no markers
+ if ( $data['stripState']->isTrivial() ) {
+ unset( $data['stripState'] );
+ }
+ // no need to store link holders if it has no links
+ if ( $data['linkHolders']->isTrivial() ) {
+ unset( $data['linkHolders'] );
+ }
return $data;
}
@@ -6331,20 +6372,27 @@
* check whether it is still valid, by calling isValidHalfParsedText().
*
* @param array $data Serialized data
+ * @param bool $checkVersion
* @throws MWException
* @return string
*/
- public function unserializeHalfParsedText( $data ) {
- if ( !isset( $data['version'] ) || $data['version'] !=
self::HALF_PARSED_VERSION ) {
+ public function unserializeHalfParsedText( $data, $checkVersion = true
) {
+ if ( $checkVersion &&
+ ( !isset( $data['version'] ) || $data['version'] !=
self::HALF_PARSED_VERSION )
+ ) {
throw new MWException( __METHOD__ . ': invalid version'
);
}
# First, extract the strip state.
$texts = [ $data['text'] ];
- $texts = $this->mStripState->merge( $data['stripState'], $texts
);
+ if ( isset( $data['stripState'] ) ) {
+ $texts = $this->mStripState->merge(
$data['stripState'], $texts );
+ }
# Now renumber links
- $texts = $this->mLinkHolders->mergeForeign(
$data['linkHolders'], $texts );
+ if ( isset( $data['linkHolders'] ) ) {
+ $texts = $this->mLinkHolders->mergeForeign(
$data['linkHolders'], $texts );
+ }
# Should be good to go.
return $texts[0];
diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php
index c168aa6..22137d6 100644
--- a/includes/parser/StripState.php
+++ b/includes/parser/StripState.php
@@ -26,7 +26,6 @@
* @ingroup Parser
*/
class StripState {
- protected $prefix;
protected $data;
protected $regex;
@@ -54,6 +53,34 @@
$this->circularRefGuard = [];
}
+ function __sleep() {
+ return [ 'data' ];
+ }
+
+ function __wakeup() {
+ $this->regex = '/' . Parser::MARKER_PREFIX . "([^\x7f]+)" .
Parser::MARKER_SUFFIX . '/';
+ $this->circularRefGuard = [];
+ }
+
+ /**
+ * Returns whether the strip state does not have any useful data
+ * (and is thus useless to serialize)
+ *
+ * @return bool
+ */
+ public function isTrivial() {
+ return !count( $this->data['nowiki'] ) && !count(
$this->data['general'] );
+ }
+
+ /**
+ * Returns all data about this object that can be made available
publicly through the API
+ *
+ * @return array
+ */
+ public function getPublicData() {
+ return [ 'markers' => $this->data ];
+ }
+
/**
* Add a nowiki strip item
* @param string $marker
--
To view, visit https://gerrit.wikimedia.org/r/278609
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib39e26ad762e2da39ba0b3ca08ce1bf558af6de7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Cenarium <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits