Aude has uploaded a new change for review.
https://gerrit.wikimedia.org/r/248345
Change subject: Add --forceParse UpdaterFlag and option in forceSearchIndex
script
......................................................................
Add --forceParse UpdaterFlag and option in forceSearchIndex script
Bug: T116381
Change-Id: I90889e448f02b7d6baa5302cc37630db74a743f1
---
M includes/Updater.php
M maintenance/forceSearchIndex.php
2 files changed, 21 insertions(+), 4 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch
refs/changes/45/248345/1
diff --git a/includes/Updater.php b/includes/Updater.php
index 7b12fb5..50c3fe0 100644
--- a/includes/Updater.php
+++ b/includes/Updater.php
@@ -10,6 +10,7 @@
use MediaWiki\Logger\LoggerFactory;
use MWTimestamp;
use ParserCache;
+use ParserOutput;
use Sanitizer;
use TextContent;
use Title;
@@ -41,6 +42,7 @@
const INDEX_ON_SKIP = 1;
const SKIP_PARSE = 2;
const SKIP_LINKS = 4;
+ const FORCE_PARSE = 8;
/**
* Full title text of pages updated in this process. Used for
deduplication
@@ -269,6 +271,7 @@
$indexOnSkip = $flags & self::INDEX_ON_SKIP;
$skipParse = $flags & self::SKIP_PARSE;
$skipLinks = $flags & self::SKIP_LINKS;
+ $forceParse = $flags & self::FORCE_PARSE;
$fullDocument = !( $skipParse || $skipLinks );
$documents = array();
@@ -304,7 +307,10 @@
if ( !$skipParse ) {
// Get text to index, based on content and
parser output
- list( $content, $parserOutput ) =
$this->getContentAndParserOutput( $page );
+ list( $content, $parserOutput ) =
$this->getContentAndParserOutput(
+ $page,
+ $forceParse
+ );
// Build our page data
$pageBuilder = new PageDataBuilder( $doc,
$title, $content, $parserOutput );
@@ -358,13 +364,18 @@
* Fetch page's content and parser output, using the parser cache if we
can
*
* @param WikiPage $page The wikipage to get output for
+ * @param boolean $forceParse Bypass ParserCache and force a fresh
parse.
* @return array(Content,ParserOutput)
*/
- private function getContentAndParserOutput( $page ) {
+ private function getContentAndParserOutput( $page, $forceParse ) {
$content = $page->getContent();
$parserOptions = $page->makeParserOptions( 'canonical' );
- $parserOutput = ParserCache::singleton()->get( $page,
$parserOptions );
- if ( !$parserOutput ) {
+
+ if ( $forceParse === false ) {
+ $parserOutput = ParserCache::singleton()->get( $page,
$parserOptions );
+ }
+
+ if ( !isset( $parserOutput ) || !$parserOutput instanceof
ParserOutput ) {
// We specify the revision ID here. There might be a
newer revision,
// but we don't care because (a) we've already got a
job somewhere
// in the queue to index it, and (b) we want magic
words like
diff --git a/maintenance/forceSearchIndex.php b/maintenance/forceSearchIndex.php
index 2ae5091..cf7cf0e 100644
--- a/maintenance/forceSearchIndex.php
+++ b/maintenance/forceSearchIndex.php
@@ -79,6 +79,7 @@
'This replaces the contents of the index for that entry
with the entry built from a skipped process.' .
'Without this if the entry does not exist then it will
be skipped entirely. Only set this when running ' .
'the first pass of building the index. Otherwise,
don\'t tempt fate by indexing half complete documents.' );
+ $this->addOption( 'forceParse', 'Bypass ParserCache and do a
fresh parse of pages from the Content.' );
$this->addOption( 'skipParse', 'Skip parsing the page. This is
really only good for running the second half ' .
'of the two phase index build. If this is specified
then the default batch size is actually 50.' );
$this->addOption( 'skipLinks', 'Skip looking for links to the
page (counting and finding redirects). Use ' .
@@ -133,6 +134,11 @@
if ( $this->getOption( 'skipLinks' ) ) {
$updateFlags |= Updater::SKIP_LINKS;
}
+
+ if ( $this->getOption( 'forceParse' ) ) {
+ $updateFlags |= Updater::FORCE_PARSE;
+ }
+
$this->namespace = $this->hasOption( 'namespace' ) ?
intval( $this->getOption( 'namespace' ) ) : null;
--
To view, visit https://gerrit.wikimedia.org/r/248345
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I90889e448f02b7d6baa5302cc37630db74a743f1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Aude <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits