Tpt has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/370807 )
Change subject: Migrates links extraction to IndexContent ...................................................................... Migrates links extraction to IndexContent Change-Id: I1cd83824b309708ccf6cdb2b396f0b2e985c20d9 --- M extension.json A includes/Link.php M includes/Pagination/PaginationFactory.php M includes/Parser/PagesTagParser.php M includes/index/EditIndexPage.php M includes/index/IndexContent.php M includes/index/IndexContentHandler.php M includes/index/ProofreadIndexPage.php M tests/phpunit/index/IndexContentTest.php M tests/phpunit/index/ProofreadIndexPageTest.php 10 files changed, 256 insertions(+), 202 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ProofreadPage refs/changes/07/370807/1 diff --git a/extension.json b/extension.json index bad3426..12f9ce6 100644 --- a/extension.json +++ b/extension.json @@ -46,6 +46,7 @@ "ProofreadPage\\DiffFormatterUtils": "includes/DiffFormatterUtils.php", "ProofreadPage\\FileNotFoundException": "includes/FileNotFoundException.php", "ProofreadPage\\FileProvider": "includes/FileProvider.php", + "ProofreadPage\\Link": "includes/Link.php", "ProofreadIndexEntry": "includes/index/ProofreadIndexEntry.php", "ProofreadIndexPage": "includes/index/ProofreadIndexPage.php", "ProofreadPage\\Index\\IndexContent": "includes/index/IndexContent.php", diff --git a/includes/Link.php b/includes/Link.php new file mode 100644 index 0000000..9c1d649 --- /dev/null +++ b/includes/Link.php @@ -0,0 +1,46 @@ +<?php +/** + * Created by IntelliJ IDEA. + * User: thomas + * Date: 07/08/2017 + * Time: 18:47 + */ + +namespace ProofreadPage; + +use Title; + +/** + * @licence GNU GPL v2+ + * + * A link to a MediaWiki page. It is composed by a target and a label + */ +class Link { + + private $target; + + private $label; + + /** + * @param Title $target + * @param string $label + */ + public function __construct( Title $target, $label ) { + $this->target = $target; + $this->label = $label; + } + + /** + * @return Title + */ + public function getTarget() { + return $this->target; + } + + /** + * @return string + */ + public function getLabel() { + return $this->label; + } +} diff --git a/includes/Pagination/PaginationFactory.php b/includes/Pagination/PaginationFactory.php index 834723d..6e62a3e 100644 --- a/includes/Pagination/PaginationFactory.php +++ b/includes/Pagination/PaginationFactory.php @@ -52,7 +52,7 @@ } // check if it is using pagelist - $pagelist = $indexPage->getPagelistTagContent(); + $pagelist = $indexPage->getContent()->getPagelistTagContent(); if ( $pagelist !== null && $file && $file->isMultipage() ) { return new FilePagination( $indexPage, @@ -61,12 +61,14 @@ $this->context ); } else { - $links = $indexPage->getLinksToPageNamespace(); + $links = $indexPage->getContent()->getLinksToNamespace( + Context::getDefaultContext()->getPageNamespaceId() + ); $pages = []; $pageNumbers = []; foreach ( $links as $link ) { - $pages[] = new ProofreadPagePage( $link[0], $indexPage ); - $pageNumbers[] = new PageNumber( $link[1] ); + $pages[] = new ProofreadPagePage( $link->getTarget(), $indexPage ); + $pageNumbers[] = new PageNumber( $link->getLabel() ); } return new PagePagination( $indexPage, $pages, $pageNumbers ); } diff --git a/includes/Parser/PagesTagParser.php b/includes/Parser/PagesTagParser.php index 9d737b2..90cabe0 100644 --- a/includes/Parser/PagesTagParser.php +++ b/includes/Parser/PagesTagParser.php @@ -263,26 +263,28 @@ if ( $header == 'toc' ) { $this->parser->getOutput()->is_toc = true; } - $indexLinks = $indexPage->getLinksToMainNamespace(); + $indexLinks = $indexPage->getContent()->getLinksToNamespace( + NS_MAIN, $indexTitle, true + ); $pageTitle = $this->parser->getTitle(); $h_out = '{{:MediaWiki:Proofreadpage_header_template'; $h_out .= "|value=$header"; // find next and previous pages in list $indexLinksCount = count( $indexLinks ); for ( $i = 0; $i < $indexLinksCount; $i++ ) { - if ( $pageTitle->equals( $indexLinks[$i][0] ) ) { - $current = '[[' . $indexLinks[$i][0]->getFullText() . '|' . - $indexLinks[$i][1] . ']]'; + if ( $pageTitle->equals( $indexLinks[$i]->getTarget() ) ) { + $current = '[[' . $indexLinks[$i]->getTarget()->getFullText() . '|' . + $indexLinks[$i]->getLabel() . ']]'; break; } } if ( $i > 1 ) { - $prev = '[[' . $indexLinks[$i - 1][0]->getFullText() . '|' . - $indexLinks[$i - 1][1] . ']]'; + $prev = '[[' . $indexLinks[$i - 1]->getTarget()->getFullText() . '|' . + $indexLinks[$i - 1]->getLabel() . ']]'; } - if ( $i + 1 < count( $indexLinks ) ) { - $next = '[[' . $indexLinks[$i + 1][0]->getFullText() . '|' . - $indexLinks[$i + 1][1] . ']]'; + if ( $i + 1 < $indexLinksCount ) { + $next = '[[' . $indexLinks[$i + 1]->getTarget()->getFullText() . '|' . + $indexLinks[$i + 1]->getLabel() . ']]'; } if ( isset( $args['current'] ) ) { $current = $args['current']; diff --git a/includes/index/EditIndexPage.php b/includes/index/EditIndexPage.php index 49facbe..fc4e82c 100644 --- a/includes/index/EditIndexPage.php +++ b/includes/index/EditIndexPage.php @@ -12,6 +12,7 @@ use OOUI\TextInputWidget; use ProofreadIndexEntry; use ProofreadIndexPage; +use ProofreadPage\Context; use Status; use WikitextContent; @@ -178,24 +179,28 @@ * @see EditPage::internalAttemptSave */ public function internalAttemptSave( &$result, $bot = false ) { - $index = $this->getActualContent(); - - // Get list of pages titles - $links = $index->getLinksToPageNamespace(); - $linksTitle = []; - foreach ( $links as $link ) { - $linksTitle[] = $link[0]; - } - - if ( count( $linksTitle ) !== count( array_unique( $linksTitle ) ) ) { - $this->context->getOutput()->showErrorPage( - 'proofreadpage_indexdupe', 'proofreadpage_indexdupetext' + $content = $this->getCurrentContent(); + if ( $content instanceof IndexContent ) { + // Get list of pages titles + $links = $content->getLinksToNamespace( + Context::getDefaultContext()->getPageNamespaceId(), $this->getTitle() ); - $status = Status::newGood(); - $status->fatal( 'hookaborted' ); - $status->value = self::AS_HOOK_ERROR; + $linksTitle = []; + foreach ( $links as $link ) { + $linksTitle[] = $link->getTarget(); + } - return $status; + if ( count( $linksTitle ) !== count( array_unique( $linksTitle ) ) ) { + $this->context->getOutput()->showErrorPage( + 'proofreadpage_indexdupe', + 'proofreadpage_indexdupetext' + ); + $status = Status::newGood(); + $status->fatal( 'hookaborted' ); + $status->value = self::AS_HOOK_ERROR; + + return $status; + } } return parent::internalAttemptSave( $result, $bot ); @@ -205,8 +210,7 @@ return new ProofreadIndexPage( $this->mTitle, ProofreadIndexPage::getDataConfig(), - ContentHandler::getForModelID( $this->contentModel ) - ->unserializeContent( $this->textbox1, $this->contentFormat ) + $this->getCurrentContent() ); } } diff --git a/includes/index/IndexContent.php b/includes/index/IndexContent.php index 107915d..23f77e4 100644 --- a/includes/index/IndexContent.php +++ b/includes/index/IndexContent.php @@ -4,7 +4,11 @@ use Content; use MagicWord; +use MalformedTitleException; use ParserOptions; +use ProofreadPage\Link; +use ProofreadPage\Pagination\PageList; +use Sanitizer; use TextContent; use Title; use User; @@ -174,4 +178,77 @@ $wikitextContent = new WikitextContent( $this->serialize( CONTENT_FORMAT_WIKITEXT ) ); return $wikitextContent->getParserOutput( $title, $revId, $options, $generateHtml ); } + + /** + * @return PageList|null + */ + public function getPagelistTagContent() { + $tagParameters = null; + foreach ( $this->fields as $field ) { + preg_match_all( '/<pagelist([^<]*?)\/>/is', + $field->serialize( CONTENT_FORMAT_WIKITEXT ), $m, PREG_PATTERN_ORDER + ); + if ( $m[1] ) { + if ( $tagParameters === null ) { + $tagParameters = $m[1]; + } else { + $tagParameters = array_merge( $tagParameters, $m[1] ); + } + } + } + if ( $tagParameters === null ) { + return $tagParameters; + } + + return new PageList( Sanitizer::decodeTagAttributes( implode( $tagParameters ) ) ); + } + + /** + * Returns all links in a given namespace + * + * @param integer $namespace the default namespace id + * @param Title $title the Index: page title + * @param bool $withPrepossessing apply preprocessor before looking for links + * @return Link[] + */ + public function getLinksToNamespace( + $namespace, Title $title = null, $withPrepossessing = false + ) { + $links = []; + foreach ( $this->fields as $field ) { + $wikitext = $field->serialize( CONTENT_FORMAT_WIKITEXT ); + if ( $withPrepossessing ) { + /** @var IndexContentHandler $contentHandler */ + $contentHandler = $this->getContentHandler(); + $wikitext = $contentHandler->getParser()->preprocess( + $wikitext, $title, new ParserOptions() + ); + } + $links = array_merge( + $links, $this->getLinksToNamespaceFromWikitext( $wikitext, $namespace ) + ); + } + return $links; + } + + private function getLinksToNamespaceFromWikitext( $wikitext, $namespace ) { + preg_match_all( '/\[\[(.*?)(\|(.*?)|)\]\]/i', $wikitext, $textLinks, PREG_PATTERN_ORDER ); + $links = []; + $textLinksCount = count( $textLinks[1] ); + for ( $i = 0; $i < $textLinksCount; $i++ ) { + try { + $title = Title::newFromTextThrow( $textLinks[1][$i] ); + if ( $title->inNamespace( $namespace ) ) { + if ( $textLinks[3][$i] === '' ) { + $links[] = new Link( $title, $title->getSubpageText() ); + } else { + $links[] = new Link( $title, $textLinks[3][$i] ); + } + } + } catch ( MalformedTitleException $e ) { + // We ignore invalid links + } + } + return $links; + } } diff --git a/includes/index/IndexContentHandler.php b/includes/index/IndexContentHandler.php index a7eb709..a342b11 100644 --- a/includes/index/IndexContentHandler.php +++ b/includes/index/IndexContentHandler.php @@ -37,9 +37,18 @@ parent::__construct( $modelId, [ CONTENT_FORMAT_WIKITEXT ] ); } + /** + * Warning: should not be used outside of IndexContent + * @return Parser + */ + public function getParser() { + return $this->parser; + } + private function buildParser() { global $wgParser; StubObject::unstub( $wgParser ); + /** @var Parser $parser */ $parser = clone $wgParser; $parser->startExternalParse( null, $this->makeParserOptions( 'canonical' ), Parser::OT_PLAIN, true diff --git a/includes/index/ProofreadIndexPage.php b/includes/index/ProofreadIndexPage.php index c50f246..f620c83 100644 --- a/includes/index/ProofreadIndexPage.php +++ b/includes/index/ProofreadIndexPage.php @@ -101,7 +101,7 @@ * Return content of the page * @return IndexContent */ - private function getContent() { + public function getContent() { if ( $this->content === null ) { $rev = Revision::newFromTitle( $this->title ); if ( $rev === null ) { @@ -298,89 +298,6 @@ } /** - * Return the ordered list of links to ns-0 from the index page. - * @return array of array( Title title of the pointed page, the label of the link ) - */ - public function getLinksToMainNamespace() { - return $this->getLinksToNamespaceFromContent( NS_MAIN, true ); - } - - /** - * @return array of array( Title title of the pointed page, the label of the link ) - */ - public function getLinksToPageNamespace() { - return $this->getLinksToNamespaceFromContent( - Context::getDefaultContext()->getPageNamespaceId() - ); - } - - /** - * @return PageList|null - */ - public function getPagelistTagContent() { - $tagParameters = null; - foreach ( $this->getContent()->getFields() as $field ) { - preg_match_all( '/<pagelist([^<]*?)\/>/is', - $field->serialize( CONTENT_FORMAT_WIKITEXT ), $m, PREG_PATTERN_ORDER - ); - if ( $m[1] ) { - if ( $tagParameters === null ) { - $tagParameters = $m[1]; - } else { - $tagParameters = array_merge( $tagParameters, $m[1] ); - } - } - } - if ( $tagParameters === null ) { - return $tagParameters; - } - - return new PageList( Sanitizer::decodeTagAttributes( implode( $tagParameters ) ) ); - } - - /** - * Return all links in a given namespace - * @param integer $namespace the default namespace id - * @param bool $withPrepossessing apply preprocessor before looking for links - * @return array of array( Title title of the pointed page, the label of the link ) - * @todo add an abstraction for links (Title + label) - */ - private function getLinksToNamespaceFromContent( $namespace, $withPrepossessing = false ) { - $links = []; - foreach ( $this->getContent()->getFields() as $field ) { - $wikitext = $field->serialize( CONTENT_FORMAT_WIKITEXT ); - if ( $withPrepossessing ) { - $wikitext = self::getParser()->preprocess( - $wikitext, $this->title, new ParserOptions() - ); - } - $links = array_merge( - $links, $this->getLinksToNamespaceFromWikitext( $wikitext, $namespace ) - ); - } - return $links; - } - - private function getLinksToNamespaceFromWikitext( $wikitext, $namespace ) { - preg_match_all( '/\[\[(.*?)(\|(.*?)|)\]\]/i', $wikitext, $textLinks, PREG_PATTERN_ORDER ); - $links = []; - $num = 0; - $textLinksCount = count( $textLinks[1] ); - for ( $i = 0; $i < $textLinksCount; $i++ ) { - $title = Title::newFromText( $textLinks[1][$i] ); - if ( $title !== null && $title->inNamespace( $namespace ) ) { - if ( $textLinks[3][$i] === '' ) { - $links[$num] = [ $title, $title->getSubpageText() ]; - } else { - $links[$num] = [ $title, $textLinks[3][$i] ]; - } - $num++; - } - } - return $links; - } - - /** * Return the value of an entry as wikitext with variable replaced with index entries and * $otherParams * Example: if 'header' entry is 'Page of {{title}} number {{pagenum}}' with @@ -425,23 +342,5 @@ $params[strtolower( $entry->getKey() )] = $entry->getStringValue(); } return $params; - } - - /** - * Return the Parser object done to be used for Index pages internal use - * Needed to avoid side effects of $parser->replaceVariables - * - * @return Parser - */ - protected static function getParser() { - global $wgParser; - static $parser = null; - - if ( $parser === null ) { - StubObject::unstub( $wgParser ); - $parser = clone $wgParser; - } - - return $parser; } } diff --git a/tests/phpunit/index/IndexContentTest.php b/tests/phpunit/index/IndexContentTest.php index dd731b6..3dd0172 100644 --- a/tests/phpunit/index/IndexContentTest.php +++ b/tests/phpunit/index/IndexContentTest.php @@ -4,6 +4,9 @@ use FauxRequest; use ParserOptions; +use ProofreadPage\Context; +use ProofreadPage\Link; +use ProofreadPage\Pagination\PageList; use ProofreadPageTestCase; use RequestContext; use Title; @@ -96,7 +99,7 @@ public function testGetWikitextForTransclusion() { $content = new IndexContent( [ 'foo' => new WikitextContent( 'bar' ) ] ); - return $this->assertEquals( + $this->assertEquals( "{{:MediaWiki:Proofreadpage_index_template\n|foo=bar\n}}", $content->getWikitextForTransclusion() ); @@ -183,4 +186,83 @@ $content = new IndexContent( [ 'foo' => new WikitextContent( 'bar' ) ] ); $this->assertEquals( 3, $content->getSize() ); } + + public function testGetLinksToMainNamespace() { + $content = new IndexContent( [ + 'Pages' => new WikitextContent( '[[Page:Test.jpg]]' ), + 'TOC' => new WikitextContent( "* [[Test/Chapter 1]]\n* [[Azerty:Test/Chapter_2|Chapter 2]]" ), + ] ); + $links = [ + new Link( Title::newFromText( 'Test/Chapter 1' ), 'Chapter 1' ), + new Link( Title::newFromText( 'Azerty:Test/Chapter_2' ), 'Chapter 2' ) + ]; + $this->assertEquals( + $links, + $content->getLinksToNamespace( NS_MAIN, null, true ) + ); + } + + public function testGetLinksToPageNamespace() { + $content = new IndexContent( [ + 'Pages' => new WikitextContent( + '[[Page:Test 1.jpg|TOC]] [[Page:Test 2.tiff|1]] [[Page:Test:3.png|2]]' + ), + 'Author' => new WikitextContent( '[[Author:Me]]' ), + ] ); + $links = [ + new Link( Title::newFromText( 'Page:Test 1.jpg' ), 'TOC' ), + new Link( Title::newFromText( 'Page:Test 2.tiff' ), '1' ), + new Link( Title::newFromText( 'Page:Test:3.png' ), '2' ) + ]; + $this->assertEquals( + $links, + $content->getLinksToNamespace( + Context::getDefaultContext()->getPageNamespaceId(), + Title::makeTitle( 252, 'Test' ) + ) + ); + } + + /** + * @dataProvider getPagelistTagContentProvider + */ + public function testGetPagelistTagContent( + IndexContent $content, PageList $pageList = null + ) { + $this->assertEquals( $pageList, $content->getPagelistTagContent() ); + } + + public function getPagelistTagContentProvider() { + return [ + [ + new IndexContent( [ 'Pages' => new WikitextContent( + '<pagelist to=24 1to4=- 5=1 5to24=roman />' . + '<pagelist from=25 25=1 1021to1024=- />\n|Author=[[Author:Me]]\n}}' + ) ] ), + new PageList( [ + '1to4' => '-', + '5' => '1', + '5to24' => 'roman', + '25' => '1', + '1021to1024' => '-', + 'to' => 24, + 'from' => 25 + ] ) + ], + [ + new IndexContent( [ + 'Pages' => new WikitextContent( '<pagelist/>' ), + 'Author' => new WikitextContent( '[[Author:Me]]' ) + ] ), + new PageList( [] ) + ], + [ + new IndexContent( [ + 'Pages' => new WikitextContent( '' ), + 'Author'=> new WikitextContent( '[[Author:Me]]' ) + ] ), + null + ] + ]; + } } diff --git a/tests/phpunit/index/ProofreadIndexPageTest.php b/tests/phpunit/index/ProofreadIndexPageTest.php index 8173f52..496c5fa 100644 --- a/tests/phpunit/index/ProofreadIndexPageTest.php +++ b/tests/phpunit/index/ProofreadIndexPageTest.php @@ -1,6 +1,5 @@ <?php use ProofreadPage\Index\IndexContent; -use ProofreadPage\Pagination\PageList; /** * @group ProofreadPage @@ -184,73 +183,6 @@ $this->assertEquals( $entry, $page->getIndexEntry( 'year' ) ); $this->assertNull( $page->getIndexEntry( 'years' ) ); - } - - public function testGetLinksToMainNamespace() { - $page = self::newIndexPage( - 'Test.djvu', - "{{\n|Pages=[[Page:Test.jpg]]\n|TOC=* [[Test/Chapter 1]]" . - "\n* [[Azerty:Test/Chapter_2|Chapter 2]]\n}}" - ); - $links = [ - [ Title::newFromText( 'Test/Chapter 1' ), 'Chapter 1' ], - [ Title::newFromText( 'Azerty:Test/Chapter_2' ), 'Chapter 2' ] - ]; - $this->assertEquals( $links, $page->getLinksToMainNamespace() ); - } - - public function testGetLinksToPageNamespace() { - $page = self::newIndexPage( - 'Test', - "{{\n|Pages=[[Page:Test 1.jpg|TOC]] [[Page:Test 2.tiff|1]] [[Page:Test:3.png|2]]" . - "\n|Author=[[Author:Me]]\n}}" - ); - $links = [ - [ Title::newFromText( 'Page:Test 1.jpg' ), 'TOC' ], - [ Title::newFromText( 'Page:Test 2.tiff' ), '1' ], - [ Title::newFromText( 'Page:Test:3.png' ), '2' ] - ]; - $this->assertEquals( $links, $page->getLinksToPageNamespace() ); - } - - /** - * @dataProvider getPagelistTagContentProvider - */ - public function testGetPagelistTagContent( - ProofreadIndexPage $page, PageList $pageList = null - ) { - $this->assertEquals( $pageList, $page->getPagelistTagContent() ); - } - - public function getPagelistTagContentProvider() { - return [ - [ - self::newIndexPage( - 'Test.djvu', - "{{\n|Pages=<pagelist to=24 1to4=- 5=1 5to24=roman /> " . - "<pagelist from=25 25=1 1021to1024=- />\n|Author=[[Author:Me]]\n}}" - ), - new PageList( [ - '1to4' => '-', - '5' => '1', - '5to24' => 'roman', - '25' => '1', - '1021to1024' => '-', - 'to' => 24, - 'from' => 25 - ] ) - ], - [ - self::newIndexPage( - 'Test.djvu', "{{\n|Pages=<pagelist/>\n|Author=[[Author:Me]]\n}}" - ), - new PageList( [] ) - ], - [ - self::newIndexPage( 'Test.djvu', "{{\n|Pages=\n|Author=[[Author:Me]]\n}}" ), - null - ], - ]; } public function replaceVariablesWithIndexEntriesProvider() { -- To view, visit https://gerrit.wikimedia.org/r/370807 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1cd83824b309708ccf6cdb2b396f0b2e985c20d9 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/ProofreadPage Gerrit-Branch: master Gerrit-Owner: Tpt <thoma...@hotmail.fr> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits