jenkins-bot has submitted this change and it was merged.

Change subject: (bug 52799) Dump JSON of entities listed in file.
......................................................................


(bug 52799) Dump JSON of entities listed in file.

This allows dumpJson.php to dump just those entities given in
a given list file.

Change-Id: Ie101cc58b0218d623e73b832a4fc9d45f1d1ac4a
---
M lib/WikibaseLib.classes.php
A lib/includes/Disposable.php
M lib/includes/Dumpers/JsonDumpGenerator.php
A lib/includes/IO/EntityIdReader.php
A lib/includes/IO/LineReader.php
A lib/tests/phpunit/IO/EntityIdReaderTest.txt
A lib/tests/phpunit/IO/EntrityIdReaderTest.php
A lib/tests/phpunit/IO/LineReaderTest.php
A lib/tests/phpunit/IO/LineReaderTest.txt
M repo/includes/store/sql/ConvertingResultWrapper.php
M repo/maintenance/dumpJson.php
11 files changed, 426 insertions(+), 6 deletions(-)

Approvals:
  Addshore: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/WikibaseLib.classes.php b/lib/WikibaseLib.classes.php
index c6120b8..fbde8a6 100644
--- a/lib/WikibaseLib.classes.php
+++ b/lib/WikibaseLib.classes.php
@@ -32,6 +32,9 @@
                // Autoloading
                'Wikibase\LibHooks' => 'WikibaseLib.hooks.php',
 
+               // generic things that could be factored out
+               'Disposable' => 'includes/Disposable.php',
+
                // includes
                'Wikibase\ChangeNotifier' => 'includes/ChangeNotifier.php',
                'Wikibase\ChangeNotificationJob' => 
'includes/ChangeNotificationJob.php',
@@ -100,6 +103,10 @@
                'Wikibase\Lib\SnakFormatterFactory' => 
'includes/formatters/SnakFormatterFactory.php',
                'Wikibase\Lib\WikibaseSnakFormatterBuilders' => 
'includes/formatters/WikibaseSnakFormatterBuilders.php',
 
+               // includes/IO
+               'Wikibase\IO\LineReader' => 'includes/IO/LineReader.php',
+               'Wikibase\IO\EntityIdReader' => 
'includes/IO/EntityIdReader.php',
+
                // includes/modules
                'Wikibase\RepoAccessModule' => 
'includes/modules/RepoAccessModule.php',
                'Wikibase\SitesModule' => 'includes/modules/SitesModule.php',
diff --git a/lib/includes/Disposable.php b/lib/includes/Disposable.php
new file mode 100644
index 0000000..a362c4b
--- /dev/null
+++ b/lib/includes/Disposable.php
@@ -0,0 +1,24 @@
+<?php
+
+/**
+ * An interface for objects that support explicit disposal.
+ *
+ * @license GPL 2+
+ * @author Daniel Kinzler
+ *
+ * @todo make this reusable outside Wikibase
+ */
+interface Disposable {
+
+       /**
+        * Releases any system (or other) resources held by this object.
+        *
+        * It is safe to call dispose() multiple times.
+        * The behavior of all other methods of this object becomes undefined 
after calling dispose()
+        * for the first time.
+        *
+        * Implementing classes may choose to implement the __destruct() method 
to call dispose().
+        */
+       public function dispose();
+
+}
diff --git a/lib/includes/Dumpers/JsonDumpGenerator.php 
b/lib/includes/Dumpers/JsonDumpGenerator.php
index 20f007f..efad245 100644
--- a/lib/includes/Dumpers/JsonDumpGenerator.php
+++ b/lib/includes/Dumpers/JsonDumpGenerator.php
@@ -83,7 +83,7 @@
                        }
                }
 
-               $json = "]\n"; //TODO: make optional
+               $json = "\n]\n"; //TODO: make optional
                $this->writeToDump( $json );
        }
 
diff --git a/lib/includes/IO/EntityIdReader.php 
b/lib/includes/IO/EntityIdReader.php
new file mode 100644
index 0000000..b4b62dc
--- /dev/null
+++ b/lib/includes/IO/EntityIdReader.php
@@ -0,0 +1,98 @@
+<?php
+
+namespace Wikibase\IO;
+
+use Disposable;
+use Iterator;
+use Wikibase\DataModel\Entity\EntityId;
+use Wikibase\Lib\EntityIdParser;
+
+/**
+ * EntityIdReader reads entity IDs from a file, one per line.
+ *
+ * @license GPL 2+
+ * @author Daniel Kinzler
+ */
+class EntityIdReader implements Iterator, Disposable {
+
+       /**
+        * @var LineReader
+        */
+       protected $reader;
+
+       /**
+        * @param resource $fileHandle The file to read from.
+        * @param bool $canClose Whether calling dispose() should close the 
fine handle.
+        * @param bool $autoDispose Whether to automatically call dispose() 
when reaching EOF.
+        *
+        * @throws \InvalidArgumentException
+        */
+       public function __construct( $fileHandle, $canClose = true, 
$autoDispose = false ) {
+               $this->reader = new LineReader( $fileHandle, $canClose, 
$autoDispose );
+               $this->parser = new EntityIdParser(); //TODO: inject?
+       }
+
+       /**
+        * @param string $line
+        * @return EntityId
+        */
+       protected function lineToId( $line ) {
+               $line = trim( $line );
+               $id = $this->parser->parse( $line );
+               //TODO: optionally catch, log & ignore ParseException
+
+               return $id;
+       }
+
+       /**
+        */
+       public function dispose() {
+               $this->reader->dispose();
+       }
+
+       /**
+        * Returns the current ID.
+        *
+        * @link http://php.net/manual/en/iterator.current.php
+        * @return EntityId
+        */
+       public function current() {
+               $line = $this->reader->current();
+               return $this->lineToId( $line );
+       }
+
+       /**
+        * Advance to next ID. Blank lines are skipped.
+        *
+        * @see LineReader::next()
+        */
+       public function next() {
+               do {
+                       $this->reader->next();
+               } while ( $this->reader->valid() && trim( 
$this->reader->current() ) === '' );
+       }
+
+       /**
+        * @see LineReader::key()
+        * @return int
+        */
+       public function key() {
+               return $this->reader->key();
+       }
+
+       /**
+        * @see LineReader::valid()
+        * @return boolean
+        */
+       public function valid() {
+               return $this->reader->valid();
+       }
+
+       /**
+        * @see LineReader::rewind()
+        */
+       public function rewind() {
+               $this->reader->rewind();
+       }
+
+}
\ No newline at end of file
diff --git a/lib/includes/IO/LineReader.php b/lib/includes/IO/LineReader.php
new file mode 100644
index 0000000..ed2e054
--- /dev/null
+++ b/lib/includes/IO/LineReader.php
@@ -0,0 +1,155 @@
+<?php
+
+namespace Wikibase\IO;
+
+use Disposable;
+use Iterator;
+
+/**
+ * LineReader allows iterating over the lines of a file.
+ * Each line returned will contain the line separator character(s) and all 
whitespace.
+ * Concatenating all lines returned by the reader should result in the 
original file.
+ *
+ * @license GPL 2+
+ * @author Daniel Kinzler
+ */
+class LineReader implements Iterator, Disposable {
+
+       /**
+        * @var resource
+        */
+       protected $fileHandle;
+
+       /**
+        * Whether dispose() will close the file handle.
+        *
+        * @var bool
+        */
+       protected $canClose;
+
+       /**
+        * Whether dispose() is called automatically when the end of file is 
reached.
+        *
+        * @var bool
+        */
+       protected $autoDispose;
+
+       /**
+        * @var string
+        */
+       protected $current = null;
+
+       /**
+        * @var int
+        */
+       protected $line = 0;
+
+       /**
+        * @param resource $fileHandle The file to read from.
+        * @param bool $canClose Whether calling dispose() should close the 
fine handle.
+        * @param bool $autoDispose Whether to automatically call dispose() 
when reaching EOF
+        *             or when this reader is destructed.
+        *
+        * @throws \InvalidArgumentException
+        */
+       public function __construct( $fileHandle, $canClose = true, 
$autoDispose = false ) {
+               if ( !is_resource( $fileHandle ) ) {
+                       throw new \InvalidArgumentException( '$fileHandle must 
be a file resource.' );
+               }
+
+               if ( !is_bool( $canClose ) ) {
+                       throw new \InvalidArgumentException( '$canClose must be 
a boolean.' );
+               }
+
+               if ( !is_bool( $autoDispose ) ) {
+                       throw new \InvalidArgumentException( '$autoDispose must 
be a boolean.' );
+               }
+
+               $this->fileHandle = $fileHandle;
+
+               $this->canClose = $canClose;
+               $this->autoDispose = $autoDispose;
+       }
+
+       /**
+        * Closes the underlying file handle if the $canClose parameter was 
given as
+        * true (the default) in the constructor.
+        */
+       public function dispose() {
+               if ( $this->fileHandle && $this->canClose ) {
+                       fclose( $this->fileHandle );
+               }
+
+               $this->fileHandle = false;
+       }
+
+       /**
+        * Destructor, calls dispose() if $autoDispose was set in the 
constructor.
+        */
+       public function __destruct() {
+               if ( $this->autoDispose ) {
+                       $this->dispose();
+               }
+       }
+
+       /**
+        * Return the current line.
+        *
+        * @link http://php.net/manual/en/iterator.current.php
+        * @return string
+        */
+       public function current() {
+               return $this->current;
+       }
+
+       /**
+        * Reads the the next line. Use current() to get the line's content.
+        *
+        * @link http://php.net/manual/en/iterator.next.php
+        */
+       public function next() {
+               $this->current = fgets( $this->fileHandle );
+
+               if ( $this->valid() ) {
+                       $this->line++;
+               } elseif ( $this->autoDispose ) {
+                       $this->dispose();
+               }
+       }
+
+       /**
+        * Return the current line number.
+        * @link http://php.net/manual/en/iterator.key.php
+        * @return int
+        */
+       public function key() {
+               return $this->line;
+       }
+
+       /**
+        * Checks if current position is valid. Returns true if and only if
+        * next() has been called at least once and the end of file has not yet 
been reached.
+        *
+        * @link http://php.net/manual/en/iterator.valid.php
+        * @return boolean whether there is a current line
+        */
+       public function valid() {
+               return is_string( $this->current );
+       }
+
+       /**
+        * Sets the file pointer to the beginning of the file, if supported.
+        * Has no effect if this LineReader has already been disposed.
+        *
+        * @link http://php.net/manual/en/iterator.rewind.php
+        * @return void Any returned value is ignored.
+        */
+       public function rewind() {
+               if ( $this->fileHandle ) {
+                       fseek( $this->fileHandle, 0 );
+                       $this->current = null;
+
+                       $this->next();
+               }
+       }
+}
diff --git a/lib/tests/phpunit/IO/EntityIdReaderTest.txt 
b/lib/tests/phpunit/IO/EntityIdReaderTest.txt
new file mode 100644
index 0000000..8eef7b3
--- /dev/null
+++ b/lib/tests/phpunit/IO/EntityIdReaderTest.txt
@@ -0,0 +1,6 @@
+Q1
+P2
+
+  q3
+
+ p4
\ No newline at end of file
diff --git a/lib/tests/phpunit/IO/EntrityIdReaderTest.php 
b/lib/tests/phpunit/IO/EntrityIdReaderTest.php
new file mode 100644
index 0000000..7c652d1
--- /dev/null
+++ b/lib/tests/phpunit/IO/EntrityIdReaderTest.php
@@ -0,0 +1,49 @@
+<?php
+
+namespace Wikibase\Test\IO;
+use PHPUnit_Framework_TestCase;
+use Wikibase\DataModel\Entity\ItemId;
+use Wikibase\DataModel\Entity\PropertyId;
+use Wikibase\IO\EntityIdReader;
+
+/**
+ * @covers Wikibase\IO\EntityIdReader
+ *
+ * @ingroup WikibaseLib
+ * @ingroup Test
+ *
+ * @group Wikibase
+ * @group WikibaseLib
+ * @group WikibaseIO
+ *
+ * @license GPL 2+
+ * @author Daniel Kinzler
+ */
+class EntityIdReaderTest extends PHPUnit_Framework_TestCase {
+
+       protected function getTestFile() {
+               return __DIR__ . '/EntityIdReaderTest.txt';
+       }
+
+       protected function openIdReader( $file ) {
+               $handle = fopen( $file, 'r' );
+               return new EntityIdReader( $handle );
+       }
+
+       public function testIteration() {
+               $expected = array(
+                       new ItemId( 'Q1' ),
+                       new PropertyId( 'P2' ),
+                       new ItemId( 'Q3' ),
+                       new PropertyId( 'P4' ),
+               );
+
+               $file = $this->getTestFile();
+               $reader = $this->openIdReader( $file );
+               $actual = iterator_to_array( $reader );
+               $reader->dispose();
+
+               $this->assertEmpty( array_diff( $expected, $actual ), 
"Different IDs" );
+       }
+
+}
diff --git a/lib/tests/phpunit/IO/LineReaderTest.php 
b/lib/tests/phpunit/IO/LineReaderTest.php
new file mode 100644
index 0000000..51f9525
--- /dev/null
+++ b/lib/tests/phpunit/IO/LineReaderTest.php
@@ -0,0 +1,43 @@
+<?php
+
+namespace Wikibase\Test\IO;
+use PHPUnit_Framework_TestCase;
+use Wikibase\IO\LineReader;
+
+/**
+ * @covers Wikibase\IO\LineReader
+ *
+ * @ingroup WikibaseLib
+ * @ingroup Test
+ *
+ * @group Wikibase
+ * @group WikibaseLib
+ * @group WikibaseIO
+ *
+ * @license GPL 2+
+ * @author Daniel Kinzler
+ */
+class LineReaderTest extends PHPUnit_Framework_TestCase {
+
+       protected function getTestFile() {
+               return __DIR__ . '/LineReaderTest.txt';
+       }
+
+       protected function openLineReader( $file ) {
+               $handle = fopen( $file, 'r' );
+               return new LineReader( $handle );
+       }
+
+       public function testIteration() {
+               $file = $this->getTestFile();
+
+               $expected = file( $file );
+
+               $reader = $this->openLineReader( $file );
+               $actual = iterator_to_array( $reader );
+               $reader->dispose();
+
+               $this->assertEmpty( array_diff( $expected, $actual ), 
"Different Lines" );
+       }
+
+}
diff --git a/lib/tests/phpunit/IO/LineReaderTest.txt 
b/lib/tests/phpunit/IO/LineReaderTest.txt
new file mode 100644
index 0000000..360a4b0
--- /dev/null
+++ b/lib/tests/phpunit/IO/LineReaderTest.txt
@@ -0,0 +1,4 @@
+Hello
+World!
+
+The End.
diff --git a/repo/includes/store/sql/ConvertingResultWrapper.php 
b/repo/includes/store/sql/ConvertingResultWrapper.php
index ab5a9f0..daef172 100644
--- a/repo/includes/store/sql/ConvertingResultWrapper.php
+++ b/repo/includes/store/sql/ConvertingResultWrapper.php
@@ -13,6 +13,9 @@
  *
  * @licence GNU GPL v2+
  * @author Daniel Kinzler
+ *
+ * @todo: this should implement Disposable know a LoadBalancer instance, so
+ *        we can recycle the DB connection when done.
  */
 abstract class ConvertingResultWrapper implements Iterator {
 
diff --git a/repo/maintenance/dumpJson.php b/repo/maintenance/dumpJson.php
index 5463f36..3e6ab1a 100644
--- a/repo/maintenance/dumpJson.php
+++ b/repo/maintenance/dumpJson.php
@@ -1,10 +1,13 @@
 <?php
 
 namespace Wikibase;
+use Disposable;
 use Iterator;
 use Maintenance;
+use Traversable;
 use ValueFormatters\FormatterOptions;
 use Wikibase\Dumpers\JsonDumpGenerator;
+use Wikibase\IO\EntityIdReader;
 use Wikibase\Lib\EntityIdFormatter;
 use Wikibase\Lib\Serializers\EntitySerializationOptions;
 use Wikibase\Lib\Serializers\EntitySerializer;
@@ -47,11 +50,12 @@
 
                $this->mDescription = 'Generate a JSON dump from entities in 
the repository.';
 
-               //TODO: read list of IDs from file
                //TODO: filter by entity type
+               //TODO: shard by id congruence class ( id % n == m )
                //$this->addOption( 'rebuild-all', "Update property info for 
all properties (per default, only missing entries are created)" );
                //$this->addOption( 'start-row', "The ID of the first row to 
update (useful for continuing aborted runs)", false, true );
-               //$this->addOption( 'batch-size', "Number of rows to update per 
database transaction (100 per default)", false, true );
+
+               $this->addOption( 'list-file', "A file containing one entity ID 
per line", false, true );
        }
 
        public function initServices() {
@@ -83,16 +87,43 @@
 
                $idStream = $this->makeIdStream();
                $dumper->generateDump( $idStream );
+
+               if ( $idStream instanceof Disposable ) {
+                       // close stream / free resources
+                       $idStream->dispose();
+               }
        }
 
        /**
         * @return Iterator a stream of EntityId objects
         */
        public function makeIdStream() {
-               //TODO: provide list/filter of entities
-               //TODO: allow ids to be read from a file
+               $listFile = $this->getOption( 'list-file' );
 
-               $stream = $this->entityPerPage->getEntities();
+               if ( $listFile !== null ) {
+                       //TODO: allow filtering by entity type, id congruence 
class ( id % n == m ), etc.
+                       $stream = $this->makeIdFileStream( $listFile );
+               } else {
+                       $stream = $this->entityPerPage->getEntities();
+               }
+
+               return $stream;
+       }
+
+       /**
+        * @param $listFile
+        *
+        * @return Traversable
+        * @throws \MWException
+        */
+       protected function makeIdFileStream( $listFile ) {
+               $input = fopen( $listFile, 'r' );
+
+               if ( !$input ) {
+                       throw new \MWException( "Failed to open ID file: 
$input" );
+               }
+
+               $stream = new EntityIdReader( $input );
                return $stream;
        }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/84000
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie101cc58b0218d623e73b832a4fc9d45f1d1ac4a
Gerrit-PatchSet: 9
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: Aude <aude.w...@gmail.com>
Gerrit-Reviewer: Denny Vrandecic <denny.vrande...@wikimedia.de>
Gerrit-Reviewer: Henning Snater <henning.sna...@wikimedia.de>
Gerrit-Reviewer: Jeroen De Dauw <jeroended...@gmail.com>
Gerrit-Reviewer: Tobias Gritschacher <tobias.gritschac...@wikimedia.de>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to