jenkins-bot has submitted this change and it was merged.

Change subject: Performance improvements for RdfWriter
......................................................................


Performance improvements for RdfWriter

Change-Id: I20df20124a3db2404a3a99421373128332d9fd4b
---
M repo/includes/rdf/N3Quoter.php
M repo/includes/rdf/N3RdfWriterBase.php
M repo/includes/rdf/RdfWriterBase.php
3 files changed, 75 insertions(+), 85 deletions(-)

Approvals:
  Thiemo Mättig (WMDE): Looks good to me, approved
  jenkins-bot: Verified



diff --git a/repo/includes/rdf/N3Quoter.php b/repo/includes/rdf/N3Quoter.php
index f2084f7..407beb9 100644
--- a/repo/includes/rdf/N3Quoter.php
+++ b/repo/includes/rdf/N3Quoter.php
@@ -11,67 +11,31 @@
  */
 class N3Quoter {
 
-       private $badChars = array(
-               "\"",
-               "\\",
-               "\0",
-               "\n",
-               "\r",
-               "\t",
-       );
-
-       private $badCharEscapes = array(
-               '\"',
-               '\\\\',
-               '\0',
-               '\n',
-               '\r',
-               '\t',
-       );
-
-       private $badUriChars = array(
-               "<",
-               ">",
-               "\"",
-               " ",
-               "\n",
-               "\r",
-               "\t",
-       );
-
-       private $badUriCharEscapes = array(
-               '%3C',
-               '%3E',
-               '%22',
-               '%20',
-               '%0D',
-               '%0A',
-               '%09',
-       );
-
        /**
         * @var UnicodeEscaper
         */
        private $escaper = null;
 
        /**
-        * @param $escapeUnicode
+        * @param bool $escapeUnicode
         */
        public function setEscapeUnicode( $escapeUnicode ) {
                $this->escaper = $escapeUnicode ? new UnicodeEscaper() : null;
        }
 
-       public function escapeIRI( $uri ) {
-               //FIXME: more robust escaping;
-               //FIXME: apply unicode escaping?!
-               $quoted = str_replace( $this->badUriChars, 
$this->badUriCharEscapes, $uri );
+       public function escapeIRI( $iri ) {
+               $quoted = strtr( $iri, array(
+                       ' ' => '%20',
+                       '"' => '%22',
+                       '<' => '%3C',
+                       '>' => '%3E',
+               ) );
 
                return $quoted;
        }
 
        public function escapeLiteral( $s ) {
-               //FIXME: more robust escaping
-               $escaped = str_replace( $this->badChars, $this->badCharEscapes, 
$s );
+               $escaped = addcslashes( $s, "\r\n\t\0\\\"" );
 
                if ( $this->escaper !== null ) {
                        $escaped = $this->escaper->escapeString( $escaped );
diff --git a/repo/includes/rdf/N3RdfWriterBase.php 
b/repo/includes/rdf/N3RdfWriterBase.php
index 8eebea1..25059f7 100644
--- a/repo/includes/rdf/N3RdfWriterBase.php
+++ b/repo/includes/rdf/N3RdfWriterBase.php
@@ -16,10 +16,29 @@
         */
        protected $quoter;
 
+       /**
+        * @var bool
+        */
+       private $trustIRIs = true;
+
        public function __construct( $role = parent::DOCUMENT_ROLE, 
BNodeLabeler $labeler = null, N3Quoter $quoter = null ) {
                parent::__construct( $role, $labeler );
 
                $this->quoter = $quoter ?: new N3Quoter();
+       }
+
+       /**
+        * @return boolean
+        */
+       public function getTrustIRIs() {
+               return $this->trustIRIs;
+       }
+
+       /**
+        * @param boolean $trustIRIs
+        */
+       public function setTrustIRIs( $trustIRIs ) {
+               $this->trustIRIs = $trustIRIs;
        }
 
        protected function writeRef( $base, $local = null ) {
@@ -51,7 +70,10 @@
                        throw new InvalidArgumentException( '$iri must be an 
absolute iri: ' . $iri );
                }
 
-               $iri = $this->quoter->escapeIRI( $iri );
+               if ( !$this->trustIRIs ) {
+                       $iri = $this->quoter->escapeIRI( $iri );
+               }
+
                $this->write( '<', $iri, '>' );
        }
 
diff --git a/repo/includes/rdf/RdfWriterBase.php 
b/repo/includes/rdf/RdfWriterBase.php
index 0239d1d..a42e610 100644
--- a/repo/includes/rdf/RdfWriterBase.php
+++ b/repo/includes/rdf/RdfWriterBase.php
@@ -22,10 +22,17 @@
         */
        private $buffer = array();
 
+       const STATE_START = 0;
+       const STATE_DOCUMENT = 5;
+       const STATE_SUBJECT = 10;
+       const STATE_PREDICATE = 11;
+       const STATE_OBJECT = 12;
+       const STATE_DRAIN = 100;
+
        /**
         * @var string the current state
         */
-       private $state = 'start';
+       private $state = self::STATE_START;
 
        /**
         * Shorthands that can be used in place of IRIs, e.g. ("a" to mean 
rdf:type).
@@ -161,10 +168,10 @@
        final public function sub() {
                //FIXME: don't mess with the state, enqueue the writer to be 
placed in the buffer
                // later, on the next transtion to subject|document|drain
-               $this->state( 'document' );
+               $this->state( self::STATE_DOCUMENT );
 
                $writer = $this->newSubWriter( self::DOCUMENT_ROLE, 
$this->labeler );
-               $writer->state = 'document';
+               $writer->state = self::STATE_DOCUMENT;
 
                // share registered prefixes
                $writer->prefixes =& $this->prefixes;
@@ -191,11 +198,8 @@
         * @param string $s...
         */
        final protected function write() {
-               $numArgs = func_num_args();
-
-               for ( $i = 0; $i < $numArgs; $i++ ) {
-                       $s = func_get_arg( $i );
-                       $this->buffer[] = $s;
+               foreach ( func_get_args() as $arg ) {
+                       $this->buffer[] = $arg;
                }
        }
 
@@ -249,7 +253,7 @@
         * @see RdfWriter::start()
         */
        final public function start() {
-               $this->state( 'document' );
+               $this->state( self::STATE_DOCUMENT );
        }
 
        /**
@@ -275,7 +279,7 @@
         */
        public function reset() {
                $this->buffer = array();
-               $this->state = 'start'; //TODO: may depend on role
+               $this->state = self::STATE_START; //TODO: may depend on role
 
                $this->currentSubject = array( null, null );
                $this->currentPredicate = array( null, null );
@@ -308,7 +312,7 @@
         * @param string $uri
         */
        final public function prefix( $prefix, $uri ) {
-               $this->state( 'document' );
+               $this->state( self::STATE_DOCUMENT );
 
                $this->registerPrefix( $prefix, $uri );
                $this->writePrefix( $prefix, $uri );
@@ -329,7 +333,7 @@
                        return $this; // redundant about() call
                }
 
-               $this->state( 'subject' );
+               $this->state( self::STATE_SUBJECT );
 
                $this->currentSubject[0] = $base;
                $this->currentSubject[1] = $local;
@@ -370,7 +374,7 @@
                        return $this; // redundant about() call
                }
 
-               $this->state( 'predicate' );
+               $this->state( self::STATE_PREDICATE );
 
                $this->currentPredicate[0] = $base;
                $this->currentPredicate[1] = $local;
@@ -388,7 +392,7 @@
         * @return RdfWriter $this
         */
        final public function is( $base, $local = null ) {
-               $this->state( 'object' );
+               $this->state( self::STATE_OBJECT );
 
                $this->expandResource( $base, $local );
                $this->writeResource( $base, $local );
@@ -404,7 +408,7 @@
         * @return $this
         */
        final public function text( $text, $language = null ) {
-               $this->state( 'object' );
+               $this->state( self::STATE_OBJECT );
 
                $this->writeText( $text, $language );
                return $this;
@@ -422,7 +426,7 @@
         * @return $this
         */
        final public function value( $value, $typeBase = null, $typeLocal = 
null ) {
-               $this->state( 'object' );
+               $this->state( self::STATE_OBJECT );
 
                if ( $typeBase === null && !is_string( $value ) ) {
                        $vtype = gettype( $value );
@@ -463,19 +467,19 @@
         */
        final protected function state( $newState ) {
                switch ( $newState ) {
-                       case 'document':
+                       case self::STATE_DOCUMENT:
                                $this->transitionDocument();
                                break;
 
-                       case 'subject':
+                       case self::STATE_SUBJECT:
                                $this->transitionSubject();
                                break;
 
-                       case 'predicate':
+                       case self::STATE_PREDICATE:
                                $this->transitionPredicate();
                                break;
 
-                       case 'object':
+                       case self::STATE_OBJECT:
                                $this->transitionObject();
                                break;
 
@@ -492,33 +496,33 @@
 
        private function transitionDocument() {
                switch ( $this->state ) {
-                       case 'document':
+                       case self::STATE_DOCUMENT:
                                break;
 
-                       case 'start':
+                       case self::STATE_START:
                                $this->beginDocument();
                                break;
 
-                       case 'object': // when injecting a sub-document
+                       case self::STATE_OBJECT: // when injecting a 
sub-document
                                $this->finishObject( 'last' );
                                $this->finishPredicate( 'last' );
                                $this->finishSubject();
                                break;
 
                        default:
-                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . 'document'  );
+                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . self::STATE_DOCUMENT  );
                }
        }
 
        private function transitionSubject() {
                switch ( $this->state ) {
-                       case 'document':
+                       case self::STATE_DOCUMENT:
                                $this->beginSubject();
                                break;
 
-                       case 'object':
+                       case self::STATE_OBJECT:
                                if ( $this->role !== self::DOCUMENT_ROLE ) {
-                                       throw new LogicException( 'Bad 
transition: ' . $this->state. ' -> ' . 'subject' );
+                                       throw new LogicException( 'Bad 
transition: ' . $this->state. ' -> ' . self::STATE_SUBJECT );
                                }
 
                                $this->finishObject( 'last' );
@@ -528,19 +532,19 @@
                                break;
 
                        default:
-                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . 'subject' );
+                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . self::STATE_SUBJECT );
                }
        }
 
        private function transitionPredicate() {
                switch ( $this->state ) {
-                       case 'subject':
+                       case self::STATE_SUBJECT:
                                $this->beginPredicate( 'first' );
                                break;
 
-                       case 'object':
+                       case self::STATE_OBJECT:
                                if ( $this->role === self::STATEMENT_ROLE ) {
-                                       throw new LogicException( 'Bad 
transition: ' . $this->state. ' -> ' . 'subject' );
+                                       throw new LogicException( 'Bad 
transition: ' . $this->state. ' -> ' . self::STATE_PREDICATE );
                                }
 
                                $this->finishObject( 'last' );
@@ -549,38 +553,38 @@
                                break;
 
                        default:
-                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . 'predicate' );
+                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . self::STATE_PREDICATE );
 
                }
        }
 
        private function transitionObject() {
                switch ( $this->state ) {
-                       case 'predicate':
+                       case self::STATE_PREDICATE:
                                $this->beginObject( 'first' );
                                break;
 
-                       case 'object':
+                       case self::STATE_OBJECT:
                                $this->finishObject();
                                $this->beginObject();
                                break;
 
                        default:
-                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . 'object' );
+                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . self::STATE_OBJECT );
 
                }
        }
 
        private function transitionDrain() {
                switch ( $this->state ) {
-                       case 'start':
+                       case self::STATE_START:
                                break;
 
-                       case 'document':
+                       case self::STATE_DOCUMENT:
                                $this->finishDocument();
                                break;
 
-                       case 'object':
+                       case self::STATE_OBJECT:
 
                                $this->finishObject( 'last' );
                                $this->finishPredicate( 'last' );
@@ -589,7 +593,7 @@
                                break;
 
                        default:
-                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . 'object' );
+                               throw new LogicException( 'Bad transition: ' . 
$this->state. ' -> ' . self::STATE_OBJECT );
 
                }
        }

-- 
To view, visit https://gerrit.wikimedia.org/r/197407
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I20df20124a3db2404a3a99421373128332d9fd4b
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to