jenkins-bot has submitted this change and it was merged. Change subject: Performance improvements for RdfWriter ......................................................................
Performance improvements for RdfWriter Change-Id: I20df20124a3db2404a3a99421373128332d9fd4b --- M repo/includes/rdf/N3Quoter.php M repo/includes/rdf/N3RdfWriterBase.php M repo/includes/rdf/RdfWriterBase.php 3 files changed, 75 insertions(+), 85 deletions(-) Approvals: Thiemo Mättig (WMDE): Looks good to me, approved jenkins-bot: Verified diff --git a/repo/includes/rdf/N3Quoter.php b/repo/includes/rdf/N3Quoter.php index f2084f7..407beb9 100644 --- a/repo/includes/rdf/N3Quoter.php +++ b/repo/includes/rdf/N3Quoter.php @@ -11,67 +11,31 @@ */ class N3Quoter { - private $badChars = array( - "\"", - "\\", - "\0", - "\n", - "\r", - "\t", - ); - - private $badCharEscapes = array( - '\"', - '\\\\', - '\0', - '\n', - '\r', - '\t', - ); - - private $badUriChars = array( - "<", - ">", - "\"", - " ", - "\n", - "\r", - "\t", - ); - - private $badUriCharEscapes = array( - '%3C', - '%3E', - '%22', - '%20', - '%0D', - '%0A', - '%09', - ); - /** * @var UnicodeEscaper */ private $escaper = null; /** - * @param $escapeUnicode + * @param bool $escapeUnicode */ public function setEscapeUnicode( $escapeUnicode ) { $this->escaper = $escapeUnicode ? new UnicodeEscaper() : null; } - public function escapeIRI( $uri ) { - //FIXME: more robust escaping; - //FIXME: apply unicode escaping?! - $quoted = str_replace( $this->badUriChars, $this->badUriCharEscapes, $uri ); + public function escapeIRI( $iri ) { + $quoted = strtr( $iri, array( + ' ' => '%20', + '"' => '%22', + '<' => '%3C', + '>' => '%3E', + ) ); return $quoted; } public function escapeLiteral( $s ) { - //FIXME: more robust escaping - $escaped = str_replace( $this->badChars, $this->badCharEscapes, $s ); + $escaped = addcslashes( $s, "\r\n\t\0\\\"" ); if ( $this->escaper !== null ) { $escaped = $this->escaper->escapeString( $escaped ); diff --git a/repo/includes/rdf/N3RdfWriterBase.php b/repo/includes/rdf/N3RdfWriterBase.php index 8eebea1..25059f7 100644 --- a/repo/includes/rdf/N3RdfWriterBase.php +++ b/repo/includes/rdf/N3RdfWriterBase.php @@ -16,10 +16,29 @@ */ protected $quoter; + /** + * @var bool + */ + private $trustIRIs = true; + public function __construct( $role = parent::DOCUMENT_ROLE, BNodeLabeler $labeler = null, N3Quoter $quoter = null ) { parent::__construct( $role, $labeler ); $this->quoter = $quoter ?: new N3Quoter(); + } + + /** + * @return boolean + */ + public function getTrustIRIs() { + return $this->trustIRIs; + } + + /** + * @param boolean $trustIRIs + */ + public function setTrustIRIs( $trustIRIs ) { + $this->trustIRIs = $trustIRIs; } protected function writeRef( $base, $local = null ) { @@ -51,7 +70,10 @@ throw new InvalidArgumentException( '$iri must be an absolute iri: ' . $iri ); } - $iri = $this->quoter->escapeIRI( $iri ); + if ( !$this->trustIRIs ) { + $iri = $this->quoter->escapeIRI( $iri ); + } + $this->write( '<', $iri, '>' ); } diff --git a/repo/includes/rdf/RdfWriterBase.php b/repo/includes/rdf/RdfWriterBase.php index 0239d1d..a42e610 100644 --- a/repo/includes/rdf/RdfWriterBase.php +++ b/repo/includes/rdf/RdfWriterBase.php @@ -22,10 +22,17 @@ */ private $buffer = array(); + const STATE_START = 0; + const STATE_DOCUMENT = 5; + const STATE_SUBJECT = 10; + const STATE_PREDICATE = 11; + const STATE_OBJECT = 12; + const STATE_DRAIN = 100; + /** * @var string the current state */ - private $state = 'start'; + private $state = self::STATE_START; /** * Shorthands that can be used in place of IRIs, e.g. ("a" to mean rdf:type). @@ -161,10 +168,10 @@ final public function sub() { //FIXME: don't mess with the state, enqueue the writer to be placed in the buffer // later, on the next transtion to subject|document|drain - $this->state( 'document' ); + $this->state( self::STATE_DOCUMENT ); $writer = $this->newSubWriter( self::DOCUMENT_ROLE, $this->labeler ); - $writer->state = 'document'; + $writer->state = self::STATE_DOCUMENT; // share registered prefixes $writer->prefixes =& $this->prefixes; @@ -191,11 +198,8 @@ * @param string $s... */ final protected function write() { - $numArgs = func_num_args(); - - for ( $i = 0; $i < $numArgs; $i++ ) { - $s = func_get_arg( $i ); - $this->buffer[] = $s; + foreach ( func_get_args() as $arg ) { + $this->buffer[] = $arg; } } @@ -249,7 +253,7 @@ * @see RdfWriter::start() */ final public function start() { - $this->state( 'document' ); + $this->state( self::STATE_DOCUMENT ); } /** @@ -275,7 +279,7 @@ */ public function reset() { $this->buffer = array(); - $this->state = 'start'; //TODO: may depend on role + $this->state = self::STATE_START; //TODO: may depend on role $this->currentSubject = array( null, null ); $this->currentPredicate = array( null, null ); @@ -308,7 +312,7 @@ * @param string $uri */ final public function prefix( $prefix, $uri ) { - $this->state( 'document' ); + $this->state( self::STATE_DOCUMENT ); $this->registerPrefix( $prefix, $uri ); $this->writePrefix( $prefix, $uri ); @@ -329,7 +333,7 @@ return $this; // redundant about() call } - $this->state( 'subject' ); + $this->state( self::STATE_SUBJECT ); $this->currentSubject[0] = $base; $this->currentSubject[1] = $local; @@ -370,7 +374,7 @@ return $this; // redundant about() call } - $this->state( 'predicate' ); + $this->state( self::STATE_PREDICATE ); $this->currentPredicate[0] = $base; $this->currentPredicate[1] = $local; @@ -388,7 +392,7 @@ * @return RdfWriter $this */ final public function is( $base, $local = null ) { - $this->state( 'object' ); + $this->state( self::STATE_OBJECT ); $this->expandResource( $base, $local ); $this->writeResource( $base, $local ); @@ -404,7 +408,7 @@ * @return $this */ final public function text( $text, $language = null ) { - $this->state( 'object' ); + $this->state( self::STATE_OBJECT ); $this->writeText( $text, $language ); return $this; @@ -422,7 +426,7 @@ * @return $this */ final public function value( $value, $typeBase = null, $typeLocal = null ) { - $this->state( 'object' ); + $this->state( self::STATE_OBJECT ); if ( $typeBase === null && !is_string( $value ) ) { $vtype = gettype( $value ); @@ -463,19 +467,19 @@ */ final protected function state( $newState ) { switch ( $newState ) { - case 'document': + case self::STATE_DOCUMENT: $this->transitionDocument(); break; - case 'subject': + case self::STATE_SUBJECT: $this->transitionSubject(); break; - case 'predicate': + case self::STATE_PREDICATE: $this->transitionPredicate(); break; - case 'object': + case self::STATE_OBJECT: $this->transitionObject(); break; @@ -492,33 +496,33 @@ private function transitionDocument() { switch ( $this->state ) { - case 'document': + case self::STATE_DOCUMENT: break; - case 'start': + case self::STATE_START: $this->beginDocument(); break; - case 'object': // when injecting a sub-document + case self::STATE_OBJECT: // when injecting a sub-document $this->finishObject( 'last' ); $this->finishPredicate( 'last' ); $this->finishSubject(); break; default: - throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . 'document' ); + throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . self::STATE_DOCUMENT ); } } private function transitionSubject() { switch ( $this->state ) { - case 'document': + case self::STATE_DOCUMENT: $this->beginSubject(); break; - case 'object': + case self::STATE_OBJECT: if ( $this->role !== self::DOCUMENT_ROLE ) { - throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . 'subject' ); + throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . self::STATE_SUBJECT ); } $this->finishObject( 'last' ); @@ -528,19 +532,19 @@ break; default: - throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . 'subject' ); + throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . self::STATE_SUBJECT ); } } private function transitionPredicate() { switch ( $this->state ) { - case 'subject': + case self::STATE_SUBJECT: $this->beginPredicate( 'first' ); break; - case 'object': + case self::STATE_OBJECT: if ( $this->role === self::STATEMENT_ROLE ) { - throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . 'subject' ); + throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . self::STATE_PREDICATE ); } $this->finishObject( 'last' ); @@ -549,38 +553,38 @@ break; default: - throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . 'predicate' ); + throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . self::STATE_PREDICATE ); } } private function transitionObject() { switch ( $this->state ) { - case 'predicate': + case self::STATE_PREDICATE: $this->beginObject( 'first' ); break; - case 'object': + case self::STATE_OBJECT: $this->finishObject(); $this->beginObject(); break; default: - throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . 'object' ); + throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . self::STATE_OBJECT ); } } private function transitionDrain() { switch ( $this->state ) { - case 'start': + case self::STATE_START: break; - case 'document': + case self::STATE_DOCUMENT: $this->finishDocument(); break; - case 'object': + case self::STATE_OBJECT: $this->finishObject( 'last' ); $this->finishPredicate( 'last' ); @@ -589,7 +593,7 @@ break; default: - throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . 'object' ); + throw new LogicException( 'Bad transition: ' . $this->state. ' -> ' . self::STATE_OBJECT ); } } -- To view, visit https://gerrit.wikimedia.org/r/197407 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I20df20124a3db2404a3a99421373128332d9fd4b Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits