Author: kn Date: Tue Feb 26 15:55:48 2008 New Revision: 7455 Log: - Parse simple paragraphs
Added: experimental/Document/src/document/rst/nodes/paragraph.php experimental/Document/tests/files/rst/parser/003_simple_text.rst experimental/Document/tests/files/rst/parser/003_simple_text.txt Modified: experimental/Document/design/class_diagram.png experimental/Document/src/document/rst/parser.php experimental/Document/src/document/rst/token.php experimental/Document/src/document_autoload.php Modified: experimental/Document/design/class_diagram.png ============================================================================== Binary files - no diff available. Added: experimental/Document/src/document/rst/nodes/paragraph.php ============================================================================== --- experimental/Document/src/document/rst/nodes/paragraph.php (added) +++ experimental/Document/src/document/rst/nodes/paragraph.php [iso-8859-1] Tue Feb 26 15:55:48 2008 @@ -1,0 +1,52 @@ +<?php +/** + * File containing the ezcDocumentRstParagraphNode struct + * + * @package TextLine + * @version //autogen// + * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved. + * @license http://ez.no/licenses/new_bsd New BSD License + */ + +/** + * The paragraph AST node + * + * @package TextLine + * @version //autogen// + * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved. + * @license http://ez.no/licenses/new_bsd New BSD License + */ +class ezcDocumentRstParagraphNode extends ezcDocumentRstNode +{ + /** + * Construct RST document node + * + * @param array $nodes + * @return void + */ + public function __construct( ezcDocumentRstToken $token ) + { + // Perhaps check, that only node of type section and metadata are + // added. + parent::__construct( $token, self::PARAGRAPH ); + } + + /** + * Set state after var_export + * + * @param array $properties + * @return void + * @ignore + */ + public static function __set_state( $properties ) + { + $node = new ezcDocumentRstParagraphNode( + $properties['token'] + ); + + $node->nodes = $properties['nodes']; + return $node; + } +} + +?> Modified: experimental/Document/src/document/rst/parser.php ============================================================================== --- experimental/Document/src/document/rst/parser.php [iso-8859-1] (original) +++ experimental/Document/src/document/rst/parser.php [iso-8859-1] Tue Feb 26 15:55:48 2008 @@ -64,6 +64,7 @@ ezcDocumentRstToken::WHITESPACE => array( ), ezcDocumentRstToken::NEWLINE => array( + 'shiftParagraph' ), ezcDocumentRstToken::BACKSLASH => array( 'shiftBackslash', @@ -97,9 +98,10 @@ * @var array */ protected $reductions = array( - ezcDocumentRstNode::DOCUMENT => 'reduceSection', - ezcDocumentRstNode::SECTION => 'reduceSection', - ezcDocumentRstNode::TITLE => 'reduceTitle', + ezcDocumentRstNode::DOCUMENT => 'reduceSection', + ezcDocumentRstNode::SECTION => 'reduceSection', + ezcDocumentRstNode::TITLE => 'reduceTitle', + ezcDocumentRstNode::PARAGRAPH => 'reduceParagraph', ); /** @@ -145,9 +147,11 @@ */ public function parse( array $tokens ) { + // echo "\n\nStart parser\n============\n\n"; + while ( ( $token = array_shift( $tokens ) ) !== null ) { - // echo "[T] Token: {$token->type}\n"; + // echo "[T] Token: " . ezcDocumentRstToken::getTokenName( $token->type ) . " ({$token->type})\n"; // First shift given token by the defined reduction methods foreach ( $this->shifts[$token->type] as $method ) @@ -205,7 +209,7 @@ * @param array $tokens * @return ezcDocumentRstDocumentNode */ - protected function shiftDocument( ezcDocumentRstToken $token, array $tokens ) + protected function shiftDocument( ezcDocumentRstToken $token, array &$tokens ) { // If there are any tokens left after the end of the file, something // went seriously wrong in the tokenizer. @@ -227,7 +231,7 @@ * @param array $tokens * @return ezcDocumentRstTitleNode */ - protected function shiftTitle( ezcDocumentRstToken $token, array $tokens ) + protected function shiftTitle( ezcDocumentRstToken $token, array &$tokens ) { if ( ( $token->position !== 1 ) || ( $tokens[0]->type !== ezcDocumentRstToken::NEWLINE ) ) @@ -262,9 +266,36 @@ * @param array $tokens * @return ezcDocumentRstTitleNode */ - protected function shiftText( ezcDocumentRstToken $token, array $tokens ) + protected function shiftText( ezcDocumentRstToken $token, array &$tokens ) { return new ezcDocumentRstTextLineNode( + $token + ); + } + + /** + * Shift a paragraph node on two newlines + * + * @param ezcDocumentRstToken $token + * @param array $tokens + * @return ezcDocumentRstTitleNode + */ + protected function shiftParagraph( ezcDocumentRstToken $token, array &$tokens ) + { + if ( $tokens[0]->type !== ezcDocumentRstToken::NEWLINE ) + { + // For now we only check for paragraphs closed with two newlines. + return false; + } + + // Remove all following newlines... + while ( isset( $tokens[0] ) && + ( $tokens[0]->type === ezcDocumentRstToken::NEWLINE ) ) + { + array_shift( $tokens ); + } + + return new ezcDocumentRstParagraphNode( $token ); } @@ -383,7 +414,6 @@ ezcDocumentRstNode::TABLE, ), true ) ) { - // @TODO: Enhance error message throw new ezcDocumentRstParserException( $child->token, "Unexpected node: " . ezcDocumentRstNode::getTokenName( $child->type ) . "." @@ -417,7 +447,10 @@ // If the section level is higher then in our new node and // lower the the last node, reduce sections. // echo " -> Reduce section {$child->depth}."; - $child->nodes = $collected; + $child->nodes = array_merge( + $child->nodes, + $collected + ); $collected = array(); } @@ -429,9 +462,43 @@ $collected[] = $child; } - $node->nodes = $collected; + $node->nodes = array_merge( + $node->nodes, + $collected + ); array_unshift( $this->documentStack, $node ); } + + /** + * Reduce paragraph + * + * Aggregates all nodes which are allowed as subnodes into a paragraph. + * + * @param ezcDocumentRstParagraphNode $node + * @return void + */ + protected function reduceParagraph( ezcDocumentRstNode $node ) + { + $found = 0; + + // Include all paragraphs, tables, lists and sections with a higher + // nesting depth + while ( isset( $this->documentStack[0] ) && + in_array( $this->documentStack[0]->type, array( + ezcDocumentRstNode::TEXT_LINE, + ), true ) ) + { + // echo " - Append text to paragraph\n"; + array_unshift( $node->nodes, array_shift( $this->documentStack ) ); + ++$found; + } + + if ( $found > 0 ) + { + // echo " - Create paragraph\n"; + array_unshift( $this->documentStack, $node ); + } + } } ?> Modified: experimental/Document/src/document/rst/token.php ============================================================================== --- experimental/Document/src/document/rst/token.php [iso-8859-1] (original) +++ experimental/Document/src/document/rst/token.php [iso-8859-1] Tue Feb 26 15:55:48 2008 @@ -77,6 +77,33 @@ } /** + * Get token name from type + * + * Return a user readable name from the numeric token type. + * + * @param int $type + * @return string + */ + public static function getTokenName( $type ) + { + $names = array( + self::WHITESPACE => 'Whitespace', + self::NEWLINE => 'Newline', + self::BACKSLASH => 'Backslash', + self::SPECIAL_CHARS => 'Special character group', + self::TEXT_LINE => 'Text', + self::EOF => 'End Of File', + ); + + if ( !isset( $names[$type] ) ) + { + return 'Unknown'; + } + + return $names[$type]; + } + + /** * Set state after var_export * * @param array $properties Modified: experimental/Document/src/document_autoload.php ============================================================================== --- experimental/Document/src/document_autoload.php [iso-8859-1] (original) +++ experimental/Document/src/document_autoload.php [iso-8859-1] Tue Feb 26 15:55:48 2008 @@ -37,6 +37,7 @@ 'ezcDocumentRst' => 'Document/document/rst.php', 'ezcDocumentRstDocumentNode' => 'Document/document/rst/nodes/document.php', 'ezcDocumentRstOptions' => 'Document/options/document_rst.php', + 'ezcDocumentRstParagraphNode' => 'Document/document/rst/nodes/paragraph.php', 'ezcDocumentRstParser' => 'Document/document/rst/parser.php', 'ezcDocumentRstSectionNode' => 'Document/document/rst/nodes/section.php', 'ezcDocumentRstTextLineNode' => 'Document/document/rst/nodes/text_line.php', Added: experimental/Document/tests/files/rst/parser/003_simple_text.rst ============================================================================== --- experimental/Document/tests/files/rst/parser/003_simple_text.rst (added) +++ experimental/Document/tests/files/rst/parser/003_simple_text.rst [iso-8859-1] Tue Feb 26 15:55:48 2008 @@ -1,0 +1,131 @@ +<?php + +return ezcDocumentRstDocumentNode::__set_state(array( + 'depth' => 0, + 'line' => 0, + 'position' => 0, + 'type' => 0, + 'nodes' => + array ( + 0 => + ezcDocumentRstSectionNode::__set_state(array( + 'title' => 'Main Title', + 'depth' => 1, + 'line' => 2, + 'position' => 1, + 'type' => 1, + 'nodes' => + array ( + 0 => + ezcDocumentRstParagraphNode::__set_state(array( + 'line' => 5, + 'position' => 12, + 'type' => 3, + 'nodes' => + array ( + 0 => + ezcDocumentRstTextLineNode::__set_state(array( + 'line' => 5, + 'position' => 1, + 'type' => 4, + 'nodes' => + array ( + ), + 'token' => + ezcDocumentRstToken::__set_state(array( + 'type' => 5, + 'content' => 'Hello world', + 'line' => 5, + 'position' => 1, + )), + )), + ), + 'token' => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 5, + 'position' => 12, + )), + )), + 1 => + ezcDocumentRstSectionNode::__set_state(array( + 'title' => 'Subsection', + 'depth' => 2, + 'line' => 7, + 'position' => 1, + 'type' => 1, + 'nodes' => + array ( + 0 => + ezcDocumentRstParagraphNode::__set_state(array( + 'line' => 11, + 'position' => 20, + 'type' => 3, + 'nodes' => + array ( + 0 => + ezcDocumentRstTextLineNode::__set_state(array( + 'line' => 10, + 'position' => 1, + 'type' => 4, + 'nodes' => + array ( + ), + 'token' => + ezcDocumentRstToken::__set_state(array( + 'type' => 5, + 'content' => 'A bit more text, which exceeds one line to test for a simple paragraph with', + 'line' => 10, + 'position' => 1, + )), + )), + 1 => + ezcDocumentRstTextLineNode::__set_state(array( + 'line' => 11, + 'position' => 1, + 'type' => 4, + 'nodes' => + array ( + ), + 'token' => + ezcDocumentRstToken::__set_state(array( + 'type' => 5, + 'content' => 'more then one line.', + 'line' => 11, + 'position' => 1, + )), + )), + ), + 'token' => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 11, + 'position' => 20, + )), + )), + ), + 'token' => + ezcDocumentRstToken::__set_state(array( + 'type' => 5, + 'content' => 'Subsection', + 'line' => 7, + 'position' => 1, + )), + )), + ), + 'token' => + ezcDocumentRstToken::__set_state(array( + 'type' => 5, + 'content' => 'Main Title', + 'line' => 2, + 'position' => 1, + )), + )), + ), + 'token' => NULL, +)); + Added: experimental/Document/tests/files/rst/parser/003_simple_text.txt ============================================================================== --- experimental/Document/tests/files/rst/parser/003_simple_text.txt (added) +++ experimental/Document/tests/files/rst/parser/003_simple_text.txt [iso-8859-1] Tue Feb 26 15:55:48 2008 @@ -1,0 +1,12 @@ +========== +Main Title +========== + +Hello world + +Subsection +========== + +A bit more text, which exceeds one line to test for a simple paragraph with +more then one line. + -- svn-components mailing list svn-components@lists.ez.no http://lists.ez.no/mailman/listinfo/svn-components