Author: kn
Date: Tue Feb 26 15:55:48 2008
New Revision: 7455

Log:
- Parse simple paragraphs

Added:
    experimental/Document/src/document/rst/nodes/paragraph.php
    experimental/Document/tests/files/rst/parser/003_simple_text.rst
    experimental/Document/tests/files/rst/parser/003_simple_text.txt
Modified:
    experimental/Document/design/class_diagram.png
    experimental/Document/src/document/rst/parser.php
    experimental/Document/src/document/rst/token.php
    experimental/Document/src/document_autoload.php

Modified: experimental/Document/design/class_diagram.png
==============================================================================
Binary files - no diff available.

Added: experimental/Document/src/document/rst/nodes/paragraph.php
==============================================================================
--- experimental/Document/src/document/rst/nodes/paragraph.php (added)
+++ experimental/Document/src/document/rst/nodes/paragraph.php [iso-8859-1] Tue 
Feb 26 15:55:48 2008
@@ -1,0 +1,52 @@
+<?php
+/**
+ * File containing the ezcDocumentRstParagraphNode struct
+ *
+ * @package TextLine
+ * @version //autogen//
+ * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+
+/**
+ * The paragraph AST node
+ * 
+ * @package TextLine
+ * @version //autogen//
+ * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+class ezcDocumentRstParagraphNode extends ezcDocumentRstNode
+{
+    /**
+     * Construct RST document node
+     * 
+     * @param array $nodes 
+     * @return void
+     */
+    public function __construct( ezcDocumentRstToken $token )
+    {
+        // Perhaps check, that only node of type section and metadata are
+        // added.
+        parent::__construct( $token, self::PARAGRAPH );
+    }
+
+    /**
+     * Set state after var_export
+     * 
+     * @param array $properties 
+     * @return void
+     * @ignore
+     */
+    public static function __set_state( $properties )
+    {
+        $node = new ezcDocumentRstParagraphNode(
+            $properties['token']
+        );
+
+        $node->nodes = $properties['nodes'];
+        return $node;
+    }
+}
+
+?>

Modified: experimental/Document/src/document/rst/parser.php
==============================================================================
--- experimental/Document/src/document/rst/parser.php [iso-8859-1] (original)
+++ experimental/Document/src/document/rst/parser.php [iso-8859-1] Tue Feb 26 
15:55:48 2008
@@ -64,6 +64,7 @@
         ezcDocumentRstToken::WHITESPACE => array(
         ),
         ezcDocumentRstToken::NEWLINE => array(
+            'shiftParagraph'
         ),
         ezcDocumentRstToken::BACKSLASH => array(
             'shiftBackslash',
@@ -97,9 +98,10 @@
      * @var array
      */
     protected $reductions = array(
-        ezcDocumentRstNode::DOCUMENT => 'reduceSection',
-        ezcDocumentRstNode::SECTION  => 'reduceSection',
-        ezcDocumentRstNode::TITLE    => 'reduceTitle',
+        ezcDocumentRstNode::DOCUMENT  => 'reduceSection',
+        ezcDocumentRstNode::SECTION   => 'reduceSection',
+        ezcDocumentRstNode::TITLE     => 'reduceTitle',
+        ezcDocumentRstNode::PARAGRAPH => 'reduceParagraph',
     );
 
     /**
@@ -145,9 +147,11 @@
      */
     public function parse( array $tokens )
     {
+        // echo "\n\nStart parser\n============\n\n";
+
         while ( ( $token = array_shift( $tokens ) ) !== null )
         {
-            // echo "[T] Token: {$token->type}\n";
+            // echo "[T] Token: " . ezcDocumentRstToken::getTokenName( 
$token->type ) . " ({$token->type})\n";
 
             // First shift given token by the defined reduction methods
             foreach ( $this->shifts[$token->type] as $method )
@@ -205,7 +209,7 @@
      * @param array $tokens 
      * @return ezcDocumentRstDocumentNode
      */
-    protected function shiftDocument( ezcDocumentRstToken $token, array 
$tokens )
+    protected function shiftDocument( ezcDocumentRstToken $token, array 
&$tokens )
     {
         // If there are any tokens left after the end of the file, something
         // went seriously wrong in the tokenizer.
@@ -227,7 +231,7 @@
      * @param array $tokens 
      * @return ezcDocumentRstTitleNode
      */
-    protected function shiftTitle( ezcDocumentRstToken $token, array $tokens )
+    protected function shiftTitle( ezcDocumentRstToken $token, array &$tokens )
     {
         if ( ( $token->position !== 1 ) ||
              ( $tokens[0]->type !== ezcDocumentRstToken::NEWLINE ) )
@@ -262,9 +266,36 @@
      * @param array $tokens 
      * @return ezcDocumentRstTitleNode
      */
-    protected function shiftText( ezcDocumentRstToken $token, array $tokens )
+    protected function shiftText( ezcDocumentRstToken $token, array &$tokens )
     {
         return new ezcDocumentRstTextLineNode(
+            $token
+        );
+    }
+    
+    /**
+     * Shift a paragraph node on two newlines
+     * 
+     * @param ezcDocumentRstToken $token 
+     * @param array $tokens 
+     * @return ezcDocumentRstTitleNode
+     */
+    protected function shiftParagraph( ezcDocumentRstToken $token, array 
&$tokens )
+    {
+        if ( $tokens[0]->type !== ezcDocumentRstToken::NEWLINE )
+        {
+            // For now we only check for paragraphs closed with two newlines.
+            return false;
+        }
+
+        // Remove all following newlines...
+        while ( isset( $tokens[0] ) &&
+                ( $tokens[0]->type === ezcDocumentRstToken::NEWLINE ) )
+        {
+            array_shift( $tokens );
+        }
+
+        return new ezcDocumentRstParagraphNode(
             $token
         );
     }
@@ -383,7 +414,6 @@
                 ezcDocumentRstNode::TABLE,
             ), true ) )
             {
-                // @TODO: Enhance error message
                 throw new ezcDocumentRstParserException(
                     $child->token,
                     "Unexpected node: " . ezcDocumentRstNode::getTokenName( 
$child->type ) . "."
@@ -417,7 +447,10 @@
                     // If the section level is higher then in our new node and
                     // lower the the last node, reduce sections.
                     // echo " -> Reduce section {$child->depth}.";
-                    $child->nodes = $collected;
+                    $child->nodes = array_merge( 
+                        $child->nodes,
+                        $collected
+                    );
                     $collected = array();
                 }
 
@@ -429,9 +462,43 @@
             $collected[] = $child;
         }
 
-        $node->nodes = $collected;
+        $node->nodes = array_merge(
+            $node->nodes,
+            $collected
+        );
         array_unshift( $this->documentStack, $node );
     }
+
+    /**
+     * Reduce paragraph
+     *
+     * Aggregates all nodes which are allowed as subnodes into a paragraph.
+     * 
+     * @param ezcDocumentRstParagraphNode $node 
+     * @return void
+     */
+    protected function reduceParagraph( ezcDocumentRstNode $node )
+    {
+        $found = 0;
+
+        // Include all paragraphs, tables, lists and sections with a higher
+        // nesting depth
+        while ( isset( $this->documentStack[0] ) &&
+            in_array( $this->documentStack[0]->type, array(
+                ezcDocumentRstNode::TEXT_LINE,
+            ), true ) )
+        {
+            // echo " - Append text to paragraph\n";
+            array_unshift( $node->nodes, array_shift( $this->documentStack ) );
+            ++$found;
+        }
+
+        if ( $found > 0 )
+        {
+            // echo " - Create paragraph\n";
+            array_unshift( $this->documentStack, $node );
+        }
+    }
 }
 
 ?>

Modified: experimental/Document/src/document/rst/token.php
==============================================================================
--- experimental/Document/src/document/rst/token.php [iso-8859-1] (original)
+++ experimental/Document/src/document/rst/token.php [iso-8859-1] Tue Feb 26 
15:55:48 2008
@@ -77,6 +77,33 @@
     }
 
     /**
+     * Get token name from type
+     *
+     * Return a user readable name from the numeric token type.
+     * 
+     * @param int $type 
+     * @return string
+     */
+    public static function getTokenName( $type )
+    {
+        $names = array(
+            self::WHITESPACE    => 'Whitespace',
+            self::NEWLINE       => 'Newline',
+            self::BACKSLASH     => 'Backslash',
+            self::SPECIAL_CHARS => 'Special character group',
+            self::TEXT_LINE     => 'Text',
+            self::EOF           => 'End Of File',
+        );
+
+        if ( !isset( $names[$type] ) )
+        {
+            return 'Unknown';
+        }
+
+        return $names[$type];
+    }
+
+    /**
      * Set state after var_export
      * 
      * @param array $properties 

Modified: experimental/Document/src/document_autoload.php
==============================================================================
--- experimental/Document/src/document_autoload.php [iso-8859-1] (original)
+++ experimental/Document/src/document_autoload.php [iso-8859-1] Tue Feb 26 
15:55:48 2008
@@ -37,6 +37,7 @@
     'ezcDocumentRst'                        => 'Document/document/rst.php',
     'ezcDocumentRstDocumentNode'            => 
'Document/document/rst/nodes/document.php',
     'ezcDocumentRstOptions'                 => 
'Document/options/document_rst.php',
+    'ezcDocumentRstParagraphNode'           => 
'Document/document/rst/nodes/paragraph.php',
     'ezcDocumentRstParser'                  => 
'Document/document/rst/parser.php',
     'ezcDocumentRstSectionNode'             => 
'Document/document/rst/nodes/section.php',
     'ezcDocumentRstTextLineNode'            => 
'Document/document/rst/nodes/text_line.php',

Added: experimental/Document/tests/files/rst/parser/003_simple_text.rst
==============================================================================
--- experimental/Document/tests/files/rst/parser/003_simple_text.rst (added)
+++ experimental/Document/tests/files/rst/parser/003_simple_text.rst 
[iso-8859-1] Tue Feb 26 15:55:48 2008
@@ -1,0 +1,131 @@
+<?php
+
+return ezcDocumentRstDocumentNode::__set_state(array(
+   'depth' => 0,
+   'line' => 0,
+   'position' => 0,
+   'type' => 0,
+   'nodes' => 
+  array (
+    0 => 
+    ezcDocumentRstSectionNode::__set_state(array(
+       'title' => 'Main Title',
+       'depth' => 1,
+       'line' => 2,
+       'position' => 1,
+       'type' => 1,
+       'nodes' => 
+      array (
+        0 => 
+        ezcDocumentRstParagraphNode::__set_state(array(
+           'line' => 5,
+           'position' => 12,
+           'type' => 3,
+           'nodes' => 
+          array (
+            0 => 
+            ezcDocumentRstTextLineNode::__set_state(array(
+               'line' => 5,
+               'position' => 1,
+               'type' => 4,
+               'nodes' => 
+              array (
+              ),
+               'token' => 
+              ezcDocumentRstToken::__set_state(array(
+                 'type' => 5,
+                 'content' => 'Hello world',
+                 'line' => 5,
+                 'position' => 1,
+              )),
+            )),
+          ),
+           'token' => 
+          ezcDocumentRstToken::__set_state(array(
+             'type' => 2,
+             'content' => '
+',
+             'line' => 5,
+             'position' => 12,
+          )),
+        )),
+        1 => 
+        ezcDocumentRstSectionNode::__set_state(array(
+           'title' => 'Subsection',
+           'depth' => 2,
+           'line' => 7,
+           'position' => 1,
+           'type' => 1,
+           'nodes' => 
+          array (
+            0 => 
+            ezcDocumentRstParagraphNode::__set_state(array(
+               'line' => 11,
+               'position' => 20,
+               'type' => 3,
+               'nodes' => 
+              array (
+                0 => 
+                ezcDocumentRstTextLineNode::__set_state(array(
+                   'line' => 10,
+                   'position' => 1,
+                   'type' => 4,
+                   'nodes' => 
+                  array (
+                  ),
+                   'token' => 
+                  ezcDocumentRstToken::__set_state(array(
+                     'type' => 5,
+                     'content' => 'A bit more text, which exceeds one line to 
test for a simple paragraph with',
+                     'line' => 10,
+                     'position' => 1,
+                  )),
+                )),
+                1 => 
+                ezcDocumentRstTextLineNode::__set_state(array(
+                   'line' => 11,
+                   'position' => 1,
+                   'type' => 4,
+                   'nodes' => 
+                  array (
+                  ),
+                   'token' => 
+                  ezcDocumentRstToken::__set_state(array(
+                     'type' => 5,
+                     'content' => 'more then one line.',
+                     'line' => 11,
+                     'position' => 1,
+                  )),
+                )),
+              ),
+               'token' => 
+              ezcDocumentRstToken::__set_state(array(
+                 'type' => 2,
+                 'content' => '
+',
+                 'line' => 11,
+                 'position' => 20,
+              )),
+            )),
+          ),
+           'token' => 
+          ezcDocumentRstToken::__set_state(array(
+             'type' => 5,
+             'content' => 'Subsection',
+             'line' => 7,
+             'position' => 1,
+          )),
+        )),
+      ),
+       'token' => 
+      ezcDocumentRstToken::__set_state(array(
+         'type' => 5,
+         'content' => 'Main Title',
+         'line' => 2,
+         'position' => 1,
+      )),
+    )),
+  ),
+   'token' => NULL,
+));
+

Added: experimental/Document/tests/files/rst/parser/003_simple_text.txt
==============================================================================
--- experimental/Document/tests/files/rst/parser/003_simple_text.txt (added)
+++ experimental/Document/tests/files/rst/parser/003_simple_text.txt 
[iso-8859-1] Tue Feb 26 15:55:48 2008
@@ -1,0 +1,12 @@
+==========
+Main Title
+==========
+
+Hello world
+
+Subsection
+==========
+
+A bit more text, which exceeds one line to test for a simple paragraph with
+more then one line.
+


-- 
svn-components mailing list
svn-components@lists.ez.no
http://lists.ez.no/mailman/listinfo/svn-components

Reply via email to