http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96052

Revision: 96052
Author:   vasilievvv
Date:     2011-09-01 22:19:16 +0000 (Thu, 01 Sep 2011)
Log Message:
-----------
WikiScripts native parser:
* A lot of optimization
* Makefile
* Some bugfixes
* License

Modified Paths:
--------------
    trunk/extensions/WikiScripts/interpreter/buildLRTables.php
    trunk/extensions/WikiScripts/interpreter/nparser/parser.c
    trunk/extensions/WikiScripts/interpreter/nparser/parser.h
    trunk/extensions/WikiScripts/interpreter/nparser/scanner.c
    trunk/extensions/WikiScripts/interpreter/nparser/stack.c
    trunk/extensions/WikiScripts/interpreter/nparser/stack.h
    trunk/extensions/WikiScripts/interpreter/nparser/test.c
    trunk/extensions/WikiScripts/interpreter/nparser/tree.c
    trunk/extensions/WikiScripts/interpreter/nparser/tree.h

Added Paths:
-----------
    trunk/extensions/WikiScripts/interpreter/nparser/COPYING
    trunk/extensions/WikiScripts/interpreter/nparser/Makefile
    trunk/extensions/WikiScripts/interpreter/nparser/config.h
    trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c
    trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h

Modified: trunk/extensions/WikiScripts/interpreter/buildLRTables.php
===================================================================
--- trunk/extensions/WikiScripts/interpreter/buildLRTables.php  2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/buildLRTables.php  2011-09-01 
22:19:16 UTC (rev 96052)
@@ -1,6 +1,25 @@
 <?php
-
 /**
+ * Copyright (C) 2009-2011 by Victor Vasiliev
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ * 
  * An ugly tool to build SLR-tables.
  *
  * Reads a BNF-ish grammar (strings without <> are terminals) from
@@ -788,6 +807,9 @@
        // Build C files
        file_put_contents( "{$base}/nparser/tokenids.h", 
$grammar->buildCTokenHeader() );
        file_put_contents( "{$base}/nparser/scanner_names.c", 
$grammar->buildCTokenList() );
-       file_put_contents( "{$base}/nparser/lrtable.c", $grammar->buildCTable() 
);
        file_put_contents( "{$base}/nparser/lrtable.h", 
$grammar->buildCTableHeader( $ts ) );
+
+       // LR table file will be automatically regenerated
+       if( file_exists( "{$base}/nparser/lrtable.c" ) )
+               unlink( "{$base}/nparser/lrtable.c" );
 }

Added: trunk/extensions/WikiScripts/interpreter/nparser/COPYING
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/COPYING                    
        (rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/COPYING    2011-09-01 
22:19:16 UTC (rev 96052)
@@ -0,0 +1,20 @@
+Copyright (C) 2011 by Victor Vasiliev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+

Added: trunk/extensions/WikiScripts/interpreter/nparser/Makefile
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/Makefile                   
        (rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/Makefile   2011-09-01 
22:19:16 UTC (rev 96052)
@@ -0,0 +1,34 @@
+CC = gcc
+CFLAGS = -Wall -O2 
+LIBTOOL = libtool
+SRCS = lrtable.c parser.c scanner.c scanner_names.c stack.c tree.c wsparse.c
+OBJS = $(SRCS:.c=.o)
+LIBOBJS = $(SRCS:.c=.lo)
+HEADERS = lrtable.h parser.h scanner.h tokenids.h tree.h wsparse.h
+
+PREFIX = /usr/local
+
+.SUFFIXES: .c .lo .o
+
+all: libwsparse wsparsertest
+
+lrtable.c:
+       php ../buildLRTables.php --ctable
+
+.c.lo: config.h
+       $(LIBTOOL) --mode=compile $(CC) $(CFLAGS) -c -o $@ $<
+
+libwsparse: $(LIBOBJS)
+       $(LIBTOOL) --mode=link $(CC) $(CFLAGS) $(LIBOBJS) -o libwsparse.la 
-rpath $(PREFIX)/lib
+
+wsparsertest: test.c libwsparse
+       $(CC) $(CFLAGS) test.c -o wsparsertest -L ./.libs -l :libwsparse.a
+
+install: libwsparse
+       $(LIBTOOL) --mode=install install -c libwsparse.la 
$(PREFIX)/lib/libwsparse.la
+       if [ ! -e $(PREFIX)/include/libwsparse ]; then mkdir -vm 755 
$(PREFIX)/include/libwsparse; fi
+       install -c -t $(PREFIX)/include/libwsparse $(HEADERS)
+
+clean:
+       rm -vf *.o *.lo *.la wsparsertest lrtable.c
+       rm -rvf .libs


Property changes on: trunk/extensions/WikiScripts/interpreter/nparser/Makefile
___________________________________________________________________
Added: svn:eol-style
   + native

Added: trunk/extensions/WikiScripts/interpreter/nparser/config.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/config.h                   
        (rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/config.h   2011-09-01 
22:19:16 UTC (rev 96052)
@@ -0,0 +1,21 @@
+/**
+ * Configuration file for libwsparse.
+ */
+
+#ifndef _WS_CONFIG_H
+#define _WS_CONFIG_H
+
+/**
+ * By default, the parser keeps several orphan nodes in the list of nodes, 
because
+ * moving them would take extra time that from opinion of the developer 
writing this
+ * lines is not worth it. If you care, you may actually define it.
+ */
+#undef WS_USE_COMPLETE_OPTIMIZATION
+
+#define WS_TREE_SIZE_INIT 256
+#define WS_TREE_SIZE_STEP 256
+
+#define WS_STACK_SIZE_INIT 50
+#define WS_STACK_SIZE_STEP 20
+
+#endif /* _WS_CONFIG_H */


Property changes on: trunk/extensions/WikiScripts/interpreter/nparser/config.h
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Rev URL
Added: svn:eol-style
   + native

Modified: trunk/extensions/WikiScripts/interpreter/nparser/parser.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/parser.c   2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/parser.c   2011-09-01 
22:19:16 UTC (rev 96052)
@@ -1,4 +1,6 @@
 #include <stdlib.h>
+#include <string.h>
+#include <memory.h>
 #include "parser.h"
 #include "stack.h"
 #include "scanner.h"
@@ -71,11 +73,14 @@
                                DO_CLEANUP();
                                output.errno = WS_PARSER_SYNTAX_ERROR;
                                output.errarg = act.action;     // So we can 
give some meaningful error message
+                               output.errline = token->lineno;
                                return output;
                        case WS_PARSER_SHIFT:
                                node.type = WS_PARSER_TERM;
                                node.value = token->type;
+                               node.firstlink = WS_NODE_ID_INVALID;
                                nodeid = ws_parser_tree_add( tree, node, 
token->value );
+                               tree->singleprods[nodeid] = WS_NODE_ID_INVALID;
 
                                if( nodeid == WS_NODE_ID_INVALID ) {
                                        DO_CLEANUP();
@@ -105,18 +110,78 @@
 
                                node.type = WS_PARSER_NONTERM;
                                node.value = nonterm;
-                               nodeid = ws_parser_tree_add( tree, node, NULL );
+                               node.firstlink = WS_NODE_ID_INVALID;
+                               nodeid = WS_NODE_ID_INVALID;
 
+                               // Tree optimization part
+                               // Looks for patterns like A -> exprA -> exprB 
(where exprA has only exprB as a child)
+                               // and replaces them with A -> exprB. We are 
currently at node A and exprA is somewhere in stack
+                               for( i = 0; i < prodlen; i++ ) {
+                                       ws_parser_stack_entry* entry = 
stack->content + (stack->count - i - 1);
+                                       ws_parser_node *opt_parent, *opt_child; 
// parent is exprA and child is exprB
+                                       ws_parser_node_id id_parent, id_child;
+                                       id_parent = entry->node;
+                                       opt_parent = tree->nodes + id_parent;
+                                       if( 
+                                               tree->singleprods[id_parent] != 
WS_NODE_ID_INVALID &&   // has single child; also implies it's a non-terminal
+                                               strstr( 
ws_parser_nonterminal_names[opt_parent->value], "expr" )        // is an "expr"
+                                       ) {
+                                               // Maybe can be optimized. 
Let's look at the child
+                                               id_child = 
tree->singleprods[entry->node];
+                                               opt_child = tree->nodes + 
id_child;
+                                               if(
+                                                       opt_child->type == 
WS_PARSER_NONTERM &&
+                                                       strstr( 
ws_parser_nonterminal_names[opt_child->value], "expr" )
+                                               ) {
+                                                       if( nodeid == 
WS_NODE_ID_INVALID ) {
+                                                               // Replace old 
node (exprA) with our current one (A)
+                                                               
tree->nodes[id_parent] = node;
+                                                               nodeid = 
id_parent;
+                                                               memset( 
tree->links + nodeid, UCHAR_MAX, sizeof( ws_parser_node_link ) );
+
+                                                               // Replace 
exprA with exprB in stack (so it will be linked as a child)
+                                                               entry->node = 
id_child;
+
+                                                               // Reset 
singleprods record and go on
+                                                               
tree->singleprods[entry->node] = WS_NODE_ID_INVALID;
+                                                       } else {
+                                                               // Make exprA 
orphan to effectively remove it from the tree
+                                                               memset( 
tree->links + entry->node, UCHAR_MAX, sizeof( ws_parser_node_link ) );
+
+                                                               // Replace 
exprA with exprB in stack (so it will be linked as a child)
+                                                               entry->node = 
tree->singleprods[entry->node];
+
+                                                               // Reset 
singleprods record and go on
+                                                               
tree->singleprods[entry->node] = WS_NODE_ID_INVALID;
+                                                       }
+                                               }
+                                       }
+                               }
+
                                if( nodeid == WS_NODE_ID_INVALID ) {
-                                       DO_CLEANUP();
-                                       output.errno = WS_PARSER_MEMORY;
-                                       return output;
+                                       nodeid = ws_parser_tree_add( tree, 
node, NULL );
+
+                                       if( nodeid == WS_NODE_ID_INVALID ) {
+                                               DO_CLEANUP();
+                                               output.errno = WS_PARSER_MEMORY;
+                                               return output;
+                                       }
                                }
 
+                               if( prodlen != 1 ) {
+                                       tree->singleprods[nodeid] = 
WS_NODE_ID_INVALID;
+                               }
+
                                // Add prodlen nodes from the stack in the 
reverse order to the tree
                                for( i = 0; i < prodlen; i++ ) {
                                        top = ws_parser_stack_top( stack );
                                        ws_parser_tree_link( tree, nodeid, 
top.node, prodlen - i - 1 );
+
+                                       // If it is a single-child production, 
track for optimization purposes
+                                       if( prodlen == 1 ) {
+                                               tree->singleprods[nodeid] = 
top.node;
+                                       }
+
                                        if( !ws_parser_stack_pop( stack ) ) {
                                                DO_CLEANUP();
                                                output.errno = 
WS_PARSER_INTERNAL_ERROR;
@@ -124,10 +189,6 @@
                                        }
                                }
 
-                               if( prodlen == 1 ) {
-                                       tree->singleprods[nodeid] = true;
-                               }
-
                                top = ws_parser_stack_top( stack );
 
                                if( ws_parser_table_goto[top.state][nonterm] != 
0xFF ) {

Modified: trunk/extensions/WikiScripts/interpreter/nparser/parser.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/parser.h   2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/parser.h   2011-09-01 
22:19:16 UTC (rev 96052)
@@ -59,6 +59,7 @@
        ws_parser_tree* tree;
        int errno;
        int errarg;
+       int errline;
 } ws_parser_output;
 
 /**

Modified: trunk/extensions/WikiScripts/interpreter/nparser/scanner.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/scanner.c  2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/scanner.c  2011-09-01 
22:19:16 UTC (rev 96052)
@@ -29,27 +29,12 @@
 }
 
 ws_token *ws_scanner_current_token(ws_scanner_state *state) {
-       ws_token *token;
-       token = malloc( sizeof( ws_token ) );
-       if( !token )
-               return NULL;
-
-       memcpy( token, &(state->cur), sizeof( ws_token ) );
-
-       if( state->cur.value ) {
-               token->value = malloc( token->value_size );
-               if( !token->value )
-                       return NULL;
-               memcpy( token->value, state->cur.value, token->value_size );
-       }
-
-       return token;
+       state->cur.lineno = state->lineno;
+       return &(state->cur);
 }
 
 void ws_scanner_free_token(ws_token* token) {
-       if( token->value )
-               free( token->value );
-       free( token );
+       /* no-op */
 }
 
 bool ws_scanner_handle_operator(ws_scanner_state *state);
@@ -71,10 +56,14 @@
        memcpy( CURRENT_VALUE, state->code + start_pos, end_pos - start_pos ); \
        CURRENT_VALUE[end_pos - start_pos] = '\0';
 
+inline bool isidchar( char value ) {
+       return isalnum( value ) || value == '_';
+}
+
 bool ws_scanner_next(ws_scanner_state *state) {
        int start_pos;
 
-       // If there was a previous token, it must have been copied by 
ws_scanner_current_token()
+       // If there was a previous token, it must have been copied by whoever 
needed it
        // Free the memory used by the value of the previous token
        if( state->cur.value ) {
                free( state->cur.value );
@@ -179,7 +168,7 @@
        }
 
        // Handle operators
-       if( ispunct( CURRENT_CHAR ) ) {
+       if( ispunct( CURRENT_CHAR ) && CURRENT_CHAR != '_' ) {
                start_pos = state->pos;
                while( CURRENT_CHAR && ispunct( CURRENT_CHAR ) && 
ws_scanner_is_long_op( state, start_pos ) )
                        state->pos++;
@@ -215,9 +204,9 @@
        }
 
        // Handle IDs and keywords
-       if( isalpha( CURRENT_CHAR ) ) {
+       if( isidchar( CURRENT_CHAR ) ) {
                start_pos = state->pos;
-               while( CURRENT_CHAR && isalnum( CURRENT_CHAR ) )
+               while( CURRENT_CHAR && isidchar( CURRENT_CHAR ) )
                        state->pos++;
                SET_TOKEN_SUBSTR( start_pos, state->pos );
                state->cur.type = WS_TOKEN_ID;

Modified: trunk/extensions/WikiScripts/interpreter/nparser/stack.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/stack.c    2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/stack.c    2011-09-01 
22:19:16 UTC (rev 96052)
@@ -1,5 +1,6 @@
 #include <stdlib.h>
 #include "stack.h"
+#include "config.h"
 
 ws_parser_stack* ws_parser_stack_init() {
        ws_parser_stack* stack;

Modified: trunk/extensions/WikiScripts/interpreter/nparser/stack.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/stack.h    2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/stack.h    2011-09-01 
22:19:16 UTC (rev 96052)
@@ -9,9 +9,6 @@
 #ifndef _WS_STACK_H
 #define _WS_STACK_H
 
-#define WS_STACK_SIZE_INIT 20
-#define WS_STACK_SIZE_STEP 2
-
 /**
  * The entry of the parser stack. Contains the state and the node bound to 
that state.
  */

Modified: trunk/extensions/WikiScripts/interpreter/nparser/test.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/test.c     2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/test.c     2011-09-01 
22:19:16 UTC (rev 96052)
@@ -1,11 +1,10 @@
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
-#include "scanner.h"
-#include "parser.h"
-#include "stack.h"
+#include "wsparse.h"
 
-#define BUFFERSIZE 1024
+#define BUFFERSIZE 4 * 1024 * 1024
 
 void printNodesRec( ws_parser_tree* tree, ws_parser_node_id id, int rec ) {
        int i;
@@ -30,23 +29,47 @@
 }
 
 int main( int argc, char** argv ) {
-       long i, j;
+       long long i, count;
        char* line = malloc( BUFFERSIZE );
 
-       fgets( line, BUFFERSIZE, stdin );
+       if( argc > 1 ) {
+               if( !strcmp( argv[1], "--help" ) ) {
+                       printf( "Parses the script input at STDIN\n" );
+                       printf( "Usage:\n" );
+                       printf( "\twsparsertest [runs]\n" );
+                       printf( "If number of runs is more than one, no parser 
tree is output.\n" );
+                       return EXIT_SUCCESS;
+               }
+               if( !strcmp( argv[1], "--version" ) ) {
+                       printf( "libwsparser %i.%i (%s)\n", 
WS_PARSER_VERSION_MAJOR, WS_PARSER_VERSION_MINOR, WS_PARSER_VERSION );
+                       return EXIT_SUCCESS;
+               }
+               count = atoll( argv[1] );
+       } else {
+               count = 1;
+       }
 
-       //for( i = 0; i < 1000000; i++ ) {
+       FILE* file = stdin;
+
+       i = 0;
+       while( ( line[i] = fgetc( file ) ) != EOF && i < BUFFERSIZE )
+               i++;
+       line[i] = '\0';
+
+       for( i = 0; i < count; i++ ) {
                ws_parser_output out;
                ws_parser_tree* tree;
                out = ws_parse( line );
                if( out.errno ) {
-                       printf( "Error: %i %i\n", out.errno, out.errarg );
+                       printf( "Error, errno: %i, err arg: %i, err line: 
%i\n", out.errno, out.errarg, out.errline );
                        return EXIT_FAILURE;
                }
                tree = out.tree;
-               printNodesRec( tree, tree->root, 0 );
+
+               if( count < 2 )
+                       printNodesRec( tree, tree->root, 0 );
                ws_parser_tree_free( tree );
-       //}
+       }
 
        free( line );
 

Modified: trunk/extensions/WikiScripts/interpreter/nparser/tree.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/tree.c     2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/tree.c     2011-09-01 
22:19:16 UTC (rev 96052)
@@ -1,9 +1,21 @@
-#include "parser.h"
 #include <stdlib.h>
 #include <memory.h>
 
+#include "parser.h"
+#include "config.h"
+
+ws_parser_tree* ws_parser_tree_init_sized(int nodes, bool finalized);
+
 ws_parser_tree* ws_parser_tree_init() {
+       return ws_parser_tree_init_sized( WS_TREE_SIZE_INIT, false );
+}
+
+/**
+ * Internal function that allocates memory for tree of given size.
+ */
+ws_parser_tree* ws_parser_tree_init_sized(int nodes, bool finalized) {
        ws_parser_tree* tree;
+       int links;
 
        tree = malloc( sizeof( ws_parser_tree ) );
        if( !tree ) {
@@ -11,34 +23,38 @@
        }
        memset( tree, '\0', sizeof( ws_parser_tree ) );
 
-       tree->nodes = malloc( WS_TREE_SIZE_INIT * sizeof( ws_parser_node ) );
-       tree->allocated = WS_TREE_SIZE_INIT;
+       tree->nodes = malloc( nodes * sizeof( ws_parser_node ) );
+       tree->allocated = nodes;
        if( !tree->nodes ) {
                ws_parser_tree_free( tree );
                return NULL;
        }
 
-       tree->links = malloc( WS_TREE_SIZE_INIT * sizeof( ws_parser_node_link ) 
);
+       links = finalized ? nodes - 1 : nodes;
+       tree->links = malloc( links * sizeof( ws_parser_node_link ) );
        if( !tree->links ) {
                ws_parser_tree_free( tree );
                return NULL;
        }
        // Since 0 may be a valid link ID, we fill it with ~0
-       memset( tree->links, UCHAR_MAX, WS_TREE_SIZE_INIT * sizeof( 
ws_parser_node_link ) );
+       memset( tree->links, UCHAR_MAX, links * sizeof( ws_parser_node_link ) );
 
-       tree->symbols = malloc( WS_TREE_SIZE_INIT * sizeof( char* ) );
+       tree->symbols = malloc( nodes * sizeof( char* ) );
        if( !tree->symbols ) {
                ws_parser_tree_free( tree );
                return NULL;
        }
 
-       tree->singleprods = malloc( WS_TREE_SIZE_INIT * sizeof( bool ) );
-       if( !tree->singleprods ) {
-               ws_parser_tree_free( tree );
-               return NULL;
+       if( !finalized ) {
+               tree->singleprods = malloc( nodes * sizeof( ws_parser_node_id ) 
);
+               if( !tree->singleprods ) {
+                       ws_parser_tree_free( tree );
+                       return NULL;
+               }
        }
-       memset( tree->singleprods, '\0', WS_TREE_SIZE_INIT * sizeof( bool ) );
 
+       tree->finalized = finalized;
+
        return tree;
 }
 
@@ -70,13 +86,12 @@
                }
                tree->symbols = symbols;
 
-               bool* singleprods;
-               singleprods = realloc( tree->singleprods, newcount * sizeof( 
bool ) );
+               ws_parser_node_id* singleprods;
+               singleprods = realloc( tree->singleprods, newcount * sizeof( 
ws_parser_node_id ) );
                if( !singleprods ) {
                        return WS_NODE_ID_INVALID;
                }
                tree->singleprods = singleprods;
-               memset( tree->singleprods + tree->count, '\0', 
WS_TREE_SIZE_STEP * sizeof( bool ) );
 
                tree->allocated += WS_TREE_SIZE_STEP;
        }
@@ -120,40 +135,44 @@
        }
 }
 
+// ws_parser_tree_finalize has subfunctions for the sake of the architecture 
and profiling
+void ws_parser_tree_finalize_relocate(ws_parser_tree* tree);
+void ws_parser_tree_finalize_index(ws_parser_tree* tree);
+
 bool ws_parser_tree_finalize(ws_parser_tree* tree) {
-       ws_parser_node_id i, j;
+#ifdef WS_USE_COMPLETE_OPTIMIZATION
+       // Remove orphan nodes and move the node at the end to the place of the 
removed ones
+       ws_parser_tree_finalize_relocate( tree );
+#endif /* WS_USE_COMPLETE_OPTIMIZATION */
 
-       // Reduce long single-member production branches
-       for( i = 0; i < tree->count; i++ ) {
-               if( tree->links[i].parent == WS_NODE_ID_INVALID )
-                       continue;
+       // We no longer need this
+       free( tree->singleprods );
+       tree->singleprods = NULL;
 
-               ws_parser_node_id parentid = tree->links[i].parent;
-               ws_parser_node *parent = tree->nodes + parentid;
-               ws_parser_node *child  = tree->nodes + i;
+       // Remove the empty link for the root node...
+       tree->links[tree->root] = tree->links[tree->count - 1];
+       // ...and sort them!
+       qsort( tree->links, tree->count - 1, sizeof( ws_parser_node_link ), 
ws_parser_tree_compare_links );
 
-               if( tree->singleprods[parentid] &&      // Parent has a single 
child
-                       tree->links[parentid].parent != WS_NODE_ID_INVALID && 
// Parent has parent
-                       child->type == WS_PARSER_NONTERM &&     // Child is a 
non-terminal
-                       strstr( ws_parser_nonterminal_names[parent->value], 
"expr" ) && // Both are expr nodes
-                       strstr( ws_parser_nonterminal_names[child->value],  
"expr" ) )
-               {
-                       // Relink
-                       tree->links[i].parent = tree->links[parentid].parent;
-                       tree->links[i].number = tree->links[parentid].number;
+       // Reallocate memory
+       tree->nodes = realloc( tree->nodes, tree->count * sizeof( 
ws_parser_node ) );
+       tree->links = realloc( tree->links, ( tree->count - 1 ) * sizeof( 
ws_parser_node_link ) );
+       tree->symbols = realloc( tree->symbols, tree->count * sizeof( char* ) );
 
-                       // Destroy old link
-                       memset( &tree->links[parentid], UCHAR_MAX, sizeof( 
ws_parser_node_link ) );
-                       
-                       // There may be more nodes
-                       i--;
-               }
-       }
+       // Fill the firstlink fields
+       ws_parser_tree_finalize_index( tree );
 
-       free( tree->singleprods );
-       tree->singleprods = NULL;
+       // We are done
+       tree->finalized = true;
 
-       // Remove orphan nodes and move the node at the end to the place of the 
removed ones
+       return true;
+}
+
+/**
+ * Remove orphan nodes and move the node at the end to the place of the 
removed ones.
+ */
+void ws_parser_tree_finalize_relocate(ws_parser_tree* tree) {
+       ws_parser_node_id i, j;
        for( i = 0; i < tree->count; i++ ) {
                if( tree->links[i].parent == WS_NODE_ID_INVALID && i != 
tree->root ) {
                        ws_parser_node_id oldid = tree->count - 1;
@@ -184,54 +203,146 @@
                        }
                }
        }
+}
 
-       // Remove the empty link for the root node...
-       tree->links[tree->root] = tree->links[tree->count - 1];
-       // ...and sort them!
-       qsort( tree->links, tree->count - 1, sizeof( ws_parser_node_link ), 
ws_parser_tree_compare_links );
-
-       // Reallocate memory
-       tree->nodes = realloc( tree->nodes, tree->count * sizeof( 
ws_parser_node ) );
-       tree->links = realloc( tree->links, ( tree->count - 1 ) * sizeof( 
ws_parser_node_link ) );
-       tree->symbols = realloc( tree->symbols, tree->count * sizeof( char* ) );
-
-       // We are done
-       tree->finalized = true;
-
-       return true;
+/**
+ * Fills the node.firstlink fields, allowing faster access to the tree.
+ */
+void ws_parser_tree_finalize_index(ws_parser_tree* tree) {
+       uint32_t i;
+       for( i = 0; i < tree->count - 1; i++ ) {
+               if( tree->links[i].number == 0 ) {
+                       tree->nodes[ tree->links[i].parent ].firstlink = i;
+               }
+       }
 }
 
 ws_parser_tree_children ws_parser_tree_search_children(ws_parser_tree* tree, 
ws_parser_node_id node) {
        ws_parser_tree_children result;
-       ws_parser_node_link target;
-       ws_parser_node_link* first;
+       uint32_t i;
 
+       result.count = 0;
+
        if( !tree->finalized || node >= tree->count ) {
-               result.count = 0;
                result.links = NULL;
                return result;
        }
 
-       target.parent = node;
-       target.number = 0;
-       first = bsearch( &target, tree->links, tree->count - 1, sizeof( 
ws_parser_node_link ), ws_parser_tree_compare_links );
-       if( first ) {
-               result.links = first;
-               result.count = 0;
-
-               ws_parser_node_link* cur;
-               for( cur = first; cur < tree->links + tree->count - 1; cur++ ) {
-                       if( cur->parent == node )
+       if( tree->nodes[node].firstlink == WS_NODE_ID_INVALID ) {
+               result.links = NULL;
+       } else {
+               result.links = tree->links + tree->nodes[node].firstlink;
+               for( i = 0; i < tree->nodes[node].firstlink - node; i++ ) {
+                       if( result.links[i].parent == node )
                                result.count++;
+                       else
+                               break;
                }
-       } else {
-               result.count = 0;
-               result.links = NULL;
        }
 
        return result;
 }
 
+ws_parser_node* ws_parser_tree_get_child(ws_parser_tree* tree, 
ws_parser_tree_children children, uint8_t number) {
+       if( number >= children.count ) {
+               return NULL;
+       }
+
+       return tree->nodes + children.links[number].child;
+}
+
+int ws_parser_tree_serialize_length(ws_parser_tree* tree) {
+       int i, symbols_length;
+       symbols_length = sizeof( uint32_t ) * tree->count;
+       for( i = 0; i < tree->count; i++ ) {
+               if( tree->symbols[i] )
+                       symbols_length += strlen( tree->symbols[i] );
+       }
+
+       return
+               sizeof( ws_parser_tree_header ) +
+               tree->count * sizeof( ws_parser_node ) +
+               ( tree->count - 1 ) * sizeof( ws_parser_node_link ) +
+               symbols_length;
+}
+
+void ws_parser_tree_serialize(ws_parser_tree* tree, void* buffer) {
+       ws_parser_tree_header header;
+       int i;
+       size_t size;
+
+       header.root = tree->root;
+       header.count = tree->count;
+       strcpy( header.version, WS_PARSER_VERSION );
+
+       size = sizeof( ws_parser_tree_header );
+       memcpy( buffer, &header, size );
+       buffer += size;
+
+       size = sizeof( ws_parser_node ) * tree->count;
+       memcpy( buffer, tree->nodes, size );
+       buffer += size;
+
+       size = sizeof( ws_parser_node_link ) * ( tree->count - 1 );
+       memcpy( buffer, tree->links, size );
+       buffer += size;
+
+       for( i = 0; i < tree->count; i++ ) {
+               uint32_t length;
+               length = tree->symbols[i] ?
+                       strlen( tree->symbols[i] ) :
+                       ((uint32_t)0);
+               *((uint32_t*)buffer) = length;
+               
+               buffer += sizeof( uint32_t );
+
+               memcpy( buffer, tree->symbols[i], length );
+               buffer += length;
+       }
+}
+
+ws_parser_tree* ws_parser_tree_unserialize(void* buffer) {
+       ws_parser_tree* tree;
+       ws_parser_tree_header* header;
+       uint32_t i;
+       size_t size;
+
+       header = buffer;
+       tree = ws_parser_tree_init_sized( header->count, true );
+       if( !tree ) {
+               return NULL;
+       }
+       if( strcmp( header->version, WS_PARSER_VERSION ) ) {
+               return NULL;
+       }
+       tree->root = header->root;
+       tree->count = header->count;
+       buffer += sizeof( ws_parser_tree_header );
+
+       size = sizeof( ws_parser_node ) * tree->count;
+       memcpy( tree->nodes, buffer, size );
+       buffer += size;
+
+       size = sizeof( ws_parser_node_link ) * ( tree->count - 1 );
+       memcpy( tree->links, buffer, size );
+       buffer += size;
+
+       for( i = 0; i < tree->count; i++ ) {
+               uint32_t length = *((uint32_t*)buffer);
+               buffer += sizeof( uint32_t );
+               if( length ) {
+                       tree->symbols[i] = malloc( length + 1 );
+                       memcpy( tree->symbols[i], buffer, length );
+                       tree->symbols[i][length] = '\0';
+                       buffer += length;
+               } else {
+                       tree->symbols[i] = NULL;
+               }
+       }
+
+       return tree;
+}
+
 void ws_parser_tree_free(ws_parser_tree* tree) {
        if( tree ) {
                if( tree->nodes )

Modified: trunk/extensions/WikiScripts/interpreter/nparser/tree.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/tree.h     2011-09-01 
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/tree.h     2011-09-01 
22:19:16 UTC (rev 96052)
@@ -14,9 +14,6 @@
 #include <stdbool.h>
 #include <limits.h>
 
-#define WS_TREE_SIZE_INIT 40
-#define WS_TREE_SIZE_STEP 10
-
 #define WS_NODE_ID_INVALID UINT_MAX
 
 typedef uint32_t ws_parser_node_id;
@@ -27,6 +24,9 @@
 
        /* ID of the (non-)terminal */
        uint8_t value;
+
+       /* Number of the link to the first child */
+       uint32_t firstlink;
 } ws_parser_node;
 
 
@@ -58,8 +58,8 @@
  * tree.
  */
 typedef struct {
-       int count;
-       int allocated;
+       uint32_t count;
+       uint32_t allocated;
        ws_parser_node* nodes;
 
        // When forming tree, number of allocated links = number of nodes.
@@ -73,15 +73,31 @@
        // Number of allocated and present symbols is mapped to the number of 
the nodes
        char** symbols;
 
-       // Temporary array used to store a flag about certain node having 
single child
-       bool* singleprods;
+       // Temporary array. If a node has a single child, it is stored here for 
optimization
+       // purposes
+       ws_parser_node_id* singleprods;
 } ws_parser_tree;
 
 /**
+ * This is the header of the serialized tree. The serialized tree looks like 
this:
+ * * Header
+ * * Nodes
+ * * Links
+ * * Symbols in the following format:
+ * ** Length (uint32_t)
+ * ** Contents
+ */
+typedef struct {
+       char version[24];
+       uint32_t count;
+       ws_parser_node_id root;
+} ws_parser_tree_header;
+
+/**
  * Results of the children search by node.
  */
 typedef struct {
-       int count;
+       uint8_t count;
        ws_parser_node_link* links;
 } ws_parser_tree_children;
 
@@ -115,11 +131,20 @@
 bool ws_parser_tree_finalize(ws_parser_tree* tree);
 
 /**
- *  Searches all the children of a given node and returns the result.
+ * Searches all the children of a given node and returns the result.
  */
 ws_parser_tree_children ws_parser_tree_search_children(ws_parser_tree* tree, 
ws_parser_node_id node);
 
 /**
+ * Returns a child by number.
+ */
+ws_parser_node* ws_parser_tree_get_child(ws_parser_tree* tree, 
ws_parser_tree_children children, uint8_t number);
+
+int ws_parser_tree_serialize_length(ws_parser_tree* tree);
+void ws_parser_tree_serialize(ws_parser_tree* tree, void* buffer);
+ws_parser_tree* ws_parser_tree_unserialize(void* buffer);
+
+/**
  * Free the resources allocated for the tree.
  */
 void ws_parser_tree_free(ws_parser_tree* tree);

Added: trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c                  
        (rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c  2011-09-01 
22:19:16 UTC (rev 96052)
@@ -0,0 +1,5 @@
+#include "wsparse.h"
+
+char* ws_parser_version() {
+       return WS_PARSER_VERSION;
+}


Property changes on: trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c
___________________________________________________________________
Added: svn:eol-style
   + native

Added: trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h                  
        (rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h  2011-09-01 
22:19:16 UTC (rev 96052)
@@ -0,0 +1,44 @@
+/**
+ * WikiScripts parser external declarations.
+ * 
+ * Copyright (C) 2011 by Victor Vasiliev
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef _WS_WSPARSE_H
+#define _WS_WSPARSE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include "parser.h"
+#include "scanner.h"
+
+#define WS_PARSER_VERSION_MAJOR 0
+#define WS_PARSER_VERSION_MINOR 1
+
+char* ws_parser_version();
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _WS_WSPARSE_H */


Property changes on: trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Rev URL
Added: svn:eol-style
   + native


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to