http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96052
Revision: 96052
Author: vasilievvv
Date: 2011-09-01 22:19:16 +0000 (Thu, 01 Sep 2011)
Log Message:
-----------
WikiScripts native parser:
* A lot of optimization
* Makefile
* Some bugfixes
* License
Modified Paths:
--------------
trunk/extensions/WikiScripts/interpreter/buildLRTables.php
trunk/extensions/WikiScripts/interpreter/nparser/parser.c
trunk/extensions/WikiScripts/interpreter/nparser/parser.h
trunk/extensions/WikiScripts/interpreter/nparser/scanner.c
trunk/extensions/WikiScripts/interpreter/nparser/stack.c
trunk/extensions/WikiScripts/interpreter/nparser/stack.h
trunk/extensions/WikiScripts/interpreter/nparser/test.c
trunk/extensions/WikiScripts/interpreter/nparser/tree.c
trunk/extensions/WikiScripts/interpreter/nparser/tree.h
Added Paths:
-----------
trunk/extensions/WikiScripts/interpreter/nparser/COPYING
trunk/extensions/WikiScripts/interpreter/nparser/Makefile
trunk/extensions/WikiScripts/interpreter/nparser/config.h
trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c
trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h
Modified: trunk/extensions/WikiScripts/interpreter/buildLRTables.php
===================================================================
--- trunk/extensions/WikiScripts/interpreter/buildLRTables.php 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/buildLRTables.php 2011-09-01
22:19:16 UTC (rev 96052)
@@ -1,6 +1,25 @@
<?php
-
/**
+ * Copyright (C) 2009-2011 by Victor Vasiliev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
* An ugly tool to build SLR-tables.
*
* Reads a BNF-ish grammar (strings without <> are terminals) from
@@ -788,6 +807,9 @@
// Build C files
file_put_contents( "{$base}/nparser/tokenids.h",
$grammar->buildCTokenHeader() );
file_put_contents( "{$base}/nparser/scanner_names.c",
$grammar->buildCTokenList() );
- file_put_contents( "{$base}/nparser/lrtable.c", $grammar->buildCTable()
);
file_put_contents( "{$base}/nparser/lrtable.h",
$grammar->buildCTableHeader( $ts ) );
+
+ // LR table file will be automatically regenerated
+ if( file_exists( "{$base}/nparser/lrtable.c" ) )
+ unlink( "{$base}/nparser/lrtable.c" );
}
Added: trunk/extensions/WikiScripts/interpreter/nparser/COPYING
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/COPYING
(rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/COPYING 2011-09-01
22:19:16 UTC (rev 96052)
@@ -0,0 +1,20 @@
+Copyright (C) 2011 by Victor Vasiliev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
Added: trunk/extensions/WikiScripts/interpreter/nparser/Makefile
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/Makefile
(rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/Makefile 2011-09-01
22:19:16 UTC (rev 96052)
@@ -0,0 +1,34 @@
+CC = gcc
+CFLAGS = -Wall -O2
+LIBTOOL = libtool
+SRCS = lrtable.c parser.c scanner.c scanner_names.c stack.c tree.c wsparse.c
+OBJS = $(SRCS:.c=.o)
+LIBOBJS = $(SRCS:.c=.lo)
+HEADERS = lrtable.h parser.h scanner.h tokenids.h tree.h wsparse.h
+
+PREFIX = /usr/local
+
+.SUFFIXES: .c .lo .o
+
+all: libwsparse wsparsertest
+
+lrtable.c:
+ php ../buildLRTables.php --ctable
+
+.c.lo: config.h
+ $(LIBTOOL) --mode=compile $(CC) $(CFLAGS) -c -o $@ $<
+
+libwsparse: $(LIBOBJS)
+ $(LIBTOOL) --mode=link $(CC) $(CFLAGS) $(LIBOBJS) -o libwsparse.la
-rpath $(PREFIX)/lib
+
+wsparsertest: test.c libwsparse
+ $(CC) $(CFLAGS) test.c -o wsparsertest -L ./.libs -l :libwsparse.a
+
+install: libwsparse
+ $(LIBTOOL) --mode=install install -c libwsparse.la
$(PREFIX)/lib/libwsparse.la
+ if [ ! -e $(PREFIX)/include/libwsparse ]; then mkdir -vm 755
$(PREFIX)/include/libwsparse; fi
+ install -c -t $(PREFIX)/include/libwsparse $(HEADERS)
+
+clean:
+ rm -vf *.o *.lo *.la wsparsertest lrtable.c
+ rm -rvf .libs
Property changes on: trunk/extensions/WikiScripts/interpreter/nparser/Makefile
___________________________________________________________________
Added: svn:eol-style
+ native
Added: trunk/extensions/WikiScripts/interpreter/nparser/config.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/config.h
(rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/config.h 2011-09-01
22:19:16 UTC (rev 96052)
@@ -0,0 +1,21 @@
+/**
+ * Configuration file for libwsparse.
+ */
+
+#ifndef _WS_CONFIG_H
+#define _WS_CONFIG_H
+
+/**
+ * By default, the parser keeps several orphan nodes in the list of nodes,
because
+ * moving them would take extra time that from opinion of the developer
writing this
+ * lines is not worth it. If you care, you may actually define it.
+ */
+#undef WS_USE_COMPLETE_OPTIMIZATION
+
+#define WS_TREE_SIZE_INIT 256
+#define WS_TREE_SIZE_STEP 256
+
+#define WS_STACK_SIZE_INIT 50
+#define WS_STACK_SIZE_STEP 20
+
+#endif /* _WS_CONFIG_H */
Property changes on: trunk/extensions/WikiScripts/interpreter/nparser/config.h
___________________________________________________________________
Added: svn:keywords
+ Author Date Id Rev URL
Added: svn:eol-style
+ native
Modified: trunk/extensions/WikiScripts/interpreter/nparser/parser.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/parser.c 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/parser.c 2011-09-01
22:19:16 UTC (rev 96052)
@@ -1,4 +1,6 @@
#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
#include "parser.h"
#include "stack.h"
#include "scanner.h"
@@ -71,11 +73,14 @@
DO_CLEANUP();
output.errno = WS_PARSER_SYNTAX_ERROR;
output.errarg = act.action; // So we can
give some meaningful error message
+ output.errline = token->lineno;
return output;
case WS_PARSER_SHIFT:
node.type = WS_PARSER_TERM;
node.value = token->type;
+ node.firstlink = WS_NODE_ID_INVALID;
nodeid = ws_parser_tree_add( tree, node,
token->value );
+ tree->singleprods[nodeid] = WS_NODE_ID_INVALID;
if( nodeid == WS_NODE_ID_INVALID ) {
DO_CLEANUP();
@@ -105,18 +110,78 @@
node.type = WS_PARSER_NONTERM;
node.value = nonterm;
- nodeid = ws_parser_tree_add( tree, node, NULL );
+ node.firstlink = WS_NODE_ID_INVALID;
+ nodeid = WS_NODE_ID_INVALID;
+ // Tree optimization part
+ // Looks for patterns like A -> exprA -> exprB
(where exprA has only exprB as a child)
+ // and replaces them with A -> exprB. We are
currently at node A and exprA is somewhere in stack
+ for( i = 0; i < prodlen; i++ ) {
+ ws_parser_stack_entry* entry =
stack->content + (stack->count - i - 1);
+ ws_parser_node *opt_parent, *opt_child;
// parent is exprA and child is exprB
+ ws_parser_node_id id_parent, id_child;
+ id_parent = entry->node;
+ opt_parent = tree->nodes + id_parent;
+ if(
+ tree->singleprods[id_parent] !=
WS_NODE_ID_INVALID && // has single child; also implies it's a non-terminal
+ strstr(
ws_parser_nonterminal_names[opt_parent->value], "expr" ) // is an "expr"
+ ) {
+ // Maybe can be optimized.
Let's look at the child
+ id_child =
tree->singleprods[entry->node];
+ opt_child = tree->nodes +
id_child;
+ if(
+ opt_child->type ==
WS_PARSER_NONTERM &&
+ strstr(
ws_parser_nonterminal_names[opt_child->value], "expr" )
+ ) {
+ if( nodeid ==
WS_NODE_ID_INVALID ) {
+ // Replace old
node (exprA) with our current one (A)
+
tree->nodes[id_parent] = node;
+ nodeid =
id_parent;
+ memset(
tree->links + nodeid, UCHAR_MAX, sizeof( ws_parser_node_link ) );
+
+ // Replace
exprA with exprB in stack (so it will be linked as a child)
+ entry->node =
id_child;
+
+ // Reset
singleprods record and go on
+
tree->singleprods[entry->node] = WS_NODE_ID_INVALID;
+ } else {
+ // Make exprA
orphan to effectively remove it from the tree
+ memset(
tree->links + entry->node, UCHAR_MAX, sizeof( ws_parser_node_link ) );
+
+ // Replace
exprA with exprB in stack (so it will be linked as a child)
+ entry->node =
tree->singleprods[entry->node];
+
+ // Reset
singleprods record and go on
+
tree->singleprods[entry->node] = WS_NODE_ID_INVALID;
+ }
+ }
+ }
+ }
+
if( nodeid == WS_NODE_ID_INVALID ) {
- DO_CLEANUP();
- output.errno = WS_PARSER_MEMORY;
- return output;
+ nodeid = ws_parser_tree_add( tree,
node, NULL );
+
+ if( nodeid == WS_NODE_ID_INVALID ) {
+ DO_CLEANUP();
+ output.errno = WS_PARSER_MEMORY;
+ return output;
+ }
}
+ if( prodlen != 1 ) {
+ tree->singleprods[nodeid] =
WS_NODE_ID_INVALID;
+ }
+
// Add prodlen nodes from the stack in the
reverse order to the tree
for( i = 0; i < prodlen; i++ ) {
top = ws_parser_stack_top( stack );
ws_parser_tree_link( tree, nodeid,
top.node, prodlen - i - 1 );
+
+ // If it is a single-child production,
track for optimization purposes
+ if( prodlen == 1 ) {
+ tree->singleprods[nodeid] =
top.node;
+ }
+
if( !ws_parser_stack_pop( stack ) ) {
DO_CLEANUP();
output.errno =
WS_PARSER_INTERNAL_ERROR;
@@ -124,10 +189,6 @@
}
}
- if( prodlen == 1 ) {
- tree->singleprods[nodeid] = true;
- }
-
top = ws_parser_stack_top( stack );
if( ws_parser_table_goto[top.state][nonterm] !=
0xFF ) {
Modified: trunk/extensions/WikiScripts/interpreter/nparser/parser.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/parser.h 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/parser.h 2011-09-01
22:19:16 UTC (rev 96052)
@@ -59,6 +59,7 @@
ws_parser_tree* tree;
int errno;
int errarg;
+ int errline;
} ws_parser_output;
/**
Modified: trunk/extensions/WikiScripts/interpreter/nparser/scanner.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/scanner.c 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/scanner.c 2011-09-01
22:19:16 UTC (rev 96052)
@@ -29,27 +29,12 @@
}
ws_token *ws_scanner_current_token(ws_scanner_state *state) {
- ws_token *token;
- token = malloc( sizeof( ws_token ) );
- if( !token )
- return NULL;
-
- memcpy( token, &(state->cur), sizeof( ws_token ) );
-
- if( state->cur.value ) {
- token->value = malloc( token->value_size );
- if( !token->value )
- return NULL;
- memcpy( token->value, state->cur.value, token->value_size );
- }
-
- return token;
+ state->cur.lineno = state->lineno;
+ return &(state->cur);
}
void ws_scanner_free_token(ws_token* token) {
- if( token->value )
- free( token->value );
- free( token );
+ /* no-op */
}
bool ws_scanner_handle_operator(ws_scanner_state *state);
@@ -71,10 +56,14 @@
memcpy( CURRENT_VALUE, state->code + start_pos, end_pos - start_pos ); \
CURRENT_VALUE[end_pos - start_pos] = '\0';
+inline bool isidchar( char value ) {
+ return isalnum( value ) || value == '_';
+}
+
bool ws_scanner_next(ws_scanner_state *state) {
int start_pos;
- // If there was a previous token, it must have been copied by
ws_scanner_current_token()
+ // If there was a previous token, it must have been copied by whoever
needed it
// Free the memory used by the value of the previous token
if( state->cur.value ) {
free( state->cur.value );
@@ -179,7 +168,7 @@
}
// Handle operators
- if( ispunct( CURRENT_CHAR ) ) {
+ if( ispunct( CURRENT_CHAR ) && CURRENT_CHAR != '_' ) {
start_pos = state->pos;
while( CURRENT_CHAR && ispunct( CURRENT_CHAR ) &&
ws_scanner_is_long_op( state, start_pos ) )
state->pos++;
@@ -215,9 +204,9 @@
}
// Handle IDs and keywords
- if( isalpha( CURRENT_CHAR ) ) {
+ if( isidchar( CURRENT_CHAR ) ) {
start_pos = state->pos;
- while( CURRENT_CHAR && isalnum( CURRENT_CHAR ) )
+ while( CURRENT_CHAR && isidchar( CURRENT_CHAR ) )
state->pos++;
SET_TOKEN_SUBSTR( start_pos, state->pos );
state->cur.type = WS_TOKEN_ID;
Modified: trunk/extensions/WikiScripts/interpreter/nparser/stack.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/stack.c 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/stack.c 2011-09-01
22:19:16 UTC (rev 96052)
@@ -1,5 +1,6 @@
#include <stdlib.h>
#include "stack.h"
+#include "config.h"
ws_parser_stack* ws_parser_stack_init() {
ws_parser_stack* stack;
Modified: trunk/extensions/WikiScripts/interpreter/nparser/stack.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/stack.h 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/stack.h 2011-09-01
22:19:16 UTC (rev 96052)
@@ -9,9 +9,6 @@
#ifndef _WS_STACK_H
#define _WS_STACK_H
-#define WS_STACK_SIZE_INIT 20
-#define WS_STACK_SIZE_STEP 2
-
/**
* The entry of the parser stack. Contains the state and the node bound to
that state.
*/
Modified: trunk/extensions/WikiScripts/interpreter/nparser/test.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/test.c 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/test.c 2011-09-01
22:19:16 UTC (rev 96052)
@@ -1,11 +1,10 @@
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
-#include "scanner.h"
-#include "parser.h"
-#include "stack.h"
+#include "wsparse.h"
-#define BUFFERSIZE 1024
+#define BUFFERSIZE 4 * 1024 * 1024
void printNodesRec( ws_parser_tree* tree, ws_parser_node_id id, int rec ) {
int i;
@@ -30,23 +29,47 @@
}
int main( int argc, char** argv ) {
- long i, j;
+ long long i, count;
char* line = malloc( BUFFERSIZE );
- fgets( line, BUFFERSIZE, stdin );
+ if( argc > 1 ) {
+ if( !strcmp( argv[1], "--help" ) ) {
+ printf( "Parses the script input at STDIN\n" );
+ printf( "Usage:\n" );
+ printf( "\twsparsertest [runs]\n" );
+ printf( "If number of runs is more than one, no parser
tree is output.\n" );
+ return EXIT_SUCCESS;
+ }
+ if( !strcmp( argv[1], "--version" ) ) {
+ printf( "libwsparser %i.%i (%s)\n",
WS_PARSER_VERSION_MAJOR, WS_PARSER_VERSION_MINOR, WS_PARSER_VERSION );
+ return EXIT_SUCCESS;
+ }
+ count = atoll( argv[1] );
+ } else {
+ count = 1;
+ }
- //for( i = 0; i < 1000000; i++ ) {
+ FILE* file = stdin;
+
+ i = 0;
+ while( ( line[i] = fgetc( file ) ) != EOF && i < BUFFERSIZE )
+ i++;
+ line[i] = '\0';
+
+ for( i = 0; i < count; i++ ) {
ws_parser_output out;
ws_parser_tree* tree;
out = ws_parse( line );
if( out.errno ) {
- printf( "Error: %i %i\n", out.errno, out.errarg );
+ printf( "Error, errno: %i, err arg: %i, err line:
%i\n", out.errno, out.errarg, out.errline );
return EXIT_FAILURE;
}
tree = out.tree;
- printNodesRec( tree, tree->root, 0 );
+
+ if( count < 2 )
+ printNodesRec( tree, tree->root, 0 );
ws_parser_tree_free( tree );
- //}
+ }
free( line );
Modified: trunk/extensions/WikiScripts/interpreter/nparser/tree.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/tree.c 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/tree.c 2011-09-01
22:19:16 UTC (rev 96052)
@@ -1,9 +1,21 @@
-#include "parser.h"
#include <stdlib.h>
#include <memory.h>
+#include "parser.h"
+#include "config.h"
+
+ws_parser_tree* ws_parser_tree_init_sized(int nodes, bool finalized);
+
ws_parser_tree* ws_parser_tree_init() {
+ return ws_parser_tree_init_sized( WS_TREE_SIZE_INIT, false );
+}
+
+/**
+ * Internal function that allocates memory for tree of given size.
+ */
+ws_parser_tree* ws_parser_tree_init_sized(int nodes, bool finalized) {
ws_parser_tree* tree;
+ int links;
tree = malloc( sizeof( ws_parser_tree ) );
if( !tree ) {
@@ -11,34 +23,38 @@
}
memset( tree, '\0', sizeof( ws_parser_tree ) );
- tree->nodes = malloc( WS_TREE_SIZE_INIT * sizeof( ws_parser_node ) );
- tree->allocated = WS_TREE_SIZE_INIT;
+ tree->nodes = malloc( nodes * sizeof( ws_parser_node ) );
+ tree->allocated = nodes;
if( !tree->nodes ) {
ws_parser_tree_free( tree );
return NULL;
}
- tree->links = malloc( WS_TREE_SIZE_INIT * sizeof( ws_parser_node_link )
);
+ links = finalized ? nodes - 1 : nodes;
+ tree->links = malloc( links * sizeof( ws_parser_node_link ) );
if( !tree->links ) {
ws_parser_tree_free( tree );
return NULL;
}
// Since 0 may be a valid link ID, we fill it with ~0
- memset( tree->links, UCHAR_MAX, WS_TREE_SIZE_INIT * sizeof(
ws_parser_node_link ) );
+ memset( tree->links, UCHAR_MAX, links * sizeof( ws_parser_node_link ) );
- tree->symbols = malloc( WS_TREE_SIZE_INIT * sizeof( char* ) );
+ tree->symbols = malloc( nodes * sizeof( char* ) );
if( !tree->symbols ) {
ws_parser_tree_free( tree );
return NULL;
}
- tree->singleprods = malloc( WS_TREE_SIZE_INIT * sizeof( bool ) );
- if( !tree->singleprods ) {
- ws_parser_tree_free( tree );
- return NULL;
+ if( !finalized ) {
+ tree->singleprods = malloc( nodes * sizeof( ws_parser_node_id )
);
+ if( !tree->singleprods ) {
+ ws_parser_tree_free( tree );
+ return NULL;
+ }
}
- memset( tree->singleprods, '\0', WS_TREE_SIZE_INIT * sizeof( bool ) );
+ tree->finalized = finalized;
+
return tree;
}
@@ -70,13 +86,12 @@
}
tree->symbols = symbols;
- bool* singleprods;
- singleprods = realloc( tree->singleprods, newcount * sizeof(
bool ) );
+ ws_parser_node_id* singleprods;
+ singleprods = realloc( tree->singleprods, newcount * sizeof(
ws_parser_node_id ) );
if( !singleprods ) {
return WS_NODE_ID_INVALID;
}
tree->singleprods = singleprods;
- memset( tree->singleprods + tree->count, '\0',
WS_TREE_SIZE_STEP * sizeof( bool ) );
tree->allocated += WS_TREE_SIZE_STEP;
}
@@ -120,40 +135,44 @@
}
}
+// ws_parser_tree_finalize has subfunctions for the sake of the architecture
and profiling
+void ws_parser_tree_finalize_relocate(ws_parser_tree* tree);
+void ws_parser_tree_finalize_index(ws_parser_tree* tree);
+
bool ws_parser_tree_finalize(ws_parser_tree* tree) {
- ws_parser_node_id i, j;
+#ifdef WS_USE_COMPLETE_OPTIMIZATION
+ // Remove orphan nodes and move the node at the end to the place of the
removed ones
+ ws_parser_tree_finalize_relocate( tree );
+#endif /* WS_USE_COMPLETE_OPTIMIZATION */
- // Reduce long single-member production branches
- for( i = 0; i < tree->count; i++ ) {
- if( tree->links[i].parent == WS_NODE_ID_INVALID )
- continue;
+ // We no longer need this
+ free( tree->singleprods );
+ tree->singleprods = NULL;
- ws_parser_node_id parentid = tree->links[i].parent;
- ws_parser_node *parent = tree->nodes + parentid;
- ws_parser_node *child = tree->nodes + i;
+ // Remove the empty link for the root node...
+ tree->links[tree->root] = tree->links[tree->count - 1];
+ // ...and sort them!
+ qsort( tree->links, tree->count - 1, sizeof( ws_parser_node_link ),
ws_parser_tree_compare_links );
- if( tree->singleprods[parentid] && // Parent has a single
child
- tree->links[parentid].parent != WS_NODE_ID_INVALID &&
// Parent has parent
- child->type == WS_PARSER_NONTERM && // Child is a
non-terminal
- strstr( ws_parser_nonterminal_names[parent->value],
"expr" ) && // Both are expr nodes
- strstr( ws_parser_nonterminal_names[child->value],
"expr" ) )
- {
- // Relink
- tree->links[i].parent = tree->links[parentid].parent;
- tree->links[i].number = tree->links[parentid].number;
+ // Reallocate memory
+ tree->nodes = realloc( tree->nodes, tree->count * sizeof(
ws_parser_node ) );
+ tree->links = realloc( tree->links, ( tree->count - 1 ) * sizeof(
ws_parser_node_link ) );
+ tree->symbols = realloc( tree->symbols, tree->count * sizeof( char* ) );
- // Destroy old link
- memset( &tree->links[parentid], UCHAR_MAX, sizeof(
ws_parser_node_link ) );
-
- // There may be more nodes
- i--;
- }
- }
+ // Fill the firstlink fields
+ ws_parser_tree_finalize_index( tree );
- free( tree->singleprods );
- tree->singleprods = NULL;
+ // We are done
+ tree->finalized = true;
- // Remove orphan nodes and move the node at the end to the place of the
removed ones
+ return true;
+}
+
+/**
+ * Remove orphan nodes and move the node at the end to the place of the
removed ones.
+ */
+void ws_parser_tree_finalize_relocate(ws_parser_tree* tree) {
+ ws_parser_node_id i, j;
for( i = 0; i < tree->count; i++ ) {
if( tree->links[i].parent == WS_NODE_ID_INVALID && i !=
tree->root ) {
ws_parser_node_id oldid = tree->count - 1;
@@ -184,54 +203,146 @@
}
}
}
+}
- // Remove the empty link for the root node...
- tree->links[tree->root] = tree->links[tree->count - 1];
- // ...and sort them!
- qsort( tree->links, tree->count - 1, sizeof( ws_parser_node_link ),
ws_parser_tree_compare_links );
-
- // Reallocate memory
- tree->nodes = realloc( tree->nodes, tree->count * sizeof(
ws_parser_node ) );
- tree->links = realloc( tree->links, ( tree->count - 1 ) * sizeof(
ws_parser_node_link ) );
- tree->symbols = realloc( tree->symbols, tree->count * sizeof( char* ) );
-
- // We are done
- tree->finalized = true;
-
- return true;
+/**
+ * Fills the node.firstlink fields, allowing faster access to the tree.
+ */
+void ws_parser_tree_finalize_index(ws_parser_tree* tree) {
+ uint32_t i;
+ for( i = 0; i < tree->count - 1; i++ ) {
+ if( tree->links[i].number == 0 ) {
+ tree->nodes[ tree->links[i].parent ].firstlink = i;
+ }
+ }
}
ws_parser_tree_children ws_parser_tree_search_children(ws_parser_tree* tree,
ws_parser_node_id node) {
ws_parser_tree_children result;
- ws_parser_node_link target;
- ws_parser_node_link* first;
+ uint32_t i;
+ result.count = 0;
+
if( !tree->finalized || node >= tree->count ) {
- result.count = 0;
result.links = NULL;
return result;
}
- target.parent = node;
- target.number = 0;
- first = bsearch( &target, tree->links, tree->count - 1, sizeof(
ws_parser_node_link ), ws_parser_tree_compare_links );
- if( first ) {
- result.links = first;
- result.count = 0;
-
- ws_parser_node_link* cur;
- for( cur = first; cur < tree->links + tree->count - 1; cur++ ) {
- if( cur->parent == node )
+ if( tree->nodes[node].firstlink == WS_NODE_ID_INVALID ) {
+ result.links = NULL;
+ } else {
+ result.links = tree->links + tree->nodes[node].firstlink;
+ for( i = 0; i < tree->nodes[node].firstlink - node; i++ ) {
+ if( result.links[i].parent == node )
result.count++;
+ else
+ break;
}
- } else {
- result.count = 0;
- result.links = NULL;
}
return result;
}
+ws_parser_node* ws_parser_tree_get_child(ws_parser_tree* tree,
ws_parser_tree_children children, uint8_t number) {
+ if( number >= children.count ) {
+ return NULL;
+ }
+
+ return tree->nodes + children.links[number].child;
+}
+
+int ws_parser_tree_serialize_length(ws_parser_tree* tree) {
+ int i, symbols_length;
+ symbols_length = sizeof( uint32_t ) * tree->count;
+ for( i = 0; i < tree->count; i++ ) {
+ if( tree->symbols[i] )
+ symbols_length += strlen( tree->symbols[i] );
+ }
+
+ return
+ sizeof( ws_parser_tree_header ) +
+ tree->count * sizeof( ws_parser_node ) +
+ ( tree->count - 1 ) * sizeof( ws_parser_node_link ) +
+ symbols_length;
+}
+
+void ws_parser_tree_serialize(ws_parser_tree* tree, void* buffer) {
+ ws_parser_tree_header header;
+ int i;
+ size_t size;
+
+ header.root = tree->root;
+ header.count = tree->count;
+ strcpy( header.version, WS_PARSER_VERSION );
+
+ size = sizeof( ws_parser_tree_header );
+ memcpy( buffer, &header, size );
+ buffer += size;
+
+ size = sizeof( ws_parser_node ) * tree->count;
+ memcpy( buffer, tree->nodes, size );
+ buffer += size;
+
+ size = sizeof( ws_parser_node_link ) * ( tree->count - 1 );
+ memcpy( buffer, tree->links, size );
+ buffer += size;
+
+ for( i = 0; i < tree->count; i++ ) {
+ uint32_t length;
+ length = tree->symbols[i] ?
+ strlen( tree->symbols[i] ) :
+ ((uint32_t)0);
+ *((uint32_t*)buffer) = length;
+
+ buffer += sizeof( uint32_t );
+
+ memcpy( buffer, tree->symbols[i], length );
+ buffer += length;
+ }
+}
+
+ws_parser_tree* ws_parser_tree_unserialize(void* buffer) {
+ ws_parser_tree* tree;
+ ws_parser_tree_header* header;
+ uint32_t i;
+ size_t size;
+
+ header = buffer;
+ tree = ws_parser_tree_init_sized( header->count, true );
+ if( !tree ) {
+ return NULL;
+ }
+ if( strcmp( header->version, WS_PARSER_VERSION ) ) {
+ return NULL;
+ }
+ tree->root = header->root;
+ tree->count = header->count;
+ buffer += sizeof( ws_parser_tree_header );
+
+ size = sizeof( ws_parser_node ) * tree->count;
+ memcpy( tree->nodes, buffer, size );
+ buffer += size;
+
+ size = sizeof( ws_parser_node_link ) * ( tree->count - 1 );
+ memcpy( tree->links, buffer, size );
+ buffer += size;
+
+ for( i = 0; i < tree->count; i++ ) {
+ uint32_t length = *((uint32_t*)buffer);
+ buffer += sizeof( uint32_t );
+ if( length ) {
+ tree->symbols[i] = malloc( length + 1 );
+ memcpy( tree->symbols[i], buffer, length );
+ tree->symbols[i][length] = '\0';
+ buffer += length;
+ } else {
+ tree->symbols[i] = NULL;
+ }
+ }
+
+ return tree;
+}
+
void ws_parser_tree_free(ws_parser_tree* tree) {
if( tree ) {
if( tree->nodes )
Modified: trunk/extensions/WikiScripts/interpreter/nparser/tree.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/tree.h 2011-09-01
22:10:35 UTC (rev 96051)
+++ trunk/extensions/WikiScripts/interpreter/nparser/tree.h 2011-09-01
22:19:16 UTC (rev 96052)
@@ -14,9 +14,6 @@
#include <stdbool.h>
#include <limits.h>
-#define WS_TREE_SIZE_INIT 40
-#define WS_TREE_SIZE_STEP 10
-
#define WS_NODE_ID_INVALID UINT_MAX
typedef uint32_t ws_parser_node_id;
@@ -27,6 +24,9 @@
/* ID of the (non-)terminal */
uint8_t value;
+
+ /* Number of the link to the first child */
+ uint32_t firstlink;
} ws_parser_node;
@@ -58,8 +58,8 @@
* tree.
*/
typedef struct {
- int count;
- int allocated;
+ uint32_t count;
+ uint32_t allocated;
ws_parser_node* nodes;
// When forming tree, number of allocated links = number of nodes.
@@ -73,15 +73,31 @@
// Number of allocated and present symbols is mapped to the number of
the nodes
char** symbols;
- // Temporary array used to store a flag about certain node having
single child
- bool* singleprods;
+ // Temporary array. If a node has a single child, it is stored here for
optimization
+ // purposes
+ ws_parser_node_id* singleprods;
} ws_parser_tree;
/**
+ * This is the header of the serialized tree. The serialized tree looks like
this:
+ * * Header
+ * * Nodes
+ * * Links
+ * * Symbols in the following format:
+ * ** Length (uint32_t)
+ * ** Contents
+ */
+typedef struct {
+ char version[24];
+ uint32_t count;
+ ws_parser_node_id root;
+} ws_parser_tree_header;
+
+/**
* Results of the children search by node.
*/
typedef struct {
- int count;
+ uint8_t count;
ws_parser_node_link* links;
} ws_parser_tree_children;
@@ -115,11 +131,20 @@
bool ws_parser_tree_finalize(ws_parser_tree* tree);
/**
- * Searches all the children of a given node and returns the result.
+ * Searches all the children of a given node and returns the result.
*/
ws_parser_tree_children ws_parser_tree_search_children(ws_parser_tree* tree,
ws_parser_node_id node);
/**
+ * Returns a child by number.
+ */
+ws_parser_node* ws_parser_tree_get_child(ws_parser_tree* tree,
ws_parser_tree_children children, uint8_t number);
+
+int ws_parser_tree_serialize_length(ws_parser_tree* tree);
+void ws_parser_tree_serialize(ws_parser_tree* tree, void* buffer);
+ws_parser_tree* ws_parser_tree_unserialize(void* buffer);
+
+/**
* Free the resources allocated for the tree.
*/
void ws_parser_tree_free(ws_parser_tree* tree);
Added: trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c
(rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c 2011-09-01
22:19:16 UTC (rev 96052)
@@ -0,0 +1,5 @@
+#include "wsparse.h"
+
+char* ws_parser_version() {
+ return WS_PARSER_VERSION;
+}
Property changes on: trunk/extensions/WikiScripts/interpreter/nparser/wsparse.c
___________________________________________________________________
Added: svn:eol-style
+ native
Added: trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h
===================================================================
--- trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h
(rev 0)
+++ trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h 2011-09-01
22:19:16 UTC (rev 96052)
@@ -0,0 +1,44 @@
+/**
+ * WikiScripts parser external declarations.
+ *
+ * Copyright (C) 2011 by Victor Vasiliev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef _WS_WSPARSE_H
+#define _WS_WSPARSE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include "parser.h"
+#include "scanner.h"
+
+#define WS_PARSER_VERSION_MAJOR 0
+#define WS_PARSER_VERSION_MINOR 1
+
+char* ws_parser_version();
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _WS_WSPARSE_H */
Property changes on: trunk/extensions/WikiScripts/interpreter/nparser/wsparse.h
___________________________________________________________________
Added: svn:keywords
+ Author Date Id Rev URL
Added: svn:eol-style
+ native
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs