[antlr-dev] antlr C target regression - 3.1.2+?

Milos Jakubicek Thu, 03 Sep 2009 15:46:36 -0700

Hi,

I believe I've found a regression in the ANTLR C target. If I try tocompile the polydiff example on 3.1.1, I get the following tree as first:


(* 2 (^ x 3)) + (^ x 5) + (* 4 x) + (* 10 x) + (* 8 x) + x + 2

However, with antlr 3.1.2+, I get:

( 2 (^ x 3)) + (^ x 5) + ( 4 x) + ( 10 x) + ( 8 x) + x + 2

(the '*'s are missing!) Seems that one cannot use imaginary tokens?

Also, I'm trying to convert my own antlr2 grammar into antlr3 grammar,which turned out into a nightmare. I've currently a semi-working grammar(see attached, input text could be e.g. [word="a"]), which, however,produces no trees (cqpAST.tree = NULL, "No tree produced" in my codewhich is attached too), although it finishes without any errors. Anyidea what might cause that? Don't know if this is somehow related to theabove mentioned problem (I'm unable to try my grammar with 3.1.1 becauseof another error which I can't explain, but which vanishes in 3.1.2+).

I spent with the move from antlr2 to antlr3 a disgusting amount of timeand now it seems to be totally wasted:(


Thank you very much in advance for any advice!
Milos Jakubicek

//  Copyright (c) 1999-2008  Pavel Rychly

grammar cqp;

options
{
        output = AST;
        language = C;
        ASTLabelType = pANTLR3_BASE_TREE;
        k = 2;
}

tokens {
        OPT;
        REPOPT;
        SEQ;
        ANYPOS;
        BEGSTRUCT;
        ENDSTRUCT;
        WHOLESTRUCT;
    KW_MEET;
    KW_UNION;
    KW_WITHIN;
    KW_MU;
}

@header {
#include <finlib/fsop.hh>
#include <cstdlib>
#include "parsop.hh"
#include "frsop.hh"
#include "corpus.hh"
}

@rulecatch { } 

@lexer::members {
        char * strip(char *s, char c);
        // in corp/corpconf.g
}

WS_ :   (' '
    |   '\t'
    |   '\n'
    |   '\r')
        { SKIP(); }
    ;

NUMBER:   ('0'..'9')+ ;
NNUMBER:   '-' ('0'..'9')+ ;

ATTR :     ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;
REGEXP: '"' ('\\' . |~'"')* '"'
                { $text->chars = (pANTLR3_UINT8) strip((char *) $text->chars, 
'"'); }
                ;
LPAREN: '(';
RPAREN: ')';
LBRACKET: '[';
RBRACKET: ']';
LBRACE: '{';
RBRACE: '}';

STAR:   '*';
PLUS:   '+';
QUEST:  '?';

BINOR:  '|';
BINAND: '&';
DOT:    '.';
COMMA:  ',';
SEMI:   ';';
COLON:  ':';
EQ:     '=';
NEQ:    '!' '=';
NOT:    '!';
LSTRUCT:'<';
RSTRUCT:'>';
SLASH:  '/';

//* TODO:
//       * funkce - f( ), bigram(  ), ...
//       * comparision control flags  %c - case insensitive
//                                                                %d - 
diacritics ins. (charScript?)
//       * within NUMBER
//         within NUMBER STRUCT ?
//*

query 
        :
        (m=ATTR^ {$m.text->compare($m.text, "MU") == 0}? mupart
                        {$m->setType($m, KW_MU);}
                |altern
                )
        (BINAND^ globpart)?
        (a=ATTR^ {$a.text->compare($a.text, "within") == 0}? withinpart 
                        {$a->setType($a, KW_WITHIN);}
                )*
                SEMI!
        ;

globpart
    : globcond (BINAND! globcond)*
    ;

globcond
    : NUMBER DOT^ ATTR (EQ|NEQ) NUMBER DOT! ATTR
    ; 

withinpart 
        : (NOT)?
        ( LSTRUCT! structure RSTRUCT!        //{/ a2}
            //| NUMBER               //{/ (a1, '')}
            //| NUMBER ("<"! structure ">"!)? //{/ (a1, a3)}
        | alignedpart
        )       
        ;

structure
        //: ATTR             //{/ a1}
        : ATTR (attvallist ) ?  //{/ (a1, a2)}
        ;

oneposonly
    : attvallist SEMI!
    ;
        
alignedpart
    : ATTR COLON^  altern   // parallel alignment
    ;
//-------------------- meet/union query --------------------
mupart
        : LPAREN! 
        (
            (unionop) => unionop
        |
            (meetop) => meetop
        ) 
        RPAREN!
        |oneposition
        ;

integer
        : NUMBER
        | n=NNUMBER -> ^(NUMBER[$n,$n.text])
        ;

meetop
        :
        a=ATTR {$a.text->compare($a.text, "meet") == 0}? mupart mupart (integer 
integer)?
                        {$a->setType ($a, KW_MEET);}
        ;

unionop
        :
        a=ATTR {$a.text->compare($a.text, "union") == 0}? mupart mupart
                        {$a->setType ($a, KW_UNION);}
        ;

//-------------------- regular expression query --------------------
altern 
        : seq (BINOR^ seq)*
        ;

seq 
        : (repetition)+ -> ^(SEQ repetition+)
        ;

repetition 
        : atomquery (
                        repopt  -> ^(REPOPT["REp"] $repetition)
                )?
        | LSTRUCT
                ( structure 
                        ( SLASH   -> ^(WHOLESTRUCT["<str/>"] structure)
                        |         -> ^(BEGSTRUCT["<str>"])
                        )
                | SLASH structure 
                                  -> ^(ENDSTRUCT["</str>"] structure)
                )
                RSTRUCT
        ;

attvallist 
        : attvaland (BINOR^ attvaland)*
        ;

attvaland 
        : attval (BINAND^ attval)*
        ;

attval
        : ATTR (EQ^|NEQ^) REGEXP
    | NOT^ attval
        | LPAREN! attvallist RPAREN!
        ;

atomquery
        : oneposition     
        | n=NUMBER c=COLON p=oneposition
        -> ^(LBRACKET["["] ^($c $n $p))
        | LPAREN! altern RPAREN!
        ;

oneposition 
        : LBRACKET (attvallist
                | -> ^(LBRACKET["["] ^(ANYPOS["[]"]))
                ) RBRACKET
        | REGEXP -> ^(LBRACKET["["] ^(EQ["="]  ATTR["-"] REGEXP))
        ;

repopt!
        : STAR   -> ^(REPOPT["RO"] NUMBER["0"] NUMBER["-1"])
        | PLUS   -> ^(REPOPT["RO"] NUMBER["1"] NUMBER["-1"])
        | QUEST  -> ^(REPOPT["RO"] NUMBER["0"] NUMBER["1"])
        | LBRACE n1=NUMBER 
                (COMMA 
                        ( n2=NUMBER
                                -> ^(REPOPT["RO"] $n1 $n2)
                        |
                                -> ^(REPOPT["RO"] $n1 NUMBER["-1"])
                        )
                |
                        -> ^(REPOPT["RO"] $n1 NUMBER[$n1.text])
                ) RBRACE
        ;

// Copyright (c) 1999-2006  Pavel Rychly

#include <finlib/seek.hh>
#include <cstdio>
#include "cqpLexer.hpp"
#include "cqpParser.hpp"
#include "cqpTreeWalker.hpp"
#include "cqpeval.hh"
#include <iostream>

using namespace std;

int ANTLR3_CDECL
main(int argc, char ** argv)
{
    const char *corpname = "desam";
    
	if (argc > 1) {
		corpname = argv[1];
	}

	char 							*inputQuery = (char *) malloc(4096);
	//*must*Â be allocated dynamicaly/staticaly, but not on the stack!
	
	pcqpLexer						lexer;
	pcqpParser						parser;
	cqpParser_query_return			cqpAST;
	pcqpTreeWalker					treeWalker;
	pANTLR3_INPUT_STREAM			input;
	pANTLR3_COMMON_TOKEN_STREAM		tokenStream;
	pANTLR3_COMMON_TREE_NODE_STREAM nodes;
	
	try {
		cin.getline(inputQuery, 4096);
		
		input = antlr3NewAsciiStringInPlaceStream ((pANTLR3_UINT8) inputQuery, (ANTLR3_UINT64) strlen(inputQuery), (pANTLR3_UINT8) "CQP Stream");
		if (input == NULL) {
			ANTLR3_FPRINTF(stderr, "Unable to set up input stream due to malloc() failure1\n");
		}

    	lexer = cqpLexerNew(input);
		if (lexer == NULL) {
			ANTLR3_FPRINTF(stderr, "Unable to create the lexer due to malloc() failure1\n");
			exit(ANTLR3_ERR_NOMEM);
		}

		tokenStream = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT, TOKENSOURCE(lexer));
		if (tokenStream == NULL) {
			ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate token stream\n");
			exit(ANTLR3_ERR_NOMEM);
		}

    	parser = cqpParserNew(tokenStream);
		if (parser == NULL) {
			ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate parser\n");
			exit(ANTLR3_ERR_NOMEM);
		}

		Corpus *corp = new Corpus (corpname);
		PosAttr *a = corp->get_attr ("word");
		printf ("Corpus %s found.\n", corpname);
		RangeStream *r = NULL;
		FastStream *fs = NULL;

		// Parse the input expression
	    cqpAST = parser->query(parser);
		//parser->oneposonly();

		if (parser->pParser->rec->state->errorCount > 0) {
			throw EvalQueryException ("error");
		} else if (!cqpAST.tree) {
			puts ("No tree produced!");
		} else {
	    	// Print the resulting tree out in LISP notation
	    	puts ((const char *) cqpAST.tree->toStringTree(cqpAST.tree)->chars);
	    	nodes = antlr3CommonTreeNodeStreamNewTree((ANTLR3_BASE_TREE_struct*) cqpAST.tree, ANTLR3_SIZE_HINT);
			treeWalker = cqpTreeWalkerNew(nodes);
	    	// Traverse the tree created by the parser
	    	r = treeWalker->query (treeWalker, *corp);
	    	//fs = walker.onepos (t, *corp);
			puts ("Stream gained.");
			if (r == NULL || r->peek_beg() >= r->final()) {
				puts ("Nothing found!");
			} else {
				printf ("stream: min=%li max=%li\n", r->rest_min(), r->rest_max());
				int maxlines = 5;
				RangeStream::Labels lll;
				int rescount = 0;
				do {
		    		rescount++;
	    			int from = r->peek_beg();
	    			int to = r->peek_end();
	    			printf ("%i[%i]: <", from, to - from);
		    		for (int i=from; i < to; i++) {
						printf ("%s ", a->pos2str (i));
					}
	    			printf ("> ");
		    		for (int i=to; i < to +5; i++) {
						printf ("%s ", a->pos2str (i));
					}
				    lll.clear();
	    			r->add_labels (lll);
				    for (RangeStream::Labels::iterator i = lll.begin(); i!= lll.end(); ++i) {
						printf ("<<%li:%li>>", (*i).first, (*i).second);
					}
	    			puts ("");
	    			//if (from  > 400) break;
				} while (r->next() && --maxlines);
    			if (r->peek_beg() < r->final()) {
					do {
						rescount++;
					} while (r->next()); 
				}
				printf ("rstream: finished %i\n", rescount);
    		}
	    	if (r != NULL) {
				delete r;
			}
	    	if (fs) {
				printf ("fstream: min=%li  max=%li\n", fs->rest_min(), fs->rest_max());
				int l = fs->final();
				int pos;
				while ((pos = fs->next()) < l) {
		    		printf ("%i: ", pos);
	    			for (int i=pos; i < pos + 5; i++) {
						printf ("%s ", a->pos2str (i));
					}
				    puts ("");
				}
				puts ("fstream: finished");
				delete fs;
	    	}
			nodes		->free(nodes);			nodes = NULL;
			treeWalker	->free(treeWalker);		treeWalker = NULL;
		}
	} catch (exception &e) {
			printf ("exception: %s\n", e.what());
	}
	
	parser		->free(parser);			parser = NULL;
	tokenStream	->free(tokenStream);	tokenStream = NULL;
	lexer		->free(lexer);			lexer = NULL;
	free(inputQuery);
    return 0;
}

_______________________________________________
antlr-dev mailing list
[email protected]
http://www.antlr.org/mailman/listinfo/antlr-dev

[antlr-dev] antlr C target regression - 3.1.2+?

Reply via email to