Hi,
I believe I've found a regression in the ANTLR C target. If I try to
compile the polydiff example on 3.1.1, I get the following tree as first:
(* 2 (^ x 3)) + (^ x 5) + (* 4 x) + (* 10 x) + (* 8 x) + x + 2
However, with antlr 3.1.2+, I get:
( 2 (^ x 3)) + (^ x 5) + ( 4 x) + ( 10 x) + ( 8 x) + x + 2
(the '*'s are missing!) Seems that one cannot use imaginary tokens?
Also, I'm trying to convert my own antlr2 grammar into antlr3 grammar,
which turned out into a nightmare. I've currently a semi-working grammar
(see attached, input text could be e.g. [word="a"]), which, however,
produces no trees (cqpAST.tree = NULL, "No tree produced" in my code
which is attached too), although it finishes without any errors. Any
idea what might cause that? Don't know if this is somehow related to the
above mentioned problem (I'm unable to try my grammar with 3.1.1 because
of another error which I can't explain, but which vanishes in 3.1.2+).
I spent with the move from antlr2 to antlr3 a disgusting amount of time
and now it seems to be totally wasted:(
Thank you very much in advance for any advice!
Milos Jakubicek
// Copyright (c) 1999-2008 Pavel Rychly
grammar cqp;
options
{
output = AST;
language = C;
ASTLabelType = pANTLR3_BASE_TREE;
k = 2;
}
tokens {
OPT;
REPOPT;
SEQ;
ANYPOS;
BEGSTRUCT;
ENDSTRUCT;
WHOLESTRUCT;
KW_MEET;
KW_UNION;
KW_WITHIN;
KW_MU;
}
@header {
#include <finlib/fsop.hh>
#include <cstdlib>
#include "parsop.hh"
#include "frsop.hh"
#include "corpus.hh"
}
@rulecatch { }
@lexer::members {
char * strip(char *s, char c);
// in corp/corpconf.g
}
WS_ : (' '
| '\t'
| '\n'
| '\r')
{ SKIP(); }
;
NUMBER: ('0'..'9')+ ;
NNUMBER: '-' ('0'..'9')+ ;
ATTR : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;
REGEXP: '"' ('\\' . |~'"')* '"'
{ $text->chars = (pANTLR3_UINT8) strip((char *) $text->chars,
'"'); }
;
LPAREN: '(';
RPAREN: ')';
LBRACKET: '[';
RBRACKET: ']';
LBRACE: '{';
RBRACE: '}';
STAR: '*';
PLUS: '+';
QUEST: '?';
BINOR: '|';
BINAND: '&';
DOT: '.';
COMMA: ',';
SEMI: ';';
COLON: ':';
EQ: '=';
NEQ: '!' '=';
NOT: '!';
LSTRUCT:'<';
RSTRUCT:'>';
SLASH: '/';
//* TODO:
// * funkce - f( ), bigram( ), ...
// * comparision control flags %c - case insensitive
// %d -
diacritics ins. (charScript?)
// * within NUMBER
// within NUMBER STRUCT ?
//*
query
:
(m=ATTR^ {$m.text->compare($m.text, "MU") == 0}? mupart
{$m->setType($m, KW_MU);}
|altern
)
(BINAND^ globpart)?
(a=ATTR^ {$a.text->compare($a.text, "within") == 0}? withinpart
{$a->setType($a, KW_WITHIN);}
)*
SEMI!
;
globpart
: globcond (BINAND! globcond)*
;
globcond
: NUMBER DOT^ ATTR (EQ|NEQ) NUMBER DOT! ATTR
;
withinpart
: (NOT)?
( LSTRUCT! structure RSTRUCT! //{/ a2}
//| NUMBER //{/ (a1, '')}
//| NUMBER ("<"! structure ">"!)? //{/ (a1, a3)}
| alignedpart
)
;
structure
//: ATTR //{/ a1}
: ATTR (attvallist ) ? //{/ (a1, a2)}
;
oneposonly
: attvallist SEMI!
;
alignedpart
: ATTR COLON^ altern // parallel alignment
;
//-------------------- meet/union query --------------------
mupart
: LPAREN!
(
(unionop) => unionop
|
(meetop) => meetop
)
RPAREN!
|oneposition
;
integer
: NUMBER
| n=NNUMBER -> ^(NUMBER[$n,$n.text])
;
meetop
:
a=ATTR {$a.text->compare($a.text, "meet") == 0}? mupart mupart (integer
integer)?
{$a->setType ($a, KW_MEET);}
;
unionop
:
a=ATTR {$a.text->compare($a.text, "union") == 0}? mupart mupart
{$a->setType ($a, KW_UNION);}
;
//-------------------- regular expression query --------------------
altern
: seq (BINOR^ seq)*
;
seq
: (repetition)+ -> ^(SEQ repetition+)
;
repetition
: atomquery (
repopt -> ^(REPOPT["REp"] $repetition)
)?
| LSTRUCT
( structure
( SLASH -> ^(WHOLESTRUCT["<str/>"] structure)
| -> ^(BEGSTRUCT["<str>"])
)
| SLASH structure
-> ^(ENDSTRUCT["</str>"] structure)
)
RSTRUCT
;
attvallist
: attvaland (BINOR^ attvaland)*
;
attvaland
: attval (BINAND^ attval)*
;
attval
: ATTR (EQ^|NEQ^) REGEXP
| NOT^ attval
| LPAREN! attvallist RPAREN!
;
atomquery
: oneposition
| n=NUMBER c=COLON p=oneposition
-> ^(LBRACKET["["] ^($c $n $p))
| LPAREN! altern RPAREN!
;
oneposition
: LBRACKET (attvallist
| -> ^(LBRACKET["["] ^(ANYPOS["[]"]))
) RBRACKET
| REGEXP -> ^(LBRACKET["["] ^(EQ["="] ATTR["-"] REGEXP))
;
repopt!
: STAR -> ^(REPOPT["RO"] NUMBER["0"] NUMBER["-1"])
| PLUS -> ^(REPOPT["RO"] NUMBER["1"] NUMBER["-1"])
| QUEST -> ^(REPOPT["RO"] NUMBER["0"] NUMBER["1"])
| LBRACE n1=NUMBER
(COMMA
( n2=NUMBER
-> ^(REPOPT["RO"] $n1 $n2)
|
-> ^(REPOPT["RO"] $n1 NUMBER["-1"])
)
|
-> ^(REPOPT["RO"] $n1 NUMBER[$n1.text])
) RBRACE
;
// Copyright (c) 1999-2006 Pavel Rychly
#include <finlib/seek.hh>
#include <cstdio>
#include "cqpLexer.hpp"
#include "cqpParser.hpp"
#include "cqpTreeWalker.hpp"
#include "cqpeval.hh"
#include <iostream>
using namespace std;
int ANTLR3_CDECL
main(int argc, char ** argv)
{
const char *corpname = "desam";
if (argc > 1) {
corpname = argv[1];
}
char *inputQuery = (char *) malloc(4096);
//*must*Â be allocated dynamicaly/staticaly, but not on the stack!
pcqpLexer lexer;
pcqpParser parser;
cqpParser_query_return cqpAST;
pcqpTreeWalker treeWalker;
pANTLR3_INPUT_STREAM input;
pANTLR3_COMMON_TOKEN_STREAM tokenStream;
pANTLR3_COMMON_TREE_NODE_STREAM nodes;
try {
cin.getline(inputQuery, 4096);
input = antlr3NewAsciiStringInPlaceStream ((pANTLR3_UINT8) inputQuery, (ANTLR3_UINT64) strlen(inputQuery), (pANTLR3_UINT8) "CQP Stream");
if (input == NULL) {
ANTLR3_FPRINTF(stderr, "Unable to set up input stream due to malloc() failure1\n");
}
lexer = cqpLexerNew(input);
if (lexer == NULL) {
ANTLR3_FPRINTF(stderr, "Unable to create the lexer due to malloc() failure1\n");
exit(ANTLR3_ERR_NOMEM);
}
tokenStream = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT, TOKENSOURCE(lexer));
if (tokenStream == NULL) {
ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate token stream\n");
exit(ANTLR3_ERR_NOMEM);
}
parser = cqpParserNew(tokenStream);
if (parser == NULL) {
ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate parser\n");
exit(ANTLR3_ERR_NOMEM);
}
Corpus *corp = new Corpus (corpname);
PosAttr *a = corp->get_attr ("word");
printf ("Corpus %s found.\n", corpname);
RangeStream *r = NULL;
FastStream *fs = NULL;
// Parse the input expression
cqpAST = parser->query(parser);
//parser->oneposonly();
if (parser->pParser->rec->state->errorCount > 0) {
throw EvalQueryException ("error");
} else if (!cqpAST.tree) {
puts ("No tree produced!");
} else {
// Print the resulting tree out in LISP notation
puts ((const char *) cqpAST.tree->toStringTree(cqpAST.tree)->chars);
nodes = antlr3CommonTreeNodeStreamNewTree((ANTLR3_BASE_TREE_struct*) cqpAST.tree, ANTLR3_SIZE_HINT);
treeWalker = cqpTreeWalkerNew(nodes);
// Traverse the tree created by the parser
r = treeWalker->query (treeWalker, *corp);
//fs = walker.onepos (t, *corp);
puts ("Stream gained.");
if (r == NULL || r->peek_beg() >= r->final()) {
puts ("Nothing found!");
} else {
printf ("stream: min=%li max=%li\n", r->rest_min(), r->rest_max());
int maxlines = 5;
RangeStream::Labels lll;
int rescount = 0;
do {
rescount++;
int from = r->peek_beg();
int to = r->peek_end();
printf ("%i[%i]: <", from, to - from);
for (int i=from; i < to; i++) {
printf ("%s ", a->pos2str (i));
}
printf ("> ");
for (int i=to; i < to +5; i++) {
printf ("%s ", a->pos2str (i));
}
lll.clear();
r->add_labels (lll);
for (RangeStream::Labels::iterator i = lll.begin(); i!= lll.end(); ++i) {
printf ("<<%li:%li>>", (*i).first, (*i).second);
}
puts ("");
//if (from > 400) break;
} while (r->next() && --maxlines);
if (r->peek_beg() < r->final()) {
do {
rescount++;
} while (r->next());
}
printf ("rstream: finished %i\n", rescount);
}
if (r != NULL) {
delete r;
}
if (fs) {
printf ("fstream: min=%li max=%li\n", fs->rest_min(), fs->rest_max());
int l = fs->final();
int pos;
while ((pos = fs->next()) < l) {
printf ("%i: ", pos);
for (int i=pos; i < pos + 5; i++) {
printf ("%s ", a->pos2str (i));
}
puts ("");
}
puts ("fstream: finished");
delete fs;
}
nodes ->free(nodes); nodes = NULL;
treeWalker ->free(treeWalker); treeWalker = NULL;
}
} catch (exception &e) {
printf ("exception: %s\n", e.what());
}
parser ->free(parser); parser = NULL;
tokenStream ->free(tokenStream); tokenStream = NULL;
lexer ->free(lexer); lexer = NULL;
free(inputQuery);
return 0;
}
_______________________________________________
antlr-dev mailing list
[email protected]
http://www.antlr.org/mailman/listinfo/antlr-dev