Revision: 1899
Author: jsuijs
Date: Fri Apr 9 08:24:41 2010
Log: Williams Anltr files of 2010-02-19
http://code.google.com/p/jallib/source/detail?r=1899
Added:
/trunk/grammar
/trunk/grammar/jal.g
/trunk/grammar/jal.py
/trunk/grammar/jaltest.py
=======================================
--- /dev/null
+++ /trunk/grammar/jal.g Fri Apr 9 08:24:41 2010
@@ -0,0 +1,243 @@
+// bvwelch 23 jan, '10
+
+// you may prefer to view this file from the 'ANTLRWorks' GUI tool,
+// which is a single Java 'jar' file, from here:
http://www.antlr.org/works/index.html
+
+// If you want to run the test program, jaltest.py, then you need to
+// generate the lexer and parser first, like this:
+// java -cp antlr-3.1.2.jar org.antlr.Tool jal.g
+
+// this first cut of JAL grammar was derived from an example found
+// here: http://www.antlr.org/wiki/display/ANTLR3/Example
+
+grammar jal;
+
+options {
+ language=Python;
+ output=AST;
+ ASTLabelType=CommonTree;
+}
+
+program : ( statement {print $statement.tree.toStringTree();} )+ ;
+
+statement :
+ block_stmt | for_stmt | forever_stmt if_stmt
+ | repeat_stmt | while_stmt | case_stmt
+ | var_def | const_def | alias_def
+ | proc_def | pseudo_proc_def
+ | func_def | pseudo_func_def
+ | 'return' expr
+ | 'assert' expr
+ | 'include' IDENTIFIER
+ | '_debug' STRING_LITERAL
+ | '_error' STRING_LITERAL
+ | '_warn' STRING_LITERAL
+ | IDENTIFIER '=' expr
+ ;
+
+// FIXME
+cexpr : constant
+ ;
+
+cexpr_list : '{' cexpr ( ',' cexpr ) '}'
+ ;
+
+for_stmt : 'for' expr ( 'using' IDENTIFIER )* 'loop'
+ statement+
+ ( 'exit' 'loop' )*
+ 'end' 'loop'
+ ;
+
+forever_stmt : 'forever' 'loop'
+ statement+
+ ( 'exit' 'loop' )*
+ 'end' 'loop'
+ ;
+
+while_stmt : 'while' expr 'loop'
+ statement+
+ ( 'exit' 'loop' )*
+ 'end' 'loop'
+ ;
+
+repeat_stmt : 'repeat'
+ statement+
+ ( 'exit' 'loop' )*
+ 'until' expr
+ ;
+
+if_stmt : 'if' expr 'then' statement+
+ ('elsif' expr 'then' statement+ )*
+ ('else' statement+ )*
+ 'end if'
+ ;
+
+case_stmt : 'case' expr 'of'
+ cexpr (',' cexpr)* ':' statement
+ ( cexpr (',' cexpr)* ':' statement )*
+ ('otherwise' statement)*
+ 'end' 'case'
+ ;
+
+block_stmt : 'block' statement+ 'end' 'block' ;
+
+proc_def : 'procedure' IDENTIFIER '(' proc_parm (',' proc_parm)* ')'
+ statement+
+ 'end' 'procedure'
+ ;
+
+func_def : 'function' IDENTIFIER '(' proc_parm (',' proc_parm)* ')'
+ statement+
+ 'end' 'function'
+ ;
+
+proc_parm : 'volatile'* type ('in' 'out' | 'in' | 'out')
+ ;
+
+pseudo_proc_def : 'procedure' IDENTIFIER '\'' 'put' '(' type 'in'
IDENTIFIER ')' 'is'
+ statement+
+ 'end' 'procedure'
+ ;
+
+pseudo_func_def : 'function' IDENTIFIER '\'' 'get' 'return' type 'is'
+ statement+
+ 'end' 'function'
+ ;
+
+alias_def : 'alias' IDENTIFIER 'is' IDENTIFIER
+ ;
+
+const_def : 'const' vtype* IDENTIFIER ( '[' cexpr* ']' )* '='
+ ( cexpr | cexpr_list | IDENTIFIER | STRING_LITERAL )
+ ;
+
+var_def : var_decl1 var_decl2 (var_multi* | at_decl | is_decl |
var_with_init)
+ ;
+
+var_multi : ',' var_decl2
+ ;
+
+var_with_init : '=' var_init
+ ;
+
+var_decl1 : 'var' 'volatile'* vtype
+ ;
+
+var_decl2 : IDENTIFIER ( '[' cexpr* ']' )*
+ ;
+
+vtype : type ('*' constant)*
+ ;
+
+at_decl : ('shared')* 'at' ( ( cexpr bitloc* ) | ( IDENTIFIER bitloc* ) |
cexpr_list )
+ ;
+
+is_decl : 'is' IDENTIFIER
+ ;
+
+bitloc : ':' constant
+ ;
+
+//FIXME: this is wrong-- add proc/func calls to the expr handler instead
+proc_func_call : IDENTIFIER '(' IDENTIFIER* ')'
+ ;
+
+var_init : proc_func_call | cexpr | cexpr_list | STRING_LITERAL |
CHARACTER_LITERAL | IDENTIFIER
+ ;
+
+type : 'bit' | 'byte' | 'word' | 'dword'
+ | 'sbyte' | 'sword' | 'sdword'
+ ;
+
+PRAGMA
+ : 'pragma' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
+ ;
+
+// all below-- borrowed/derived from Antlr C.g and Python.g examples.
+
+//or_test : and_test ('|' and_test)*
+// ;
+//
+//and_test : not_test ('&' not_test)*
+// ;
+//
+//not_test : '!' not_test
+// | comparison
+// ;
+//
+//comparison: expr (comp_op expr)*
+// ;
+//
+//comp_op : '<' | '>' | '==' | '>=' | '<=' | '!='
+// ;
+//
+expr : xor_expr ('|' xor_expr)*
+ ;
+
+xor_expr : and_expr ('^' and_expr)*
+ ;
+
+and_expr : shift_expr ('&' shift_expr)*
+ ;
+
+shift_expr : arith_expr (('<<'|'>>') arith_expr)*
+ ;
+
+arith_expr: term (('+'|'-') term)*
+ ;
+
+term : factor (('*' | '/' | '%' ) factor)*
+ ;
+
+factor : '+' factor
+ | '-' factor
+ | '~' factor
+ | atom
+ ;
+
+atom : CHARACTER_LITERAL
+ | STRING_LITERAL
+ | constant
+ | IDENTIFIER
+ ;
+
+IDENTIFIER : LETTER (LETTER|'0'..'9')* ;
+
+fragment
+LETTER : 'A'..'Z' | 'a'..'z' | '_' ;
+
+constant : BIN_LITERAL | HEX_LITERAL | OCTAL_LITERAL | DECIMAL_LITERAL ;
+
+BIN_LITERAL : '0' ('b'|'B') ('0' | '1' | '_')+ ;
+
+DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) ;
+
+HEX_LITERAL : '0' ('x'|'X') HexDigit+ ;
+
+OCTAL_LITERAL : '0' ('0'..'7')+ ;
+
+CHARACTER_LITERAL : '"' ( EscapeSequence | ~('\''|'\\') ) '"'
+ ;
+
+STRING_LITERAL : '"' ( EscapeSequence | ~('\\'|'"') )* '"'
+ ;
+
+fragment
+HexDigit : ('0'..'9'|'a'..'f'|'A'..'F'|'_') ;
+
+fragment EscapeSequence : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
+ | OctalEscape
+ ;
+
+fragment OctalEscape : '\\' ('0'..'3') ('0'..'7') ('0'..'7')
+ | '\\' ('0'..'7') ('0'..'7')
+ | '\\' ('0'..'7')
+ ;
+
+WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
+ ;
+
+LINE_COMMENT
+ : ('--' | ';') ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
+ ;
+
=======================================
--- /dev/null
+++ /trunk/grammar/jal.py Fri Apr 9 08:24:41 2010
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+
+# bw 9 Jan 2010
+#
+import os.path
+import optparse
+from plex import *
+
+resword = Str( "_debug", "_error", "_warn", "alias", "asm", "assembler",
+ "assert", "at", "bit", "block", "byte", "case", "const",
+ "defined", "dword", "else", "elsif", "end", "exit", "for",
"forever",
+ "function", "if", "in", "include", "is", "loop", "of", "out",
"pragma",
+ "procedure", "repeat", "return", "sbit", "sbyte", "sdword",
"start",
+ "suspend", "sword", "task", "then", "until", "using", "var",
"volatile",
+ "while", "word" )
+
+comment = Str("--", ";") + Rep(AnyBut('\n')) + Eol
+digit = Range("09") | Str('_')
+hexdigit = Range("09AFaf") | Str('_')
+hexnum = Str("0x") + Rep1(hexdigit)
+binarydigit = Range("01") | Str('_')
+binarynum = Str("0b") + Rep1(binarydigit)
+letter = Range("AZaz") | Str('_')
+name = Rep1(letter | digit )
+number = Rep1(digit)
+ps_get = name + Str("'get")
+ps_put = name + Str("'put")
+qstring = Str('"') + AnyBut('"') + Rep1(AnyBut('"')) + Str('"')
+qascii = Str('"') + AnyBut('"') + Str('"')
+space = Any(" \t\n")
+op2 = Str("<<", ">>", "<=", "==", "!=", ">=")
+
+def do_resword(token, word):
+ return word.lower()
+
+lex_table = Lexicon([
+ (comment, 'comment'), # we may want to see the comments while
debugging
+ (qstring, 'qstring'),
+ (qascii, 'qascii'),
+ (resword, do_resword),
+ (NoCase(ps_put), 'ps_put'),
+ (NoCase(ps_get), 'ps_get'),
+ (NoCase(hexnum), 'hexnum'),
+ (NoCase(binarynum), 'binarynum'),
+ (number, 'number'),
+ (op2, 'op2'),
+ (name, 'id'),
+
+ # FIXME
+ (Any("%!|&.:^[],'(){}+-*/=<>\"\\"), TEXT),
+
+ (space , IGNORE),
+])
+
+# Future: if we want to show a traceback of nested includes.
+include_stack = []
+
+token_list = []
+include_path = []
+
+# FIXME. need better approach to dealing with
+# all the places the file might be found.
+def find_and_open(p_fname):
+
+ # Make an exhaustive list, but we'll stop
+ # with the first one we can open correctly.
+ files = [p_fname]
+ for path in include_path:
+ files.append(os.path.join(path, p_fname))
+
+ for fname in files:
+ try:
+ f = open(fname, "r")
+ except:
+ pass
+ else:
+ return fname, f
+
+ print "Open failed on file: " , p_fname
+ return p_fname, None
+
+def lex_include(cur_file, cur_lineno, inc_file):
+ print "do include: ", cur_file, cur_lineno, inc_file
+ include_stack.append( (cur_file, cur_lineno, inc_file) )
+ # recursive call to lexan
+ lexan(inc_file)
+ include_stack.pop()
+
+def lexan(p_fname):
+ print "lexan file: ", p_fname
+ fname, f = find_and_open(p_fname)
+ if f is None:
+ return
+
+ scanner = Scanner(lex_table, f, fname)
+ while True:
+ token = scanner.read()
+ if not token[0]:
+ break
+ if token[0] == 'include':
+ cur_fname, cur_lineno, cur_pos = scanner.position()
+ token = scanner.read()
+ inc_file = token[1] + ".jal"
+ lex_include(cur_fname, cur_lineno, inc_file)
+ else:
+ #print "token = ", token
+ token_list.append( token )
+
+def parse():
+ print "parsing file"
+ for t in token_list:
+ # supress comments for now
+ if t[0] != 'comment':
+ print "token = ", t
+
+def compile(fname):
+ lexan(fname)
+ parse()
+
+def main():
+ p = optparse.OptionParser()
+ p.add_option("-s", action="append", dest="ipath")
+ opts, args = p.parse_args()
+ global include_path
+ include_path = opts.ipath
+
+ for fname in args:
+ compile(fname)
+
+if __name__ == '__main__':
+ main()
+
=======================================
--- /dev/null
+++ /trunk/grammar/jaltest.py Fri Apr 9 08:24:41 2010
@@ -0,0 +1,46 @@
+# bvwelch 23 jan, '10
+
+# this program tests the JAL grammar, and was derived from
+# an example found here: http://www.antlr.org/wiki/display/ANTLR3/Example
+
+import sys
+import antlr3
+import antlr3.tree
+
+# the lexer and parser are generated by Antlr, like this:
+# java -cp antlr-3.1.2.jar org.antlr.Tool jal.g
+
+from jalLexer import *
+from jalParser import *
+
+f = open('a.jal')
+char_stream = antlr3.ANTLRInputStream(f)
+lexer = jalLexer(char_stream)
+tokens = antlr3.CommonTokenStream(lexer)
+
+#print "dumping our lexer tokens"
+#for x in tokens.getTokens():
+ #print x.getLine(), x.getText()
+#print "done with lexer tokens"
+#exit(1)
+
+print "setting up parser"
+parser = jalParser(tokens)
+print "calling parser"
+r = parser.program()
+print "back from parser"
+#exit(1)
+
+# this is the root of the AST
+root = r.tree
+
+nodes = antlr3.tree.CommonTreeNodeStream(root)
+nodes.setTokenStream(tokens)
+
+print "dumping our parser/tree nodes"
+for x in nodes:
+ print x.toString()
+print "done with parser/tree nodes"
+
+exit()
+
--
You received this message because you are subscribed to the Google Groups
"jallib" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/jallib?hl=en.