Revision: 1899
Author: jsuijs
Date: Fri Apr  9 08:24:41 2010
Log: Williams Anltr files of 2010-02-19
http://code.google.com/p/jallib/source/detail?r=1899

Added:
 /trunk/grammar
 /trunk/grammar/jal.g
 /trunk/grammar/jal.py
 /trunk/grammar/jaltest.py

=======================================
--- /dev/null
+++ /trunk/grammar/jal.g        Fri Apr  9 08:24:41 2010
@@ -0,0 +1,243 @@
+// bvwelch 23 jan, '10
+
+// you may prefer to view this file from the 'ANTLRWorks' GUI tool,
+// which is a single Java 'jar' file, from here: http://www.antlr.org/works/index.html
+
+// If you want to run the test program, jaltest.py, then you need to
+// generate the lexer and parser first, like this:
+//    java -cp antlr-3.1.2.jar org.antlr.Tool jal.g
+
+// this first cut of JAL grammar was derived from an example found
+// here: http://www.antlr.org/wiki/display/ANTLR3/Example
+
+grammar jal;
+
+options {
+       language=Python;
+       output=AST;
+       ASTLabelType=CommonTree;
+}
+
+program : ( statement {print $statement.tree.toStringTree();} )+ ;
+
+statement :
+        block_stmt | for_stmt | forever_stmt if_stmt
+        | repeat_stmt | while_stmt | case_stmt
+        | var_def | const_def | alias_def
+        | proc_def | pseudo_proc_def
+        | func_def | pseudo_func_def
+        | 'return' expr
+        | 'assert' expr
+        | 'include' IDENTIFIER
+        | '_debug' STRING_LITERAL
+        | '_error' STRING_LITERAL
+        | '_warn' STRING_LITERAL
+       | IDENTIFIER '=' expr
+       ;
+
+// FIXME
+cexpr   :   constant
+        ;
+
+cexpr_list : '{' cexpr ( ',' cexpr ) '}'
+       ;
+
+for_stmt : 'for' expr ( 'using' IDENTIFIER )* 'loop'
+                statement+
+                ( 'exit' 'loop' )*
+            'end' 'loop'
+        ;
+
+forever_stmt : 'forever' 'loop'
+                statement+
+                ( 'exit' 'loop' )*
+            'end' 'loop'
+        ;
+
+while_stmt : 'while' expr 'loop'
+                statement+
+                ( 'exit' 'loop' )*
+            'end' 'loop'
+        ;
+
+repeat_stmt : 'repeat'
+                statement+
+                ( 'exit' 'loop' )*
+            'until' expr
+        ;
+
+if_stmt : 'if' expr 'then' statement+
+            ('elsif' expr 'then' statement+ )*
+            ('else' statement+ )*
+            'end if'
+        ;
+
+case_stmt : 'case' expr 'of'
+                cexpr (',' cexpr)* ':' statement
+                ( cexpr (',' cexpr)* ':' statement )*
+                ('otherwise' statement)*
+            'end' 'case'
+        ;
+
+block_stmt : 'block' statement+ 'end' 'block' ;
+
+proc_def : 'procedure' IDENTIFIER '(' proc_parm (',' proc_parm)* ')'
+                statement+
+            'end' 'procedure'
+    ;
+
+func_def : 'function' IDENTIFIER '(' proc_parm (',' proc_parm)* ')'
+                statement+
+            'end' 'function'
+    ;
+
+proc_parm : 'volatile'* type ('in' 'out' | 'in' | 'out')
+    ;
+
+pseudo_proc_def : 'procedure' IDENTIFIER '\'' 'put' '(' type 'in' IDENTIFIER ')' 'is'
+                statement+
+            'end' 'procedure'
+    ;
+
+pseudo_func_def : 'function' IDENTIFIER '\'' 'get' 'return' type 'is'
+                statement+
+            'end' 'function'
+    ;
+
+alias_def : 'alias' IDENTIFIER 'is' IDENTIFIER
+        ;
+
+const_def : 'const' vtype* IDENTIFIER ( '[' cexpr* ']' )* '='
+            ( cexpr | cexpr_list | IDENTIFIER | STRING_LITERAL )
+        ;
+
+var_def : var_decl1 var_decl2 (var_multi* | at_decl | is_decl | var_with_init)
+        ;
+
+var_multi : ',' var_decl2
+        ;
+
+var_with_init : '=' var_init
+        ;
+
+var_decl1 : 'var' 'volatile'* vtype
+        ;
+
+var_decl2 : IDENTIFIER ( '[' cexpr* ']' )*
+        ;
+
+vtype   :   type ('*' constant)*
+        ;
+
+at_decl : ('shared')* 'at' ( ( cexpr bitloc* ) | ( IDENTIFIER bitloc* ) | cexpr_list )
+        ;
+
+is_decl : 'is' IDENTIFIER
+        ;
+
+bitloc  : ':' constant
+        ;
+
+//FIXME: this is wrong-- add proc/func calls to the expr handler instead
+proc_func_call   : IDENTIFIER '(' IDENTIFIER* ')'
+        ;
+
+var_init : proc_func_call | cexpr | cexpr_list | STRING_LITERAL | CHARACTER_LITERAL | IDENTIFIER
+        ;
+
+type    :       'bit' | 'byte' | 'word' | 'dword'
+        | 'sbyte' | 'sword' | 'sdword'
+        ;
+
+PRAGMA
+    : 'pragma' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
+    ;
+
+// all below-- borrowed/derived from Antlr C.g and Python.g examples.
+
+//or_test : and_test ('|' and_test)*
+//        ;
+//
+//and_test : not_test ('&' not_test)*
+//         ;
+//
+//not_test : '!' not_test
+//         | comparison
+//         ;
+//
+//comparison: expr (comp_op expr)*
+//          ;
+//
+//comp_op : '<' | '>' | '==' | '>=' | '<=' | '!='
+//        ;
+//
+expr : xor_expr ('|' xor_expr)*
+     ;
+
+xor_expr : and_expr ('^' and_expr)*
+         ;
+
+and_expr : shift_expr ('&' shift_expr)*
+         ;
+
+shift_expr : arith_expr (('<<'|'>>') arith_expr)*
+           ;
+
+arith_expr: term (('+'|'-') term)*
+          ;
+
+term : factor (('*' | '/' | '%' ) factor)*
+     ;
+
+factor : '+' factor
+       | '-' factor
+       | '~' factor
+       | atom
+       ;
+
+atom   :       CHARACTER_LITERAL
+        |       STRING_LITERAL
+        |       constant
+       |       IDENTIFIER
+       ;
+
+IDENTIFIER : LETTER (LETTER|'0'..'9')* ;
+
+fragment
+LETTER : 'A'..'Z' | 'a'..'z' | '_' ;
+
+constant :  BIN_LITERAL | HEX_LITERAL | OCTAL_LITERAL | DECIMAL_LITERAL ;
+
+BIN_LITERAL : '0' ('b'|'B') ('0' | '1' | '_')+ ;
+
+DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) ;
+
+HEX_LITERAL : '0' ('x'|'X') HexDigit+ ;
+
+OCTAL_LITERAL : '0' ('0'..'7')+ ;
+
+CHARACTER_LITERAL :   '"' ( EscapeSequence | ~('\''|'\\') ) '"'
+    ;
+
+STRING_LITERAL :  '"' ( EscapeSequence | ~('\\'|'"') )* '"'
+    ;
+
+fragment
+HexDigit : ('0'..'9'|'a'..'f'|'A'..'F'|'_') ;
+
+fragment EscapeSequence :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
+    |   OctalEscape
+    ;
+
+fragment OctalEscape :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
+    |   '\\' ('0'..'7') ('0'..'7')
+    |   '\\' ('0'..'7')
+    ;
+
+WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
+    ;
+
+LINE_COMMENT
+    : ('--' | ';') ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
+    ;
+
=======================================
--- /dev/null
+++ /trunk/grammar/jal.py       Fri Apr  9 08:24:41 2010
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+
+# bw 9 Jan 2010
+#
+import os.path
+import optparse
+from plex import *
+
+resword = Str(  "_debug", "_error", "_warn", "alias", "asm", "assembler",
+                "assert", "at", "bit", "block", "byte", "case", "const",
+                "defined", "dword", "else", "elsif", "end", "exit", "for", 
"forever",
+                "function", "if", "in", "include", "is", "loop", "of", "out", 
"pragma",
+                "procedure", "repeat", "return", "sbit", "sbyte", "sdword", 
"start",
+                "suspend", "sword", "task", "then", "until", "using", "var", 
"volatile",
+                "while", "word" )
+
+comment = Str("--", ";") + Rep(AnyBut('\n')) + Eol
+digit = Range("09") | Str('_')
+hexdigit = Range("09AFaf") | Str('_')
+hexnum = Str("0x") + Rep1(hexdigit)
+binarydigit = Range("01") | Str('_')
+binarynum = Str("0b") + Rep1(binarydigit)
+letter = Range("AZaz") | Str('_')
+name = Rep1(letter | digit )
+number = Rep1(digit)
+ps_get = name + Str("'get")
+ps_put = name + Str("'put")
+qstring = Str('"') + AnyBut('"') + Rep1(AnyBut('"')) + Str('"')
+qascii = Str('"') + AnyBut('"') + Str('"')
+space = Any(" \t\n")
+op2 = Str("<<", ">>", "<=", "==", "!=", ">=")
+
+def do_resword(token, word):
+    return word.lower()
+
+lex_table = Lexicon([
+ (comment, 'comment'), # we may want to see the comments while debugging
+    (qstring, 'qstring'),
+    (qascii, 'qascii'),
+    (resword, do_resword),
+    (NoCase(ps_put), 'ps_put'),
+    (NoCase(ps_get), 'ps_get'),
+    (NoCase(hexnum), 'hexnum'),
+    (NoCase(binarynum), 'binarynum'),
+    (number, 'number'),
+    (op2, 'op2'),
+    (name, 'id'),
+
+    # FIXME
+    (Any("%!|&.:^[],'(){}+-*/=<>\"\\"),  TEXT),
+
+    (space , IGNORE),
+])
+
+# Future: if we want to show a traceback of nested includes.
+include_stack = []
+
+token_list = []
+include_path = []
+
+# FIXME. need better approach to dealing with
+# all the places the file might be found.
+def find_and_open(p_fname):
+
+    # Make an exhaustive list, but we'll stop
+    # with the first one we can open correctly.
+    files = [p_fname]
+    for path in include_path:
+        files.append(os.path.join(path, p_fname))
+
+    for fname in files:
+        try:
+            f = open(fname, "r")
+        except:
+            pass
+        else:
+            return fname, f
+
+    print "Open failed on file: " , p_fname
+    return p_fname, None
+
+def lex_include(cur_file, cur_lineno, inc_file):
+    print "do include: ", cur_file, cur_lineno, inc_file
+    include_stack.append( (cur_file, cur_lineno, inc_file) )
+    # recursive call to lexan
+    lexan(inc_file)
+    include_stack.pop()
+
+def lexan(p_fname):
+    print "lexan file: ", p_fname
+    fname, f = find_and_open(p_fname)
+    if f is None:
+        return
+
+    scanner = Scanner(lex_table, f, fname)
+    while True:
+        token = scanner.read()
+        if not token[0]:
+            break
+        if token[0] == 'include':
+            cur_fname, cur_lineno, cur_pos = scanner.position()
+            token = scanner.read()
+            inc_file = token[1] + ".jal"
+            lex_include(cur_fname, cur_lineno, inc_file)
+        else:
+            #print "token = ", token
+            token_list.append( token )
+
+def parse():
+    print "parsing file"
+    for t in token_list:
+        # supress comments for now
+        if t[0] != 'comment':
+            print "token = ", t
+
+def compile(fname):
+    lexan(fname)
+    parse()
+
+def main():
+    p = optparse.OptionParser()
+    p.add_option("-s", action="append", dest="ipath")
+    opts, args = p.parse_args()
+    global include_path
+    include_path = opts.ipath
+
+    for fname in args:
+        compile(fname)
+
+if __name__ == '__main__':
+    main()
+
=======================================
--- /dev/null
+++ /trunk/grammar/jaltest.py   Fri Apr  9 08:24:41 2010
@@ -0,0 +1,46 @@
+# bvwelch 23 jan, '10
+
+# this program tests the JAL grammar, and was derived from
+# an example found here: http://www.antlr.org/wiki/display/ANTLR3/Example
+
+import sys
+import antlr3
+import antlr3.tree
+
+# the lexer and parser are generated by Antlr, like this:
+#    java -cp antlr-3.1.2.jar org.antlr.Tool jal.g
+
+from jalLexer import *
+from jalParser import *
+
+f = open('a.jal')
+char_stream = antlr3.ANTLRInputStream(f)
+lexer = jalLexer(char_stream)
+tokens = antlr3.CommonTokenStream(lexer)
+
+#print "dumping our lexer tokens"
+#for x in tokens.getTokens():
+    #print x.getLine(), x.getText()
+#print "done with lexer tokens"
+#exit(1)
+
+print "setting up parser"
+parser = jalParser(tokens)
+print "calling parser"
+r = parser.program()
+print "back from parser"
+#exit(1)
+
+# this is the root of the AST
+root = r.tree
+
+nodes = antlr3.tree.CommonTreeNodeStream(root)
+nodes.setTokenStream(tokens)
+
+print "dumping our parser/tree nodes"
+for x in nodes:
+    print x.toString()
+print "done with parser/tree nodes"
+
+exit()
+

--
You received this message because you are subscribed to the Google Groups 
"jallib" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/jallib?hl=en.

Reply via email to