Repository: jena
Updated Branches:
  refs/heads/rat-checks cb46780bd -> 525988312


Add license


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/06339ad3
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/06339ad3
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/06339ad3

Branch: refs/heads/rat-checks
Commit: 06339ad30ce682cb5d123c5369eae396901e4954
Parents: cb46780
Author: Andy Seaborne <a...@apache.org>
Authored: Tue Oct 21 16:39:10 2014 +0100
Committer: Andy Seaborne <a...@apache.org>
Committed: Tue Oct 21 16:39:10 2014 +0100

----------------------------------------------------------------------
 jena-arq/Grammar/sse/grammar-sse |  16 +
 jena-arq/Grammar/sse/jj2html     | 922 +++++++++++++++++-----------------
 jena-arq/Grammar/sse/sse.jj      | 490 +++++++++---------
 jena-arq/Grammar/sse/tokens.inc  | 334 ++++++------
 4 files changed, 906 insertions(+), 856 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/06339ad3/jena-arq/Grammar/sse/grammar-sse
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/sse/grammar-sse b/jena-arq/Grammar/sse/grammar-sse
index 3502d78..a675f70 100644
--- a/jena-arq/Grammar/sse/grammar-sse
+++ b/jena-arq/Grammar/sse/grammar-sse
@@ -1,4 +1,20 @@
 #!/bin/bash
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+
 # Parser builder
 
 GRAMMAR="${GRAMMAR:-"sse.jj"}"

http://git-wip-us.apache.org/repos/asf/jena/blob/06339ad3/jena-arq/Grammar/sse/jj2html
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/sse/jj2html b/jena-arq/Grammar/sse/jj2html
index 9451deb..0f83ee7 100644
--- a/jena-arq/Grammar/sse/jj2html
+++ b/jena-arq/Grammar/sse/jj2html
@@ -1,453 +1,469 @@
-#!/bin/perl
-# Grammar into HTML
-# Read in sparql.txt and the tokens.txt file
-
-## ToDo:
-## Check tokens exist and are used
-## Validate
-
-
-if ( $#ARGV != 1 )
-{
-    print STDERR "Usage: grammar.txt tokens.txt\n" ;
-    exit 1 ;
-}
-
-
-
-$/ = undef ;
-# Just table or full page.
-$TABLE = 1 ;
-
-$grammarFile = $ARGV[0] ;
-$tokensFile = $ARGV[1] ;
-
-## $grammarFile = 's.txt' ;
-## $tokensFile = 't.txt' ;
-
-
-$grammar = &readFile($grammarFile) ;
-$tokens = &readFile($tokensFile) ;
-
-$grammar =~ s!DOCUMENT START!! ;
-$grammar =~ s!NON-TERMINALS!! ;
-$grammar =~ s!DOCUMENT END!! ;
-
-$grammar =~ s!//.*!!g ;
-$grammar =~ s!\r!!g ;
-
-# remove leading whitespace
-$grammar =~ s!^[\n\s]*!\n! ;
-
-# Merge alts
-$grammar =~ s!\n\s*\|!\ |!g ;
-
-$tokens =~ s!//.*!!g ;
-$tokens =~ s!\r!!g ;
-
-
-## Grammar
-#print "GRAMMAR\n" ;
-
-@g = split(/\n\s*/, $grammar) ;
-
-@rules = () ;
-%ruleMap = () ;
-%tokenMap = () ;
-%inline = () ;
-
-# Grammar rules
-# Direct from "jjdoc -TEXT=true"
-
-for $g (@g)
-{
-    ($rulename, $rulebody) = split(/:=/,$g) ;
-
-    $rulename =~ s!^\s*!! ;
-    $rulename =~ s!\s*$!! ;
-
-    $rulebody =~ s!^\s*!! ;
-    $rulebody =~ s!\s*$!! ;
-    
-    # Remove outer brackets
-#    $rulebody =~ s!^\((.*)\)$!$1! ;
-
-    # Remove <> around tokens in grammar.
-    ## Now done very late (as &lt;&gt;) in fixups.
-    ## $rulebody =~ s/\<(\w+)\>/$1/g ;
-    # Leave in - so tokens distinguished from rules
-
-    next if $rulename eq '' ;
-    #next if $rulebody eq '' ;
-
-    # Skip the root rule.
-    next if ( $rulename eq 'CompilationUnit' ) ;
-
-    $rulebody = 'Perl 5 regular expression'
-       if ( $rulename eq 'PatternLiteral' ) ;
-
-    push @rules, $rulename ;
-    warn "Duplicate rule (grammar): $rulename\n" if 
defined($ruleMap{$rulename}) ;
-    $ruleMap{$rulename} = $rulebody ;
-
-##     print "----------\n" ;
-##     print $rulename,"\n" ;
-##     print $rulebody,"\n" ;
-
-}
-
-
-# Tokens
-# Produced by "jj2tokens"
-# Hand edited to indicate the inlines
-
-$tokens =~ s/\n+/\n/g ;
-$tokens =~ s/^\n// ;
-
-@t = split(/\n(?=\<|\[)/, $tokens) ;
-
-for $t (@t)
-{
-    ($tokenname,$tokenbody) = split(/::=/, $t) ;
-    $tokenname =~ s!^\s*!! ;
-    $tokenname =~ s!\s*$!! ;
-
-##     # remove <> around tokens
-## Do very late as a formatting fix up.
-##     $tokenname =~ s/^\<// ;
-##     $tokenname =~ s/\>$// ;
-
-    $tokenname =~ s/#// ;
-    
-    $tokenbody =~ s!^\s*!! ;
-    $tokenbody =~ s!\s*$!! ;
-    
-    # <> round tokens
-    # Remove at last minute.
-
-    # Remove outer ()
-    # $tokenbody =~ s!^\((.*)\)$!$1! ;
-
-    # Inline?
-    if ( $tokenname =~ /^\[\<\w*\>\]/ )
-    {
-       warn "Duplicate inline (token): $tokenname\n" if 
defined($inline{$tokenname}) ;
-       $tokenname =~ s/^\[//g ;
-       $tokenname =~ s/\]$//g ;
-       $tokenbody =~ s/"/'/g ; # '" -- But not literal " -- how?
-        $tokenbody =~ s/\<\>\'\{\}/\<\>\"\{\}/ ; # '" IRI fixup
-
-       $inline{$tokenname} = $tokenbody ;
-
-       #print "INLINE: ",$tokenname," => ",$tokenbody,"\n" ;
-    }
-    else
-    {
-       push @rules, $tokenname ;
-       warn "Duplicate rule (token): $tokenname\n" if 
defined($tokenMap{$tokenname}) ;
-       $ruleMap{$tokenname} = $tokenbody ; 
-    }
-}
-
-# Table
-
-if ( ! $TABLE )
-{
-    print "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" ;
-    print "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n" ;
-    print "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\";>\n" ;
-    print "\n" ;
-
-    print "<html>\n";
-    print "<head>\n";
-    print "<title>SPARQL Grammar</title>\n" ;
-    print "<style type=\"text/css\">\n" ;
-
- # .token inline
- # .ruleHead
- # .ruleBody
-
-    print <<'EOF' ;
-div.grammarTable table * { border-width: 0 ; }
-div.grammarTable table * tr { border: 1px solid black ; }
-
-.grammar     { text-align: left ; vertical-align: top ; }
-.token       { color: #3f3f5f; }
-.gRuleHead   { font-style: italic ; font-family: monospace ; }
-.gRuleBody   { font-family: monospace ; }
-.gRuleLabel  { font-family: monospace ; }
-EOF
-
-     print "</style>\n" ;
-     print "</head>\n";
-     print "<body>\n";
-
-    print "\n" ;
-}
-
-print "<div class=\"grammarTable\">\n" ;
-print "  <table><tbody>\n" ;
-
-$ruleNum = 0 ;
-
-for $r (@rules)
-{
-    $DEBUG = 0 ;
-    $ruleNum++ ;
-    $rulename = $r ;
-    
-    $rulebody = $ruleMap{$rulename} ;
-
-##    $DEBUG = 1 if ( $rulename =~ /Prolog/ ) ;
-
-    $rb = $rulebody ;
-
-    if ( $DEBUG )
-    {
-       print STDERR "\n" ;
-       print STDERR "Rule: $rulename\n" ; 
-       print STDERR "Body: $rulebody\n" ; 
-    }
-
-    ## Do before '||' substitution
-    # Not perfect - some fixups later.
-    #$rb =~ s%\|%\<br/\>\|%g ;
-
-    # Escape HTML chars before adding markup.
-    $rb = esc($rb) ;
-    
-    # Inlines
-    for $k (keys %inline)
-    {
-       $s = span('token', $inline{$k}) ;
-       $k = esc($k) ;
-       # Assumes escaped <> round tokens.
-       $k = quotemeta $k ;
-       $rb =~ s/$k/$s/g ;
-
-    }
-
-    if ( $DEBUG )
-    {
-       print STDERR "After inlining\n" ;
-       print STDERR $rb,"\n" ; ; 
-    }
-
-
-    # Add hrefs - issue if one is a substring of another \W helps.
-
-    for $k (keys %ruleMap)
-    {
-       $s = href("r-".$k,$k) ;
-
-       $k = esc($k) ;
-       $k = quotemeta $k ;
-
-##     if ( $DEBUG )
-##     {
-##       print STDERR "K:$k\n" ;  
-##     }
-
-
-       $rb =~ s/(?=\W)(\s*)$k(\s*)(?=\W)/$1$s$2/g ;
-       $rb =~ s/^$k(\s*)(?=\W)/$s$1/g ;
-       $rb =~ s/(?=\W)(\s*)$k$/$1$s/g ;
-       $rb =~ s/^$k$/$s/g ;
-    }
-    
-    if ( $DEBUG )
-    {
-       print STDERR "After hrefs\n" ;
-       print STDERR $rb,"\n" ; ; 
-    }
-
-    #exit if $ruleNum > 2 ;
-
-    $rn = anchor("r-".$rulename, $rulename) ;
-    $rn = fixupHead($rn) ;
-
-    print "\n" ;
-    print "<tr valign=\"baseline\">\n" ;
-    $rlabel = '[' . $ruleNum .  ']&nbsp;&nbsp;' ;
-
-    print "  <td>",code('gRuleLabel', $rlabel),"</td>\n" ;
-
-    #print "  <td>",span('gRuleHead', $rn),"</td>\n" ;
-    print "  <td>",code('gRuleHead',$rn),"</td>\n" ;
-
-    print "  <td>&nbsp;&nbsp;::=&nbsp;&nbsp;</td>\n" ;
-    
-    $rb = fixupRule($rulename, $rb) ;
-    print "  <td>",code('gRuleBody',$rb),"</td>\n" ;
-
-    print "</tr>\n" ;
-
-#    $rule{$rulename, $rulebody) ;
-#    print $rulename , "\n" ;
-}
-
-print "  </tbody></table>\n" ;
-print "</div>\n" ;
-
-if ( !$TABLE )
-{
-    print "\n" ;
-    print "</body>\n" ;
-    print "</html>\n" ;
-}
-
-sub readFile
-{
-    my $f = $_[0] ;
-    open(F, "$f") || die "$!"; 
-    my $s = <F> ;
-    return $s ;
-}
-
-sub esc
-{
-    my $s = $_[0] ;
-    $s =~ s/&/&amp;/g ; 
-    $s =~ s/</&lt;/g ; 
-    $s =~ s/>/&gt;/g ; 
-    return $s ;
-}
-
-sub span
-{
-    my $c = $_[0] ;
-    my $t = $_[1] ;
-    $t = esc($t) ;
-    my $s = '<span class="' . $c . '">' . $t . '</span>' ;
-    return $s ;
-}
-
-sub href
-{
-    my $a = $_[0] ;
-    my $t = $_[1] ;
-    $a = sane($a) ;
-    $t = esc($t) ;
-    my $s = '<a href="#' . $a . '">' . $t . '</a>' ;
-    return $s ;
-}
-
-sub anchor
-{
-    my $a = $_[0] ;
-    my $t = $_[1] ;
-    $a = sane($a) ;
-    $t = esc($t) ;
-    my $s = '<a id="' . $a . '" name="' . $a . '">' . $t . '</a>' ;
-    return $s ;
-}
-
-sub sane
-{
-   my $a = $_[0] ;
-   $a =~ s/\W//g ;
-   return $a ;
-}
-
-sub code
-{
-    my $c = $_[0] ;
-    my $t = $_[1] ;
-    return '<code class="' . $c . '">' . $t . '</code>' ;
-}
-
-sub fixupHead
-{
-    my $head = $_[0] ;
-    # Remove <> around tokens.
-    $head =~ s/&lt;(\w+)&gt;/$1/g ;
-    return $head ;
-}
-
-sub fixupRule
-{
-    my $head = $_[0] ;
-    my $body = $_[1] ;
-
-    # Remove unnecessary ()
-    $body =~ s/\(\s*([^()| ]*) \)/$1/g ;
-
-##     if ( $body =~ m!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)! )
-##     {
-##     $b = $body ;
-##     print "================================\n" ;
-##     print STDERR "$b\n" ;
-##     print STDERR "--------\n" ;
-##     $b =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ;
-##     $b =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ;
-##     print STDERR "$b\n" ;
-##     print STDERR "=====\n" ;
-##     print STDERR "\n" ;
-##     }
-
-
-    # Remove outer matching () where there are no inner ()
-    $body =~ s/^\(\s+([^\(]*)\s+\)$/$1/ ;
-
-    # ( A )* => A* and for + and ? where A is a linked or spanned object
-    $body =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ;
-    $body =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ;
-
-    # There aren't any of these
-##    $body =~ s!\(\s+(\S*)\s+\)!$1!g ;
-
-    # Remove <> around tokens.
-    $body =~ s/&lt;(\w+)&gt;/$1/g ;
-
-    # Specials
-    # Split long bodies
-    if ( $head eq "CallExpression" ||
-        $head eq "UnaryExpression" ||
-        $head eq "<NCCHAR1p>" ||
-        $head eq "PatternElement" ||
-        $head eq "BuiltInCall" )
-    {
-       $body =~ s%\|%\<br/\>\|%g ;
-       $body =~ s/^\s+// ;
-       $body = "&nbsp;&nbsp;".$body ;
-    }
-
-    if ( $head eq "RelationalExpression" ||
-        $head eq "AdditiveExpression" ||
-        $head eq "MultiplicativeExpression" ||
-        $head eq "ConditionalOrExpression")
-    {
-       $body =~ s%\*\(%<br/>\(% ;
-    }
-
-    # These failed the outer () test because they have nested () in them
-    if (  $head eq "QueryPattern" ||
-          $head eq "OrderCondition" )
-    {
-       # Remove outer ()
-       $body =~ s/^\((.*)\)$/$1/ ;     
-    }
-
-    if (  $head eq "Query" )
-    {
-       $body =~ s! \(!<br/>\(! ;
-       $body =~ s!\) !\)<br/>! ;
-    }
-
-    if (  $head =~ m/(Select|Construct|Describe|Ask)Query/ )
-    {
-       # Put a line break before the DatasetClause
-       # <a href="#rDatasetClause">DatasetClause</a>
-       $c = '<a href="#rDatasetClause">DatasetClause</a>' ;
-       $c = quotemeta $c ;
-       # Expects the dataset clause to be unbracketted
-       $body =~ s!(\(\s*$c)!<br/>$1! ;
-    }
-
-    if ( $head eq "OrderCondition" )
-    {
-       $body =~ s!\)\s*\|\s*\(!\)<br/>\| \(! ;
-       $body = "  ".$body ;
-    }
-    return $body ;
-}
+#!/bin/perl
+## Licensed to the Apache Software Foundation (ASF) under one
+## or more contributor license agreements.  See the NOTICE file
+## distributed with this work for additional information
+## regarding copyright ownership.  The ASF licenses this file
+## to you under the Apache License, Version 2.0 (the
+## "License"); you may not use this file except in compliance
+## with the License.  You may obtain a copy of the License at
+##
+##     http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+
+# Grammar into HTML
+# Read in sparql.txt and the tokens.txt file
+
+## ToDo:
+## Check tokens exist and are used
+## Validate
+
+
+if ( $#ARGV != 1 )
+{
+    print STDERR "Usage: grammar.txt tokens.txt\n" ;
+    exit 1 ;
+}
+
+
+
+$/ = undef ;
+# Just table or full page.
+$TABLE = 1 ;
+
+$grammarFile = $ARGV[0] ;
+$tokensFile = $ARGV[1] ;
+
+## $grammarFile = 's.txt' ;
+## $tokensFile = 't.txt' ;
+
+
+$grammar = &readFile($grammarFile) ;
+$tokens = &readFile($tokensFile) ;
+
+$grammar =~ s!DOCUMENT START!! ;
+$grammar =~ s!NON-TERMINALS!! ;
+$grammar =~ s!DOCUMENT END!! ;
+
+$grammar =~ s!//.*!!g ;
+$grammar =~ s!\r!!g ;
+
+# remove leading whitespace
+$grammar =~ s!^[\n\s]*!\n! ;
+
+# Merge alts
+$grammar =~ s!\n\s*\|!\ |!g ;
+
+$tokens =~ s!//.*!!g ;
+$tokens =~ s!\r!!g ;
+
+
+## Grammar
+#print "GRAMMAR\n" ;
+
+@g = split(/\n\s*/, $grammar) ;
+
+@rules = () ;
+%ruleMap = () ;
+%tokenMap = () ;
+%inline = () ;
+
+# Grammar rules
+# Direct from "jjdoc -TEXT=true"
+
+for $g (@g)
+{
+    ($rulename, $rulebody) = split(/:=/,$g) ;
+
+    $rulename =~ s!^\s*!! ;
+    $rulename =~ s!\s*$!! ;
+
+    $rulebody =~ s!^\s*!! ;
+    $rulebody =~ s!\s*$!! ;
+    
+    # Remove outer brackets
+#    $rulebody =~ s!^\((.*)\)$!$1! ;
+
+    # Remove <> around tokens in grammar.
+    ## Now done very late (as &lt;&gt;) in fixups.
+    ## $rulebody =~ s/\<(\w+)\>/$1/g ;
+    # Leave in - so tokens distinguished from rules
+
+    next if $rulename eq '' ;
+    #next if $rulebody eq '' ;
+
+    # Skip the root rule.
+    next if ( $rulename eq 'CompilationUnit' ) ;
+
+    $rulebody = 'Perl 5 regular expression'
+       if ( $rulename eq 'PatternLiteral' ) ;
+
+    push @rules, $rulename ;
+    warn "Duplicate rule (grammar): $rulename\n" if 
defined($ruleMap{$rulename}) ;
+    $ruleMap{$rulename} = $rulebody ;
+
+##     print "----------\n" ;
+##     print $rulename,"\n" ;
+##     print $rulebody,"\n" ;
+
+}
+
+
+# Tokens
+# Produced by "jj2tokens"
+# Hand edited to indicate the inlines
+
+$tokens =~ s/\n+/\n/g ;
+$tokens =~ s/^\n// ;
+
+@t = split(/\n(?=\<|\[)/, $tokens) ;
+
+for $t (@t)
+{
+    ($tokenname,$tokenbody) = split(/::=/, $t) ;
+    $tokenname =~ s!^\s*!! ;
+    $tokenname =~ s!\s*$!! ;
+
+##     # remove <> around tokens
+## Do very late as a formatting fix up.
+##     $tokenname =~ s/^\<// ;
+##     $tokenname =~ s/\>$// ;
+
+    $tokenname =~ s/#// ;
+    
+    $tokenbody =~ s!^\s*!! ;
+    $tokenbody =~ s!\s*$!! ;
+    
+    # <> round tokens
+    # Remove at last minute.
+
+    # Remove outer ()
+    # $tokenbody =~ s!^\((.*)\)$!$1! ;
+
+    # Inline?
+    if ( $tokenname =~ /^\[\<\w*\>\]/ )
+    {
+       warn "Duplicate inline (token): $tokenname\n" if 
defined($inline{$tokenname}) ;
+       $tokenname =~ s/^\[//g ;
+       $tokenname =~ s/\]$//g ;
+       $tokenbody =~ s/"/'/g ; # '" -- But not literal " -- how?
+        $tokenbody =~ s/\<\>\'\{\}/\<\>\"\{\}/ ; # '" IRI fixup
+
+       $inline{$tokenname} = $tokenbody ;
+
+       #print "INLINE: ",$tokenname," => ",$tokenbody,"\n" ;
+    }
+    else
+    {
+       push @rules, $tokenname ;
+       warn "Duplicate rule (token): $tokenname\n" if 
defined($tokenMap{$tokenname}) ;
+       $ruleMap{$tokenname} = $tokenbody ; 
+    }
+}
+
+# Table
+
+if ( ! $TABLE )
+{
+    print "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" ;
+    print "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n" ;
+    print "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\";>\n" ;
+    print "\n" ;
+
+    print "<html>\n";
+    print "<head>\n";
+    print "<title>SPARQL Grammar</title>\n" ;
+    print "<style type=\"text/css\">\n" ;
+
+ # .token inline
+ # .ruleHead
+ # .ruleBody
+
+    print <<'EOF' ;
+div.grammarTable table * { border-width: 0 ; }
+div.grammarTable table * tr { border: 1px solid black ; }
+
+.grammar     { text-align: left ; vertical-align: top ; }
+.token       { color: #3f3f5f; }
+.gRuleHead   { font-style: italic ; font-family: monospace ; }
+.gRuleBody   { font-family: monospace ; }
+.gRuleLabel  { font-family: monospace ; }
+EOF
+
+     print "</style>\n" ;
+     print "</head>\n";
+     print "<body>\n";
+
+    print "\n" ;
+}
+
+print "<div class=\"grammarTable\">\n" ;
+print "  <table><tbody>\n" ;
+
+$ruleNum = 0 ;
+
+for $r (@rules)
+{
+    $DEBUG = 0 ;
+    $ruleNum++ ;
+    $rulename = $r ;
+    
+    $rulebody = $ruleMap{$rulename} ;
+
+##    $DEBUG = 1 if ( $rulename =~ /Prolog/ ) ;
+
+    $rb = $rulebody ;
+
+    if ( $DEBUG )
+    {
+       print STDERR "\n" ;
+       print STDERR "Rule: $rulename\n" ; 
+       print STDERR "Body: $rulebody\n" ; 
+    }
+
+    ## Do before '||' substitution
+    # Not perfect - some fixups later.
+    #$rb =~ s%\|%\<br/\>\|%g ;
+
+    # Escape HTML chars before adding markup.
+    $rb = esc($rb) ;
+    
+    # Inlines
+    for $k (keys %inline)
+    {
+       $s = span('token', $inline{$k}) ;
+       $k = esc($k) ;
+       # Assumes escaped <> round tokens.
+       $k = quotemeta $k ;
+       $rb =~ s/$k/$s/g ;
+
+    }
+
+    if ( $DEBUG )
+    {
+       print STDERR "After inlining\n" ;
+       print STDERR $rb,"\n" ; ; 
+    }
+
+
+    # Add hrefs - issue if one is a substring of another \W helps.
+
+    for $k (keys %ruleMap)
+    {
+       $s = href("r-".$k,$k) ;
+
+       $k = esc($k) ;
+       $k = quotemeta $k ;
+
+##     if ( $DEBUG )
+##     {
+##       print STDERR "K:$k\n" ;  
+##     }
+
+
+       $rb =~ s/(?=\W)(\s*)$k(\s*)(?=\W)/$1$s$2/g ;
+       $rb =~ s/^$k(\s*)(?=\W)/$s$1/g ;
+       $rb =~ s/(?=\W)(\s*)$k$/$1$s/g ;
+       $rb =~ s/^$k$/$s/g ;
+    }
+    
+    if ( $DEBUG )
+    {
+       print STDERR "After hrefs\n" ;
+       print STDERR $rb,"\n" ; ; 
+    }
+
+    #exit if $ruleNum > 2 ;
+
+    $rn = anchor("r-".$rulename, $rulename) ;
+    $rn = fixupHead($rn) ;
+
+    print "\n" ;
+    print "<tr valign=\"baseline\">\n" ;
+    $rlabel = '[' . $ruleNum .  ']&nbsp;&nbsp;' ;
+
+    print "  <td>",code('gRuleLabel', $rlabel),"</td>\n" ;
+
+    #print "  <td>",span('gRuleHead', $rn),"</td>\n" ;
+    print "  <td>",code('gRuleHead',$rn),"</td>\n" ;
+
+    print "  <td>&nbsp;&nbsp;::=&nbsp;&nbsp;</td>\n" ;
+    
+    $rb = fixupRule($rulename, $rb) ;
+    print "  <td>",code('gRuleBody',$rb),"</td>\n" ;
+
+    print "</tr>\n" ;
+
+#    $rule{$rulename, $rulebody) ;
+#    print $rulename , "\n" ;
+}
+
+print "  </tbody></table>\n" ;
+print "</div>\n" ;
+
+if ( !$TABLE )
+{
+    print "\n" ;
+    print "</body>\n" ;
+    print "</html>\n" ;
+}
+
+sub readFile
+{
+    my $f = $_[0] ;
+    open(F, "$f") || die "$!"; 
+    my $s = <F> ;
+    return $s ;
+}
+
+sub esc
+{
+    my $s = $_[0] ;
+    $s =~ s/&/&amp;/g ; 
+    $s =~ s/</&lt;/g ; 
+    $s =~ s/>/&gt;/g ; 
+    return $s ;
+}
+
+sub span
+{
+    my $c = $_[0] ;
+    my $t = $_[1] ;
+    $t = esc($t) ;
+    my $s = '<span class="' . $c . '">' . $t . '</span>' ;
+    return $s ;
+}
+
+sub href
+{
+    my $a = $_[0] ;
+    my $t = $_[1] ;
+    $a = sane($a) ;
+    $t = esc($t) ;
+    my $s = '<a href="#' . $a . '">' . $t . '</a>' ;
+    return $s ;
+}
+
+sub anchor
+{
+    my $a = $_[0] ;
+    my $t = $_[1] ;
+    $a = sane($a) ;
+    $t = esc($t) ;
+    my $s = '<a id="' . $a . '" name="' . $a . '">' . $t . '</a>' ;
+    return $s ;
+}
+
+sub sane
+{
+   my $a = $_[0] ;
+   $a =~ s/\W//g ;
+   return $a ;
+}
+
+sub code
+{
+    my $c = $_[0] ;
+    my $t = $_[1] ;
+    return '<code class="' . $c . '">' . $t . '</code>' ;
+}
+
+sub fixupHead
+{
+    my $head = $_[0] ;
+    # Remove <> around tokens.
+    $head =~ s/&lt;(\w+)&gt;/$1/g ;
+    return $head ;
+}
+
+sub fixupRule
+{
+    my $head = $_[0] ;
+    my $body = $_[1] ;
+
+    # Remove unnecessary ()
+    $body =~ s/\(\s*([^()| ]*) \)/$1/g ;
+
+##     if ( $body =~ m!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)! )
+##     {
+##     $b = $body ;
+##     print "================================\n" ;
+##     print STDERR "$b\n" ;
+##     print STDERR "--------\n" ;
+##     $b =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ;
+##     $b =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ;
+##     print STDERR "$b\n" ;
+##     print STDERR "=====\n" ;
+##     print STDERR "\n" ;
+##     }
+
+
+    # Remove outer matching () where there are no inner ()
+    $body =~ s/^\(\s+([^\(]*)\s+\)$/$1/ ;
+
+    # ( A )* => A* and for + and ? where A is a linked or spanned object
+    $body =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ;
+    $body =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ;
+
+    # There aren't any of these
+##    $body =~ s!\(\s+(\S*)\s+\)!$1!g ;
+
+    # Remove <> around tokens.
+    $body =~ s/&lt;(\w+)&gt;/$1/g ;
+
+    # Specials
+    # Split long bodies
+    if ( $head eq "CallExpression" ||
+        $head eq "UnaryExpression" ||
+        $head eq "<NCCHAR1p>" ||
+        $head eq "PatternElement" ||
+        $head eq "BuiltInCall" )
+    {
+       $body =~ s%\|%\<br/\>\|%g ;
+       $body =~ s/^\s+// ;
+       $body = "&nbsp;&nbsp;".$body ;
+    }
+
+    if ( $head eq "RelationalExpression" ||
+        $head eq "AdditiveExpression" ||
+        $head eq "MultiplicativeExpression" ||
+        $head eq "ConditionalOrExpression")
+    {
+       $body =~ s%\*\(%<br/>\(% ;
+    }
+
+    # These failed the outer () test because they have nested () in them
+    if (  $head eq "QueryPattern" ||
+          $head eq "OrderCondition" )
+    {
+       # Remove outer ()
+       $body =~ s/^\((.*)\)$/$1/ ;     
+    }
+
+    if (  $head eq "Query" )
+    {
+       $body =~ s! \(!<br/>\(! ;
+       $body =~ s!\) !\)<br/>! ;
+    }
+
+    if (  $head =~ m/(Select|Construct|Describe|Ask)Query/ )
+    {
+       # Put a line break before the DatasetClause
+       # <a href="#rDatasetClause">DatasetClause</a>
+       $c = '<a href="#rDatasetClause">DatasetClause</a>' ;
+       $c = quotemeta $c ;
+       # Expects the dataset clause to be unbracketted
+       $body =~ s!(\(\s*$c)!<br/>$1! ;
+    }
+
+    if ( $head eq "OrderCondition" )
+    {
+       $body =~ s!\)\s*\|\s*\(!\)<br/>\| \(! ;
+       $body = "  ".$body ;
+    }
+    return $body ;
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/06339ad3/jena-arq/Grammar/sse/sse.jj
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/sse/sse.jj b/jena-arq/Grammar/sse/sse.jj
index f69e992..e04e907 100644
--- a/jena-arq/Grammar/sse/sse.jj
+++ b/jena-arq/Grammar/sse/sse.jj
@@ -1,245 +1,245 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-options
-{
-  // Use \ u escapes in streams AND use a reader for the query
-  // => get both raw and escaped unicode
-   JAVA_UNICODE_ESCAPE   = true ;
-   UNICODE_INPUT         = false ;
-
-  STATIC                = false ;
-//   DEBUG_PARSER          = true ;
-//   DEBUG_TOKEN_MANAGER   = true ;
-}
-
-PARSER_BEGIN(SSE_ParserCore)
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.hp.hpl.jena.sparql.sse.lang.parser ;
-
-import com.hp.hpl.jena.sparql.sse.lang.ParserSSEBase ;
-    
-
-public class SSE_ParserCore extends ParserSSEBase
-{
-}
-
-PARSER_END(SSE_ParserCore)
-
-// Now has explicit WS control in the grammar.
-// Policy - eat trailing WS
-
-// ---- Entry points : check for EOF.
-
-void parse() : { }
-{
-    { parseStart() ; }
-    (<WS>)*
-    TermOrList()
-    <EOF>
-    { parseFinish() ; }
-}
-
-void term() : { }
-{
-    { parseStart() ; }
-    Term()
-    <EOF>
-    { parseFinish() ; }
-}
-
-// ----
-
-void TermOrList() : { }
-{
-  ( Term() (<WS>)* |  List() )
-}
-
-void List() : { Token t ; }
-{
-    // The OP token must exclude these
-  ( t = <LPAREN> 
-    (<WS>)*
-    { listStart(t.beginLine, t.beginColumn) ; }
-    BareList() 
-    t = <RPAREN>
-    (<WS>)*
-    { listFinish(t.beginLine, t.beginColumn) ; }
-
-  | t = <LBRACKET> 
-    (<WS>)*
-    { listStart(t.beginLine, t.beginColumn) ; }
-    BareList() 
-    t = <RBRACKET>
-    (<WS>)*
-    { listFinish(t.beginLine, t.beginColumn) ; }  )
-}
-
-void BareList() : { }
-{
-  ( 
-     TermOrList()
-     // White space swallowed
-  )*
-}
-
-void Term() : { Token t ; }
-{
-    Symbol()
-  |
-    IRIref()
-  |
-    PrefixedName()
-  | 
-    Var()
-  |
-    Literal()
-  |
-    BlankNode()
-}
-
-
-void Symbol() : { Token t ; }
-{
-    t = <SYMBOL>
-    { emitSymbol(t.beginLine, t.beginColumn, t.image) ; }
-}
-
-void IRIref() : { Token t ; String s ; }
-{
-  t = <IRIref>
-    { 
-      s = t.image ;
-      s = stripQuotes(s) ;
-      s = unescapeStr(s, t.beginLine, t.beginColumn) ;
-      emitIRI(t.beginLine, t.beginColumn, s) ; } 
-}
-
-void PrefixedName() : { Token t ; }
-{
-  t = <PNAME>
-    { emitPName(t.beginLine, t.beginColumn, t.image) ; }
-}
-
-void Var() : { Token t ; }
-{
-    // VAR_NAMED:   "?"  and any legal SPARQL variable.
-    // VAR_NAMED2:  "?." and non-legal SPARQL variable (usually allocated)
-    // VAR_ANON:    "??" : Anon variables.
-
-
-    // Includes "?" as a variable which allocated one from ?0, ?1, ?2 
-    //    Legal SPARQL syntax.
-    // Includes "??" as a variable for anon non-distinguished variables.
-    // Includes non-distinguished variables as ??0
-    // Includes internal allocated variables as ?.0
-
-//  ( t = <VAR_NAMED> | t = <VAR_NAMED2> | t = <VAR_ANON> )
-  ( t = <VAR_NAMED> | t = <VAR_OTHER> )
-  { emitVar(t.beginLine, t.beginColumn, stripChars(t.image, 1)) ; }
-}
-
-void Literal() : { }
-{
- ( RDFLiteral()
- | NumericLiteral()
-// | BooleanLiteral() // Do as a symbol.
- )
-}
-
-void BlankNode() : { Token t ; }
-{
-  t = <BLANK_NODE_LABEL>
-    { emitBNode(t.beginLine, t.beginColumn, stripChars(t.image, 2)) ; }  
-//|
-//  t = <LBRACKET> <RBRACKET> { return emitBNode(t.beginLine, t.beginColumn) ; 
}
-//  t = <ANON> { return emitBNode(t.beginLine, t.beginColumn) ; }
-}
-
-void RDFLiteral() : { Token t = null ; int currLine ; int currColumn ;
-                      String lex ; String lang = null ;
-                      String dt_iri = null ; String dt_pn = null ; }
-{
-  ( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; }
-  | t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; }
-  | t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; }
-  | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
-  )
-  { currLine = t.beginLine ; currColumn = t.beginColumn ;
-    lex = unescapeStr(lex, currLine, currColumn) ;
-  }
-
-  // Optional lang tag and datatype.
-  (
-     t = <LANGTAG> { lang = stripChars(t.image, 1) ; }
-  |
-    <DATATYPE> 
-    ( t = <IRIref> { dt_iri  = stripQuotes(t.image) ; }
-    | t = <PNAME>  { dt_pn = t.image ; }
-    )
-  )?
-  { emitLiteral(currLine, currColumn, lex, lang, dt_iri, dt_pn) ; }
-} 
-
-void NumericLiteral() : { Token t ; }
-{
-  t = <INTEGER>
-   { emitLiteralInteger(t.beginLine, t.beginColumn, t.image) ; }
-| t = <DECIMAL>
-   { emitLiteralDecimal(t.beginLine, t.beginColumn, t.image) ; }
-| t = <DOUBLE>
-   { emitLiteralDouble(t.beginLine, t.beginColumn, t.image) ; }
-}
-
-// Symbol!
-// Node BooleanLiteral() : {}
-// {
-//   <TRUE> { return XSD_TRUE ; }
-//  |
-//   <FALSE> { return XSD_FALSE ; }
-// }
-
-// No whitespace skipping.
-#undef SKIP
-#include "tokens.inc"
-#include "copyright.inc"
-
-/*
-# Local Variables:
-# tab-width: 4
-# indent-tabs-mode: nil
-# comment-default-style: "//"
-# End:
-*/
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+options
+{
+  // Use \ u escapes in streams AND use a reader for the query
+  // => get both raw and escaped unicode
+   JAVA_UNICODE_ESCAPE   = true ;
+   UNICODE_INPUT         = false ;
+
+  STATIC                = false ;
+//   DEBUG_PARSER          = true ;
+//   DEBUG_TOKEN_MANAGER   = true ;
+}
+
+PARSER_BEGIN(SSE_ParserCore)
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.hp.hpl.jena.sparql.sse.lang.parser ;
+
+import com.hp.hpl.jena.sparql.sse.lang.ParserSSEBase ;
+    
+
+public class SSE_ParserCore extends ParserSSEBase
+{
+}
+
+PARSER_END(SSE_ParserCore)
+
+// Now has explicit WS control in the grammar.
+// Policy - eat trailing WS
+
+// ---- Entry points : check for EOF.
+
+void parse() : { }
+{
+    { parseStart() ; }
+    (<WS>)*
+    TermOrList()
+    <EOF>
+    { parseFinish() ; }
+}
+
+void term() : { }
+{
+    { parseStart() ; }
+    Term()
+    <EOF>
+    { parseFinish() ; }
+}
+
+// ----
+
+void TermOrList() : { }
+{
+  ( Term() (<WS>)* |  List() )
+}
+
+void List() : { Token t ; }
+{
+    // The OP token must exclude these
+  ( t = <LPAREN> 
+    (<WS>)*
+    { listStart(t.beginLine, t.beginColumn) ; }
+    BareList() 
+    t = <RPAREN>
+    (<WS>)*
+    { listFinish(t.beginLine, t.beginColumn) ; }
+
+  | t = <LBRACKET> 
+    (<WS>)*
+    { listStart(t.beginLine, t.beginColumn) ; }
+    BareList() 
+    t = <RBRACKET>
+    (<WS>)*
+    { listFinish(t.beginLine, t.beginColumn) ; }  )
+}
+
+void BareList() : { }
+{
+  ( 
+     TermOrList()
+     // White space swallowed
+  )*
+}
+
+void Term() : { Token t ; }
+{
+    Symbol()
+  |
+    IRIref()
+  |
+    PrefixedName()
+  | 
+    Var()
+  |
+    Literal()
+  |
+    BlankNode()
+}
+
+
+void Symbol() : { Token t ; }
+{
+    t = <SYMBOL>
+    { emitSymbol(t.beginLine, t.beginColumn, t.image) ; }
+}
+
+void IRIref() : { Token t ; String s ; }
+{
+  t = <IRIref>
+    { 
+      s = t.image ;
+      s = stripQuotes(s) ;
+      s = unescapeStr(s, t.beginLine, t.beginColumn) ;
+      emitIRI(t.beginLine, t.beginColumn, s) ; } 
+}
+
+void PrefixedName() : { Token t ; }
+{
+  t = <PNAME>
+    { emitPName(t.beginLine, t.beginColumn, t.image) ; }
+}
+
+void Var() : { Token t ; }
+{
+    // VAR_NAMED:   "?"  and any legal SPARQL variable.
+    // VAR_NAMED2:  "?." and non-legal SPARQL variable (usually allocated)
+    // VAR_ANON:    "??" : Anon variables.
+
+
+    // Includes "?" as a variable which allocated one from ?0, ?1, ?2 
+    //    Legal SPARQL syntax.
+    // Includes "??" as a variable for anon non-distinguished variables.
+    // Includes non-distinguished variables as ??0
+    // Includes internal allocated variables as ?.0
+
+//  ( t = <VAR_NAMED> | t = <VAR_NAMED2> | t = <VAR_ANON> )
+  ( t = <VAR_NAMED> | t = <VAR_OTHER> )
+  { emitVar(t.beginLine, t.beginColumn, stripChars(t.image, 1)) ; }
+}
+
+void Literal() : { }
+{
+ ( RDFLiteral()
+ | NumericLiteral()
+// | BooleanLiteral() // Do as a symbol.
+ )
+}
+
+void BlankNode() : { Token t ; }
+{
+  t = <BLANK_NODE_LABEL>
+    { emitBNode(t.beginLine, t.beginColumn, stripChars(t.image, 2)) ; }  
+//|
+//  t = <LBRACKET> <RBRACKET> { return emitBNode(t.beginLine, t.beginColumn) ; 
}
+//  t = <ANON> { return emitBNode(t.beginLine, t.beginColumn) ; }
+}
+
+void RDFLiteral() : { Token t = null ; int currLine ; int currColumn ;
+                      String lex ; String lang = null ;
+                      String dt_iri = null ; String dt_pn = null ; }
+{
+  ( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; }
+  | t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; }
+  | t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; }
+  | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
+  )
+  { currLine = t.beginLine ; currColumn = t.beginColumn ;
+    lex = unescapeStr(lex, currLine, currColumn) ;
+  }
+
+  // Optional lang tag and datatype.
+  (
+     t = <LANGTAG> { lang = stripChars(t.image, 1) ; }
+  |
+    <DATATYPE> 
+    ( t = <IRIref> { dt_iri  = stripQuotes(t.image) ; }
+    | t = <PNAME>  { dt_pn = t.image ; }
+    )
+  )?
+  { emitLiteral(currLine, currColumn, lex, lang, dt_iri, dt_pn) ; }
+} 
+
+void NumericLiteral() : { Token t ; }
+{
+  t = <INTEGER>
+   { emitLiteralInteger(t.beginLine, t.beginColumn, t.image) ; }
+| t = <DECIMAL>
+   { emitLiteralDecimal(t.beginLine, t.beginColumn, t.image) ; }
+| t = <DOUBLE>
+   { emitLiteralDouble(t.beginLine, t.beginColumn, t.image) ; }
+}
+
+// Symbol!
+// Node BooleanLiteral() : {}
+// {
+//   <TRUE> { return XSD_TRUE ; }
+//  |
+//   <FALSE> { return XSD_FALSE ; }
+// }
+
+// No whitespace skipping.
+#undef SKIP
+#include "tokens.inc"
+#include "copyright.inc"
+
+/*
+# Local Variables:
+# tab-width: 4
+# indent-tabs-mode: nil
+# comment-default-style: "//"
+# End:
+*/

http://git-wip-us.apache.org/repos/asf/jena/blob/06339ad3/jena-arq/Grammar/sse/tokens.inc
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/sse/tokens.inc b/jena-arq/Grammar/sse/tokens.inc
index 4294188..95dfaae 100644
--- a/jena-arq/Grammar/sse/tokens.inc
+++ b/jena-arq/Grammar/sse/tokens.inc
@@ -1,158 +1,176 @@
-// Basic tokens for SPARQL / RDF terms.
-// SSE - SPARQL S-Expressions
-// Not the keywords.
-
-
-TOKEN: { <WS: " " | "\t" | "\n" | "\r" | "\f"> }
-
-#ifdef SKIP
-SKIP : { <WS> }    //" " | "\t" | "\n" | "\r" | "\f" }
-#endif
-
-SPECIAL_TOKEN :
-{
-    <SINGLE_LINE_COMMENT1: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > 
-|
-    // Lisp-style comments (makes Emacs lisp-mode more useful
-    <SINGLE_LINE_COMMENT2: ";" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? >
-}
-
-TOKEN:
-{
-   <IRIref: "<" (~[">","<","\u0000"-"\u0020"])* ">" >
-|  <PNAME: (<PN_PREFIX>)? ":" (<PN_LOCAL>)? >
-|  <BLANK_NODE_LABEL: "_:" (<PN_LOCAL>)? >  // Allows no label
-
-    // Co-ordinate with ARQConstants
-    // Named variable - allows no name
-|  <VAR_NAMED:   "?" (<VARNAME>)?>
-
-//     // Non-distinguished variable (BNode in SPARQL)
-// |  <VAR_NAMED2:   "?." (~[" " , "\t" , "\n" , "\r" , "\f",
-//                             "(", ")", "[", "]", "{", "}"])* >
-// 
-// |  <VAR_ANON:     "??" (~[ " " , "\t" , "\n" , "\r" , "\f",
-//                            "(", ")", "[", "]", "{", "}"])* >
-
-| <VAR_OTHER: "?" (<SYM>)+ >
-}
-
-TOKEN :
-{
-  < #DIGITS: (["0"-"9"])+>
-| < INTEGER: (["+","-"])? <DIGITS> >
-| < DECIMAL: (["+","-"])? ( <DIGITS> "." (<DIGITS>)* | "." <DIGITS> ) >
-| < DOUBLE:   // Required exponent.
-      ( (["+","-"])? 
-        (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
-        | "." (["0"-"9"])+ (<EXPONENT>)
-        | (["0"-"9"])+ <EXPONENT>
-      )
-      >
-| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
-| < #QUOTE_3D: "\"\"\"">
-| < #QUOTE_3S: "'''">
-| < ECHAR: "\\" ( "t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'") >
-| < STRING_LITERAL1: 
-      // Single quoted string
-      "'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" >
-| < STRING_LITERAL2:
-    // Double quoted string
-      "\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" >
-| < STRING_LITERAL_LONG1:
-     <QUOTE_3S> 
-      ( ("'" | "''")? (~["'","\\"] | <ECHAR> ))*
-     <QUOTE_3S> >
-
-| < STRING_LITERAL_LONG2: 
-     <QUOTE_3D> 
-      ( ("\"" | "\"\"")? (~["\"","\\"] | <ECHAR> ))*
-     <QUOTE_3D> >
-}
-
-TOKEN :
-{
-  < LPAREN:    "(" >
-| < RPAREN:    ")" >
-
-
-| < LBRACE:    "{" >
-| < RBRACE:    "}" >
-
-| < LBRACKET:  "[" >
-| < RBRACKET:  "]" >
-}
-
-// Specials for literals trailing parts
-// Otherwise include in Symbol() rule for when out of position.
-TOKEN :
-{
-  < DATATYPE: "^^" >
-|  <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* > : DEFAULT
-| < #AT: "@">
-|  <#A2Z: ["a"-"z","A"-"Z"]>
-|  <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
-}
-
-TOKEN:
-{
-  // XML 1.1 NCNameStartChar without "_"
-  <#PN_CHARS_BASE:
-          ["A"-"Z"] | ["a"-"z"] |
-          ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] |
-          ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] |
-          ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] |
-          ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] 
-          >
-          // [#x10000-#xEFFFF]
-|
-  <#PN_CHARS_U: <PN_CHARS_BASE> | "_" >
-|
-// No DOT
-  <#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" |
-              ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) >
-|
-  // No leading "_", no trailing ".", can have dot inside prefix name.
-  <#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)?  >
-|
-  // With a leading "_", no dot at end of local name.
-  <#PN_LOCAL: (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)?  >
-|
-  // NCNAME without "-" and ".", allowing leading digits.
-  <#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] )
-             ( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" |
-               ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* >
-
-
-}
-
-TOKEN:
-{
-  // Anything left that isn't structural
-  // LPAREN and RPAREN / LBRACKET/RBRACKET
-  // Quotes
-  <#SYM:  (~["(", ")", "[", "]", "'", "\"", " ", "\t","\n","\r","\f" ])>
-|
-  <#SYM1: (~["^", "@", 
-             "(", ")", "[", "]", "'", "\"", " ", "\t","\n","\r","\f" ])>
-| <#SYM_ESC: "\\" ( " " | "'" | "\"" ) >
-|
-  <SYMBOL: <SYM1> (<SYM>)*>
-}
-
-// Catch-all tokens.  Must be last.  
-// Any non-whitespace.  Causes a parser exception, rather than a
-// token manager error (with hidden line numbers).
-// Only bad IRIs (e.g. spaces) now give unhelpful parse errors.
-TOKEN:
-{
-  <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
-}
-
-/*
-# Local Variables:
-# tab-width: 4
-# indent-tabs-mode: nil
-# comment-default-style: "//"
-# End:
-*/
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Basic tokens for SPARQL / RDF terms.
+// SSE - SPARQL S-Expressions
+// Not the keywords.
+
+
+TOKEN: { <WS: " " | "\t" | "\n" | "\r" | "\f"> }
+
+#ifdef SKIP
+SKIP : { <WS> }    //" " | "\t" | "\n" | "\r" | "\f" }
+#endif
+
+SPECIAL_TOKEN :
+{
+    <SINGLE_LINE_COMMENT1: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > 
+|
+    // Lisp-style comments (makes Emacs lisp-mode more useful
+    <SINGLE_LINE_COMMENT2: ";" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? >
+}
+
+TOKEN:
+{
+   <IRIref: "<" (~[">","<","\u0000"-"\u0020"])* ">" >
+|  <PNAME: (<PN_PREFIX>)? ":" (<PN_LOCAL>)? >
+|  <BLANK_NODE_LABEL: "_:" (<PN_LOCAL>)? >  // Allows no label
+
+    // Co-ordinate with ARQConstants
+    // Named variable - allows no name
+|  <VAR_NAMED:   "?" (<VARNAME>)?>
+
+//     // Non-distinguished variable (BNode in SPARQL)
+// |  <VAR_NAMED2:   "?." (~[" " , "\t" , "\n" , "\r" , "\f",
+//                             "(", ")", "[", "]", "{", "}"])* >
+// 
+// |  <VAR_ANON:     "??" (~[ " " , "\t" , "\n" , "\r" , "\f",
+//                            "(", ")", "[", "]", "{", "}"])* >
+
+| <VAR_OTHER: "?" (<SYM>)+ >
+}
+
+TOKEN :
+{
+  < #DIGITS: (["0"-"9"])+>
+| < INTEGER: (["+","-"])? <DIGITS> >
+| < DECIMAL: (["+","-"])? ( <DIGITS> "." (<DIGITS>)* | "." <DIGITS> ) >
+| < DOUBLE:   // Required exponent.
+      ( (["+","-"])? 
+        (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
+        | "." (["0"-"9"])+ (<EXPONENT>)
+        | (["0"-"9"])+ <EXPONENT>
+      )
+      >
+| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
+| < #QUOTE_3D: "\"\"\"">
+| < #QUOTE_3S: "'''">
+| < ECHAR: "\\" ( "t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'") >
+| < STRING_LITERAL1: 
+      // Single quoted string
+      "'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" >
+| < STRING_LITERAL2:
+    // Double quoted string
+      "\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" >
+| < STRING_LITERAL_LONG1:
+     <QUOTE_3S> 
+      ( ("'" | "''")? (~["'","\\"] | <ECHAR> ))*
+     <QUOTE_3S> >
+
+| < STRING_LITERAL_LONG2: 
+     <QUOTE_3D> 
+      ( ("\"" | "\"\"")? (~["\"","\\"] | <ECHAR> ))*
+     <QUOTE_3D> >
+}
+
+TOKEN :
+{
+  < LPAREN:    "(" >
+| < RPAREN:    ")" >
+
+
+| < LBRACE:    "{" >
+| < RBRACE:    "}" >
+
+| < LBRACKET:  "[" >
+| < RBRACKET:  "]" >
+}
+
+// Specials for literals trailing parts
+// Otherwise include in Symbol() rule for when out of position.
+TOKEN :
+{
+  < DATATYPE: "^^" >
+|  <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* > : DEFAULT
+| < #AT: "@">
+|  <#A2Z: ["a"-"z","A"-"Z"]>
+|  <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
+}
+
+TOKEN:
+{
+  // XML 1.1 NCNameStartChar without "_"
+  <#PN_CHARS_BASE:
+          ["A"-"Z"] | ["a"-"z"] |
+          ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] |
+          ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] |
+          ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] |
+          ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] 
+          >
+          // [#x10000-#xEFFFF]
+|
+  <#PN_CHARS_U: <PN_CHARS_BASE> | "_" >
+|
+// No DOT
+  <#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" |
+              ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) >
+|
+  // No leading "_", no trailing ".", can have dot inside prefix name.
+  <#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)?  >
+|
+  // With a leading "_", no dot at end of local name.
+  <#PN_LOCAL: (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)?  >
+|
+  // NCNAME without "-" and ".", allowing leading digits.
+  <#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] )
+             ( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" |
+               ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* >
+
+
+}
+
+TOKEN:
+{
+  // Anything left that isn't structural
+  // LPAREN and RPAREN / LBRACKET/RBRACKET
+  // Quotes
+  <#SYM:  (~["(", ")", "[", "]", "'", "\"", " ", "\t","\n","\r","\f" ])>
+|
+  <#SYM1: (~["^", "@", 
+             "(", ")", "[", "]", "'", "\"", " ", "\t","\n","\r","\f" ])>
+| <#SYM_ESC: "\\" ( " " | "'" | "\"" ) >
+|
+  <SYMBOL: <SYM1> (<SYM>)*>
+}
+
+// Catch-all tokens.  Must be last.  
+// Any non-whitespace.  Causes a parser exception, rather than a
+// token manager error (with hidden line numbers).
+// Only bad IRIs (e.g. spaces) now give unhelpful parse errors.
+TOKEN:
+{
+  <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
+}
+
+/*
+# Local Variables:
+# tab-width: 4
+# indent-tabs-mode: nil
+# comment-default-style: "//"
+# End:
+*/

Reply via email to