Repository: jena Updated Branches: refs/heads/rat-checks cb46780bd -> 525988312
Add license Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/06339ad3 Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/06339ad3 Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/06339ad3 Branch: refs/heads/rat-checks Commit: 06339ad30ce682cb5d123c5369eae396901e4954 Parents: cb46780 Author: Andy Seaborne <a...@apache.org> Authored: Tue Oct 21 16:39:10 2014 +0100 Committer: Andy Seaborne <a...@apache.org> Committed: Tue Oct 21 16:39:10 2014 +0100 ---------------------------------------------------------------------- jena-arq/Grammar/sse/grammar-sse | 16 + jena-arq/Grammar/sse/jj2html | 922 +++++++++++++++++----------------- jena-arq/Grammar/sse/sse.jj | 490 +++++++++--------- jena-arq/Grammar/sse/tokens.inc | 334 ++++++------ 4 files changed, 906 insertions(+), 856 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/06339ad3/jena-arq/Grammar/sse/grammar-sse ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/sse/grammar-sse b/jena-arq/Grammar/sse/grammar-sse index 3502d78..a675f70 100644 --- a/jena-arq/Grammar/sse/grammar-sse +++ b/jena-arq/Grammar/sse/grammar-sse @@ -1,4 +1,20 @@ #!/bin/bash +## Licensed to the Apache Software Foundation (ASF) under one +## or more contributor license agreements. See the NOTICE file +## distributed with this work for additional information +## regarding copyright ownership. The ASF licenses this file +## to you under the Apache License, Version 2.0 (the +## "License"); you may not use this file except in compliance +## with the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + # Parser builder GRAMMAR="${GRAMMAR:-"sse.jj"}" http://git-wip-us.apache.org/repos/asf/jena/blob/06339ad3/jena-arq/Grammar/sse/jj2html ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/sse/jj2html b/jena-arq/Grammar/sse/jj2html index 9451deb..0f83ee7 100644 --- a/jena-arq/Grammar/sse/jj2html +++ b/jena-arq/Grammar/sse/jj2html @@ -1,453 +1,469 @@ -#!/bin/perl -# Grammar into HTML -# Read in sparql.txt and the tokens.txt file - -## ToDo: -## Check tokens exist and are used -## Validate - - -if ( $#ARGV != 1 ) -{ - print STDERR "Usage: grammar.txt tokens.txt\n" ; - exit 1 ; -} - - - -$/ = undef ; -# Just table or full page. -$TABLE = 1 ; - -$grammarFile = $ARGV[0] ; -$tokensFile = $ARGV[1] ; - -## $grammarFile = 's.txt' ; -## $tokensFile = 't.txt' ; - - -$grammar = &readFile($grammarFile) ; -$tokens = &readFile($tokensFile) ; - -$grammar =~ s!DOCUMENT START!! ; -$grammar =~ s!NON-TERMINALS!! ; -$grammar =~ s!DOCUMENT END!! ; - -$grammar =~ s!//.*!!g ; -$grammar =~ s!\r!!g ; - -# remove leading whitespace -$grammar =~ s!^[\n\s]*!\n! ; - -# Merge alts -$grammar =~ s!\n\s*\|!\ |!g ; - -$tokens =~ s!//.*!!g ; -$tokens =~ s!\r!!g ; - - -## Grammar -#print "GRAMMAR\n" ; - -@g = split(/\n\s*/, $grammar) ; - -@rules = () ; -%ruleMap = () ; -%tokenMap = () ; -%inline = () ; - -# Grammar rules -# Direct from "jjdoc -TEXT=true" - -for $g (@g) -{ - ($rulename, $rulebody) = split(/:=/,$g) ; - - $rulename =~ s!^\s*!! ; - $rulename =~ s!\s*$!! ; - - $rulebody =~ s!^\s*!! ; - $rulebody =~ s!\s*$!! ; - - # Remove outer brackets -# $rulebody =~ s!^\((.*)\)$!$1! ; - - # Remove <> around tokens in grammar. - ## Now done very late (as <>) in fixups. - ## $rulebody =~ s/\<(\w+)\>/$1/g ; - # Leave in - so tokens distinguished from rules - - next if $rulename eq '' ; - #next if $rulebody eq '' ; - - # Skip the root rule. - next if ( $rulename eq 'CompilationUnit' ) ; - - $rulebody = 'Perl 5 regular expression' - if ( $rulename eq 'PatternLiteral' ) ; - - push @rules, $rulename ; - warn "Duplicate rule (grammar): $rulename\n" if defined($ruleMap{$rulename}) ; - $ruleMap{$rulename} = $rulebody ; - -## print "----------\n" ; -## print $rulename,"\n" ; -## print $rulebody,"\n" ; - -} - - -# Tokens -# Produced by "jj2tokens" -# Hand edited to indicate the inlines - -$tokens =~ s/\n+/\n/g ; -$tokens =~ s/^\n// ; - -@t = split(/\n(?=\<|\[)/, $tokens) ; - -for $t (@t) -{ - ($tokenname,$tokenbody) = split(/::=/, $t) ; - $tokenname =~ s!^\s*!! ; - $tokenname =~ s!\s*$!! ; - -## # remove <> around tokens -## Do very late as a formatting fix up. -## $tokenname =~ s/^\<// ; -## $tokenname =~ s/\>$// ; - - $tokenname =~ s/#// ; - - $tokenbody =~ s!^\s*!! ; - $tokenbody =~ s!\s*$!! ; - - # <> round tokens - # Remove at last minute. - - # Remove outer () - # $tokenbody =~ s!^\((.*)\)$!$1! ; - - # Inline? - if ( $tokenname =~ /^\[\<\w*\>\]/ ) - { - warn "Duplicate inline (token): $tokenname\n" if defined($inline{$tokenname}) ; - $tokenname =~ s/^\[//g ; - $tokenname =~ s/\]$//g ; - $tokenbody =~ s/"/'/g ; # '" -- But not literal " -- how? - $tokenbody =~ s/\<\>\'\{\}/\<\>\"\{\}/ ; # '" IRI fixup - - $inline{$tokenname} = $tokenbody ; - - #print "INLINE: ",$tokenname," => ",$tokenbody,"\n" ; - } - else - { - push @rules, $tokenname ; - warn "Duplicate rule (token): $tokenname\n" if defined($tokenMap{$tokenname}) ; - $ruleMap{$tokenname} = $tokenbody ; - } -} - -# Table - -if ( ! $TABLE ) -{ - print "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" ; - print "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n" ; - print " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ; - print "\n" ; - - print "<html>\n"; - print "<head>\n"; - print "<title>SPARQL Grammar</title>\n" ; - print "<style type=\"text/css\">\n" ; - - # .token inline - # .ruleHead - # .ruleBody - - print <<'EOF' ; -div.grammarTable table * { border-width: 0 ; } -div.grammarTable table * tr { border: 1px solid black ; } - -.grammar { text-align: left ; vertical-align: top ; } -.token { color: #3f3f5f; } -.gRuleHead { font-style: italic ; font-family: monospace ; } -.gRuleBody { font-family: monospace ; } -.gRuleLabel { font-family: monospace ; } -EOF - - print "</style>\n" ; - print "</head>\n"; - print "<body>\n"; - - print "\n" ; -} - -print "<div class=\"grammarTable\">\n" ; -print " <table><tbody>\n" ; - -$ruleNum = 0 ; - -for $r (@rules) -{ - $DEBUG = 0 ; - $ruleNum++ ; - $rulename = $r ; - - $rulebody = $ruleMap{$rulename} ; - -## $DEBUG = 1 if ( $rulename =~ /Prolog/ ) ; - - $rb = $rulebody ; - - if ( $DEBUG ) - { - print STDERR "\n" ; - print STDERR "Rule: $rulename\n" ; - print STDERR "Body: $rulebody\n" ; - } - - ## Do before '||' substitution - # Not perfect - some fixups later. - #$rb =~ s%\|%\<br/\>\|%g ; - - # Escape HTML chars before adding markup. - $rb = esc($rb) ; - - # Inlines - for $k (keys %inline) - { - $s = span('token', $inline{$k}) ; - $k = esc($k) ; - # Assumes escaped <> round tokens. - $k = quotemeta $k ; - $rb =~ s/$k/$s/g ; - - } - - if ( $DEBUG ) - { - print STDERR "After inlining\n" ; - print STDERR $rb,"\n" ; ; - } - - - # Add hrefs - issue if one is a substring of another \W helps. - - for $k (keys %ruleMap) - { - $s = href("r-".$k,$k) ; - - $k = esc($k) ; - $k = quotemeta $k ; - -## if ( $DEBUG ) -## { -## print STDERR "K:$k\n" ; -## } - - - $rb =~ s/(?=\W)(\s*)$k(\s*)(?=\W)/$1$s$2/g ; - $rb =~ s/^$k(\s*)(?=\W)/$s$1/g ; - $rb =~ s/(?=\W)(\s*)$k$/$1$s/g ; - $rb =~ s/^$k$/$s/g ; - } - - if ( $DEBUG ) - { - print STDERR "After hrefs\n" ; - print STDERR $rb,"\n" ; ; - } - - #exit if $ruleNum > 2 ; - - $rn = anchor("r-".$rulename, $rulename) ; - $rn = fixupHead($rn) ; - - print "\n" ; - print "<tr valign=\"baseline\">\n" ; - $rlabel = '[' . $ruleNum . '] ' ; - - print " <td>",code('gRuleLabel', $rlabel),"</td>\n" ; - - #print " <td>",span('gRuleHead', $rn),"</td>\n" ; - print " <td>",code('gRuleHead',$rn),"</td>\n" ; - - print " <td> ::= </td>\n" ; - - $rb = fixupRule($rulename, $rb) ; - print " <td>",code('gRuleBody',$rb),"</td>\n" ; - - print "</tr>\n" ; - -# $rule{$rulename, $rulebody) ; -# print $rulename , "\n" ; -} - -print " </tbody></table>\n" ; -print "</div>\n" ; - -if ( !$TABLE ) -{ - print "\n" ; - print "</body>\n" ; - print "</html>\n" ; -} - -sub readFile -{ - my $f = $_[0] ; - open(F, "$f") || die "$!"; - my $s = <F> ; - return $s ; -} - -sub esc -{ - my $s = $_[0] ; - $s =~ s/&/&/g ; - $s =~ s/</</g ; - $s =~ s/>/>/g ; - return $s ; -} - -sub span -{ - my $c = $_[0] ; - my $t = $_[1] ; - $t = esc($t) ; - my $s = '<span class="' . $c . '">' . $t . '</span>' ; - return $s ; -} - -sub href -{ - my $a = $_[0] ; - my $t = $_[1] ; - $a = sane($a) ; - $t = esc($t) ; - my $s = '<a href="#' . $a . '">' . $t . '</a>' ; - return $s ; -} - -sub anchor -{ - my $a = $_[0] ; - my $t = $_[1] ; - $a = sane($a) ; - $t = esc($t) ; - my $s = '<a id="' . $a . '" name="' . $a . '">' . $t . '</a>' ; - return $s ; -} - -sub sane -{ - my $a = $_[0] ; - $a =~ s/\W//g ; - return $a ; -} - -sub code -{ - my $c = $_[0] ; - my $t = $_[1] ; - return '<code class="' . $c . '">' . $t . '</code>' ; -} - -sub fixupHead -{ - my $head = $_[0] ; - # Remove <> around tokens. - $head =~ s/<(\w+)>/$1/g ; - return $head ; -} - -sub fixupRule -{ - my $head = $_[0] ; - my $body = $_[1] ; - - # Remove unnecessary () - $body =~ s/\(\s*([^()| ]*) \)/$1/g ; - -## if ( $body =~ m!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)! ) -## { -## $b = $body ; -## print "================================\n" ; -## print STDERR "$b\n" ; -## print STDERR "--------\n" ; -## $b =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ; -## $b =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ; -## print STDERR "$b\n" ; -## print STDERR "=====\n" ; -## print STDERR "\n" ; -## } - - - # Remove outer matching () where there are no inner () - $body =~ s/^\(\s+([^\(]*)\s+\)$/$1/ ; - - # ( A )* => A* and for + and ? where A is a linked or spanned object - $body =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ; - $body =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ; - - # There aren't any of these -## $body =~ s!\(\s+(\S*)\s+\)!$1!g ; - - # Remove <> around tokens. - $body =~ s/<(\w+)>/$1/g ; - - # Specials - # Split long bodies - if ( $head eq "CallExpression" || - $head eq "UnaryExpression" || - $head eq "<NCCHAR1p>" || - $head eq "PatternElement" || - $head eq "BuiltInCall" ) - { - $body =~ s%\|%\<br/\>\|%g ; - $body =~ s/^\s+// ; - $body = " ".$body ; - } - - if ( $head eq "RelationalExpression" || - $head eq "AdditiveExpression" || - $head eq "MultiplicativeExpression" || - $head eq "ConditionalOrExpression") - { - $body =~ s%\*\(%<br/>\(% ; - } - - # These failed the outer () test because they have nested () in them - if ( $head eq "QueryPattern" || - $head eq "OrderCondition" ) - { - # Remove outer () - $body =~ s/^\((.*)\)$/$1/ ; - } - - if ( $head eq "Query" ) - { - $body =~ s! \(!<br/>\(! ; - $body =~ s!\) !\)<br/>! ; - } - - if ( $head =~ m/(Select|Construct|Describe|Ask)Query/ ) - { - # Put a line break before the DatasetClause - # <a href="#rDatasetClause">DatasetClause</a> - $c = '<a href="#rDatasetClause">DatasetClause</a>' ; - $c = quotemeta $c ; - # Expects the dataset clause to be unbracketted - $body =~ s!(\(\s*$c)!<br/>$1! ; - } - - if ( $head eq "OrderCondition" ) - { - $body =~ s!\)\s*\|\s*\(!\)<br/>\| \(! ; - $body = " ".$body ; - } - return $body ; -} +#!/bin/perl +## Licensed to the Apache Software Foundation (ASF) under one +## or more contributor license agreements. See the NOTICE file +## distributed with this work for additional information +## regarding copyright ownership. The ASF licenses this file +## to you under the Apache License, Version 2.0 (the +## "License"); you may not use this file except in compliance +## with the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +# Grammar into HTML +# Read in sparql.txt and the tokens.txt file + +## ToDo: +## Check tokens exist and are used +## Validate + + +if ( $#ARGV != 1 ) +{ + print STDERR "Usage: grammar.txt tokens.txt\n" ; + exit 1 ; +} + + + +$/ = undef ; +# Just table or full page. +$TABLE = 1 ; + +$grammarFile = $ARGV[0] ; +$tokensFile = $ARGV[1] ; + +## $grammarFile = 's.txt' ; +## $tokensFile = 't.txt' ; + + +$grammar = &readFile($grammarFile) ; +$tokens = &readFile($tokensFile) ; + +$grammar =~ s!DOCUMENT START!! ; +$grammar =~ s!NON-TERMINALS!! ; +$grammar =~ s!DOCUMENT END!! ; + +$grammar =~ s!//.*!!g ; +$grammar =~ s!\r!!g ; + +# remove leading whitespace +$grammar =~ s!^[\n\s]*!\n! ; + +# Merge alts +$grammar =~ s!\n\s*\|!\ |!g ; + +$tokens =~ s!//.*!!g ; +$tokens =~ s!\r!!g ; + + +## Grammar +#print "GRAMMAR\n" ; + +@g = split(/\n\s*/, $grammar) ; + +@rules = () ; +%ruleMap = () ; +%tokenMap = () ; +%inline = () ; + +# Grammar rules +# Direct from "jjdoc -TEXT=true" + +for $g (@g) +{ + ($rulename, $rulebody) = split(/:=/,$g) ; + + $rulename =~ s!^\s*!! ; + $rulename =~ s!\s*$!! ; + + $rulebody =~ s!^\s*!! ; + $rulebody =~ s!\s*$!! ; + + # Remove outer brackets +# $rulebody =~ s!^\((.*)\)$!$1! ; + + # Remove <> around tokens in grammar. + ## Now done very late (as <>) in fixups. + ## $rulebody =~ s/\<(\w+)\>/$1/g ; + # Leave in - so tokens distinguished from rules + + next if $rulename eq '' ; + #next if $rulebody eq '' ; + + # Skip the root rule. + next if ( $rulename eq 'CompilationUnit' ) ; + + $rulebody = 'Perl 5 regular expression' + if ( $rulename eq 'PatternLiteral' ) ; + + push @rules, $rulename ; + warn "Duplicate rule (grammar): $rulename\n" if defined($ruleMap{$rulename}) ; + $ruleMap{$rulename} = $rulebody ; + +## print "----------\n" ; +## print $rulename,"\n" ; +## print $rulebody,"\n" ; + +} + + +# Tokens +# Produced by "jj2tokens" +# Hand edited to indicate the inlines + +$tokens =~ s/\n+/\n/g ; +$tokens =~ s/^\n// ; + +@t = split(/\n(?=\<|\[)/, $tokens) ; + +for $t (@t) +{ + ($tokenname,$tokenbody) = split(/::=/, $t) ; + $tokenname =~ s!^\s*!! ; + $tokenname =~ s!\s*$!! ; + +## # remove <> around tokens +## Do very late as a formatting fix up. +## $tokenname =~ s/^\<// ; +## $tokenname =~ s/\>$// ; + + $tokenname =~ s/#// ; + + $tokenbody =~ s!^\s*!! ; + $tokenbody =~ s!\s*$!! ; + + # <> round tokens + # Remove at last minute. + + # Remove outer () + # $tokenbody =~ s!^\((.*)\)$!$1! ; + + # Inline? + if ( $tokenname =~ /^\[\<\w*\>\]/ ) + { + warn "Duplicate inline (token): $tokenname\n" if defined($inline{$tokenname}) ; + $tokenname =~ s/^\[//g ; + $tokenname =~ s/\]$//g ; + $tokenbody =~ s/"/'/g ; # '" -- But not literal " -- how? + $tokenbody =~ s/\<\>\'\{\}/\<\>\"\{\}/ ; # '" IRI fixup + + $inline{$tokenname} = $tokenbody ; + + #print "INLINE: ",$tokenname," => ",$tokenbody,"\n" ; + } + else + { + push @rules, $tokenname ; + warn "Duplicate rule (token): $tokenname\n" if defined($tokenMap{$tokenname}) ; + $ruleMap{$tokenname} = $tokenbody ; + } +} + +# Table + +if ( ! $TABLE ) +{ + print "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" ; + print "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n" ; + print " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ; + print "\n" ; + + print "<html>\n"; + print "<head>\n"; + print "<title>SPARQL Grammar</title>\n" ; + print "<style type=\"text/css\">\n" ; + + # .token inline + # .ruleHead + # .ruleBody + + print <<'EOF' ; +div.grammarTable table * { border-width: 0 ; } +div.grammarTable table * tr { border: 1px solid black ; } + +.grammar { text-align: left ; vertical-align: top ; } +.token { color: #3f3f5f; } +.gRuleHead { font-style: italic ; font-family: monospace ; } +.gRuleBody { font-family: monospace ; } +.gRuleLabel { font-family: monospace ; } +EOF + + print "</style>\n" ; + print "</head>\n"; + print "<body>\n"; + + print "\n" ; +} + +print "<div class=\"grammarTable\">\n" ; +print " <table><tbody>\n" ; + +$ruleNum = 0 ; + +for $r (@rules) +{ + $DEBUG = 0 ; + $ruleNum++ ; + $rulename = $r ; + + $rulebody = $ruleMap{$rulename} ; + +## $DEBUG = 1 if ( $rulename =~ /Prolog/ ) ; + + $rb = $rulebody ; + + if ( $DEBUG ) + { + print STDERR "\n" ; + print STDERR "Rule: $rulename\n" ; + print STDERR "Body: $rulebody\n" ; + } + + ## Do before '||' substitution + # Not perfect - some fixups later. + #$rb =~ s%\|%\<br/\>\|%g ; + + # Escape HTML chars before adding markup. + $rb = esc($rb) ; + + # Inlines + for $k (keys %inline) + { + $s = span('token', $inline{$k}) ; + $k = esc($k) ; + # Assumes escaped <> round tokens. + $k = quotemeta $k ; + $rb =~ s/$k/$s/g ; + + } + + if ( $DEBUG ) + { + print STDERR "After inlining\n" ; + print STDERR $rb,"\n" ; ; + } + + + # Add hrefs - issue if one is a substring of another \W helps. + + for $k (keys %ruleMap) + { + $s = href("r-".$k,$k) ; + + $k = esc($k) ; + $k = quotemeta $k ; + +## if ( $DEBUG ) +## { +## print STDERR "K:$k\n" ; +## } + + + $rb =~ s/(?=\W)(\s*)$k(\s*)(?=\W)/$1$s$2/g ; + $rb =~ s/^$k(\s*)(?=\W)/$s$1/g ; + $rb =~ s/(?=\W)(\s*)$k$/$1$s/g ; + $rb =~ s/^$k$/$s/g ; + } + + if ( $DEBUG ) + { + print STDERR "After hrefs\n" ; + print STDERR $rb,"\n" ; ; + } + + #exit if $ruleNum > 2 ; + + $rn = anchor("r-".$rulename, $rulename) ; + $rn = fixupHead($rn) ; + + print "\n" ; + print "<tr valign=\"baseline\">\n" ; + $rlabel = '[' . $ruleNum . '] ' ; + + print " <td>",code('gRuleLabel', $rlabel),"</td>\n" ; + + #print " <td>",span('gRuleHead', $rn),"</td>\n" ; + print " <td>",code('gRuleHead',$rn),"</td>\n" ; + + print " <td> ::= </td>\n" ; + + $rb = fixupRule($rulename, $rb) ; + print " <td>",code('gRuleBody',$rb),"</td>\n" ; + + print "</tr>\n" ; + +# $rule{$rulename, $rulebody) ; +# print $rulename , "\n" ; +} + +print " </tbody></table>\n" ; +print "</div>\n" ; + +if ( !$TABLE ) +{ + print "\n" ; + print "</body>\n" ; + print "</html>\n" ; +} + +sub readFile +{ + my $f = $_[0] ; + open(F, "$f") || die "$!"; + my $s = <F> ; + return $s ; +} + +sub esc +{ + my $s = $_[0] ; + $s =~ s/&/&/g ; + $s =~ s/</</g ; + $s =~ s/>/>/g ; + return $s ; +} + +sub span +{ + my $c = $_[0] ; + my $t = $_[1] ; + $t = esc($t) ; + my $s = '<span class="' . $c . '">' . $t . '</span>' ; + return $s ; +} + +sub href +{ + my $a = $_[0] ; + my $t = $_[1] ; + $a = sane($a) ; + $t = esc($t) ; + my $s = '<a href="#' . $a . '">' . $t . '</a>' ; + return $s ; +} + +sub anchor +{ + my $a = $_[0] ; + my $t = $_[1] ; + $a = sane($a) ; + $t = esc($t) ; + my $s = '<a id="' . $a . '" name="' . $a . '">' . $t . '</a>' ; + return $s ; +} + +sub sane +{ + my $a = $_[0] ; + $a =~ s/\W//g ; + return $a ; +} + +sub code +{ + my $c = $_[0] ; + my $t = $_[1] ; + return '<code class="' . $c . '">' . $t . '</code>' ; +} + +sub fixupHead +{ + my $head = $_[0] ; + # Remove <> around tokens. + $head =~ s/<(\w+)>/$1/g ; + return $head ; +} + +sub fixupRule +{ + my $head = $_[0] ; + my $body = $_[1] ; + + # Remove unnecessary () + $body =~ s/\(\s*([^()| ]*) \)/$1/g ; + +## if ( $body =~ m!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)! ) +## { +## $b = $body ; +## print "================================\n" ; +## print STDERR "$b\n" ; +## print STDERR "--------\n" ; +## $b =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ; +## $b =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ; +## print STDERR "$b\n" ; +## print STDERR "=====\n" ; +## print STDERR "\n" ; +## } + + + # Remove outer matching () where there are no inner () + $body =~ s/^\(\s+([^\(]*)\s+\)$/$1/ ; + + # ( A )* => A* and for + and ? where A is a linked or spanned object + $body =~ s!\(\s+(\<a[^>]*\>[^<>]*\</a\>)\s+\)!$1!g ; + $body =~ s!\(\s+(\<span[^>]*\>[^<>]*\</span\>)\s+\)!$1!g ; + + # There aren't any of these +## $body =~ s!\(\s+(\S*)\s+\)!$1!g ; + + # Remove <> around tokens. + $body =~ s/<(\w+)>/$1/g ; + + # Specials + # Split long bodies + if ( $head eq "CallExpression" || + $head eq "UnaryExpression" || + $head eq "<NCCHAR1p>" || + $head eq "PatternElement" || + $head eq "BuiltInCall" ) + { + $body =~ s%\|%\<br/\>\|%g ; + $body =~ s/^\s+// ; + $body = " ".$body ; + } + + if ( $head eq "RelationalExpression" || + $head eq "AdditiveExpression" || + $head eq "MultiplicativeExpression" || + $head eq "ConditionalOrExpression") + { + $body =~ s%\*\(%<br/>\(% ; + } + + # These failed the outer () test because they have nested () in them + if ( $head eq "QueryPattern" || + $head eq "OrderCondition" ) + { + # Remove outer () + $body =~ s/^\((.*)\)$/$1/ ; + } + + if ( $head eq "Query" ) + { + $body =~ s! \(!<br/>\(! ; + $body =~ s!\) !\)<br/>! ; + } + + if ( $head =~ m/(Select|Construct|Describe|Ask)Query/ ) + { + # Put a line break before the DatasetClause + # <a href="#rDatasetClause">DatasetClause</a> + $c = '<a href="#rDatasetClause">DatasetClause</a>' ; + $c = quotemeta $c ; + # Expects the dataset clause to be unbracketted + $body =~ s!(\(\s*$c)!<br/>$1! ; + } + + if ( $head eq "OrderCondition" ) + { + $body =~ s!\)\s*\|\s*\(!\)<br/>\| \(! ; + $body = " ".$body ; + } + return $body ; +} http://git-wip-us.apache.org/repos/asf/jena/blob/06339ad3/jena-arq/Grammar/sse/sse.jj ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/sse/sse.jj b/jena-arq/Grammar/sse/sse.jj index f69e992..e04e907 100644 --- a/jena-arq/Grammar/sse/sse.jj +++ b/jena-arq/Grammar/sse/sse.jj @@ -1,245 +1,245 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -options -{ - // Use \ u escapes in streams AND use a reader for the query - // => get both raw and escaped unicode - JAVA_UNICODE_ESCAPE = true ; - UNICODE_INPUT = false ; - - STATIC = false ; -// DEBUG_PARSER = true ; -// DEBUG_TOKEN_MANAGER = true ; -} - -PARSER_BEGIN(SSE_ParserCore) -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.hp.hpl.jena.sparql.sse.lang.parser ; - -import com.hp.hpl.jena.sparql.sse.lang.ParserSSEBase ; - - -public class SSE_ParserCore extends ParserSSEBase -{ -} - -PARSER_END(SSE_ParserCore) - -// Now has explicit WS control in the grammar. -// Policy - eat trailing WS - -// ---- Entry points : check for EOF. - -void parse() : { } -{ - { parseStart() ; } - (<WS>)* - TermOrList() - <EOF> - { parseFinish() ; } -} - -void term() : { } -{ - { parseStart() ; } - Term() - <EOF> - { parseFinish() ; } -} - -// ---- - -void TermOrList() : { } -{ - ( Term() (<WS>)* | List() ) -} - -void List() : { Token t ; } -{ - // The OP token must exclude these - ( t = <LPAREN> - (<WS>)* - { listStart(t.beginLine, t.beginColumn) ; } - BareList() - t = <RPAREN> - (<WS>)* - { listFinish(t.beginLine, t.beginColumn) ; } - - | t = <LBRACKET> - (<WS>)* - { listStart(t.beginLine, t.beginColumn) ; } - BareList() - t = <RBRACKET> - (<WS>)* - { listFinish(t.beginLine, t.beginColumn) ; } ) -} - -void BareList() : { } -{ - ( - TermOrList() - // White space swallowed - )* -} - -void Term() : { Token t ; } -{ - Symbol() - | - IRIref() - | - PrefixedName() - | - Var() - | - Literal() - | - BlankNode() -} - - -void Symbol() : { Token t ; } -{ - t = <SYMBOL> - { emitSymbol(t.beginLine, t.beginColumn, t.image) ; } -} - -void IRIref() : { Token t ; String s ; } -{ - t = <IRIref> - { - s = t.image ; - s = stripQuotes(s) ; - s = unescapeStr(s, t.beginLine, t.beginColumn) ; - emitIRI(t.beginLine, t.beginColumn, s) ; } -} - -void PrefixedName() : { Token t ; } -{ - t = <PNAME> - { emitPName(t.beginLine, t.beginColumn, t.image) ; } -} - -void Var() : { Token t ; } -{ - // VAR_NAMED: "?" and any legal SPARQL variable. - // VAR_NAMED2: "?." and non-legal SPARQL variable (usually allocated) - // VAR_ANON: "??" : Anon variables. - - - // Includes "?" as a variable which allocated one from ?0, ?1, ?2 - // Legal SPARQL syntax. - // Includes "??" as a variable for anon non-distinguished variables. - // Includes non-distinguished variables as ??0 - // Includes internal allocated variables as ?.0 - -// ( t = <VAR_NAMED> | t = <VAR_NAMED2> | t = <VAR_ANON> ) - ( t = <VAR_NAMED> | t = <VAR_OTHER> ) - { emitVar(t.beginLine, t.beginColumn, stripChars(t.image, 1)) ; } -} - -void Literal() : { } -{ - ( RDFLiteral() - | NumericLiteral() -// | BooleanLiteral() // Do as a symbol. - ) -} - -void BlankNode() : { Token t ; } -{ - t = <BLANK_NODE_LABEL> - { emitBNode(t.beginLine, t.beginColumn, stripChars(t.image, 2)) ; } -//| -// t = <LBRACKET> <RBRACKET> { return emitBNode(t.beginLine, t.beginColumn) ; } -// t = <ANON> { return emitBNode(t.beginLine, t.beginColumn) ; } -} - -void RDFLiteral() : { Token t = null ; int currLine ; int currColumn ; - String lex ; String lang = null ; - String dt_iri = null ; String dt_pn = null ; } -{ - ( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; } - | t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; } - | t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; } - | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; } - ) - { currLine = t.beginLine ; currColumn = t.beginColumn ; - lex = unescapeStr(lex, currLine, currColumn) ; - } - - // Optional lang tag and datatype. - ( - t = <LANGTAG> { lang = stripChars(t.image, 1) ; } - | - <DATATYPE> - ( t = <IRIref> { dt_iri = stripQuotes(t.image) ; } - | t = <PNAME> { dt_pn = t.image ; } - ) - )? - { emitLiteral(currLine, currColumn, lex, lang, dt_iri, dt_pn) ; } -} - -void NumericLiteral() : { Token t ; } -{ - t = <INTEGER> - { emitLiteralInteger(t.beginLine, t.beginColumn, t.image) ; } -| t = <DECIMAL> - { emitLiteralDecimal(t.beginLine, t.beginColumn, t.image) ; } -| t = <DOUBLE> - { emitLiteralDouble(t.beginLine, t.beginColumn, t.image) ; } -} - -// Symbol! -// Node BooleanLiteral() : {} -// { -// <TRUE> { return XSD_TRUE ; } -// | -// <FALSE> { return XSD_FALSE ; } -// } - -// No whitespace skipping. -#undef SKIP -#include "tokens.inc" -#include "copyright.inc" - -/* -# Local Variables: -# tab-width: 4 -# indent-tabs-mode: nil -# comment-default-style: "//" -# End: -*/ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +options +{ + // Use \ u escapes in streams AND use a reader for the query + // => get both raw and escaped unicode + JAVA_UNICODE_ESCAPE = true ; + UNICODE_INPUT = false ; + + STATIC = false ; +// DEBUG_PARSER = true ; +// DEBUG_TOKEN_MANAGER = true ; +} + +PARSER_BEGIN(SSE_ParserCore) +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.hp.hpl.jena.sparql.sse.lang.parser ; + +import com.hp.hpl.jena.sparql.sse.lang.ParserSSEBase ; + + +public class SSE_ParserCore extends ParserSSEBase +{ +} + +PARSER_END(SSE_ParserCore) + +// Now has explicit WS control in the grammar. +// Policy - eat trailing WS + +// ---- Entry points : check for EOF. + +void parse() : { } +{ + { parseStart() ; } + (<WS>)* + TermOrList() + <EOF> + { parseFinish() ; } +} + +void term() : { } +{ + { parseStart() ; } + Term() + <EOF> + { parseFinish() ; } +} + +// ---- + +void TermOrList() : { } +{ + ( Term() (<WS>)* | List() ) +} + +void List() : { Token t ; } +{ + // The OP token must exclude these + ( t = <LPAREN> + (<WS>)* + { listStart(t.beginLine, t.beginColumn) ; } + BareList() + t = <RPAREN> + (<WS>)* + { listFinish(t.beginLine, t.beginColumn) ; } + + | t = <LBRACKET> + (<WS>)* + { listStart(t.beginLine, t.beginColumn) ; } + BareList() + t = <RBRACKET> + (<WS>)* + { listFinish(t.beginLine, t.beginColumn) ; } ) +} + +void BareList() : { } +{ + ( + TermOrList() + // White space swallowed + )* +} + +void Term() : { Token t ; } +{ + Symbol() + | + IRIref() + | + PrefixedName() + | + Var() + | + Literal() + | + BlankNode() +} + + +void Symbol() : { Token t ; } +{ + t = <SYMBOL> + { emitSymbol(t.beginLine, t.beginColumn, t.image) ; } +} + +void IRIref() : { Token t ; String s ; } +{ + t = <IRIref> + { + s = t.image ; + s = stripQuotes(s) ; + s = unescapeStr(s, t.beginLine, t.beginColumn) ; + emitIRI(t.beginLine, t.beginColumn, s) ; } +} + +void PrefixedName() : { Token t ; } +{ + t = <PNAME> + { emitPName(t.beginLine, t.beginColumn, t.image) ; } +} + +void Var() : { Token t ; } +{ + // VAR_NAMED: "?" and any legal SPARQL variable. + // VAR_NAMED2: "?." and non-legal SPARQL variable (usually allocated) + // VAR_ANON: "??" : Anon variables. + + + // Includes "?" as a variable which allocated one from ?0, ?1, ?2 + // Legal SPARQL syntax. + // Includes "??" as a variable for anon non-distinguished variables. + // Includes non-distinguished variables as ??0 + // Includes internal allocated variables as ?.0 + +// ( t = <VAR_NAMED> | t = <VAR_NAMED2> | t = <VAR_ANON> ) + ( t = <VAR_NAMED> | t = <VAR_OTHER> ) + { emitVar(t.beginLine, t.beginColumn, stripChars(t.image, 1)) ; } +} + +void Literal() : { } +{ + ( RDFLiteral() + | NumericLiteral() +// | BooleanLiteral() // Do as a symbol. + ) +} + +void BlankNode() : { Token t ; } +{ + t = <BLANK_NODE_LABEL> + { emitBNode(t.beginLine, t.beginColumn, stripChars(t.image, 2)) ; } +//| +// t = <LBRACKET> <RBRACKET> { return emitBNode(t.beginLine, t.beginColumn) ; } +// t = <ANON> { return emitBNode(t.beginLine, t.beginColumn) ; } +} + +void RDFLiteral() : { Token t = null ; int currLine ; int currColumn ; + String lex ; String lang = null ; + String dt_iri = null ; String dt_pn = null ; } +{ + ( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; } + | t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; } + | t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; } + | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; } + ) + { currLine = t.beginLine ; currColumn = t.beginColumn ; + lex = unescapeStr(lex, currLine, currColumn) ; + } + + // Optional lang tag and datatype. + ( + t = <LANGTAG> { lang = stripChars(t.image, 1) ; } + | + <DATATYPE> + ( t = <IRIref> { dt_iri = stripQuotes(t.image) ; } + | t = <PNAME> { dt_pn = t.image ; } + ) + )? + { emitLiteral(currLine, currColumn, lex, lang, dt_iri, dt_pn) ; } +} + +void NumericLiteral() : { Token t ; } +{ + t = <INTEGER> + { emitLiteralInteger(t.beginLine, t.beginColumn, t.image) ; } +| t = <DECIMAL> + { emitLiteralDecimal(t.beginLine, t.beginColumn, t.image) ; } +| t = <DOUBLE> + { emitLiteralDouble(t.beginLine, t.beginColumn, t.image) ; } +} + +// Symbol! +// Node BooleanLiteral() : {} +// { +// <TRUE> { return XSD_TRUE ; } +// | +// <FALSE> { return XSD_FALSE ; } +// } + +// No whitespace skipping. +#undef SKIP +#include "tokens.inc" +#include "copyright.inc" + +/* +# Local Variables: +# tab-width: 4 +# indent-tabs-mode: nil +# comment-default-style: "//" +# End: +*/ http://git-wip-us.apache.org/repos/asf/jena/blob/06339ad3/jena-arq/Grammar/sse/tokens.inc ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/sse/tokens.inc b/jena-arq/Grammar/sse/tokens.inc index 4294188..95dfaae 100644 --- a/jena-arq/Grammar/sse/tokens.inc +++ b/jena-arq/Grammar/sse/tokens.inc @@ -1,158 +1,176 @@ -// Basic tokens for SPARQL / RDF terms. -// SSE - SPARQL S-Expressions -// Not the keywords. - - -TOKEN: { <WS: " " | "\t" | "\n" | "\r" | "\f"> } - -#ifdef SKIP -SKIP : { <WS> } //" " | "\t" | "\n" | "\r" | "\f" } -#endif - -SPECIAL_TOKEN : -{ - <SINGLE_LINE_COMMENT1: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > -| - // Lisp-style comments (makes Emacs lisp-mode more useful - <SINGLE_LINE_COMMENT2: ";" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > -} - -TOKEN: -{ - <IRIref: "<" (~[">","<","\u0000"-"\u0020"])* ">" > -| <PNAME: (<PN_PREFIX>)? ":" (<PN_LOCAL>)? > -| <BLANK_NODE_LABEL: "_:" (<PN_LOCAL>)? > // Allows no label - - // Co-ordinate with ARQConstants - // Named variable - allows no name -| <VAR_NAMED: "?" (<VARNAME>)?> - -// // Non-distinguished variable (BNode in SPARQL) -// | <VAR_NAMED2: "?." (~[" " , "\t" , "\n" , "\r" , "\f", -// "(", ")", "[", "]", "{", "}"])* > -// -// | <VAR_ANON: "??" (~[ " " , "\t" , "\n" , "\r" , "\f", -// "(", ")", "[", "]", "{", "}"])* > - -| <VAR_OTHER: "?" (<SYM>)+ > -} - -TOKEN : -{ - < #DIGITS: (["0"-"9"])+> -| < INTEGER: (["+","-"])? <DIGITS> > -| < DECIMAL: (["+","-"])? ( <DIGITS> "." (<DIGITS>)* | "." <DIGITS> ) > -| < DOUBLE: // Required exponent. - ( (["+","-"])? - (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT> - | "." (["0"-"9"])+ (<EXPONENT>) - | (["0"-"9"])+ <EXPONENT> - ) - > -| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > -| < #QUOTE_3D: "\"\"\""> -| < #QUOTE_3S: "'''"> -| < ECHAR: "\\" ( "t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'") > -| < STRING_LITERAL1: - // Single quoted string - "'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" > -| < STRING_LITERAL2: - // Double quoted string - "\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" > -| < STRING_LITERAL_LONG1: - <QUOTE_3S> - ( ("'" | "''")? (~["'","\\"] | <ECHAR> ))* - <QUOTE_3S> > - -| < STRING_LITERAL_LONG2: - <QUOTE_3D> - ( ("\"" | "\"\"")? (~["\"","\\"] | <ECHAR> ))* - <QUOTE_3D> > -} - -TOKEN : -{ - < LPAREN: "(" > -| < RPAREN: ")" > - - -| < LBRACE: "{" > -| < RBRACE: "}" > - -| < LBRACKET: "[" > -| < RBRACKET: "]" > -} - -// Specials for literals trailing parts -// Otherwise include in Symbol() rule for when out of position. -TOKEN : -{ - < DATATYPE: "^^" > -| <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* > : DEFAULT -| < #AT: "@"> -| <#A2Z: ["a"-"z","A"-"Z"]> -| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> -} - -TOKEN: -{ - // XML 1.1 NCNameStartChar without "_" - <#PN_CHARS_BASE: - ["A"-"Z"] | ["a"-"z"] | - ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | - ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | - ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | - ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] - > - // [#x10000-#xEFFFF] -| - <#PN_CHARS_U: <PN_CHARS_BASE> | "_" > -| -// No DOT - <#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" | - ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > -| - // No leading "_", no trailing ".", can have dot inside prefix name. - <#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)? > -| - // With a leading "_", no dot at end of local name. - <#PN_LOCAL: (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)? > -| - // NCNAME without "-" and ".", allowing leading digits. - <#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] ) - ( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" | - ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > - - -} - -TOKEN: -{ - // Anything left that isn't structural - // LPAREN and RPAREN / LBRACKET/RBRACKET - // Quotes - <#SYM: (~["(", ")", "[", "]", "'", "\"", " ", "\t","\n","\r","\f" ])> -| - <#SYM1: (~["^", "@", - "(", ")", "[", "]", "'", "\"", " ", "\t","\n","\r","\f" ])> -| <#SYM_ESC: "\\" ( " " | "'" | "\"" ) > -| - <SYMBOL: <SYM1> (<SYM>)*> -} - -// Catch-all tokens. Must be last. -// Any non-whitespace. Causes a parser exception, rather than a -// token manager error (with hidden line numbers). -// Only bad IRIs (e.g. spaces) now give unhelpful parse errors. -TOKEN: -{ - <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ > -} - -/* -# Local Variables: -# tab-width: 4 -# indent-tabs-mode: nil -# comment-default-style: "//" -# End: -*/ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Basic tokens for SPARQL / RDF terms. +// SSE - SPARQL S-Expressions +// Not the keywords. + + +TOKEN: { <WS: " " | "\t" | "\n" | "\r" | "\f"> } + +#ifdef SKIP +SKIP : { <WS> } //" " | "\t" | "\n" | "\r" | "\f" } +#endif + +SPECIAL_TOKEN : +{ + <SINGLE_LINE_COMMENT1: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > +| + // Lisp-style comments (makes Emacs lisp-mode more useful + <SINGLE_LINE_COMMENT2: ";" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > +} + +TOKEN: +{ + <IRIref: "<" (~[">","<","\u0000"-"\u0020"])* ">" > +| <PNAME: (<PN_PREFIX>)? ":" (<PN_LOCAL>)? > +| <BLANK_NODE_LABEL: "_:" (<PN_LOCAL>)? > // Allows no label + + // Co-ordinate with ARQConstants + // Named variable - allows no name +| <VAR_NAMED: "?" (<VARNAME>)?> + +// // Non-distinguished variable (BNode in SPARQL) +// | <VAR_NAMED2: "?." (~[" " , "\t" , "\n" , "\r" , "\f", +// "(", ")", "[", "]", "{", "}"])* > +// +// | <VAR_ANON: "??" (~[ " " , "\t" , "\n" , "\r" , "\f", +// "(", ")", "[", "]", "{", "}"])* > + +| <VAR_OTHER: "?" (<SYM>)+ > +} + +TOKEN : +{ + < #DIGITS: (["0"-"9"])+> +| < INTEGER: (["+","-"])? <DIGITS> > +| < DECIMAL: (["+","-"])? ( <DIGITS> "." (<DIGITS>)* | "." <DIGITS> ) > +| < DOUBLE: // Required exponent. + ( (["+","-"])? + (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT> + | "." (["0"-"9"])+ (<EXPONENT>) + | (["0"-"9"])+ <EXPONENT> + ) + > +| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > +| < #QUOTE_3D: "\"\"\""> +| < #QUOTE_3S: "'''"> +| < ECHAR: "\\" ( "t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'") > +| < STRING_LITERAL1: + // Single quoted string + "'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" > +| < STRING_LITERAL2: + // Double quoted string + "\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" > +| < STRING_LITERAL_LONG1: + <QUOTE_3S> + ( ("'" | "''")? (~["'","\\"] | <ECHAR> ))* + <QUOTE_3S> > + +| < STRING_LITERAL_LONG2: + <QUOTE_3D> + ( ("\"" | "\"\"")? (~["\"","\\"] | <ECHAR> ))* + <QUOTE_3D> > +} + +TOKEN : +{ + < LPAREN: "(" > +| < RPAREN: ")" > + + +| < LBRACE: "{" > +| < RBRACE: "}" > + +| < LBRACKET: "[" > +| < RBRACKET: "]" > +} + +// Specials for literals trailing parts +// Otherwise include in Symbol() rule for when out of position. +TOKEN : +{ + < DATATYPE: "^^" > +| <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* > : DEFAULT +| < #AT: "@"> +| <#A2Z: ["a"-"z","A"-"Z"]> +| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> +} + +TOKEN: +{ + // XML 1.1 NCNameStartChar without "_" + <#PN_CHARS_BASE: + ["A"-"Z"] | ["a"-"z"] | + ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | + ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | + ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | + ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] + > + // [#x10000-#xEFFFF] +| + <#PN_CHARS_U: <PN_CHARS_BASE> | "_" > +| +// No DOT + <#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" | + ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > +| + // No leading "_", no trailing ".", can have dot inside prefix name. + <#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)? > +| + // With a leading "_", no dot at end of local name. + <#PN_LOCAL: (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)? > +| + // NCNAME without "-" and ".", allowing leading digits. + <#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] ) + ( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" | + ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > + + +} + +TOKEN: +{ + // Anything left that isn't structural + // LPAREN and RPAREN / LBRACKET/RBRACKET + // Quotes + <#SYM: (~["(", ")", "[", "]", "'", "\"", " ", "\t","\n","\r","\f" ])> +| + <#SYM1: (~["^", "@", + "(", ")", "[", "]", "'", "\"", " ", "\t","\n","\r","\f" ])> +| <#SYM_ESC: "\\" ( " " | "'" | "\"" ) > +| + <SYMBOL: <SYM1> (<SYM>)*> +} + +// Catch-all tokens. Must be last. +// Any non-whitespace. Causes a parser exception, rather than a +// token manager error (with hidden line numbers). +// Only bad IRIs (e.g. spaces) now give unhelpful parse errors. +TOKEN: +{ + <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ > +} + +/* +# Local Variables: +# tab-width: 4 +# indent-tabs-mode: nil +# comment-default-style: "//" +# End: +*/