JENA-810 : Handle BOM in the javacc file for the SPARQL 1.1 grammar. Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/d1c3b58d Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/d1c3b58d Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/d1c3b58d
Branch: refs/heads/master Commit: d1c3b58d4a3bc468cdda20108ca6afdf58ae5198 Parents: 9e6d45b Author: Andy Seaborne <a...@apache.org> Authored: Sat Nov 1 17:02:18 2014 +0000 Committer: Andy Seaborne <a...@apache.org> Committed: Sat Nov 1 17:02:18 2014 +0000 ---------------------------------------------------------------------- jena-arq/Grammar/.gitignore | 4 + jena-arq/Grammar/arq.jj | 48 +- jena-arq/Grammar/grammar | 9 +- jena-arq/Grammar/master.jj | 24 +- jena-arq/Grammar/sparql_11.jj | 41 +- .../hpl/jena/sparql/lang/ParserARQUpdate.java | 5 +- .../jena/sparql/lang/ParserSPARQL11Update.java | 28 +- .../hp/hpl/jena/sparql/lang/SPARQLParser.java | 3 - .../hp/hpl/jena/sparql/lang/UpdateParser.java | 18 +- .../hp/hpl/jena/sparql/lang/arq/ARQParser.java | 630 +++--- .../sparql/lang/arq/ARQParserConstants.java | 399 ++-- .../sparql/lang/arq/ARQParserTokenManager.java | 1863 +++++++++--------- .../jena/sparql/lang/arq/ParseException.java | 28 +- .../hpl/jena/sparql/lang/arq/TokenMgrError.java | 2 +- .../sparql/lang/sparql_11/ParseException.java | 28 +- .../sparql/lang/sparql_11/SPARQLParser11.java | 372 ++-- .../lang/sparql_11/SPARQLParser11Constants.java | 385 ++-- .../sparql_11/SPARQLParser11TokenManager.java | 1805 ++++++++--------- .../sparql/lang/sparql_11/TokenMgrError.java | 2 +- 19 files changed, 2903 insertions(+), 2791 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/.gitignore ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/.gitignore b/jena-arq/Grammar/.gitignore new file mode 100644 index 0000000..b64aedc --- /dev/null +++ b/jena-arq/Grammar/.gitignore @@ -0,0 +1,4 @@ +# Intermediate files +arq.txt +sparql_11.txt +Q.arq http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/arq.jj ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/arq.jj b/jena-arq/Grammar/arq.jj index e0cdbb0..10c6419 100644 --- a/jena-arq/Grammar/arq.jj +++ b/jena-arq/Grammar/arq.jj @@ -1,3 +1,35 @@ +/* Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +/* This header is separate from features.h so that the compiler can + include it implicitly at the start of every compilation. It must + not itself include <features.h> or any other header that includes + <features.h> because the implicit include comes before any feature + test macros that may be defined in a source file before it first + explicitly includes a system header. GCC knows the name of this + header in order to preinclude it. */ +/* glibc's intent is to support the IEC 559 math functionality, real + and complex. If the GCC (4.9 and later) predefined macros + specifying compiler intent are available, use them to determine + whether the overall intent is to support these features; otherwise, + presume an older compiler has intent to support these features and + define these macros by default. */ +/* wchar_t uses ISO/IEC 10646 (2nd ed., published 2011-03-15) / + Unicode 6.0. */ +/* We do not support C11 <threads.h>. */ /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -15,7 +47,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - // ARQ/SPARQL 1.1 Grammar - native syntax for the query engine options { @@ -73,6 +104,7 @@ PARSER_END(ARQParser) // Query only entry point void QueryUnit(): { } { + ByteOrderMark() { startQuery() ; } Query() <EOF> { finishQuery() ; } @@ -89,11 +121,16 @@ void Query() : { } } void UpdateUnit() : {} { + ByteOrderMark() { startUpdateRequest() ; } Update() <EOF> { finishUpdateRequest() ; } } +void ByteOrderMark() : {} +{ + (<BOM>)? +} void Prologue() : {} { ( BaseDecl() | PrefixDecl() )* @@ -987,9 +1024,10 @@ void ObjectPath(Node s, Node p, Path path, TripleCollector acc): { Node o ; } // -------- Paths Path PathUnit() : { Path p ; } { - p = Path() - <EOF> - { return p ; } + ByteOrderMark() + p = Path() + <EOF> + { return p ; } } // Weakest outermost Path Path() : { Path p ; } @@ -1684,6 +1722,8 @@ TOKEN: { | // Whitespace or comment. <#WSC: <WS> | <SINGLE_LINE_COMMENT> > +| + <BOM: "\uFEFF"> } // Main tokens */ TOKEN: http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/grammar ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/grammar b/jena-arq/Grammar/grammar index c4f86c6..fbb3b89 100755 --- a/jena-arq/Grammar/grammar +++ b/jena-arq/Grammar/grammar @@ -35,7 +35,7 @@ function grammar (cd "$DIR" ; rm -f TokenMgrError.java ParseException.java Token.java JavaCharStream.java ) echo "---- Process grammar -- $1" - javacc -OUTPUT_DIRECTORY=$DIR -JDK_VERSION=1.5 "${FILE}" + javacc -OUTPUT_DIRECTORY=$DIR -JDK_VERSION=1.7 "${FILE}" RC=$? [ "$RC" = 0 ] || return @@ -93,11 +93,12 @@ function grammar -e 's/public String getMessage/@Override public String getMessage/' < $F > F mv F $F -## echo "---- Fixing Java warnings in ${CLASS} ..." -## F="$DIR/${CLASS}.java" + echo "---- Fixing Java warnings in ${CLASS} ..." + F="$DIR/${CLASS}.java" + sed -e 's/public class /\n@SuppressWarnings("all")\npublic class /' < $F > F ## sed -e 's/for (java.util.Iterator/for (java.util.Iterator<int[]>/' \ ## -e 's/(int\[\])//' < $F > F -## mv F $F + mv F $F echo "---- Done" } http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/master.jj ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/master.jj b/jena-arq/Grammar/master.jj index ac5242d..6b958e1 100644 --- a/jena-arq/Grammar/master.jj +++ b/jena-arq/Grammar/master.jj @@ -110,6 +110,7 @@ PARSER_END(CLASS) // Query only entry point void QueryUnit(): { } { + ByteOrderMark() { startQuery() ; } Query() <EOF> { finishQuery() ; } @@ -129,6 +130,7 @@ void Query() : { } #ifdef UPDATE void UpdateUnit() : {} { + ByteOrderMark() { startUpdateRequest() ; } Update() <EOF> @@ -136,6 +138,10 @@ void UpdateUnit() : {} } #endif +void ByteOrderMark() : {} +{ + (<BOM>)? +} void Prologue() : {} { @@ -1194,9 +1200,10 @@ void ObjectPath(Node s, Node p, Path path, TripleCollector acc): { Node o ; } #ifdef ARQ Path PathUnit() : { Path p ; } { - p = Path() - <EOF> - { return p ; } + ByteOrderMark() + p = Path() + <EOF> + { return p ; } } #endif @@ -1935,6 +1942,15 @@ Expr Aggregate() : { Aggregator agg = null ; String sep = null ; #endif <RPAREN> { agg = AggregatorFactory.createGroupConcat(distinct, expr, sep, ordered) ; } + +#if 0 + /* Temporary syntax*/ + | t = <AGG> + { String iri ; } + iri = iri() a = ExpressionList() + { agg = AggregatorFactory.create(iri, a) ; } +#endif + ) { @@ -2106,6 +2122,8 @@ TOKEN: { | // Whitespace or comment. <#WSC: <WS> | <SINGLE_LINE_COMMENT> > +| + <BOM: "\uFEFF"> } // Main tokens */ http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/sparql_11.jj ---------------------------------------------------------------------- diff --git a/jena-arq/Grammar/sparql_11.jj b/jena-arq/Grammar/sparql_11.jj index 783b1e9..2bdd531 100644 --- a/jena-arq/Grammar/sparql_11.jj +++ b/jena-arq/Grammar/sparql_11.jj @@ -1,3 +1,35 @@ +/* Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +/* This header is separate from features.h so that the compiler can + include it implicitly at the start of every compilation. It must + not itself include <features.h> or any other header that includes + <features.h> because the implicit include comes before any feature + test macros that may be defined in a source file before it first + explicitly includes a system header. GCC knows the name of this + header in order to preinclude it. */ +/* glibc's intent is to support the IEC 559 math functionality, real + and complex. If the GCC (4.9 and later) predefined macros + specifying compiler intent are available, use them to determine + whether the overall intent is to support these features; otherwise, + presume an older compiler has intent to support these features and + define these macros by default. */ +/* wchar_t uses ISO/IEC 10646 (2nd ed., published 2011-03-15) / + Unicode 6.0. */ +/* We do not support C11 <threads.h>. */ /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -15,7 +47,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - // ARQ/SPARQL 1.1 Grammar - native syntax for the query engine options { @@ -73,6 +104,7 @@ PARSER_END(SPARQLParser11) // Query only entry point void QueryUnit(): { } { + ByteOrderMark() { startQuery() ; } Query() <EOF> { finishQuery() ; } @@ -89,11 +121,16 @@ void Query() : { } } void UpdateUnit() : {} { + ByteOrderMark() { startUpdateRequest() ; } Update() <EOF> { finishUpdateRequest() ; } } +void ByteOrderMark() : {} +{ + (<BOM>)? +} void Prologue() : {} { ( BaseDecl() | PrefixDecl() )* @@ -1556,6 +1593,8 @@ TOKEN: { | // Whitespace or comment. <#WSC: <WS> | <SINGLE_LINE_COMMENT> > +| + <BOM: "\uFEFF"> } // Main tokens */ TOKEN: http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java index 2269976..2490445 100644 --- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java +++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java @@ -22,7 +22,6 @@ import java.io.FileReader ; import java.io.Reader ; import java.io.StringReader ; -import org.apache.jena.atlas.io.PeekReader ; import org.slf4j.LoggerFactory ; import com.hp.hpl.jena.query.QueryException ; @@ -41,9 +40,9 @@ public class ParserARQUpdate extends UpdateParser } @Override - protected void parse$(UpdateSink sink, PeekReader pr) + protected void parse$(UpdateSink sink, Reader r) { - _parse(sink, pr) ; + _parse(sink, r) ; } /** Use with care - Reader must be UTF-8 */ http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java index 777e527..6d7ce29 100644 --- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java +++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java @@ -19,11 +19,9 @@ package com.hp.hpl.jena.sparql.lang; import java.io.FileReader ; -import java.io.InputStream ; import java.io.Reader ; import java.io.StringReader ; -import org.apache.jena.atlas.io.PeekReader ; import org.apache.jena.atlas.logging.Log ; import org.slf4j.LoggerFactory ; @@ -33,40 +31,28 @@ import com.hp.hpl.jena.shared.JenaException ; import com.hp.hpl.jena.sparql.lang.sparql_11.SPARQLParser11 ; import com.hp.hpl.jena.sparql.modify.UpdateSink ; import com.hp.hpl.jena.update.UpdateException ; -import com.hp.hpl.jena.util.FileUtils ; public class ParserSPARQL11Update extends UpdateParser { @Override - protected void parse$(UpdateSink sink, String updateString) - { + protected void parse$(UpdateSink sink, String updateString) { Reader r = new StringReader(updateString) ; _parse(sink, r) ; } - - @Override - protected void parse$(UpdateSink sink, PeekReader pr) - { - _parse(sink, pr) ; - } - + @Override - public void parse(UpdateSink sink, InputStream in) - { - Reader r = FileUtils.asBufferedUTF8(in) ; + protected void parse$(UpdateSink sink, Reader r) { _parse(sink, r) ; } - public void parse(UpdateSink sink, Reader r) - { + public void parse(UpdateSink sink, Reader r) { if ( r instanceof FileReader ) LoggerFactory.getLogger(this.getClass()).warn("FileReader passed to ParserSPARQL11Update.parse - use a FileInputStream") ; _parse(sink, r) ; } - private void _parse(UpdateSink sink, Reader r) - { + private void _parse(UpdateSink sink, Reader r) { SPARQLParser11 parser = null ; try { parser = new SPARQLParser11(r) ; @@ -77,8 +63,8 @@ public class ParserSPARQL11Update extends UpdateParser { throw new QueryParseException(ex.getMessage(), ex.currentToken.beginLine, - ex.currentToken.beginColumn - ) ; } + ex.currentToken.beginColumn) ; + } catch (com.hp.hpl.jena.sparql.lang.sparql_11.TokenMgrError tErr) { // Last valid token : not the same as token error message - but this should not happen http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java index 3995551..aa98e01 100644 --- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java +++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java @@ -31,9 +31,6 @@ public abstract class SPARQLParser { public final Query parse(Query query, String queryString) throws QueryParseException { - // Sort out BOM - if ( queryString.startsWith("\uFEFF") ) - queryString = queryString.substring(1) ; return parse$(query, queryString) ; } http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java index ee8e416..cb1fefb 100644 --- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java +++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java @@ -19,12 +19,12 @@ package com.hp.hpl.jena.sparql.lang; import java.io.InputStream ; - -import org.apache.jena.atlas.io.PeekReader ; +import java.io.Reader ; import com.hp.hpl.jena.query.QueryParseException ; import com.hp.hpl.jena.query.Syntax ; import com.hp.hpl.jena.sparql.modify.UpdateSink ; +import com.hp.hpl.jena.util.FileUtils ; /** This class provides the root of lower level access to all the parsers. * Each subclass hides the details of the per-language exception handlers and other @@ -36,23 +36,19 @@ public abstract class UpdateParser { public final void parse(UpdateSink sink, String updateString) throws QueryParseException { - // Sort out BOM - if ( updateString.startsWith("\uFEFF") ) - updateString = updateString.substring(1) ; parse$(sink, updateString) ; } protected abstract void parse$(UpdateSink sink, String updateString) throws QueryParseException ; - public void parse(UpdateSink sink, InputStream input) throws QueryParseException + public final void parse(UpdateSink sink, InputStream input) throws QueryParseException { - // :-( Wrap in something that we can use to look for a BOM. - // ?? Move BOM processing to grammar and reverse this. - PeekReader pr = PeekReader.makeUTF8(input) ; - parse$(sink, pr) ; + // BOM processing moved to the grammar. + Reader r = FileUtils.asBufferedUTF8(input) ; + parse$(sink, r) ; } - protected abstract void parse$(UpdateSink sink, PeekReader pr) throws QueryParseException ; + protected abstract void parse$(UpdateSink sink, Reader r) throws QueryParseException ; public static boolean canParse(Syntax syntaxURI) {