JENA-810 : Handle BOM in the javacc file for the SPARQL 1.1 grammar.

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/d1c3b58d
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/d1c3b58d
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/d1c3b58d

Branch: refs/heads/master
Commit: d1c3b58d4a3bc468cdda20108ca6afdf58ae5198
Parents: 9e6d45b
Author: Andy Seaborne <a...@apache.org>
Authored: Sat Nov 1 17:02:18 2014 +0000
Committer: Andy Seaborne <a...@apache.org>
Committed: Sat Nov 1 17:02:18 2014 +0000

----------------------------------------------------------------------
 jena-arq/Grammar/.gitignore                     |    4 +
 jena-arq/Grammar/arq.jj                         |   48 +-
 jena-arq/Grammar/grammar                        |    9 +-
 jena-arq/Grammar/master.jj                      |   24 +-
 jena-arq/Grammar/sparql_11.jj                   |   41 +-
 .../hpl/jena/sparql/lang/ParserARQUpdate.java   |    5 +-
 .../jena/sparql/lang/ParserSPARQL11Update.java  |   28 +-
 .../hp/hpl/jena/sparql/lang/SPARQLParser.java   |    3 -
 .../hp/hpl/jena/sparql/lang/UpdateParser.java   |   18 +-
 .../hp/hpl/jena/sparql/lang/arq/ARQParser.java  |  630 +++---
 .../sparql/lang/arq/ARQParserConstants.java     |  399 ++--
 .../sparql/lang/arq/ARQParserTokenManager.java  | 1863 +++++++++---------
 .../jena/sparql/lang/arq/ParseException.java    |   28 +-
 .../hpl/jena/sparql/lang/arq/TokenMgrError.java |    2 +-
 .../sparql/lang/sparql_11/ParseException.java   |   28 +-
 .../sparql/lang/sparql_11/SPARQLParser11.java   |  372 ++--
 .../lang/sparql_11/SPARQLParser11Constants.java |  385 ++--
 .../sparql_11/SPARQLParser11TokenManager.java   | 1805 ++++++++---------
 .../sparql/lang/sparql_11/TokenMgrError.java    |    2 +-
 19 files changed, 2903 insertions(+), 2791 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/.gitignore
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/.gitignore b/jena-arq/Grammar/.gitignore
new file mode 100644
index 0000000..b64aedc
--- /dev/null
+++ b/jena-arq/Grammar/.gitignore
@@ -0,0 +1,4 @@
+# Intermediate files
+arq.txt
+sparql_11.txt
+Q.arq

http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/arq.jj
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/arq.jj b/jena-arq/Grammar/arq.jj
index e0cdbb0..10c6419 100644
--- a/jena-arq/Grammar/arq.jj
+++ b/jena-arq/Grammar/arq.jj
@@ -1,3 +1,35 @@
+/* Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+/* This header is separate from features.h so that the compiler can
+   include it implicitly at the start of every compilation.  It must
+   not itself include <features.h> or any other header that includes
+   <features.h> because the implicit include comes before any feature
+   test macros that may be defined in a source file before it first
+   explicitly includes a system header.  GCC knows the name of this
+   header in order to preinclude it.  */
+/* glibc's intent is to support the IEC 559 math functionality, real
+   and complex.  If the GCC (4.9 and later) predefined macros
+   specifying compiler intent are available, use them to determine
+   whether the overall intent is to support these features; otherwise,
+   presume an older compiler has intent to support these features and
+   define these macros by default.  */
+/* wchar_t uses ISO/IEC 10646 (2nd ed., published 2011-03-15) /
+   Unicode 6.0.  */
+/* We do not support C11 <threads.h>.  */
 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -15,7 +47,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 // ARQ/SPARQL 1.1 Grammar - native syntax for the query engine
 options
 {
@@ -73,6 +104,7 @@ PARSER_END(ARQParser)
 // Query only entry point
 void QueryUnit(): { }
 {
+  ByteOrderMark()
   { startQuery() ; }
   Query() <EOF>
   { finishQuery() ; }
@@ -89,11 +121,16 @@ void Query() : { }
 }
 void UpdateUnit() : {}
 {
+  ByteOrderMark()
   { startUpdateRequest() ; }
   Update()
   <EOF>
   { finishUpdateRequest() ; }
 }
+void ByteOrderMark() : {}
+{
+   (<BOM>)?
+}
 void Prologue() : {}
 {
   ( BaseDecl() | PrefixDecl() )*
@@ -987,9 +1024,10 @@ void ObjectPath(Node s, Node p, Path path, 
TripleCollector acc): { Node o ; }
 // -------- Paths
 Path PathUnit() : { Path p ; }
 {
-    p = Path()
-    <EOF>
-    { return p ; }
+  ByteOrderMark()
+  p = Path()
+  <EOF>
+  { return p ; }
 }
 // Weakest outermost
 Path Path() : { Path p ; }
@@ -1684,6 +1722,8 @@ TOKEN: {
 |
   // Whitespace or comment.
   <#WSC: <WS> | <SINGLE_LINE_COMMENT> >
+|
+  <BOM: "\uFEFF">
 }
 // Main tokens */
 TOKEN:

http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/grammar
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/grammar b/jena-arq/Grammar/grammar
index c4f86c6..fbb3b89 100755
--- a/jena-arq/Grammar/grammar
+++ b/jena-arq/Grammar/grammar
@@ -35,7 +35,7 @@ function grammar
     (cd "$DIR" ; rm -f TokenMgrError.java ParseException.java Token.java 
JavaCharStream.java )
 
     echo "---- Process grammar -- $1"
-    javacc -OUTPUT_DIRECTORY=$DIR  -JDK_VERSION=1.5 "${FILE}"
+    javacc -OUTPUT_DIRECTORY=$DIR  -JDK_VERSION=1.7 "${FILE}"
     RC=$?
 
     [ "$RC" = 0 ] || return
@@ -93,11 +93,12 @@ function grammar
        -e 's/public String getMessage/@Override public String getMessage/' < 
$F > F
     mv F $F
 
-##     echo "---- Fixing Java warnings in ${CLASS} ..."
-##     F="$DIR/${CLASS}.java"
+    echo "---- Fixing Java warnings in ${CLASS} ..."
+    F="$DIR/${CLASS}.java"
+    sed -e 's/public class /\n@SuppressWarnings("all")\npublic class /' < $F > 
F 
 ##     sed -e 's/for (java.util.Iterator/for (java.util.Iterator<int[]>/' \
 ##     -e 's/(int\[\])//' < $F > F
-##     mv F $F
+    mv F $F
 
     echo "---- Done"
 }

http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/master.jj
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/master.jj b/jena-arq/Grammar/master.jj
index ac5242d..6b958e1 100644
--- a/jena-arq/Grammar/master.jj
+++ b/jena-arq/Grammar/master.jj
@@ -110,6 +110,7 @@ PARSER_END(CLASS)
 // Query only entry point
 void QueryUnit(): { }
 {
+  ByteOrderMark()
   { startQuery() ; }
   Query() <EOF>
   { finishQuery() ; }
@@ -129,6 +130,7 @@ void Query() : { }
 #ifdef UPDATE
 void UpdateUnit() : {}
 {
+  ByteOrderMark()
   { startUpdateRequest() ; }
   Update()
   <EOF>
@@ -136,6 +138,10 @@ void UpdateUnit() : {}
 }
 #endif
 
+void ByteOrderMark() : {}
+{
+   (<BOM>)?
+}
 
 void Prologue() : {}
 {
@@ -1194,9 +1200,10 @@ void ObjectPath(Node s, Node p, Path path, 
TripleCollector acc): { Node o ; }
 #ifdef ARQ
 Path PathUnit() : { Path p ; }
 {
-    p = Path()
-    <EOF>
-    { return p ; }
+  ByteOrderMark()
+  p = Path()
+  <EOF>
+  { return p ; }
 }
 #endif
 
@@ -1935,6 +1942,15 @@ Expr Aggregate() : { Aggregator agg = null ; String sep 
= null ;
 #endif
     <RPAREN>
     { agg = AggregatorFactory.createGroupConcat(distinct, expr, sep, ordered) 
; }
+
+#if 0
+    /* Temporary syntax*/
+    | t = <AGG>
+          { String iri ; }
+          iri = iri() a = ExpressionList()
+      { agg = AggregatorFactory.create(iri, a) ; }
+#endif
+
    )
 
    {
@@ -2106,6 +2122,8 @@ TOKEN: {
 |
   // Whitespace or comment.
   <#WSC: <WS> | <SINGLE_LINE_COMMENT> >
+|
+  <BOM:    "\uFEFF">
 }
 
 // Main tokens */

http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/Grammar/sparql_11.jj
----------------------------------------------------------------------
diff --git a/jena-arq/Grammar/sparql_11.jj b/jena-arq/Grammar/sparql_11.jj
index 783b1e9..2bdd531 100644
--- a/jena-arq/Grammar/sparql_11.jj
+++ b/jena-arq/Grammar/sparql_11.jj
@@ -1,3 +1,35 @@
+/* Copyright (C) 1991-2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+/* This header is separate from features.h so that the compiler can
+   include it implicitly at the start of every compilation.  It must
+   not itself include <features.h> or any other header that includes
+   <features.h> because the implicit include comes before any feature
+   test macros that may be defined in a source file before it first
+   explicitly includes a system header.  GCC knows the name of this
+   header in order to preinclude it.  */
+/* glibc's intent is to support the IEC 559 math functionality, real
+   and complex.  If the GCC (4.9 and later) predefined macros
+   specifying compiler intent are available, use them to determine
+   whether the overall intent is to support these features; otherwise,
+   presume an older compiler has intent to support these features and
+   define these macros by default.  */
+/* wchar_t uses ISO/IEC 10646 (2nd ed., published 2011-03-15) /
+   Unicode 6.0.  */
+/* We do not support C11 <threads.h>.  */
 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -15,7 +47,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 // ARQ/SPARQL 1.1 Grammar - native syntax for the query engine
 options
 {
@@ -73,6 +104,7 @@ PARSER_END(SPARQLParser11)
 // Query only entry point
 void QueryUnit(): { }
 {
+  ByteOrderMark()
   { startQuery() ; }
   Query() <EOF>
   { finishQuery() ; }
@@ -89,11 +121,16 @@ void Query() : { }
 }
 void UpdateUnit() : {}
 {
+  ByteOrderMark()
   { startUpdateRequest() ; }
   Update()
   <EOF>
   { finishUpdateRequest() ; }
 }
+void ByteOrderMark() : {}
+{
+   (<BOM>)?
+}
 void Prologue() : {}
 {
   ( BaseDecl() | PrefixDecl() )*
@@ -1556,6 +1593,8 @@ TOKEN: {
 |
   // Whitespace or comment.
   <#WSC: <WS> | <SINGLE_LINE_COMMENT> >
+|
+  <BOM: "\uFEFF">
 }
 // Main tokens */
 TOKEN:

http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java
----------------------------------------------------------------------
diff --git 
a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java 
b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java
index 2269976..2490445 100644
--- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java
+++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserARQUpdate.java
@@ -22,7 +22,6 @@ import java.io.FileReader ;
 import java.io.Reader ;
 import java.io.StringReader ;
 
-import org.apache.jena.atlas.io.PeekReader ;
 import org.slf4j.LoggerFactory ;
 
 import com.hp.hpl.jena.query.QueryException ;
@@ -41,9 +40,9 @@ public class ParserARQUpdate extends UpdateParser
     }
     
     @Override
-    protected void parse$(UpdateSink sink, PeekReader pr)
+    protected void parse$(UpdateSink sink, Reader r)
     {
-        _parse(sink, pr) ;
+        _parse(sink, r) ;
     }
 
     /** Use with care - Reader must be UTF-8 */ 

http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java
----------------------------------------------------------------------
diff --git 
a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java 
b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java
index 777e527..6d7ce29 100644
--- 
a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java
+++ 
b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/ParserSPARQL11Update.java
@@ -19,11 +19,9 @@
 package com.hp.hpl.jena.sparql.lang;
 
 import java.io.FileReader ;
-import java.io.InputStream ;
 import java.io.Reader ;
 import java.io.StringReader ;
 
-import org.apache.jena.atlas.io.PeekReader ;
 import org.apache.jena.atlas.logging.Log ;
 import org.slf4j.LoggerFactory ;
 
@@ -33,40 +31,28 @@ import com.hp.hpl.jena.shared.JenaException ;
 import com.hp.hpl.jena.sparql.lang.sparql_11.SPARQLParser11 ;
 import com.hp.hpl.jena.sparql.modify.UpdateSink ;
 import com.hp.hpl.jena.update.UpdateException ;
-import com.hp.hpl.jena.util.FileUtils ;
 
 
 public class ParserSPARQL11Update extends UpdateParser
 {
     @Override
-    protected void parse$(UpdateSink sink, String updateString)
-    {
+    protected void parse$(UpdateSink sink, String updateString) {
         Reader r = new StringReader(updateString) ;
         _parse(sink, r) ;
     }
-    
-    @Override
-    protected void parse$(UpdateSink sink, PeekReader pr)
-    {
-        _parse(sink, pr) ;
-    }
-    
+
     @Override
-    public void parse(UpdateSink sink, InputStream in)
-    {
-        Reader r = FileUtils.asBufferedUTF8(in) ;
+    protected void parse$(UpdateSink sink, Reader r) {
         _parse(sink, r) ;
     }
 
-    public void parse(UpdateSink sink, Reader r)
-    {
+    public void parse(UpdateSink sink, Reader r) {
         if ( r instanceof FileReader )
             LoggerFactory.getLogger(this.getClass()).warn("FileReader passed 
to ParserSPARQL11Update.parse - use a FileInputStream") ;
         _parse(sink, r) ;
     }
     
-    private void _parse(UpdateSink sink, Reader r)
-    {
+    private void _parse(UpdateSink sink, Reader r) {
         SPARQLParser11 parser = null ;
         try {
             parser = new SPARQLParser11(r) ;
@@ -77,8 +63,8 @@ public class ParserSPARQL11Update extends UpdateParser
         { 
             throw new QueryParseException(ex.getMessage(),
                                           ex.currentToken.beginLine,
-                                          ex.currentToken.beginColumn
-            ) ; }
+                                          ex.currentToken.beginColumn) ;
+        }
         catch (com.hp.hpl.jena.sparql.lang.sparql_11.TokenMgrError tErr)
         {
             // Last valid token : not the same as token error message - but 
this should not happen

http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java
----------------------------------------------------------------------
diff --git 
a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java 
b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java
index 3995551..aa98e01 100644
--- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java
+++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/SPARQLParser.java
@@ -31,9 +31,6 @@ public abstract class SPARQLParser
 {
     public final Query parse(Query query, String queryString) throws 
QueryParseException
     {
-        // Sort out BOM
-        if ( queryString.startsWith("\uFEFF") )
-            queryString = queryString.substring(1) ;
         return parse$(query, queryString) ;
     }
     

http://git-wip-us.apache.org/repos/asf/jena/blob/d1c3b58d/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java
----------------------------------------------------------------------
diff --git 
a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java 
b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java
index ee8e416..cb1fefb 100644
--- a/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java
+++ b/jena-arq/src/main/java/com/hp/hpl/jena/sparql/lang/UpdateParser.java
@@ -19,12 +19,12 @@
 package com.hp.hpl.jena.sparql.lang;
 
 import java.io.InputStream ;
-
-import org.apache.jena.atlas.io.PeekReader ;
+import java.io.Reader ;
 
 import com.hp.hpl.jena.query.QueryParseException ;
 import com.hp.hpl.jena.query.Syntax ;
 import com.hp.hpl.jena.sparql.modify.UpdateSink ;
+import com.hp.hpl.jena.util.FileUtils ;
 
 /** This class provides the root of lower level access to all the parsers.
  *  Each subclass hides the details of the per-language exception handlers and 
other
@@ -36,23 +36,19 @@ public abstract class UpdateParser
 {
     public final void parse(UpdateSink sink, String updateString) throws 
QueryParseException
     {
-        // Sort out BOM
-        if ( updateString.startsWith("\uFEFF") )
-            updateString = updateString.substring(1) ;
         parse$(sink, updateString) ;
     }
 
     protected abstract void parse$(UpdateSink sink, String updateString) 
throws QueryParseException ;
 
-    public void parse(UpdateSink sink, InputStream input) throws 
QueryParseException
+    public final void parse(UpdateSink sink, InputStream input) throws 
QueryParseException
     {
-        // :-( Wrap in something that we can use to look for a BOM.
-        // ?? Move BOM processing to grammar and reverse this.
-        PeekReader pr = PeekReader.makeUTF8(input) ;
-        parse$(sink, pr) ;
+        // BOM processing moved to the grammar.
+        Reader r = FileUtils.asBufferedUTF8(input) ;
+        parse$(sink, r) ;
     }
     
-    protected abstract void parse$(UpdateSink sink, PeekReader pr) throws 
QueryParseException ;
+    protected abstract void parse$(UpdateSink sink, Reader r) throws 
QueryParseException ;
 
     public static boolean canParse(Syntax syntaxURI)
     {

Reply via email to