This is an automated email from the ASF dual-hosted git repository.

amanin pushed a commit to branch refactor/sql-store
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 36b597e5f0f7b023a29b22a14e10f0ed17795ca6
Author: Alexis Manin <[email protected]>
AuthorDate: Thu Sep 26 14:31:37 2019 +0200

    feat(SQLStore): add partial support for distinct operator on SQL query
---
 .../sis/internal/sql/feature/QueryBuilder.java     |   3 +
 .../sis/internal/sql/feature/QueryFeatureSet.java  | 130 +++++++++++++++------
 .../apache/sis/internal/sql/feature/StreamSQL.java |  21 ++--
 .../org/apache/sis/storage/sql/SQLStoreTest.java   |  54 ++++++---
 4 files changed, 145 insertions(+), 63 deletions(-)

diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java
index 24ac4f3..68d798e 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java
@@ -1,5 +1,8 @@
 package org.apache.sis.internal.sql.feature;
 
+/**
+ * API to allow overrided SQL Stream to delegate a set of intermediate 
operations to native driver.
+ */
 interface QueryBuilder {
 
     QueryBuilder limit(long limit);
diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
index b590c4b..9d027a4 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
@@ -20,6 +20,15 @@ import org.apache.sis.internal.storage.AbstractFeatureSet;
 import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.util.collection.BackingStoreException;
 
+/**
+ * Stores SQL query given at built time, and execute it when calling {@link 
#features(boolean) data stream}. Note that
+ * {@link #getType() data type} is defined by analyzing sql statement 
metadata. Note that user query can be modified at
+ * before execution to adapt various parameters overridable at fetch time as 
offset and limit through
+ * {@link Stream#skip(long)} and {@link Stream#limit(long)}.
+ *
+ * Note that this component models query result as close as possible, so built 
data type will be simple feature type (no
+ * association).
+ */
 public class QueryFeatureSet extends AbstractFeatureSet {
 
     /**
@@ -35,34 +44,111 @@ public class QueryFeatureSet extends AbstractFeatureSet {
     private static final Pattern OFFSET_PATTERN = 
Pattern.compile("OFFSET\\s+([^\\s]+)(?:\\s+ROWS?)?", Pattern.CASE_INSENSITIVE);
 
     /**
+     * Check for a selection of distinct rows.
+     */
+    private static final Pattern DISTINCT_PATTERN = 
Pattern.compile("^\\s*SELECT\\s+DISTINCT", Pattern.CASE_INSENSITIVE);
+
+    /**
      * Keep builder to allow native limit and offset through stream operation.
      */
     private final SQLBuilder queryBuilder;
 
+    /**
+     * SQL database handler. Used to open new connections at query time.
+     */
     private final DataSource source;
 
+    /**
+     * Component in charge of conversion from SQL entry to Feature entity. 
Also provides output data type.
+     */
     private final FeatureAdapter adapter;
 
+    /**
+     * Offset and limit defined in user query, if any. If none is found, or we 
cannot determine safely their value (not
+     * specified as a literal but as a variable name), values will be set to 
-1.
+     *
+     * @implNote  BE CAREFUL ! We use these fields for optimisations. We 
remove values from user query if safely
+     * identified, and add them only at query time, with additional 
skip/offset defined through stream or java query.
+     */
+    private final long originOffset, originLimit;
+
+    /**
+     * A flag indicating that we've safely identified a {@literal DISTINCT} 
keyword in the user query.
+     */
+    private final boolean distinct;
+
+    /**
+     * Same as {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)}, 
except query is provided by a fixed text
+     * instead of a builder.
+     */
     public QueryFeatureSet(String query, DataSource source, Connection conn) 
throws SQLException {
         this(fromQuery(query, conn), source, conn);
     }
 
+    /**
+     * Create a new feature set whose data is provided by given user query. 
Note that query will be compiled now, but
+     * only executed when {@link #features(boolean) acquiring a data stream}.
+     *
+     * @param queryBuilder Contains user-defined SQL query. It must contains a 
valid and complete SQL statement, because
+     *                     it will be compiled at built time to define feature 
type. Note that we will make a copy of
+     *                     it, so any modification done after this feature set 
is created won't have any effect on it.
+     * @param source A database pointer we'll keep, to create new connections 
when {@link #features(boolean) fetching data}.
+     * @param conn Serves for compiling user query, thus creating output data 
type. The connection is not kept nor
+     *             re-used after constructor, so you can close it immediately 
after. We require it, so we do not force
+     *             opening a new connection if user already has one ready on 
its end.
+     * @throws SQLException If input query compiling or analysis of its 
metadata fails.
+     */
     public QueryFeatureSet(SQLBuilder queryBuilder, DataSource source, 
Connection conn) throws SQLException {
         this(queryBuilder, new Analyzer(source, conn.getMetaData(), null, 
null), source, conn);
     }
 
-    public QueryFeatureSet(SQLBuilder queryBuilder, Analyzer analyzer, 
DataSource source, Connection conn) throws SQLException {
+
+    /**
+     * See {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)} for 
details.
+     *
+     * @param analyzer SIS sql analyzer, used for query metadata analysis. Not 
nullable. If you do not have any, you
+     *                 can use {@link #QueryFeatureSet(SQLBuilder, DataSource, 
Connection) another constructor}.
+     */
+    QueryFeatureSet(SQLBuilder queryBuilder, Analyzer analyzer, DataSource 
source, Connection conn) throws SQLException {
         super(analyzer.listeners);
-        // Defensive copy
-        this.queryBuilder = new SQLBuilder(queryBuilder);
-        this.queryBuilder.append(queryBuilder.toString());
         this.source = source;
 
-        final String sql = queryBuilder.toString();
+        String sql = queryBuilder.toString();
         try (PreparedStatement statement = conn.prepareStatement(sql)) {
             final SQLTypeSpecification spec = analyzer.create(statement, sql, 
null);
             adapter = analyzer.buildAdapter(spec);
         }
+
+        /* We will now try to parse offset and limit from input query. If we 
encounter unsupported/ambiguous case,
+         * we will fallback to pure java management of additional limit and 
offset.
+         * If we successfully retrieve offset and limit, we'll modify user 
query to take account of additional
+         * parameters given later.
+         */
+        long tmpOffset = 0, tmpLimit = 0;
+        try {
+            Matcher matcher = OFFSET_PATTERN.matcher(sql);
+            if (matcher.find()) tmpOffset = Long.parseLong(matcher.group(1));
+            if (matcher.find()) throw new UnsupportedOperationException("More 
than one offset in the query.");
+            sql = matcher.replaceFirst("");
+
+            matcher = LIMIT_PATTERN.matcher(sql);
+            if (matcher.find()) tmpLimit = Long.parseLong(matcher.group(1));
+            if (matcher.find()) throw new UnsupportedOperationException("More 
than one limit in the query.");
+            sql = matcher.replaceFirst("");
+        } catch (RuntimeException e) {
+            sql = source.toString();
+            tmpOffset = -1;
+            tmpLimit = -1;
+        }
+
+        distinct = DISTINCT_PATTERN.matcher(sql).find();
+
+        originOffset = tmpOffset;
+        originLimit = tmpLimit;
+
+        // Defensive copy
+        this.queryBuilder = new SQLBuilder(queryBuilder);
+        this.queryBuilder.append(sql);
     }
 
     /**
@@ -99,7 +185,7 @@ public class QueryFeatureSet extends AbstractFeatureSet {
     }
 
     @Override
-    public Stream<Feature> features(boolean parallel) throws 
DataStoreException {
+    public Stream<Feature> features(boolean parallel) {
         return new StreamSQL(new QueryAdapter(queryBuilder), source);
     }
 
@@ -107,39 +193,12 @@ public class QueryFeatureSet extends AbstractFeatureSet {
 
         private final SQLBuilder source;
 
-        private final long originOffset, originLimit;
         private long additionalOffset, additionalLimit;
 
         QueryAdapter(SQLBuilder source) {
-            /* We will now try to parse offset and limit from input query. If 
we encounter unsupported/ambiguous case,
-             * we will fallback to pure java management of additional limit 
and offset.
-             * If we successfully retrieve offset and limit, we'll modify user 
query to take account of additional
-             * parameters given later.
-             */
-            long tmpOffset = 0, tmpLimit = 0;
-            String sql = source.toString();
-            try {
-                Matcher matcher = OFFSET_PATTERN.matcher(sql);
-                if (matcher.find()) tmpOffset = 
Long.parseLong(matcher.group(1));
-                if (matcher.find()) throw new 
UnsupportedOperationException("More than one offset in the query.");
-                sql = matcher.replaceFirst("");
-
-                matcher = LIMIT_PATTERN.matcher(sql);
-                if (matcher.find()) tmpLimit = 
Long.parseLong(matcher.group(1));
-                if (matcher.find()) throw new 
UnsupportedOperationException("More than one limit in the query.");
-                sql = matcher.replaceFirst("");
-            } catch (RuntimeException e) {
-                sql = source.toString();
-                tmpOffset = -1;
-                tmpLimit = -1;
-            }
-
-            originOffset = tmpOffset;
-            originLimit = tmpLimit;
-
             // defensive copy
             this.source = new SQLBuilder(source);
-            this.source.append(sql);
+            this.source.append(source.toString());
         }
 
         @Override
@@ -156,6 +215,7 @@ public class QueryFeatureSet extends AbstractFeatureSet {
 
         @Override
         public QueryBuilder distinct(boolean activate) {
+            if (distinct == activate) return this;
             throw new UnsupportedOperationException("Not supported yet: 
modifying user query"); // "Alexis Manin (Geomatys)" on 24/09/2019
         }
 
@@ -250,7 +310,7 @@ public class QueryFeatureSet extends AbstractFeatureSet {
         public long estimateSize() {
             // TODO: economic size estimation ? A count query seems overkill 
for the aim of this API. Howver, we could
             // analyze user query in search for a limit value.
-            return Long.MAX_VALUE;
+            return originLimit > 0? originLimit : Long.MAX_VALUE;
         }
 
         @Override
diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java
index e1b1de6..925da4e 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java
@@ -125,8 +125,13 @@ class StreamSQL extends StreamDecoration<Feature> {
 
     @Override
     public Stream<Feature> distinct() {
-        queryAdapter.distinct();
-        return this;
+        try {
+            queryAdapter.distinct();
+            return this;
+        } catch (UnsupportedOperationException e) {
+            // TODO: emit warning
+            return super.distinct();
+        }
     }
 
     @Override
@@ -239,12 +244,6 @@ class StreamSQL extends StreamDecoration<Feature> {
         }
 
         @Override
-        public Stream<O> distinct() {
-            source = source.distinct();
-            return this;
-        }
-
-        @Override
         public Stream<O> limit(long maxSize) {
             source = source.limit(maxSize);
             return this;
@@ -335,12 +334,6 @@ class StreamSQL extends StreamDecoration<Feature> {
         }
 
         @Override
-        public DoubleStream distinct() {
-            source = source.distinct();
-            return this;
-        }
-
-        @Override
         public DoubleStream limit(long maxSize) {
             source = source.limit(maxSize);
             return this;
diff --git 
a/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java
 
b/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java
index 9eb41aa..2929194 100644
--- 
a/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java
+++ 
b/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java
@@ -17,6 +17,7 @@
 package org.apache.sis.storage.sql;
 
 import java.sql.Connection;
+import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
@@ -175,8 +176,8 @@ public final strictfp class SQLStoreTest extends TestCase {
 
     private void verifyQueries(DataSource source) throws Exception {
         verifyFetchCityTableAsQuery(source);
-
         verifyLimitOffsetAndColumnSelectionFromQuery(source);
+        verifyDistinctQuery(source);
     }
 
     private void verifyFetchCityTableAsQuery(DataSource source) throws 
Exception {
@@ -263,7 +264,11 @@ public final strictfp class SQLStoreTest extends TestCase {
      * @param source Database connection provider.
      */
     private void verifyLimitOffsetAndColumnSelectionFromQuery(final DataSource 
source) throws Exception {
-        final String query = "SELECT \"english_name\" as \"title\" FROM 
features.\"Parks\" ORDER BY \"english_name\" ASC OFFSET 2 ROWS FETCH NEXT 3 
ROWS ONLY";
+        // Ensure multiline text is accepted
+        final String query = "SELECT \"english_name\" as \"title\" \n\r" +
+                "FROM features.\"Parks\" \n" +
+                "ORDER BY \"english_name\" ASC \n" +
+                "OFFSET 2 ROWS FETCH NEXT 3 ROWS ONLY";
         final QueryFeatureSet qfs;
         try (Connection conn = source.getConnection()) {
             qfs = new QueryFeatureSet(query, source, conn);
@@ -307,18 +312,39 @@ public final strictfp class SQLStoreTest extends TestCase 
{
         assertArrayEquals("Only second third name should be returned", new 
String[]{"Shinjuku Gyoen"}, parkNames);
     }
 
-/**
-* Checks that operations stacked on feature stream are well executed. This 
test focus on mapping and peeking
-* actions overloaded by sql streams. We'd like to test skip and limit 
operations too, but ignore it for now,
-* because ordering of results matters for such a test.
-*
-* @implNote Most of stream operations used here are meaningless. We just want 
to ensure that the pipeline does not
-* skip any operation.
-*
-* @param cities The feature set to read from. We expect a feature set 
containing all cities defined for the test
-*               class.
-* @throws DataStoreException Let's propagate any error raised by input feature 
set.
-*/
+    /**
+     * Check that a {@link Stream#distinct()} gives coherent results. For now, 
no optimisation is done to delegate it to
+     * database, but this test allows for non-regression test, so when an 
optimisation is done, we'll immediately test
+     * its validity.
+     */
+    private void verifyDistinctQuery(DataSource source) throws SQLException {
+        // Ensure multiline text is accepted
+        final String query = "SELECT \"country\" FROM features.\"Parks\" ORDER 
BY \"country\"";
+        final QueryFeatureSet qfs;
+        try (Connection conn = source.getConnection()) {
+            qfs = new QueryFeatureSet(query, source, conn);
+        }
+
+        final Object[] expected = qfs.features(false)
+                .distinct() 
+                .map(f -> f.getPropertyValue("country"))
+                .toArray();
+
+        assertArrayEquals("Distinct country names, sorted in ascending order", 
new String[]{"CAN", "FRA", "JPN"}, expected);
+    }
+
+    /**
+    * Checks that operations stacked on feature stream are well executed. This 
test focus on mapping and peeking
+    * actions overloaded by sql streams. We'd like to test skip and limit 
operations too, but ignore it for now,
+    * because ordering of results matters for such a test.
+    *
+    * @implNote Most of stream operations used here are meaningless. We just 
want to ensure that the pipeline does not
+    * skip any operation.
+    *
+    * @param cities The feature set to read from. We expect a feature set 
containing all cities defined for the test
+    *               class.
+    * @throws DataStoreException Let's propagate any error raised by input 
feature set.
+    */
     private static void verifyStreamOperations(final FeatureSet cities) throws 
DataStoreException {
         try (Stream<Feature> features = cities.features(false)) {
             final AtomicInteger peekCount = new AtomicInteger();

Reply via email to