This is an automated email from the ASF dual-hosted git repository. amanin pushed a commit to branch refactor/sql-store in repository https://gitbox.apache.org/repos/asf/sis.git
commit 36b597e5f0f7b023a29b22a14e10f0ed17795ca6 Author: Alexis Manin <[email protected]> AuthorDate: Thu Sep 26 14:31:37 2019 +0200 feat(SQLStore): add partial support for distinct operator on SQL query --- .../sis/internal/sql/feature/QueryBuilder.java | 3 + .../sis/internal/sql/feature/QueryFeatureSet.java | 130 +++++++++++++++------ .../apache/sis/internal/sql/feature/StreamSQL.java | 21 ++-- .../org/apache/sis/storage/sql/SQLStoreTest.java | 54 ++++++--- 4 files changed, 145 insertions(+), 63 deletions(-) diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java index 24ac4f3..68d798e 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryBuilder.java @@ -1,5 +1,8 @@ package org.apache.sis.internal.sql.feature; +/** + * API to allow overrided SQL Stream to delegate a set of intermediate operations to native driver. + */ interface QueryBuilder { QueryBuilder limit(long limit); diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java index b590c4b..9d027a4 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java @@ -20,6 +20,15 @@ import org.apache.sis.internal.storage.AbstractFeatureSet; import org.apache.sis.storage.DataStoreException; import org.apache.sis.util.collection.BackingStoreException; +/** + * Stores SQL query given at built time, and execute it when calling {@link #features(boolean) data stream}. Note that + * {@link #getType() data type} is defined by analyzing sql statement metadata. Note that user query can be modified at + * before execution to adapt various parameters overridable at fetch time as offset and limit through + * {@link Stream#skip(long)} and {@link Stream#limit(long)}. + * + * Note that this component models query result as close as possible, so built data type will be simple feature type (no + * association). + */ public class QueryFeatureSet extends AbstractFeatureSet { /** @@ -35,34 +44,111 @@ public class QueryFeatureSet extends AbstractFeatureSet { private static final Pattern OFFSET_PATTERN = Pattern.compile("OFFSET\\s+([^\\s]+)(?:\\s+ROWS?)?", Pattern.CASE_INSENSITIVE); /** + * Check for a selection of distinct rows. + */ + private static final Pattern DISTINCT_PATTERN = Pattern.compile("^\\s*SELECT\\s+DISTINCT", Pattern.CASE_INSENSITIVE); + + /** * Keep builder to allow native limit and offset through stream operation. */ private final SQLBuilder queryBuilder; + /** + * SQL database handler. Used to open new connections at query time. + */ private final DataSource source; + /** + * Component in charge of conversion from SQL entry to Feature entity. Also provides output data type. + */ private final FeatureAdapter adapter; + /** + * Offset and limit defined in user query, if any. If none is found, or we cannot determine safely their value (not + * specified as a literal but as a variable name), values will be set to -1. + * + * @implNote BE CAREFUL ! We use these fields for optimisations. We remove values from user query if safely + * identified, and add them only at query time, with additional skip/offset defined through stream or java query. + */ + private final long originOffset, originLimit; + + /** + * A flag indicating that we've safely identified a {@literal DISTINCT} keyword in the user query. + */ + private final boolean distinct; + + /** + * Same as {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)}, except query is provided by a fixed text + * instead of a builder. + */ public QueryFeatureSet(String query, DataSource source, Connection conn) throws SQLException { this(fromQuery(query, conn), source, conn); } + /** + * Create a new feature set whose data is provided by given user query. Note that query will be compiled now, but + * only executed when {@link #features(boolean) acquiring a data stream}. + * + * @param queryBuilder Contains user-defined SQL query. It must contains a valid and complete SQL statement, because + * it will be compiled at built time to define feature type. Note that we will make a copy of + * it, so any modification done after this feature set is created won't have any effect on it. + * @param source A database pointer we'll keep, to create new connections when {@link #features(boolean) fetching data}. + * @param conn Serves for compiling user query, thus creating output data type. The connection is not kept nor + * re-used after constructor, so you can close it immediately after. We require it, so we do not force + * opening a new connection if user already has one ready on its end. + * @throws SQLException If input query compiling or analysis of its metadata fails. + */ public QueryFeatureSet(SQLBuilder queryBuilder, DataSource source, Connection conn) throws SQLException { this(queryBuilder, new Analyzer(source, conn.getMetaData(), null, null), source, conn); } - public QueryFeatureSet(SQLBuilder queryBuilder, Analyzer analyzer, DataSource source, Connection conn) throws SQLException { + + /** + * See {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)} for details. + * + * @param analyzer SIS sql analyzer, used for query metadata analysis. Not nullable. If you do not have any, you + * can use {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection) another constructor}. + */ + QueryFeatureSet(SQLBuilder queryBuilder, Analyzer analyzer, DataSource source, Connection conn) throws SQLException { super(analyzer.listeners); - // Defensive copy - this.queryBuilder = new SQLBuilder(queryBuilder); - this.queryBuilder.append(queryBuilder.toString()); this.source = source; - final String sql = queryBuilder.toString(); + String sql = queryBuilder.toString(); try (PreparedStatement statement = conn.prepareStatement(sql)) { final SQLTypeSpecification spec = analyzer.create(statement, sql, null); adapter = analyzer.buildAdapter(spec); } + + /* We will now try to parse offset and limit from input query. If we encounter unsupported/ambiguous case, + * we will fallback to pure java management of additional limit and offset. + * If we successfully retrieve offset and limit, we'll modify user query to take account of additional + * parameters given later. + */ + long tmpOffset = 0, tmpLimit = 0; + try { + Matcher matcher = OFFSET_PATTERN.matcher(sql); + if (matcher.find()) tmpOffset = Long.parseLong(matcher.group(1)); + if (matcher.find()) throw new UnsupportedOperationException("More than one offset in the query."); + sql = matcher.replaceFirst(""); + + matcher = LIMIT_PATTERN.matcher(sql); + if (matcher.find()) tmpLimit = Long.parseLong(matcher.group(1)); + if (matcher.find()) throw new UnsupportedOperationException("More than one limit in the query."); + sql = matcher.replaceFirst(""); + } catch (RuntimeException e) { + sql = source.toString(); + tmpOffset = -1; + tmpLimit = -1; + } + + distinct = DISTINCT_PATTERN.matcher(sql).find(); + + originOffset = tmpOffset; + originLimit = tmpLimit; + + // Defensive copy + this.queryBuilder = new SQLBuilder(queryBuilder); + this.queryBuilder.append(sql); } /** @@ -99,7 +185,7 @@ public class QueryFeatureSet extends AbstractFeatureSet { } @Override - public Stream<Feature> features(boolean parallel) throws DataStoreException { + public Stream<Feature> features(boolean parallel) { return new StreamSQL(new QueryAdapter(queryBuilder), source); } @@ -107,39 +193,12 @@ public class QueryFeatureSet extends AbstractFeatureSet { private final SQLBuilder source; - private final long originOffset, originLimit; private long additionalOffset, additionalLimit; QueryAdapter(SQLBuilder source) { - /* We will now try to parse offset and limit from input query. If we encounter unsupported/ambiguous case, - * we will fallback to pure java management of additional limit and offset. - * If we successfully retrieve offset and limit, we'll modify user query to take account of additional - * parameters given later. - */ - long tmpOffset = 0, tmpLimit = 0; - String sql = source.toString(); - try { - Matcher matcher = OFFSET_PATTERN.matcher(sql); - if (matcher.find()) tmpOffset = Long.parseLong(matcher.group(1)); - if (matcher.find()) throw new UnsupportedOperationException("More than one offset in the query."); - sql = matcher.replaceFirst(""); - - matcher = LIMIT_PATTERN.matcher(sql); - if (matcher.find()) tmpLimit = Long.parseLong(matcher.group(1)); - if (matcher.find()) throw new UnsupportedOperationException("More than one limit in the query."); - sql = matcher.replaceFirst(""); - } catch (RuntimeException e) { - sql = source.toString(); - tmpOffset = -1; - tmpLimit = -1; - } - - originOffset = tmpOffset; - originLimit = tmpLimit; - // defensive copy this.source = new SQLBuilder(source); - this.source.append(sql); + this.source.append(source.toString()); } @Override @@ -156,6 +215,7 @@ public class QueryFeatureSet extends AbstractFeatureSet { @Override public QueryBuilder distinct(boolean activate) { + if (distinct == activate) return this; throw new UnsupportedOperationException("Not supported yet: modifying user query"); // "Alexis Manin (Geomatys)" on 24/09/2019 } @@ -250,7 +310,7 @@ public class QueryFeatureSet extends AbstractFeatureSet { public long estimateSize() { // TODO: economic size estimation ? A count query seems overkill for the aim of this API. Howver, we could // analyze user query in search for a limit value. - return Long.MAX_VALUE; + return originLimit > 0? originLimit : Long.MAX_VALUE; } @Override diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java index e1b1de6..925da4e 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java @@ -125,8 +125,13 @@ class StreamSQL extends StreamDecoration<Feature> { @Override public Stream<Feature> distinct() { - queryAdapter.distinct(); - return this; + try { + queryAdapter.distinct(); + return this; + } catch (UnsupportedOperationException e) { + // TODO: emit warning + return super.distinct(); + } } @Override @@ -239,12 +244,6 @@ class StreamSQL extends StreamDecoration<Feature> { } @Override - public Stream<O> distinct() { - source = source.distinct(); - return this; - } - - @Override public Stream<O> limit(long maxSize) { source = source.limit(maxSize); return this; @@ -335,12 +334,6 @@ class StreamSQL extends StreamDecoration<Feature> { } @Override - public DoubleStream distinct() { - source = source.distinct(); - return this; - } - - @Override public DoubleStream limit(long maxSize) { source = source.limit(maxSize); return this; diff --git a/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java b/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java index 9eb41aa..2929194 100644 --- a/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java +++ b/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java @@ -17,6 +17,7 @@ package org.apache.sis.storage.sql; import java.sql.Connection; +import java.sql.SQLException; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -175,8 +176,8 @@ public final strictfp class SQLStoreTest extends TestCase { private void verifyQueries(DataSource source) throws Exception { verifyFetchCityTableAsQuery(source); - verifyLimitOffsetAndColumnSelectionFromQuery(source); + verifyDistinctQuery(source); } private void verifyFetchCityTableAsQuery(DataSource source) throws Exception { @@ -263,7 +264,11 @@ public final strictfp class SQLStoreTest extends TestCase { * @param source Database connection provider. */ private void verifyLimitOffsetAndColumnSelectionFromQuery(final DataSource source) throws Exception { - final String query = "SELECT \"english_name\" as \"title\" FROM features.\"Parks\" ORDER BY \"english_name\" ASC OFFSET 2 ROWS FETCH NEXT 3 ROWS ONLY"; + // Ensure multiline text is accepted + final String query = "SELECT \"english_name\" as \"title\" \n\r" + + "FROM features.\"Parks\" \n" + + "ORDER BY \"english_name\" ASC \n" + + "OFFSET 2 ROWS FETCH NEXT 3 ROWS ONLY"; final QueryFeatureSet qfs; try (Connection conn = source.getConnection()) { qfs = new QueryFeatureSet(query, source, conn); @@ -307,18 +312,39 @@ public final strictfp class SQLStoreTest extends TestCase { assertArrayEquals("Only second third name should be returned", new String[]{"Shinjuku Gyoen"}, parkNames); } -/** -* Checks that operations stacked on feature stream are well executed. This test focus on mapping and peeking -* actions overloaded by sql streams. We'd like to test skip and limit operations too, but ignore it for now, -* because ordering of results matters for such a test. -* -* @implNote Most of stream operations used here are meaningless. We just want to ensure that the pipeline does not -* skip any operation. -* -* @param cities The feature set to read from. We expect a feature set containing all cities defined for the test -* class. -* @throws DataStoreException Let's propagate any error raised by input feature set. -*/ + /** + * Check that a {@link Stream#distinct()} gives coherent results. For now, no optimisation is done to delegate it to + * database, but this test allows for non-regression test, so when an optimisation is done, we'll immediately test + * its validity. + */ + private void verifyDistinctQuery(DataSource source) throws SQLException { + // Ensure multiline text is accepted + final String query = "SELECT \"country\" FROM features.\"Parks\" ORDER BY \"country\""; + final QueryFeatureSet qfs; + try (Connection conn = source.getConnection()) { + qfs = new QueryFeatureSet(query, source, conn); + } + + final Object[] expected = qfs.features(false) + .distinct() + .map(f -> f.getPropertyValue("country")) + .toArray(); + + assertArrayEquals("Distinct country names, sorted in ascending order", new String[]{"CAN", "FRA", "JPN"}, expected); + } + + /** + * Checks that operations stacked on feature stream are well executed. This test focus on mapping and peeking + * actions overloaded by sql streams. We'd like to test skip and limit operations too, but ignore it for now, + * because ordering of results matters for such a test. + * + * @implNote Most of stream operations used here are meaningless. We just want to ensure that the pipeline does not + * skip any operation. + * + * @param cities The feature set to read from. We expect a feature set containing all cities defined for the test + * class. + * @throws DataStoreException Let's propagate any error raised by input feature set. + */ private static void verifyStreamOperations(final FeatureSet cities) throws DataStoreException { try (Stream<Feature> features = cities.features(false)) { final AtomicInteger peekCount = new AtomicInteger();
