This is an automated email from the ASF dual-hosted git repository. amanin pushed a commit to branch refactor/sql-store in repository https://gitbox.apache.org/repos/asf/sis.git
commit 1f851ea91be28a212f3165e854316a1f00ea9e73 Author: Alexis Manin <[email protected]> AuthorDate: Wed Sep 25 18:54:59 2019 +0200 WIP(SQLStore): working on query feature set: proper management of offset limit. --- .../apache/sis/internal/sql/feature/Analyzer.java | 2 +- .../apache/sis/internal/sql/feature/Features.java | 9 +- .../sis/internal/sql/feature/QueryFeatureSet.java | 97 ++++++++++-- .../apache/sis/internal/sql/feature/StreamSQL.java | 15 +- .../org/apache/sis/storage/sql/SQLStoreTest.java | 164 ++++++++++++++++++--- .../org/apache/sis/storage/sql/Features.sql | 4 +- 6 files changed, 255 insertions(+), 36 deletions(-) diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java index 4183659..ff5f0a7 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java @@ -334,7 +334,7 @@ final class Analyzer { public FeatureAdapter buildAdapter(final SQLTypeSpecification spec) throws SQLException { final FeatureTypeBuilder builder = new FeatureTypeBuilder(nameFactory, functions.library, locale); - builder.setName(spec.getName() == null ? Names.createGenericName("sis", ":", UUID.randomUUID().toString()) : spec.getName()); + builder.setName(spec.getName() == null ? Names.createGenericName("sis", ":", UUID.randomUUID().toString()) : spec.getName()); builder.setDefinition(spec.getDefinition()); final String geomCol = spec.getPrimaryGeometryColumn().orElse(""); final List pkCols = spec.getPK().map(PrimaryKey::getColumns).orElse(Collections.EMPTY_LIST); diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Features.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Features.java index c057ddf..2950311 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Features.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Features.java @@ -306,13 +306,16 @@ final class Features implements Spliterator<Feature> { /** * If a limit or an offset is appended, a space will be added beforehand to the given builder. + * + * @implNote We use ANSI notation to get best possible compatibility with possible drivers. + * * @param toEdit The builder to add offset and limit to. * @param offset The offset to use. If zero or negative, it will be ignored. * @param limit the value for limit parameter. If zero or negative, it will be ignored. */ - private static void addOffsetLimit(final SQLBuilder toEdit, final long offset, final long limit) { - if (limit > 0) toEdit.append(" LIMIT ").append(limit); - if (offset > 0) toEdit.append(" OFFSET ").append(offset); + static void addOffsetLimit(final SQLBuilder toEdit, final long offset, final long limit) { + if (offset > 0) toEdit.append(" OFFSET ").append(offset).append(" ROWS"); + if (limit > 0) toEdit.append(" FETCH NEXT ").append(limit).append(" ROWS ONLY"); } /** diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java index 8441341..b590c4b 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java @@ -6,6 +6,8 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Spliterator; import java.util.function.Consumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Stream; import java.util.stream.StreamSupport; import javax.sql.DataSource; @@ -21,6 +23,18 @@ import org.apache.sis.util.collection.BackingStoreException; public class QueryFeatureSet extends AbstractFeatureSet { /** + * A regex searching for ANSI or PostgreSQL way of defining max number of rows to return. For details, see + * <a href="https://www.postgresql.org/docs/current/sql-select.html#SQL-LIMIT">PostgreSQL LIMIT documentation</a>. + * Documentation states that value could be a reference to a variable name, so we do not search for a digit. + */ + private static final Pattern LIMIT_PATTERN = Pattern.compile("(?:FETCH|LIMIT)(?:\\s+(?:FIRST|NEXT))?\\s+([^\\s]+)(?:\\s+ROWS?)?(?:\\s+ONLY)?", Pattern.CASE_INSENSITIVE); + /** + * Search for ANSI or PostgreSQL way of defining a number of rows to skip when returning results. For details, see + * <a href="https://www.postgresql.org/docs/current/sql-select.html#SQL-LIMIT">PostgreSQL LIMIT documentation</a>. + */ + private static final Pattern OFFSET_PATTERN = Pattern.compile("OFFSET\\s+([^\\s]+)(?:\\s+ROWS?)?", Pattern.CASE_INSENSITIVE); + + /** * Keep builder to allow native limit and offset through stream operation. */ private final SQLBuilder queryBuilder; @@ -29,13 +43,19 @@ public class QueryFeatureSet extends AbstractFeatureSet { private final FeatureAdapter adapter; + public QueryFeatureSet(String query, DataSource source, Connection conn) throws SQLException { + this(fromQuery(query, conn), source, conn); + } + public QueryFeatureSet(SQLBuilder queryBuilder, DataSource source, Connection conn) throws SQLException { this(queryBuilder, new Analyzer(source, conn.getMetaData(), null, null), source, conn); } public QueryFeatureSet(SQLBuilder queryBuilder, Analyzer analyzer, DataSource source, Connection conn) throws SQLException { super(analyzer.listeners); - this.queryBuilder = queryBuilder; + // Defensive copy + this.queryBuilder = new SQLBuilder(queryBuilder); + this.queryBuilder.append(queryBuilder.toString()); this.source = source; final String sql = queryBuilder.toString(); @@ -85,20 +105,53 @@ public class QueryFeatureSet extends AbstractFeatureSet { private class QueryAdapter implements QueryBuilder { - SQLBuilder source; + private final SQLBuilder source; + + private final long originOffset, originLimit; + private long additionalOffset, additionalLimit; QueryAdapter(SQLBuilder source) { - this.source = source; + /* We will now try to parse offset and limit from input query. If we encounter unsupported/ambiguous case, + * we will fallback to pure java management of additional limit and offset. + * If we successfully retrieve offset and limit, we'll modify user query to take account of additional + * parameters given later. + */ + long tmpOffset = 0, tmpLimit = 0; + String sql = source.toString(); + try { + Matcher matcher = OFFSET_PATTERN.matcher(sql); + if (matcher.find()) tmpOffset = Long.parseLong(matcher.group(1)); + if (matcher.find()) throw new UnsupportedOperationException("More than one offset in the query."); + sql = matcher.replaceFirst(""); + + matcher = LIMIT_PATTERN.matcher(sql); + if (matcher.find()) tmpLimit = Long.parseLong(matcher.group(1)); + if (matcher.find()) throw new UnsupportedOperationException("More than one limit in the query."); + sql = matcher.replaceFirst(""); + } catch (RuntimeException e) { + sql = source.toString(); + tmpOffset = -1; + tmpLimit = -1; + } + + originOffset = tmpOffset; + originLimit = tmpLimit; + + // defensive copy + this.source = new SQLBuilder(source); + this.source.append(sql); } @Override public QueryBuilder limit(long limit) { - throw new UnsupportedOperationException("Not supported yet: modifying user query"); // "Alexis Manin (Geomatys)" on 24/09/2019 + additionalLimit = limit; + return this; } @Override public QueryBuilder offset(long offset) { - throw new UnsupportedOperationException("Not supported yet: modifying user query"); // "Alexis Manin (Geomatys)" on 24/09/2019 + additionalOffset = offset; + return this; } @Override @@ -108,8 +161,23 @@ public class QueryFeatureSet extends AbstractFeatureSet { @Override public Connector select(ColumnRef... columns) { - if (columns == null || columns.length < 1) - return new PreparedQueryConnector(source.toString()); + if (columns == null || columns.length < 1) { + long javaOffset = 0, nativeOffset = 0, javaLimit = 0, nativeLimit = 0; + if (originOffset < 0) { + javaOffset = this.additionalOffset; + } else if (originOffset > 0 || additionalOffset > 0) { + nativeOffset = originOffset + additionalOffset; + } + + if (originLimit < 0) { + javaLimit = this.additionalLimit; + } else if (originLimit > 0 || additionalLimit > 0) { + nativeLimit = Math.min(originLimit, additionalLimit); + } + + Features.addOffsetLimit(source, nativeOffset, nativeLimit); + return new PreparedQueryConnector(source.toString(), javaOffset, javaLimit); + } throw new UnsupportedOperationException("Not supported yet: modifying user query"); // "Alexis Manin (Geomatys)" on 24/09/2019 } } @@ -117,9 +185,12 @@ public class QueryFeatureSet extends AbstractFeatureSet { private class PreparedQueryConnector implements Connector { final String sql; + private long additionalOffset, additionalLimit; - private PreparedQueryConnector(String sql) { + private PreparedQueryConnector(String sql, long additionalOffset, long additionalLimit) { this.sql = sql; + this.additionalOffset = additionalOffset; + this.additionalLimit = additionalLimit; } @Override @@ -127,7 +198,10 @@ public class QueryFeatureSet extends AbstractFeatureSet { final PreparedStatement statement = connection.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); final ResultSet result = statement.executeQuery(); - final Stream<Feature> stream = StreamSupport.stream(new ResultSpliterator(result), false); + Stream<Feature> stream = StreamSupport.stream(new ResultSpliterator(result), false); + if (additionalLimit > 0) stream = stream.limit(additionalLimit); + if (additionalOffset > 0) stream = stream.skip(additionalOffset); + return stream.onClose(() -> { try ( final AutoCloseable rc = result::close; @@ -185,4 +259,9 @@ public class QueryFeatureSet extends AbstractFeatureSet { return Spliterator.IMMUTABLE | Spliterator.NONNULL; } } + + private static SQLBuilder fromQuery(final String query, final Connection conn) throws SQLException { + return new SQLBuilder(conn.getMetaData(), true) + .append(query); + } } diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java index f80cc0d..e1b1de6 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/StreamSQL.java @@ -49,6 +49,7 @@ import org.apache.sis.util.collection.BackingStoreException; import org.apache.sis.util.logging.Logging; import static org.apache.sis.util.ArgumentChecks.ensureNonNull; +import static org.apache.sis.util.ArgumentChecks.ensurePositive; /** * Manages query lifecycle and optimizations. Operations like {@link #count()}, {@link #distinct()}, {@link #skip(long)} @@ -143,23 +144,29 @@ class StreamSQL extends StreamDecoration<Feature> { @Override public Stream<Feature> limit(long maxSize) { + ensurePositive("Limit", maxSize); if (limit < 1) limit = maxSize; else limit = Math.min(limit, maxSize); - queryAdapter.limit(limit); return this; } @Override public Stream<Feature> skip(long n) { + ensurePositive("Offset", n); offset += n; - queryAdapter.offset(offset); return this; } + private Connector select() { + queryAdapter.offset(offset); + queryAdapter.limit(limit); + return queryAdapter.select(); + } + @Override public long count() { // Avoid opening a connection if sql text cannot be evaluated. - final String sql = queryAdapter.select().estimateStatement(true); + final String sql = select().estimateStatement(true); try (Connection conn = source.getConnection()) { try (Statement st = conn.createStatement(); ResultSet rs = st.executeQuery(sql)) { @@ -180,7 +187,7 @@ class StreamSQL extends StreamDecoration<Feature> { .peek(connectionRef::set) .flatMap(conn -> { try { - return queryAdapter.select().connect(conn); + return select().connect(conn); } catch (SQLException | DataStoreException e) { throw new BackingStoreException(e); } diff --git a/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java b/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java index ef5d769..9eb41aa 100644 --- a/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java +++ b/storage/sis-sqlstore/src/test/java/org/apache/sis/storage/sql/SQLStoreTest.java @@ -21,16 +21,23 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.Map; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.sql.DataSource; import org.opengis.feature.AttributeType; import org.opengis.feature.Feature; import org.opengis.feature.FeatureAssociationRole; import org.opengis.feature.FeatureType; import org.opengis.feature.PropertyType; +import org.opengis.util.GenericName; +import org.apache.sis.internal.feature.AttributeConvention; import org.apache.sis.internal.metadata.sql.SQLBuilder; import org.apache.sis.internal.sql.feature.QueryFeatureSet; import org.apache.sis.storage.DataStoreException; @@ -48,6 +55,8 @@ import static org.apache.sis.test.Assert.assertNotNull; import static org.apache.sis.test.Assert.assertSame; import static org.apache.sis.test.Assert.assertTrue; import static org.apache.sis.test.Assert.fail; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertFalse; // Branch-dependent imports @@ -155,7 +164,7 @@ public final strictfp class SQLStoreTest extends TestCase { // Now, we'll check that overloaded stream operations are functionally stable, even stacked. verifyStreamOperations(cities); - verifyQueries(tmp); + verifyQueries(tmp.source); } } assertEquals(Integer.valueOf(2), countryCount.remove("CAN")); @@ -164,31 +173,152 @@ public final strictfp class SQLStoreTest extends TestCase { assertTrue (countryCount.isEmpty()); } - private void verifyQueries(TestDatabase source) throws Exception { - final QueryFeatureSet qfs; - try (Connection conn = source.source.getConnection()) { + private void verifyQueries(DataSource source) throws Exception { + verifyFetchCityTableAsQuery(source); + + verifyLimitOffsetAndColumnSelectionFromQuery(source); + } + + private void verifyFetchCityTableAsQuery(DataSource source) throws Exception { + final QueryFeatureSet allCities; + final QueryFeatureSet canadaCities; + try (Connection conn = source.getConnection()) { final SQLBuilder builder = new SQLBuilder(conn.getMetaData(), false) - .append("SELECT * FROM ").appendIdentifier("features", "Parks"); - qfs = new QueryFeatureSet(builder, source.source, conn); + .append("SELECT * FROM ").appendIdentifier("features", "Cities"); + allCities = new QueryFeatureSet(builder, source, conn); + /* By re-using the same builder, we ensure a defensive copy is done at feature set creation, avoiding + * potential concurrent or security issue due to afterward modification of the query. + */ + builder.append(" WHERE ").appendIdentifier("country").append("='CAN'"); + canadaCities = new QueryFeatureSet(builder, source, conn); } - final FeatureType type = qfs.getType(); - System.out.println(type); - qfs.features(false).forEach(System.out::println); + final HashMap<String, Class> expectedAttrs = new HashMap<>(); + expectedAttrs.put("country", String.class); + expectedAttrs.put("native_name", String.class); + expectedAttrs.put("english_name", String.class); + expectedAttrs.put("population", Integer.class); + + checkQueryType(expectedAttrs, allCities.getType()); + checkQueryType(expectedAttrs, canadaCities.getType()); + + Set<Map<String, Object>> expectedResults = new HashSet<>(); + expectedResults.add(city("CAN", "Montréal", "Montreal", 1704694)); + expectedResults.add(city("CAN", "Québec", "Quebec", 531902)); + + Set<Map<String, Object>> result = canadaCities.features(false) + .map(SQLStoreTest::asMap) + .collect(Collectors.toSet()); + assertEquals("Query result is not consistent with expected one", expectedResults, result); + + expectedResults.add(city("FRA", "Paris", "Paris", 2206488)); + expectedResults.add(city("JPN", "東京", "Tōkyō", 13622267)); + + result = allCities.features(false) + .map(SQLStoreTest::asMap) + .collect(Collectors.toSet()); + assertEquals("Query result is not consistent with expected one", expectedResults, result); } + private static Map<String, Object> city(String country, String nativeName, String enName, int population) { + final Map<String, Object> result = new HashMap<>(); + result.put("country", country); + result.put("native_name", nativeName); + result.put("english_name", enName); + result.put("population", population); + return result; + } + /** - * Checks that operations stacked on feature stream are well executed. This test focus on mapping and peeking - * actions overloaded by sql streams. We'd like to test skip and limit operations too, but ignore it for now, - * because ordering of results matters for such a test. + * Differs from {@link #verifyFeatureType(FeatureType, String[], Object[])} because + * @param expectedAttrs + * @param target + */ + private static void checkQueryType(final Map<String, Class> expectedAttrs, final FeatureType target) { + final Collection<? extends PropertyType> props = target.getProperties(true); + assertEquals("Number of attributes", expectedAttrs.size(), props.size()); + for (PropertyType p : props) { + assertTrue("Query type should contain only attributes", p instanceof AttributeType); + final String pName = p.getName().toString(); + final Class expectedClass = expectedAttrs.get(pName); + assertNotNull("Unexpected property: "+pName, expectedClass); + assertEquals("Unepected type for property: "+pName, expectedClass, ((AttributeType)p).getValueClass()); + } + } + + private static Map<String, Object> asMap(final Feature source) { + return source.getType().getProperties(true).stream() + .map(PropertyType::getName) + .map(GenericName::toString) + .collect(Collectors.toMap(n->n, source::getPropertyValue)); + } + + /** + * Test limit and offset. The logic is: if user provided an offset, stream {@link Stream#skip(long) skip operator} + * does NOT override it, but stack on it (which is logic: the feature set provide user defined result, and the + * stream navigate through it). * - * @implNote Most of stream operations used here are meaningless. We just want to ensure that the pipeline does not - * skip any operation. + * Moreover, we also check filtering of columns and label usage. * - * @param cities The feature set to read from. We expect a feature set containing all cities defined for the test - * class. - * @throws DataStoreException Let's propagate any error raised by input feature set. + * @param source Database connection provider. */ + private void verifyLimitOffsetAndColumnSelectionFromQuery(final DataSource source) throws Exception { + final String query = "SELECT \"english_name\" as \"title\" FROM features.\"Parks\" ORDER BY \"english_name\" ASC OFFSET 2 ROWS FETCH NEXT 3 ROWS ONLY"; + final QueryFeatureSet qfs; + try (Connection conn = source.getConnection()) { + qfs = new QueryFeatureSet(query, source, conn); + } + + final FeatureType type = qfs.getType(); + final Iterator<? extends PropertyType> props = type.getProperties(true).iterator(); + assertTrue("Built feature set has at least one property", props.hasNext()); + final AttributeType attr = (AttributeType) props.next(); + assertEquals("Property name should be label defined in query", "title", attr.getName().toString()); + assertEquals("Attribute should be a string", String.class, attr.getValueClass()); + assertTrue("Column should be nullable.", attr.getMinimumOccurs() == 0); + final Object precision = attr.characteristics().get(AttributeConvention.MAXIMAL_LENGTH_CHARACTERISTIC.toString()); + assertNotNull("Length constraint should be visible from feature type", precision); + assertEquals("Column length constraint should be visible from attribute type.", 20, ((AttributeType)precision).getDefaultValue()); + assertFalse("Built feature type should have exactly one attribute.", props.hasNext()); + + Function<Stream<Feature>, String[]> getNames = in -> in + .map(f -> f.getPropertyValue("title").toString()) + .toArray(size -> new String[size]); + + String[] parkNames = getNames.apply( + qfs.features(false) + // Get third row in the table, as query starts on second one, and we want to skip one entry from there + .skip(1) + // Tries to increase limit. The test will ensure it's not possible. + .limit(4) + ); + + assertArrayEquals( + "Should get fourth and fifth park names from ascending order", + new String[]{"Tuileries Garden", "Yoyogi-kōen"}, + parkNames + ); + + parkNames = getNames.apply(qfs.features(false) + .skip(0) + .limit(1) + ); + + assertArrayEquals("Only second third name should be returned", new String[]{"Shinjuku Gyoen"}, parkNames); + } + +/** +* Checks that operations stacked on feature stream are well executed. This test focus on mapping and peeking +* actions overloaded by sql streams. We'd like to test skip and limit operations too, but ignore it for now, +* because ordering of results matters for such a test. +* +* @implNote Most of stream operations used here are meaningless. We just want to ensure that the pipeline does not +* skip any operation. +* +* @param cities The feature set to read from. We expect a feature set containing all cities defined for the test +* class. +* @throws DataStoreException Let's propagate any error raised by input feature set. +*/ private static void verifyStreamOperations(final FeatureSet cities) throws DataStoreException { try (Stream<Feature> features = cities.features(false)) { final AtomicInteger peekCount = new AtomicInteger(); diff --git a/storage/sis-sqlstore/src/test/resources/org/apache/sis/storage/sql/Features.sql b/storage/sis-sqlstore/src/test/resources/org/apache/sis/storage/sql/Features.sql index 148c076..27e4d65 100644 --- a/storage/sis-sqlstore/src/test/resources/org/apache/sis/storage/sql/Features.sql +++ b/storage/sis-sqlstore/src/test/resources/org/apache/sis/storage/sql/Features.sql @@ -65,5 +65,5 @@ INSERT INTO features."Parks" ("country", "city", "native_name", "english_name") ('CAN', 'Montréal', 'Mont Royal', 'Mount Royal'), ('FRA', 'Paris', 'Jardin des Tuileries', 'Tuileries Garden'), ('FRA', 'Paris', 'Jardin du Luxembourg', 'Luxembourg Garden'), - ('JPN', '東京', '代々木公園', 'Yoyogi-kōen'), - ('JPN', '東京', '新宿御苑', 'Shinjuku Gyoen'); + ('JPN', '東京', '代々木公園', 'Yoyogi-kōen'), + ('JPN', '東京', '新宿御苑', 'Shinjuku Gyoen');
