This is an automated email from the ASF dual-hosted git repository. amanin pushed a commit to branch refactor/sql-store in repository https://gitbox.apache.org/repos/asf/sis.git
commit 65aeda1248ce5eb666c9480bab3dac5339a2d395 Author: Alexis Manin <[email protected]> AuthorDate: Fri Sep 27 11:07:43 2019 +0200 WIP(SQLStore): improve benchmark and add javadoc --- .../apache/sis/internal/sql/feature/Analyzer.java | 26 ++++----- .../sis/internal/sql/feature/PrimaryKey.java | 12 +++-- .../sis/internal/sql/feature/QueryFeatureSet.java | 44 ++++++++++++++- .../sql/feature/QuerySpliteratorsBench.java | 12 ++++- .../internal/sql/feature/SQLTypeSpecification.java | 62 ++++++++++++++++++++-- .../sis/internal/sql/feature/package-info.java | 5 ++ 6 files changed, 137 insertions(+), 24 deletions(-) diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java index 6ac8fd7..9348d01 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java @@ -22,6 +22,7 @@ import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.*; +import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.LogRecord; import javax.sql.DataSource; @@ -140,6 +141,7 @@ final class Analyzer { * The namespace created with {@link #catalog} and {@link #schema}. */ private transient NameSpace namespace; + public static final Supplier<GenericName> RANDOME_NAME = () -> Names.createGenericName("sis", ":", UUID.randomUUID().toString()); /** * Creates a new analyzer for the database described by given metadata. @@ -334,9 +336,9 @@ final class Analyzer { public FeatureAdapter buildAdapter(final SQLTypeSpecification spec) throws SQLException { final FeatureTypeBuilder builder = new FeatureTypeBuilder(nameFactory, functions.library, locale); - builder.setName(spec.getName() == null ? Names.createGenericName("sis", ":", UUID.randomUUID().toString()) : spec.getName()); - builder.setDefinition(spec.getDefinition()); - final String geomCol = spec.getPrimaryGeometryColumn().orElse(""); + builder.setName(spec.getName().orElseGet(RANDOME_NAME)); + spec.getDefinition().ifPresent(builder::setDefinition); + final String geomCol = spec.getPrimaryGeometryColumn().map(ColumnRef::getAttributeName).orElse(""); final List pkCols = spec.getPK().map(PrimaryKey::getColumns).orElse(Collections.EMPTY_LIST); List<PropertyMapper> attributes = new ArrayList<>(); // JDBC column indices are 1 based. @@ -470,15 +472,15 @@ final class Analyzer { } @Override - public GenericName getName() { - return id.getName(Analyzer.this); + public Optional<GenericName> getName() { + return Optional.of(id.getName(Analyzer.this)); } /** * The remarks are opportunistically stored in id.freeText if known by the caller. */ @Override - public String getDefinition() throws SQLException { + public Optional<String> getDefinition() throws SQLException { String remarks = id.freeText; if (id instanceof Relation) { try (ResultSet reflect = metadata.getTables(id.catalog, schemaEsc, tableEsc, null)) { @@ -493,7 +495,7 @@ final class Analyzer { } } } - return remarks; + return Optional.ofNullable(remarks); } @Override @@ -551,7 +553,7 @@ final class Analyzer { } @Override - public Optional<String> getPrimaryGeometryColumn() { + public Optional<ColumnRef> getPrimaryGeometryColumn() { return Optional.empty(); //throw new UnsupportedOperationException("Not supported yet"); // "Alexis Manin (Geomatys)" on 20/09/2019 } @@ -589,13 +591,13 @@ final class Analyzer { } @Override - public GenericName getName() throws SQLException { - return name; + public Optional<GenericName> getName() throws SQLException { + return Optional.of(name); } @Override - public String getDefinition() throws SQLException { - return query; + public Optional<String> getDefinition() throws SQLException { + return Optional.of(query); } @Override diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java index 1e68fd3..e220abc 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java @@ -10,6 +10,9 @@ import org.apache.sis.util.ArgumentChecks; /** * Represents SQL primary key constraint. Main information is columns composing the key. * + * @implNote For now, only list of columns composing the key are returned. However, in the future it would be possible + * to add other information, as a value type to describe how to expose primary key value. + * * @author "Alexis Manin (Geomatys)" */ interface PrimaryKey { @@ -20,13 +23,16 @@ interface PrimaryKey { return Optional.of(new Composite(cols)); } - //Class<T> getViewType(); + /** + * + * @return List of column names composing the key. Should neither be null nor empty. + */ List<String> getColumns(); class Simple implements PrimaryKey { final String column; - public Simple(String column) { + Simple(String column) { this.column = column; } @@ -40,7 +46,7 @@ interface PrimaryKey { */ private final List<String> columns; - public Composite(List<String> columns) { + Composite(List<String> columns) { ArgumentChecks.ensureNonEmpty("Primary key column names", columns); this.columns = Collections.unmodifiableList(new ArrayList<>(columns)); } diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java index fdf518d..ffbdc8b 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java @@ -83,7 +83,15 @@ public class QueryFeatureSet extends AbstractFeatureSet { * batch loading of results. */ boolean allowBatchLoading = true; + /** + * Profiling variable. Define the fraction (0 none, 1 all) of a single fetch (as defined by {@link ResultSet#getFetchSize()} + * that {@link PrefetchSpliterator} will load in one go. + */ float fetchRatio = 0.5f; + /** + * Profiling variable, serves to define {{@link PreparedStatement#setFetchSize(int)} SQL result fetch size}. + */ + int fetchSize = 100; /** * Same as {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)}, except query is provided by a fixed text @@ -268,7 +276,7 @@ public class QueryFeatureSet extends AbstractFeatureSet { @Override public Stream<Feature> connect(Connection connection) throws SQLException, DataStoreException { final PreparedStatement statement = connection.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); - statement.setFetchSize(100); + statement.setFetchSize(fetchSize); final ResultSet result = statement.executeQuery(); final int fetchSize = result.getFetchSize(); final boolean withPrefetch = !allowBatchLoading || (fetchSize < 1 || fetchSize >= Integer.MAX_VALUE); @@ -297,6 +305,17 @@ public class QueryFeatureSet extends AbstractFeatureSet { } } + /** + * Base class for loading SQL query result loading through {@link Spliterator} API. Concrete implementations comes + * in two experimental flavors : + * <ul> + * <li>Sequential streaming: {@link ResultSpliterator}</li> + * <li>Parallelizable batch streaming: {@link PrefetchSpliterator}</li> + * </ul> + * + * {@link QuerySpliteratorsBench A benchmark is available} to compare both implementations, which could be useful in + * the future to determine which implementation to priorize. For now results does not show much difference. + */ private abstract class QuerySpliterator implements java.util.Spliterator<Feature> { final ResultSet result; @@ -347,12 +366,26 @@ public class QueryFeatureSet extends AbstractFeatureSet { .append(query); } + /** + * An attempt to optimize feature loading through batching and potential parallelization. For now, it looks like + * there's not much improvement regarding to naive streaming approach. IMHO, two improvements would really impact + * performance positively if done: + * <ul> + * <li>Optimisation of batch loading through {@link FeatureAdapter#prefetch(int, ResultSet)}</li> + * <li>Better splitting balance, as stated by {@link Spliterator#trySplit()}</li> + * </ul> + */ private final class PrefetchSpliterator extends QuerySpliterator { final int fetchSize; int idx; List<Feature> chunk; + /** + * According to {@link Spliterator#trySplit()} documentation, the original size estimation must be reduced after + * split to remain consistent. + */ + long splittedAmount = 0; private PrefetchSpliterator(ResultSet result) throws SQLException { this(result, 0.5f); @@ -374,14 +407,21 @@ public class QueryFeatureSet extends AbstractFeatureSet { public Spliterator<Feature> trySplit() { if (!ensureChunkAvailable()) return null; + final List<Feature> remainingChunk = chunk.subList(idx, chunk.size()); + splittedAmount += remainingChunk.size(); final Spliterator<Feature> chunkSpliterator = idx == 0 ? - chunk.spliterator() : chunk.subList(idx, chunk.size()).spliterator(); + chunk.spliterator() : remainingChunk.spliterator(); chunk = null; idx = 0; return chunkSpliterator; } @Override + public long estimateSize() { + return super.estimateSize() - splittedAmount; + } + + @Override public int characteristics() { return super.characteristics() | Spliterator.CONCURRENT; } diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java index f2c62ef..bb519d9 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java @@ -25,8 +25,8 @@ import org.openjdk.jmh.annotations.Warmup; import static org.apache.sis.util.ArgumentChecks.ensureStrictlyPositive; @Fork(value = 2, jvmArgs = {"-server", "-Xmx2g"} ) -@Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 2, time = 4, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 2, time = 4, timeUnit = TimeUnit.SECONDS) public class QuerySpliteratorsBench { @State(Scope.Benchmark) @@ -41,6 +41,12 @@ public class QuerySpliteratorsBench { @Param({"true", "false"}) boolean prefetch; + @Param({"10", "100", "1000"}) + int fetchSize; + + @Param({"0.5", "1", "2"}) + float fetchRatio; + EmbeddedDataSource db; QueryFeatureSet fs; @@ -78,6 +84,8 @@ public class QuerySpliteratorsBench { fs = new QueryFeatureSet("SELECT * FROM TEST", db, c); fs.allowBatchLoading = prefetch; + fs.fetchSize = fetchSize; + fs.fetchRatio = fetchRatio; } } diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java index 0e35edf..e54823d 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java @@ -1,35 +1,87 @@ package org.apache.sis.internal.sql.feature; +import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.sql.SQLException; import java.util.List; import java.util.Optional; import org.opengis.util.GenericName; +import org.apache.sis.feature.builder.FeatureTypeBuilder; +import org.apache.sis.internal.feature.AttributeConvention; import org.apache.sis.storage.DataStoreContentException; +/** + * Defines an application schema inferred from an SQL database (query, table, etc.). Implementations will be used by + * {@link Analyzer} to create an {@link FeatureAdapter adaptation layer to the feature model}. Default implementations + * can be retrieved for tables and queries respectively through {@link Analyzer#create(TableReference, TableReference)} + * and {@link Analyzer#create(PreparedStatement, String, GenericName)} methods. + */ interface SQLTypeSpecification { /** + * Identifying name for the application schema. It is strongly recommended to be present, for SIS engine to be + * capable to create insightful models. However, in corner cases where no proper names could be provided, an empty + * value is allowed. * - * @return Name for the feature type to build. Nullable. + * @implNote SIS {@link FeatureTypeBuilder feature type builder} <em>requires</em> a name, and current + * {@link Analyzer#buildAdapter(SQLTypeSpecification) analysis implementation} will create a random UUID if + * necessary. + * + * @return Name for the feature type to build. * @throws SQLException If an error occurs while retrieving information from database. */ - GenericName getName() throws SQLException; + Optional<GenericName> getName() throws SQLException; /** + * Gives an optional description of the application schema.This information is not necessary for any kind of + * computation, but allows to give end-user global information about the schema (s)he's manipulating. * - * @return A succint description of the data source. Nullable. + * @return A brief description of the data source. * @throws SQLException If an error occurs while retrieving information from database. */ - String getDefinition() throws SQLException; + Optional<String> getDefinition() throws SQLException; + /** + * Primary key definition of source schema. Can be empty if no primary key is defined (Example: query definition). + * + * @return Primary key definition if any, otherwise an empty shell. + * @throws SQLException If an error occurs while exchanging information with underlying database. + */ Optional<PrimaryKey> getPK() throws SQLException; + /** + * + * @return Ordered list of columns in application schema. Order is important, and will be relied upon to retrieve + * {@link ResultSet#getObject(int) result values by index}. + */ List<SQLColumn> getColumns(); + /** + * + * @return All identified relations based on a foreign key in <em>current</em> application schema (1..1 or n..1). + * Corresponds to {@link Relation.Direction#IMPORT}. Can be empty but not null. + * + * @throws SQLException If an error occurs while exchanging information with underlying database. + */ List<Relation> getImports() throws SQLException; + /** + * + * @return All identified relations based on foreign key located in <em>another</em> application schema (1..n). + * Corresponds to {@link Relation.Direction#EXPORT}. Can be empty but not null. + * @throws SQLException If an error occurs while exchanging information with underlying database. + * @throws DataStoreContentException If a schema problem is encountered. + */ List<Relation> getExports() throws SQLException, DataStoreContentException; - default Optional<String> getPrimaryGeometryColumn() {return Optional.empty();} + /** + * In case target schema contains geographic information, this serves to identify without ambiguity which column + * contains what could be considered main geolocation (as stated by {@link AttributeConvention#GEOMETRY_PROPERTY}). + * This is a very important information in case application schema contains multiple geometric fields. + * + * @return The name of the column/attribute to be considered as main geometry information, or an empty shell if + * unknown. + */ + default Optional<ColumnRef> getPrimaryGeometryColumn() {return Optional.empty();} } diff --git a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java index 2c198a8..19c3151 100644 --- a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java +++ b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java @@ -25,8 +25,13 @@ * This package is for internal use by SIS only. Classes in this package * may change in incompatible ways in any future version without notice. * + * @implNote Feature type analysis is done through {@link org.apache.sis.internal.sql.feature.Analyzer} class. + * It relies on internal {@link org.apache.sis.internal.sql.feature.SQLTypeSpecification} API to fetch SQL schema + * information, and build {@link org.apache.sis.internal.sql.feature.FeatureAdapter an adapter to feature model from it}. + * * @author Johann Sorel (Geomatys) * @author Martin Desruisseaux (Geomatys) + * @author Alexis Manin (Geomatys) * @version 1.0 * @since 1.0 * @module
