[sis] 19/45: WIP(SQLStore): improve benchmark and add javadoc

amanin Tue, 12 Nov 2019 08:45:01 -0800

This is an automated email from the ASF dual-hosted git repository.

amanin pushed a commit to branch refactor/sql-store
in repository https://gitbox.apache.org/repos/asf/sis.git


commit 65aeda1248ce5eb666c9480bab3dac5339a2d395
Author: Alexis Manin <[email protected]>
AuthorDate: Fri Sep 27 11:07:43 2019 +0200

    WIP(SQLStore): improve benchmark and add javadoc
---
 .../apache/sis/internal/sql/feature/Analyzer.java  | 26 ++++-----
 .../sis/internal/sql/feature/PrimaryKey.java       | 12 +++--
 .../sis/internal/sql/feature/QueryFeatureSet.java  | 44 ++++++++++++++-
 .../sql/feature/QuerySpliteratorsBench.java        | 12 ++++-
 .../internal/sql/feature/SQLTypeSpecification.java | 62 ++++++++++++++++++++--
 .../sis/internal/sql/feature/package-info.java     |  5 ++
 6 files changed, 137 insertions(+), 24 deletions(-)

diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java
index 6ac8fd7..9348d01 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/Analyzer.java
@@ -22,6 +22,7 @@ import java.sql.ResultSet;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
 import java.util.*;
+import java.util.function.Supplier;
 import java.util.logging.Level;
 import java.util.logging.LogRecord;
 import javax.sql.DataSource;
@@ -140,6 +141,7 @@ final class Analyzer {
      * The namespace created with {@link #catalog} and {@link #schema}.
      */
     private transient NameSpace namespace;
+    public static final Supplier<GenericName> RANDOME_NAME = () -> 
Names.createGenericName("sis", ":", UUID.randomUUID().toString());
 
     /**
      * Creates a new analyzer for the database described by given metadata.
@@ -334,9 +336,9 @@ final class Analyzer {
 
     public FeatureAdapter buildAdapter(final SQLTypeSpecification spec) throws 
SQLException {
         final FeatureTypeBuilder builder = new FeatureTypeBuilder(nameFactory, 
functions.library, locale);
-        builder.setName(spec.getName() == null ? 
Names.createGenericName("sis", ":", UUID.randomUUID().toString()) : 
spec.getName());
-        builder.setDefinition(spec.getDefinition());
-        final String geomCol = spec.getPrimaryGeometryColumn().orElse("");
+        builder.setName(spec.getName().orElseGet(RANDOME_NAME));
+        spec.getDefinition().ifPresent(builder::setDefinition);
+        final String geomCol = 
spec.getPrimaryGeometryColumn().map(ColumnRef::getAttributeName).orElse("");
         final List pkCols = 
spec.getPK().map(PrimaryKey::getColumns).orElse(Collections.EMPTY_LIST);
         List<PropertyMapper> attributes = new ArrayList<>();
         // JDBC column indices are 1 based.
@@ -470,15 +472,15 @@ final class Analyzer {
         }
 
         @Override
-        public GenericName getName() {
-            return id.getName(Analyzer.this);
+        public Optional<GenericName> getName() {
+            return Optional.of(id.getName(Analyzer.this));
         }
 
         /**
          * The remarks are opportunistically stored in id.freeText if known by 
the caller.
          */
         @Override
-        public String getDefinition() throws SQLException {
+        public Optional<String> getDefinition() throws SQLException {
             String remarks = id.freeText;
             if (id instanceof Relation) {
                 try (ResultSet reflect = metadata.getTables(id.catalog, 
schemaEsc, tableEsc, null)) {
@@ -493,7 +495,7 @@ final class Analyzer {
                     }
                 }
             }
-            return remarks;
+            return Optional.ofNullable(remarks);
         }
 
         @Override
@@ -551,7 +553,7 @@ final class Analyzer {
         }
 
         @Override
-        public Optional<String> getPrimaryGeometryColumn() {
+        public Optional<ColumnRef> getPrimaryGeometryColumn() {
             return Optional.empty();
             //throw new UnsupportedOperationException("Not supported yet"); // 
"Alexis Manin (Geomatys)" on 20/09/2019
         }
@@ -589,13 +591,13 @@ final class Analyzer {
         }
 
         @Override
-        public GenericName getName() throws SQLException {
-            return name;
+        public Optional<GenericName> getName() throws SQLException {
+            return Optional.of(name);
         }
 
         @Override
-        public String getDefinition() throws SQLException {
-            return query;
+        public Optional<String> getDefinition() throws SQLException {
+            return Optional.of(query);
         }
 
         @Override
diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java
index 1e68fd3..e220abc 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/PrimaryKey.java
@@ -10,6 +10,9 @@ import org.apache.sis.util.ArgumentChecks;
 /**
  * Represents SQL primary key constraint. Main information is columns 
composing the key.
  *
+ * @implNote For now, only list of columns composing the key are returned. 
However, in the future it would be possible
+ * to add other information, as a value type to describe how to expose primary 
key value.
+ *
  * @author "Alexis Manin (Geomatys)"
  */
 interface PrimaryKey {
@@ -20,13 +23,16 @@ interface PrimaryKey {
         return Optional.of(new Composite(cols));
     }
 
-    //Class<T> getViewType();
+    /**
+     *
+     * @return List of column names composing the key. Should neither be null 
nor empty.
+     */
     List<String> getColumns();
 
     class Simple implements PrimaryKey {
         final String column;
 
-        public Simple(String column) {
+        Simple(String column) {
             this.column = column;
         }
 
@@ -40,7 +46,7 @@ interface PrimaryKey {
          */
         private final List<String> columns;
 
-        public Composite(List<String> columns) {
+        Composite(List<String> columns) {
             ArgumentChecks.ensureNonEmpty("Primary key column names", columns);
             this.columns = Collections.unmodifiableList(new 
ArrayList<>(columns));
         }
diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
index fdf518d..ffbdc8b 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QueryFeatureSet.java
@@ -83,7 +83,15 @@ public class QueryFeatureSet extends AbstractFeatureSet {
      * batch loading of results.
      */
     boolean allowBatchLoading = true;
+    /**
+     * Profiling variable. Define the fraction (0 none, 1 all) of a single 
fetch (as defined by {@link ResultSet#getFetchSize()}
+     * that {@link PrefetchSpliterator} will load in one go.
+     */
     float fetchRatio = 0.5f;
+    /**
+     * Profiling variable, serves to define {{@link 
PreparedStatement#setFetchSize(int)} SQL result fetch size}.
+     */
+    int fetchSize = 100;
 
     /**
      * Same as {@link #QueryFeatureSet(SQLBuilder, DataSource, Connection)}, 
except query is provided by a fixed text
@@ -268,7 +276,7 @@ public class QueryFeatureSet extends AbstractFeatureSet {
         @Override
         public Stream<Feature> connect(Connection connection) throws 
SQLException, DataStoreException {
             final PreparedStatement statement = 
connection.prepareStatement(sql, ResultSet.TYPE_FORWARD_ONLY, 
ResultSet.CONCUR_READ_ONLY);
-            statement.setFetchSize(100);
+            statement.setFetchSize(fetchSize);
             final ResultSet result = statement.executeQuery();
             final int fetchSize = result.getFetchSize();
             final boolean withPrefetch = !allowBatchLoading || (fetchSize < 1 
|| fetchSize >= Integer.MAX_VALUE);
@@ -297,6 +305,17 @@ public class QueryFeatureSet extends AbstractFeatureSet {
         }
     }
 
+    /**
+     * Base class for loading SQL query result loading through {@link 
Spliterator} API. Concrete implementations comes
+     * in two experimental flavors :
+     * <ul>
+     *     <li>Sequential streaming: {@link ResultSpliterator}</li>
+     *     <li>Parallelizable batch  streaming: {@link 
PrefetchSpliterator}</li>
+     * </ul>
+     *
+     * {@link QuerySpliteratorsBench A benchmark is available} to compare both 
implementations, which could be useful in
+     * the future to determine which implementation to priorize. For now 
results does not show much difference.
+     */
     private abstract class QuerySpliterator  implements 
java.util.Spliterator<Feature> {
 
         final ResultSet result;
@@ -347,12 +366,26 @@ public class QueryFeatureSet extends AbstractFeatureSet {
                 .append(query);
     }
 
+    /**
+     * An attempt to optimize feature loading through batching and potential 
parallelization. For now, it looks like
+     * there's not much improvement regarding to naive streaming approach. 
IMHO, two improvements would really impact
+     * performance positively if done:
+     * <ul>
+     *     <li>Optimisation of batch loading through {@link 
FeatureAdapter#prefetch(int, ResultSet)}</li>
+     *     <li>Better splitting balance, as stated by {@link 
Spliterator#trySplit()}</li>
+     * </ul>
+     */
     private final class PrefetchSpliterator extends QuerySpliterator {
 
         final int fetchSize;
 
         int idx;
         List<Feature> chunk;
+        /**
+         * According to {@link Spliterator#trySplit()} documentation, the 
original size estimation must be reduced after
+         * split to remain consistent.
+         */
+        long splittedAmount = 0;
 
         private PrefetchSpliterator(ResultSet result) throws SQLException {
             this(result, 0.5f);
@@ -374,14 +407,21 @@ public class QueryFeatureSet extends AbstractFeatureSet {
 
         public Spliterator<Feature> trySplit() {
             if (!ensureChunkAvailable()) return null;
+            final List<Feature> remainingChunk = chunk.subList(idx, 
chunk.size());
+            splittedAmount += remainingChunk.size();
             final Spliterator<Feature> chunkSpliterator = idx == 0 ?
-                    chunk.spliterator() : chunk.subList(idx, 
chunk.size()).spliterator();
+                    chunk.spliterator() : remainingChunk.spliterator();
             chunk = null;
             idx = 0;
             return chunkSpliterator;
         }
 
         @Override
+        public long estimateSize() {
+            return super.estimateSize() - splittedAmount;
+        }
+
+        @Override
         public int characteristics() {
             return super.characteristics() | Spliterator.CONCURRENT;
         }
diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java
index f2c62ef..bb519d9 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/QuerySpliteratorsBench.java
@@ -25,8 +25,8 @@ import org.openjdk.jmh.annotations.Warmup;
 import static org.apache.sis.util.ArgumentChecks.ensureStrictlyPositive;
 
 @Fork(value = 2, jvmArgs = {"-server", "-Xmx2g"} )
-@Warmup(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS)
-@Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS)
+@Warmup(iterations = 2, time = 4, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 2, time = 4, timeUnit = TimeUnit.SECONDS)
 public class QuerySpliteratorsBench {
 
     @State(Scope.Benchmark)
@@ -41,6 +41,12 @@ public class QuerySpliteratorsBench {
         @Param({"true", "false"})
         boolean prefetch;
 
+        @Param({"10", "100", "1000"})
+        int fetchSize;
+
+        @Param({"0.5", "1", "2"})
+        float fetchRatio;
+
         EmbeddedDataSource db;
         QueryFeatureSet fs;
 
@@ -78,6 +84,8 @@ public class QuerySpliteratorsBench {
 
                 fs = new QueryFeatureSet("SELECT * FROM TEST", db, c);
                 fs.allowBatchLoading = prefetch;
+                fs.fetchSize = fetchSize;
+                fs.fetchRatio = fetchRatio;
             }
         }
 
diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java
index 0e35edf..e54823d 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/SQLTypeSpecification.java
@@ -1,35 +1,87 @@
 package org.apache.sis.internal.sql.feature;
 
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.List;
 import java.util.Optional;
 
 import org.opengis.util.GenericName;
 
+import org.apache.sis.feature.builder.FeatureTypeBuilder;
+import org.apache.sis.internal.feature.AttributeConvention;
 import org.apache.sis.storage.DataStoreContentException;
 
+/**
+ * Defines an application schema inferred from an SQL database (query, table, 
etc.). Implementations will be used by
+ * {@link Analyzer} to create an {@link FeatureAdapter adaptation layer to the 
feature model}. Default implementations
+ * can be retrieved for tables and queries respectively through {@link 
Analyzer#create(TableReference, TableReference)}
+ * and {@link Analyzer#create(PreparedStatement, String, GenericName)} methods.
+ */
 interface SQLTypeSpecification {
     /**
+     * Identifying name for the application schema. It is strongly recommended 
to be present, for SIS engine to be
+     * capable to create insightful models. However, in corner cases where no 
proper names could be provided, an empty
+     * value is allowed.
      *
-     * @return Name for the feature type to build. Nullable.
+     * @implNote SIS {@link FeatureTypeBuilder feature type builder} 
<em>requires</em> a name, and current
+     * {@link Analyzer#buildAdapter(SQLTypeSpecification) analysis 
implementation} will create a random UUID if
+     * necessary.
+     *
+     * @return Name for the feature type to build.
      * @throws SQLException If an error occurs while retrieving information 
from database.
      */
-    GenericName getName() throws SQLException;
+    Optional<GenericName> getName() throws SQLException;
 
     /**
+     * Gives an optional description of the application schema.This 
information is not necessary for any kind of
+     * computation, but allows to give end-user global information about the 
schema (s)he's manipulating.
      *
-     * @return A succint description of the data source. Nullable.
+     * @return A brief description of the data source.
      * @throws SQLException If an error occurs while retrieving information 
from database.
      */
-    String getDefinition() throws SQLException;
+    Optional<String> getDefinition() throws SQLException;
 
+    /**
+     * Primary key definition of source schema. Can be empty if no primary key 
is defined (Example: query definition).
+     *
+     * @return Primary key definition if any, otherwise an empty shell.
+     * @throws SQLException If an error occurs while exchanging information 
with underlying database.
+     */
     Optional<PrimaryKey> getPK() throws SQLException;
 
+    /**
+     *
+     * @return Ordered list of columns in application schema. Order is 
important, and will be relied upon to retrieve
+     *  {@link ResultSet#getObject(int) result values by index}.
+     */
     List<SQLColumn> getColumns();
 
+    /**
+     *
+     * @return All identified relations based on a foreign key in 
<em>current</em> application schema (1..1 or n..1).
+     * Corresponds to {@link Relation.Direction#IMPORT}. Can be empty but not 
null.
+     *
+     * @throws SQLException If an error occurs while exchanging information 
with underlying database.
+     */
     List<Relation> getImports() throws SQLException;
 
+    /**
+     *
+     * @return All identified relations based on foreign key located in 
<em>another</em> application schema (1..n).
+     * Corresponds to {@link Relation.Direction#EXPORT}. Can be empty but not 
null.
+     * @throws SQLException If an error occurs while exchanging information 
with underlying database.
+     * @throws DataStoreContentException If a schema problem is encountered.
+     */
     List<Relation> getExports() throws SQLException, DataStoreContentException;
 
-    default Optional<String> getPrimaryGeometryColumn() {return 
Optional.empty();}
+    /**
+     * In case target schema contains geographic information, this serves to 
identify without ambiguity which column
+     * contains what could be considered main geolocation (as stated by {@link 
AttributeConvention#GEOMETRY_PROPERTY}).
+     * This is a very important information in case application schema 
contains multiple geometric fields.
+     *
+     * @return The name of the column/attribute to be considered as main 
geometry information, or an empty shell if
+     * unknown.
+     */
+    default Optional<ColumnRef> getPrimaryGeometryColumn() {return 
Optional.empty();}
 }
diff --git 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java
 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java
index 2c198a8..19c3151 100644
--- 
a/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java
+++ 
b/storage/sis-sqlstore/src/main/java/org/apache/sis/internal/sql/feature/package-info.java
@@ -25,8 +25,13 @@
  * This package is for internal use by SIS only. Classes in this package
  * may change in incompatible ways in any future version without notice.
  *
+ * @implNote Feature type analysis is done through {@link 
org.apache.sis.internal.sql.feature.Analyzer} class.
+ * It relies on internal {@link 
org.apache.sis.internal.sql.feature.SQLTypeSpecification} API to fetch SQL 
schema
+ * information, and build {@link 
org.apache.sis.internal.sql.feature.FeatureAdapter an adapter to feature model 
from it}.
+ *
  * @author  Johann Sorel (Geomatys)
  * @author  Martin Desruisseaux (Geomatys)
+ * @author  Alexis Manin (Geomatys)
  * @version 1.0
  * @since   1.0
  * @module

[sis] 19/45: WIP(SQLStore): improve benchmark and add javadoc

Reply via email to