This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f1737f93a5 ARROW-17003: [Java][Docs] Document arrow-jdbc adapter 
(#13543)
f1737f93a5 is described below

commit f1737f93a5f9f420740f5e948a114c95c47427b0
Author: David Li <[email protected]>
AuthorDate: Wed Jul 13 08:01:45 2022 -0400

    ARROW-17003: [Java][Docs] Document arrow-jdbc adapter (#13543)
    
    Add a basic documentation page for the arrow-jdbc adapter.
    
    I would also like to add a Cookbook page, and then cross-link the two 
pages, as a follow-up.
    
    Authored-by: David Li <[email protected]>
    Signed-off-by: David Li <[email protected]>
---
 docs/Makefile                                      |  11 +-
 docs/source/conf.py                                |   2 +-
 docs/source/developers/java/building.rst           |  31 ++--
 docs/source/java/index.rst                         |   1 +
 docs/source/java/jdbc.rst                          | 174 +++++++++++++++++++++
 .../adapter/jdbc/JdbcToArrowConfigBuilder.java     |  35 +++++
 6 files changed, 237 insertions(+), 17 deletions(-)

diff --git a/docs/Makefile b/docs/Makefile
index 2511d22e44..ded2b73899 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -256,14 +256,19 @@ dummy:
 python:
        $(SPHINXBUILD) -b html $(SPHINXOPTS) -c $(SOURCEDIR) 
$(SOURCEDIR)/python $(BUILDDIR)/html/python
        @echo
-       @echo "Build finished. The HTML files are in $(BUILLDIR)/html/python"
+       @echo "Build finished. The HTML files are in $(BUILDDIR)/html/python"
 
 java_tutorial:
        $(SPHINXBUILD) -b html $(SPHINXOPTS) -c $(SOURCEDIR) $(SOURCEDIR)/java 
$(BUILDDIR)/html/tutorial/java
        @echo
-       @echo "Build finished. The HTML files are in 
$(BUILLDIR)/html/tutorial/java"
+       @echo "Build finished. The HTML files are in 
$(BUILDDIR)/html/tutorial/java"
 
 java_dev:
        $(SPHINXBUILD) -b html $(SPHINXOPTS) -c $(SOURCEDIR) 
$(SOURCEDIR)/developers/java $(BUILDDIR)/html/developers/java
        @echo
-       @echo "Build finished. The HTML files are in 
$(BUILLDIR)/html/developers/java"
\ No newline at end of file
+       @echo "Build finished. The HTML files are in 
$(BUILDDIR)/html/developers/java"
+
+java:
+       $(SPHINXBUILD) -b html $(SPHINXOPTS) -c $(SOURCEDIR) $(SOURCEDIR)/java 
$(BUILDDIR)/html/java
+       @echo
+       @echo "Build finished. The HTML files are in $(BUILDDIR)/html/java"
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ea583aa8ce..42994b0f3b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -172,7 +172,7 @@ if "+" in release:
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
diff --git a/docs/source/developers/java/building.rst 
b/docs/source/developers/java/building.rst
index e50142d285..b7b9cb9213 100644
--- a/docs/source/developers/java/building.rst
+++ b/docs/source/developers/java/building.rst
@@ -186,6 +186,9 @@ Arrow repository, and update the following settings:
   Settings > Build, Execution, Deployment > Compiler > Java Compiler and 
disable
   "Use '--release' option for cross-compilation (Java 9 and later)". Otherwise
   you will get an error like "package sun.misc does not exist".
+* You may want to disable error-prone entirely if it gives spurious
+  warnings (disable both error-prone profiles in the Maven tool window
+  and "Reload All Maven Projects").
 * If using IntelliJ's Maven integration to build, you may need to change
   ``<fork>`` to ``false`` in the pom.xml files due to an `IntelliJ bug
   <https://youtrack.jetbrains.com/issue/IDEA-278903>`__.
@@ -196,23 +199,25 @@ IntelliJ Maven integration instead of with IntelliJ 
directly.
 Common Errors
 =============
 
-1. If the build cannot find dependencies, with errors like these:
-    - Could NOT find Boost (missing: Boost_INCLUDE_DIR system filesystem)
-    - Could NOT find Lz4 (missing: LZ4_LIB)
-    - Could NOT find zstd (missing: ZSTD_LIB)
+* When working with the JNI code: if the C++ build cannot find dependencies, 
with errors like these:
 
-    Download the dependencies at build time (More details in the `Dependency 
Resolution`_):
+  .. code-block::
 
-    .. code-block::
+     Could NOT find Boost (missing: Boost_INCLUDE_DIR system filesystem)
+     Could NOT find Lz4 (missing: LZ4_LIB)
+     Could NOT find zstd (missing: ZSTD_LIB)
 
-        -Dre2_SOURCE=BUNDLED \
-        -DBoost_SOURCE=BUNDLED \
-        -Dutf8proc_SOURCE=BUNDLED \
-        -DSnappy_SOURCE=BUNDLED \
-        -DORC_SOURCE=BUNDLED \
-        -DZLIB_SOURCE=BUNDLED
+  Specify that the dependencies should be downloaded at build time (more 
details at `Dependency Resolution`_):
+
+  .. code-block::
+
+     -Dre2_SOURCE=BUNDLED \
+     -DBoost_SOURCE=BUNDLED \
+     -Dutf8proc_SOURCE=BUNDLED \
+     -DSnappy_SOURCE=BUNDLED \
+     -DORC_SOURCE=BUNDLED \
+     -DZLIB_SOURCE=BUNDLED
 
 .. _Archery: https://github.com/apache/arrow/blob/master/dev/archery/README.md
 .. _Dependency Resolution: 
https://arrow.apache.org/docs/developers/cpp/building.html#individual-dependency-resolution
 .. _C++ shared libraries: https://arrow.apache.org/docs/cpp/build_system.html
-.. _TestArrowBuf.java: 
https://github.com/apache/arrow/blob/master/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java#L130:L147
diff --git a/docs/source/java/index.rst b/docs/source/java/index.rst
index b05988626b..ea08364858 100644
--- a/docs/source/java/index.rst
+++ b/docs/source/java/index.rst
@@ -35,4 +35,5 @@ on the Arrow format and other language bindings see the 
:doc:`parent documentati
    flight
    dataset
    cdata
+   jdbc
    Reference (javadoc) <reference/index>
diff --git a/docs/source/java/jdbc.rst b/docs/source/java/jdbc.rst
new file mode 100644
index 0000000000..da63351601
--- /dev/null
+++ b/docs/source/java/jdbc.rst
@@ -0,0 +1,174 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+==================
+Arrow JDBC Adapter
+==================
+
+The Arrow JDBC Adapter assists with working with JDBC and Arrow
+data. Currently, it supports reading JDBC ResultSets into Arrow
+VectorSchemaRoots.
+
+ResultSet to VectorSchemaRoot Conversion
+========================================
+
+This can be accessed via the JdbcToArrow class. The resulting
+ArrowVectorIterator will convert a ResultSet to Arrow data in batches
+of rows.
+
+.. code-block:: java
+
+   try (ArrowVectorIterator it = 
JdbcToArrow.sqlToArrowVectorIterator(resultSet, allocator)) {
+     while (it.hasNext()) {
+       VectorSchemaRoot root = it.next();
+       // Consume the root…
+     }
+   }
+
+The batch size and type mapping can both be customized:
+
+.. code-block:: java
+
+   JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, 
/*calendar=*/null)
+       .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
+       .setJdbcToArrowTypeConverter((jdbcFieldInfo -> {
+         switch (jdbcFieldInfo.getJdbcType()) {
+           case Types.BIGINT:
+             // Assume actual value range is SMALLINT
+             return new ArrowType.Int(16, true);
+           default:
+             return null;
+         }
+       }))
+       .build();
+   try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, 
config)) {
+     while (iter.hasNext()) {
+       VectorSchemaRoot root = iter.next();
+       // Consume the root…
+     }
+   }
+
+The JDBC type can be explicitly specified, which is useful since JDBC
+drivers can give spurious type information. For example, the Postgres
+driver has been observed to use Decimal types with scale and precision
+0; these cases can be handled by specifying the type explicitly before
+reading. Also, some JDBC drivers may return BigDecimal values with
+inconsistent scale. A RoundingMode can be set to handle these cases:
+
+.. code-block:: java
+
+   Map<Integer, JdbcFieldInfo> mapping = new HashMap<>();
+   mapping.put(1, new JdbcFieldInfo(Types.DECIMAL, 20, 7));
+   JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, 
/*calendar=*/null)
+       .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY)
+       .setExplicitTypesByColumnIndex(mapping)
+       .build();
+   try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, 
config)) {
+     while (iter.hasNext()) {
+       VectorSchemaRoot root = iter.next();
+       // Consume the root…
+     }
+   }
+
+Currently, it is not possible to define a custom type conversion for a
+supported or unsupported type.
+
+Type Mapping
+------------
+
+The JDBC to Arrow type mapping can be obtained at runtime from
+`JdbcToArrowUtils.getArrowTypeFromJdbcType`_.
+
+.. _JdbcToArrowUtils.getArrowTypeFromJdbcType: 
https://arrow.apache.org/docs/java/reference/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.html#getArrowTypeFromJdbcType-org.apache.arrow.adapter.jdbc.JdbcFieldInfo-java.util.Calendar-
+
++--------------------+--------------------+-------+
+| JDBC Type          | Arrow Type         | Notes |
++====================+====================+=======+
+| ARRAY              | List               | \(1)  |
++--------------------+--------------------+-------+
+| BIGINT             | Int64              |       |
++--------------------+--------------------+-------+
+| BINARY             | Binary             |       |
++--------------------+--------------------+-------+
+| BIT                | Bool               |       |
++--------------------+--------------------+-------+
+| BLOB               | Binary             |       |
++--------------------+--------------------+-------+
+| BOOLEAN            | Bool               |       |
++--------------------+--------------------+-------+
+| CHAR               | Utf8               |       |
++--------------------+--------------------+-------+
+| CLOB               | Utf8               |       |
++--------------------+--------------------+-------+
+| DATE               | Date32             |       |
++--------------------+--------------------+-------+
+| DECIMAL            | Decimal128         | \(2)  |
++--------------------+--------------------+-------+
+| DOUBLE             | Double             |       |
++--------------------+--------------------+-------+
+| FLOAT              | Float              |       |
++--------------------+--------------------+-------+
+| INTEGER            | Int32              |       |
++--------------------+--------------------+-------+
+| LONGVARBINARY      | Binary             |       |
++--------------------+--------------------+-------+
+| LONGNVARCHAR       | Utf8               |       |
++--------------------+--------------------+-------+
+| LONGVARCHAR        | Utf8               |       |
++--------------------+--------------------+-------+
+| NCHAR              | Utf8               |       |
++--------------------+--------------------+-------+
+| NULL               | Null               |       |
++--------------------+--------------------+-------+
+| NUMERIC            | Decimal128         |       |
++--------------------+--------------------+-------+
+| NVARCHAR           | Utf8               |       |
++--------------------+--------------------+-------+
+| REAL               | Float              |       |
++--------------------+--------------------+-------+
+| SMALLINT           | Int16              |       |
++--------------------+--------------------+-------+
+| STRUCT             | Struct             | \(3)  |
++--------------------+--------------------+-------+
+| TIME               | Time32[ms]         |       |
++--------------------+--------------------+-------+
+| TIMESTAMP          | Timestamp[ms]      | \(4)  |
++--------------------+--------------------+-------+
+| TINYINT            | Int8               |       |
++--------------------+--------------------+-------+
+| VARBINARY          | Binary             |       |
++--------------------+--------------------+-------+
+| VARCHAR            | Utf8               |       |
++--------------------+--------------------+-------+
+
+* \(1) The list value type must be explicitly configured and cannot be
+  inferred. Use `setArraySubTypeByColumnIndexMap`_ or
+  `setArraySubTypeByColumnNameMap`_.
+* \(2) By default, the scale of decimal values must match the scale in
+  the type exactly; precision is allowed to be any value greater or
+  equal to the type precision.  If there is a mismatch, by default, an
+  exception will be thrown.  This can be configured by setting a
+  different RoundingMode with setBigDecimalRoundingMode.
+* \(3) Not fully supported: while the type conversion is defined, the
+  value conversion is not. See ARROW-17006_.
+* \(4) If a Calendar is provided, then the timestamp will have the
+  timezone of the calendar, else it will be a timestamp without
+  timezone.
+
+.. _setArraySubTypeByColumnIndexMap: 
https://arrow.apache.org/docs/java/reference/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.html#setArraySubTypeByColumnIndexMap-java.util.Map-
+.. _setArraySubTypeByColumnNameMap: 
https://arrow.apache.org/docs/java/reference/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.html#setArraySubTypeByColumnNameMap-java.util.Map-
+.. _ARROW-17006: https://issues.apache.org/jira/browse/ARROW-17006
diff --git 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
index 2d963cfac4..5618087669 100644
--- 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
+++ 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -170,32 +170,67 @@ public class JdbcToArrowConfigBuilder {
     return this;
   }
 
+  /**
+   * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for column 
types.
+   * <p>
+   * This can be useful to override type information from JDBC drivers that 
provide incomplete type info,
+   * e.g. DECIMAL with precision = scale = 0.
+   * <p>
+   * The column index is 1-based, to match the JDBC column index.
+   * @param map The mapping.
+   */
   public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map<Integer, 
JdbcFieldInfo> map) {
     this.explicitTypesByColumnIndex = map;
     return this;
   }
 
+  /**
+   * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for column 
types.
+   * <p>
+   * This can be useful to override type information from JDBC drivers that 
provide incomplete type info,
+   * e.g. DECIMAL with precision = scale = 0.
+   * @param map The mapping.
+   */
   public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map<String, 
JdbcFieldInfo> map) {
     this.explicitTypesByColumnName = map;
     return this;
   }
 
+  /**
+   * Set the target number of rows to convert at once.
+   * <p>
+   * Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at 
once.
+   */
   public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) {
     this.targetBatchSize = targetBatchSize;
     return this;
   }
 
+  /**
+   * Set the function used to convert JDBC types to Arrow types.
+   * <p>
+   * Defaults to wrapping {@link 
JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, Calendar)}.
+   */
   public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter(
       Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
     this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter;
     return this;
   }
 
+  /**
+   * Set whether to use the same {@link 
org.apache.arrow.vector.VectorSchemaRoot} instance on each iteration,
+   * or to allocate a new one.
+   */
   public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean 
reuseVectorSchemaRoot) {
     this.reuseVectorSchemaRoot = reuseVectorSchemaRoot;
     return this;
   }
 
+  /**
+   * Set the rounding mode used when the scale of the actual value does not 
match the declared scale.
+   * <p>
+   * By default, an error is raised in such cases.
+   */
   public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode 
bigDecimalRoundingMode) {
     this.bigDecimalRoundingMode = bigDecimalRoundingMode;
     return this;

Reply via email to