This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a227e50c01 ARROW-16600: [Java] Configurable RoundingMode to handle 
inconsistent scale in BigDecimals (#13433)
a227e50c01 is described below

commit a227e50c0102898ff594d2da0479819272ecbfdd
Author: Todd Farmer <[email protected]>
AuthorDate: Mon Jun 27 09:27:16 2022 -0600

    ARROW-16600: [Java] Configurable RoundingMode to handle inconsistent scale 
in BigDecimals (#13433)
    
    Under certain conditions, JDBC vendors may return ResultSets where the 
scale of BigDecimal values differ by row. Existing logic required exact 
matching of every row to the established scale for the column (target vector), 
and throws UnsupportedOperationException when there is a mismatch, aborting 
ResultSet processing.  This change enables configuration of a 
java.math.RoundingMode to be applied in any required scale conversion, and 
should enable both full-fidelity truncation (trimming  [...]
    
    Note that this is implemented as a global configuration options - it will 
apply to all BigDecimal columns in a ResultSet being processed using the 
supplied config. It's possible to provide per-column control over this 
behavior, but I assessed that to complicate configuration for little benefit. 
Please indicate if this decision should be reevaluated.
    
    
    Authored-by: Todd Farmer <[email protected]>
    Signed-off-by: David Li <[email protected]>
---
 .../arrow/adapter/jdbc/JdbcToArrowConfig.java      | 35 ++++++++++--
 .../adapter/jdbc/JdbcToArrowConfigBuilder.java     | 11 +++-
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java       |  4 +-
 .../adapter/jdbc/consumer/DecimalConsumer.java     | 64 ++++++++++++++++++----
 .../arrow/adapter/jdbc/h2/JdbcToArrowTest.java     | 22 +++++++-
 5 files changed, 116 insertions(+), 20 deletions(-)

diff --git 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
index d1c21621a2..b475ee046b 100644
--- 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
+++ 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -17,6 +17,7 @@
 
 package org.apache.arrow.adapter.jdbc;
 
+import java.math.RoundingMode;
 import java.util.Calendar;
 import java.util.Map;
 import java.util.function.Function;
@@ -59,6 +60,7 @@ public final class JdbcToArrowConfig {
   private final Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
   private final Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex;
   private final Map<String, JdbcFieldInfo> explicitTypesByColumnName;
+  private final RoundingMode bigDecimalRoundingMode;
   /**
    * The maximum rowCount to read each time when partially convert data.
    * Default value is 1024 and -1 means disable partial read.
@@ -87,7 +89,20 @@ public final class JdbcToArrowConfig {
         /* reuse vector schema root */ false,
         /* array sub-types by column index */ null,
         /* array sub-types by column name */ null,
-        DEFAULT_TARGET_BATCH_SIZE, null);
+        DEFAULT_TARGET_BATCH_SIZE, null, null);
+  }
+
+  JdbcToArrowConfig(
+          BufferAllocator allocator,
+          Calendar calendar,
+          boolean includeMetadata,
+          boolean reuseVectorSchemaRoot,
+          Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex,
+          Map<String, JdbcFieldInfo> arraySubTypesByColumnName,
+          int targetBatchSize,
+          Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
+    this(allocator, calendar, includeMetadata, reuseVectorSchemaRoot, 
arraySubTypesByColumnIndex,
+        arraySubTypesByColumnName, targetBatchSize, jdbcToArrowTypeConverter, 
null);
   }
 
   /**
@@ -101,6 +116,7 @@ public final class JdbcToArrowConfig {
    * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for 
each data load.
    * @param arraySubTypesByColumnIndex The type of the JDBC array at the 
column index (1-based).
    * @param arraySubTypesByColumnName  The type of the JDBC array at the 
column name.
+   * @param targetBatchSize The target batch size to be used in preallcation 
of the resulting vectors.
    * @param jdbcToArrowTypeConverter The function that maps JDBC field type 
information to arrow type. If set to null,
    *                                 the default mapping will be used, which 
is defined as:
    *  <ul>
@@ -132,6 +148,9 @@ public final class JdbcToArrowConfig {
    *    <li>STRUCT --> ArrowType.Struct</li>
    *    <li>NULL --> ArrowType.Null</li>
    *  </ul>
+   * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in 
coercion of a BigDecimal from a
+   *                               ResultSet having a scale which does not 
match that of the target vector. Use null
+   *                               (default value) to require strict scale 
matching.
    */
   JdbcToArrowConfig(
       BufferAllocator allocator,
@@ -141,7 +160,8 @@ public final class JdbcToArrowConfig {
       Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex,
       Map<String, JdbcFieldInfo> arraySubTypesByColumnName,
       int targetBatchSize,
-      Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
+      Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter,
+      RoundingMode bigDecimalRoundingMode) {
 
     this(
         allocator,
@@ -153,7 +173,8 @@ public final class JdbcToArrowConfig {
         targetBatchSize,
         jdbcToArrowTypeConverter,
         null,
-        null);
+        null,
+        bigDecimalRoundingMode);
   }
 
   JdbcToArrowConfig(
@@ -166,7 +187,8 @@ public final class JdbcToArrowConfig {
       int targetBatchSize,
       Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter,
       Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex,
-      Map<String, JdbcFieldInfo> explicitTypesByColumnName) {
+      Map<String, JdbcFieldInfo> explicitTypesByColumnName,
+      RoundingMode bigDecimalRoundingMode) {
     Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
     this.allocator = allocator;
     this.calendar = calendar;
@@ -177,6 +199,7 @@ public final class JdbcToArrowConfig {
     this.targetBatchSize = targetBatchSize;
     this.explicitTypesByColumnIndex = explicitTypesByColumnIndex;
     this.explicitTypesByColumnName = explicitTypesByColumnName;
+    this.bigDecimalRoundingMode = bigDecimalRoundingMode;
 
     // set up type converter
     this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? 
jdbcToArrowTypeConverter :
@@ -288,4 +311,8 @@ public final class JdbcToArrowConfig {
       return explicitTypesByColumnName.get(name);
     }
   }
+
+  public RoundingMode getBigDecimalRoundingMode() {
+    return bigDecimalRoundingMode;
+  }
 }
diff --git 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
index 2f1f91ca1c..2d963cfac4 100644
--- 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
+++ 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -19,6 +19,7 @@ package org.apache.arrow.adapter.jdbc;
 
 import static 
org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE;
 
+import java.math.RoundingMode;
 import java.util.Calendar;
 import java.util.Map;
 import java.util.function.Function;
@@ -41,6 +42,7 @@ public class JdbcToArrowConfigBuilder {
   private Map<String, JdbcFieldInfo> explicitTypesByColumnName;
   private int targetBatchSize;
   private Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter;
+  private RoundingMode bigDecimalRoundingMode;
 
   /**
    * Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
@@ -56,6 +58,7 @@ public class JdbcToArrowConfigBuilder {
     this.arraySubTypesByColumnName = null;
     this.explicitTypesByColumnIndex = null;
     this.explicitTypesByColumnName = null;
+    this.bigDecimalRoundingMode = null;
   }
 
   /**
@@ -193,6 +196,11 @@ public class JdbcToArrowConfigBuilder {
     return this;
   }
 
+  public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode 
bigDecimalRoundingMode) {
+    this.bigDecimalRoundingMode = bigDecimalRoundingMode;
+    return this;
+  }
+
   /**
    * This builds the {@link JdbcToArrowConfig} from the provided
    * {@link BufferAllocator} and {@link Calendar}.
@@ -211,6 +219,7 @@ public class JdbcToArrowConfigBuilder {
         targetBatchSize,
         jdbcToArrowTypeConverter,
         explicitTypesByColumnIndex,
-        explicitTypesByColumnName);
+        explicitTypesByColumnName,
+        bigDecimalRoundingMode);
   }
 }
diff --git 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index a918afaf05..4bb6558e06 100644
--- 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -21,6 +21,7 @@ import static 
org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
 import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
 
 import java.io.IOException;
+import java.math.RoundingMode;
 import java.sql.Date;
 import java.sql.ParameterMetaData;
 import java.sql.ResultSet;
@@ -427,7 +428,8 @@ public class JdbcToArrowUtils {
             return null;
         }
       case Decimal:
-        return DecimalConsumer.createConsumer((DecimalVector) vector, 
columnIndex, nullable);
+        final RoundingMode bigDecimalRoundingMode = 
config.getBigDecimalRoundingMode();
+        return DecimalConsumer.createConsumer((DecimalVector) vector, 
columnIndex, nullable, bigDecimalRoundingMode);
       case FloatingPoint:
         switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) {
           case SINGLE:
diff --git 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java
 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java
index 4498fdeccb..bed96dda8b 100644
--- 
a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java
+++ 
b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java
@@ -18,6 +18,7 @@
 package org.apache.arrow.adapter.jdbc.consumer;
 
 import java.math.BigDecimal;
+import java.math.RoundingMode;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 
@@ -27,29 +28,68 @@ import org.apache.arrow.vector.DecimalVector;
  * Consumer which consume decimal type values from {@link ResultSet}.
  * Write the data to {@link org.apache.arrow.vector.DecimalVector}.
  */
-public class DecimalConsumer {
+public abstract class DecimalConsumer extends BaseConsumer<DecimalVector> {
+  private final RoundingMode bigDecimalRoundingMode;
+  private final int scale;
+
+  /**
+   * Constructs a new consumer.
+   *
+   * @param vector the underlying vector for the consumer.
+   * @param index  the column id for the consumer.
+   */
+  public DecimalConsumer(DecimalVector vector, int index) {
+    this(vector, index, null);
+  }
+
+  /**
+   * Constructs a new consumer, with optional coercibility.
+   * @param vector the underlying vector for the consumer.
+   * @param index the column index for the consumer.
+   * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the 
BigDecimal scale does not match that
+   *                               of the target vector.  Set to null to 
retain strict matching behavior (scale of
+   *                               source and target vector must match 
exactly).
+   */
+  public DecimalConsumer(DecimalVector vector, int index, RoundingMode 
bigDecimalRoundingMode) {
+    super(vector, index);
+    this.bigDecimalRoundingMode = bigDecimalRoundingMode;
+    this.scale = vector.getScale();
+  }
 
   /**
    * Creates a consumer for {@link DecimalVector}.
    */
-  public static JdbcConsumer<DecimalVector> createConsumer(DecimalVector 
vector, int index, boolean nullable) {
+  public static JdbcConsumer<DecimalVector> createConsumer(
+          DecimalVector vector,
+          int index,
+          boolean nullable,
+          RoundingMode bigDecimalRoundingMode
+  ) {
     if (nullable) {
-      return new NullableDecimalConsumer(vector, index);
+      return new NullableDecimalConsumer(vector, index, 
bigDecimalRoundingMode);
     } else {
-      return new NonNullableDecimalConsumer(vector, index);
+      return new NonNullableDecimalConsumer(vector, index, 
bigDecimalRoundingMode);
+    }
+  }
+
+  protected void set(BigDecimal value) {
+    if (bigDecimalRoundingMode != null && value.scale() != scale) {
+      value = value.setScale(scale, bigDecimalRoundingMode);
     }
+    vector.set(currentIndex, value);
   }
 
+
   /**
    * Consumer for nullable decimal.
    */
-  static class NullableDecimalConsumer extends BaseConsumer<DecimalVector> {
+  static class NullableDecimalConsumer extends DecimalConsumer {
 
     /**
      * Instantiate a DecimalConsumer.
      */
-    public NullableDecimalConsumer(DecimalVector vector, int index) {
-      super(vector, index);
+    public NullableDecimalConsumer(DecimalVector vector, int index, 
RoundingMode bigDecimalRoundingMode) {
+      super(vector, index, bigDecimalRoundingMode);
     }
 
     @Override
@@ -58,7 +98,7 @@ public class DecimalConsumer {
       if (!resultSet.wasNull()) {
         // for fixed width vectors, we have allocated enough memory 
proactively,
         // so there is no need to call the setSafe method here.
-        vector.set(currentIndex, value);
+        set(value);
       }
       currentIndex++;
     }
@@ -67,13 +107,13 @@ public class DecimalConsumer {
   /**
    * Consumer for non-nullable decimal.
    */
-  static class NonNullableDecimalConsumer extends BaseConsumer<DecimalVector> {
+  static class NonNullableDecimalConsumer extends DecimalConsumer {
 
     /**
      * Instantiate a DecimalConsumer.
      */
-    public NonNullableDecimalConsumer(DecimalVector vector, int index) {
-      super(vector, index);
+    public NonNullableDecimalConsumer(DecimalVector vector, int index, 
RoundingMode bigDecimalRoundingMode) {
+      super(vector, index, bigDecimalRoundingMode);
     }
 
     @Override
@@ -81,7 +121,7 @@ public class DecimalConsumer {
       BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet);
       // for fixed width vectors, we have allocated enough memory proactively,
       // so there is no need to call the setSafe method here.
-      vector.set(currentIndex, value);
+      set(value);
       currentIndex++;
     }
   }
diff --git 
a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
 
b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 9b3076a05f..6e88d543b6 100644
--- 
a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ 
b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -50,6 +50,7 @@ import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.math.BigDecimal;
+import java.math.RoundingMode;
 import java.sql.ResultSet;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
@@ -428,11 +429,28 @@ public class JdbcToArrowTest extends 
AbstractJdbcToArrowTest {
       ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, 
config);
       while (iter.hasNext()) {
         iter.next();
-        fail("This is expected to fail due to ARROW-16600");
+        fail("This is expected to fail due to inconsistent BigDecimal scales, 
while strict matching is enabled.");
       }
       iter.close();
     } catch (Exception ex) {
-      // Once ARROW-16600 is implemented, this should no longer fail.
+      // Expected to fail due to default strict scale matching of ResultSet 
and target vector BigDecimal.
+    }
+    // Reuse same ResultSet, with RoundingMode.UNNECESSARY set to coerce 
BigDecmial scale as needed:
+    config = new JdbcToArrowConfigBuilder(
+            allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata 
*/ false)
+            .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
+            .setExplicitTypesByColumnIndex(explicitMapping)
+            .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP)
+            .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY)
+            .build();
+    try {
+      ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, 
config);
+      while (iter.hasNext()) {
+        iter.next();
+      }
+      iter.close();
+    } catch (Exception ex) {
+      fail("BigDecminal scale failed to coerce as expected.");
     }
   }
 

Reply via email to