This is an automated email from the ASF dual-hosted git repository.

csy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new abab96dd6 ORC-1683: Fix `instanceof` of BinaryStatisticsImpl merge 
method
abab96dd6 is described below

commit abab96dd6b908d0ea6cf6d522cc7f8b095b8e4ec
Author: sychen <[email protected]>
AuthorDate: Fri Aug 23 12:25:11 2024 +0800

    ORC-1683: Fix `instanceof` of BinaryStatisticsImpl merge method
    
    ### What changes were proposed in this pull request?
    This PR aims to fix `instanceof` of `BinaryStatisticsImpl` merge method.
    
    ### Why are the changes needed?
    In [ORC-1542](https://issues.apache.org/jira/browse/ORC-1542), we modified 
part of instanceof, but `BinaryStatisticsImpl` was not modified because the 
merge method was written differently.
    
    However, the current code is instanceof `BinaryColumnStatistics` and then 
explicitly cast `BinaryStatisticsImpl`, so it should be replaced by the new 
instanceof writing method (`Pattern Matching for instanceof`).
    
    ```java
    if (other instanceof BinaryColumnStatistics) {
            BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
    ```
    
    ### How was this patch tested?
    GA
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #1883 from cxzl25/ORC-1683.
    
    Authored-by: sychen <[email protected]>
    Signed-off-by: Shaoyun Chen <[email protected]>
---
 .../org/apache/orc/impl/ColumnStatisticsImpl.java  | 23 +++++++++++-
 .../test/org/apache/orc/TestColumnStatistics.java  | 43 ++++++++++++++++++++++
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java 
b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index c5e13cc3c..d6147050e 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -102,6 +102,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
     public void merge(ColumnStatisticsImpl other) {
       if (other instanceof BooleanStatisticsImpl bkt) {
         trueCount += bkt.trueCount;
+      } else if (!(other instanceof BooleanColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of boolean 
column statistics");
       } else {
         if (isStatsExists() && trueCount != 0) {
           throw new IllegalArgumentException("Incompatible merging of boolean 
column statistics");
@@ -222,6 +224,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
           }
         }
         sum += otherColl.sum;
+      } else if (!(other instanceof CollectionColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of collection 
column statistics");
       } else {
         if (isStatsExists()) {
           throw new IllegalArgumentException("Incompatible merging of 
collection column statistics");
@@ -397,6 +401,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
             overflow = true;
           }
         }
+      } else if (!(other instanceof IntegerColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of integer 
column statistics");
       } else {
         if (isStatsExists() && hasMinimum) {
           throw new IllegalArgumentException("Incompatible merging of integer 
column statistics");
@@ -560,6 +566,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
           }
         }
         sum += dbl.sum;
+      } else if (!(other instanceof DoubleColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of double 
column statistics");
       } else {
         if (isStatsExists() && hasMinimum) {
           throw new IllegalArgumentException("Incompatible merging of double 
column statistics");
@@ -763,6 +771,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
           }
         }
         sum += str.sum;
+      } else if (!(other instanceof StringColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of string 
column statistics");
       } else {
         if (isStatsExists()) {
           throw new IllegalArgumentException("Incompatible merging of string 
column statistics");
@@ -993,9 +1003,10 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
 
     @Override
     public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof BinaryColumnStatistics) {
-        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
+      if (other instanceof BinaryStatisticsImpl bin) {
         sum += bin.sum;
+      } else if (!(other instanceof BinaryColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of binary 
column statistics");
       } else {
         if (isStatsExists() && sum != 0) {
           throw new IllegalArgumentException("Incompatible merging of binary 
column statistics");
@@ -1128,6 +1139,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
             sum.mutateAdd(dec.sum);
           }
         }
+      } else if (!(other instanceof DecimalColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of decimal 
column statistics");
       } else {
         if (isStatsExists() && minimum != null) {
           throw new IllegalArgumentException("Incompatible merging of decimal 
column statistics");
@@ -1321,6 +1334,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
             hasSum = false;
           }
         }
+      } else if (!(other instanceof DecimalColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of decimal 
column statistics");
       } else {
         if (other.getNumberOfValues() != 0) {
           throw new IllegalArgumentException("Incompatible merging of decimal 
column statistics");
@@ -1486,6 +1501,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
       if (other instanceof DateStatisticsImpl dateStats) {
         minimum = Math.min(minimum, dateStats.minimum);
         maximum = Math.max(maximum, dateStats.maximum);
+      } else if (!(other instanceof DateColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of date 
column statistics");
       } else {
         if (isStatsExists() && count != 0) {
           throw new IllegalArgumentException("Incompatible merging of date 
column statistics");
@@ -1698,6 +1715,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
             maximum = timestampStats.maximum;
           }
         }
+      } else if (!(other instanceof TimestampColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of timestamp 
column statistics");
       } else {
         if (isStatsExists() && count != 0) {
           throw new IllegalArgumentException("Incompatible merging of 
timestamp column statistics");
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java 
b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index 2ef96e5f5..ddcbcdc1a 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -27,6 +27,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.orc.impl.ColumnStatisticsImpl;
 import org.junit.jupiter.api.BeforeEach;
@@ -44,6 +45,7 @@ import java.util.TimeZone;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
@@ -699,6 +701,47 @@ public class TestColumnStatistics {
         "Incorrect minimum value");
   }
 
+  @Test
+  public void testBinaryMerge() {
+    TypeDescription schema = TypeDescription.createBinary();
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.increment(3);
+    stats1.updateBinary(new 
BytesWritable("bob".getBytes(StandardCharsets.UTF_8)));
+    stats1.updateBinary(new 
BytesWritable("david".getBytes(StandardCharsets.UTF_8)));
+    stats1.updateBinary(new 
BytesWritable("charles".getBytes(StandardCharsets.UTF_8)));
+    stats2.increment(2);
+    stats2.updateBinary(new 
BytesWritable("anne".getBytes(StandardCharsets.UTF_8)));
+    stats2.updateBinary(new 
BytesWritable("abcdef".getBytes(StandardCharsets.UTF_8)));
+
+    assertEquals(15, ((BinaryColumnStatistics) stats1).getSum());
+    assertEquals(10, ((BinaryColumnStatistics) stats2).getSum());
+
+    stats1.merge(stats2);
+
+    assertEquals(25, ((BinaryColumnStatistics) stats1).getSum());
+  }
+
+  @Test
+  public void testMergeIncompatible() {
+    TypeDescription stringSchema = TypeDescription.createString();
+    ColumnStatisticsImpl stringStats = 
ColumnStatisticsImpl.create(stringSchema);
+
+    TypeDescription doubleSchema = TypeDescription.createDouble();
+    ColumnStatisticsImpl doubleStats = 
ColumnStatisticsImpl.create(doubleSchema);
+
+    stringStats.increment(3);
+    stringStats.updateString(new Text("bob"));
+    stringStats.updateString(new Text("david"));
+    stringStats.updateString(new Text("charles"));
+
+    assertThrows(IllegalArgumentException.class, () -> {
+      doubleStats.merge(stringStats);
+    });
+
+    assertEquals(0, ((DoubleColumnStatistics) 
doubleStats).getNumberOfValues());
+  }
 
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));

Reply via email to