This is an automated email from the ASF dual-hosted git repository.
csy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new abab96dd6 ORC-1683: Fix `instanceof` of BinaryStatisticsImpl merge
method
abab96dd6 is described below
commit abab96dd6b908d0ea6cf6d522cc7f8b095b8e4ec
Author: sychen <[email protected]>
AuthorDate: Fri Aug 23 12:25:11 2024 +0800
ORC-1683: Fix `instanceof` of BinaryStatisticsImpl merge method
### What changes were proposed in this pull request?
This PR aims to fix `instanceof` of `BinaryStatisticsImpl` merge method.
### Why are the changes needed?
In [ORC-1542](https://issues.apache.org/jira/browse/ORC-1542), we modified
part of instanceof, but `BinaryStatisticsImpl` was not modified because the
merge method was written differently.
However, the current code is instanceof `BinaryColumnStatistics` and then
explicitly cast `BinaryStatisticsImpl`, so it should be replaced by the new
instanceof writing method (`Pattern Matching for instanceof`).
```java
if (other instanceof BinaryColumnStatistics) {
BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
```
### How was this patch tested?
GA
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #1883 from cxzl25/ORC-1683.
Authored-by: sychen <[email protected]>
Signed-off-by: Shaoyun Chen <[email protected]>
---
.../org/apache/orc/impl/ColumnStatisticsImpl.java | 23 +++++++++++-
.../test/org/apache/orc/TestColumnStatistics.java | 43 ++++++++++++++++++++++
2 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index c5e13cc3c..d6147050e 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -102,6 +102,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
public void merge(ColumnStatisticsImpl other) {
if (other instanceof BooleanStatisticsImpl bkt) {
trueCount += bkt.trueCount;
+ } else if (!(other instanceof BooleanColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of boolean
column statistics");
} else {
if (isStatsExists() && trueCount != 0) {
throw new IllegalArgumentException("Incompatible merging of boolean
column statistics");
@@ -222,6 +224,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
}
}
sum += otherColl.sum;
+ } else if (!(other instanceof CollectionColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of collection
column statistics");
} else {
if (isStatsExists()) {
throw new IllegalArgumentException("Incompatible merging of
collection column statistics");
@@ -397,6 +401,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
overflow = true;
}
}
+ } else if (!(other instanceof IntegerColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of integer
column statistics");
} else {
if (isStatsExists() && hasMinimum) {
throw new IllegalArgumentException("Incompatible merging of integer
column statistics");
@@ -560,6 +566,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
}
}
sum += dbl.sum;
+ } else if (!(other instanceof DoubleColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of double
column statistics");
} else {
if (isStatsExists() && hasMinimum) {
throw new IllegalArgumentException("Incompatible merging of double
column statistics");
@@ -763,6 +771,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
}
}
sum += str.sum;
+ } else if (!(other instanceof StringColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of string
column statistics");
} else {
if (isStatsExists()) {
throw new IllegalArgumentException("Incompatible merging of string
column statistics");
@@ -993,9 +1003,10 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
@Override
public void merge(ColumnStatisticsImpl other) {
- if (other instanceof BinaryColumnStatistics) {
- BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
+ if (other instanceof BinaryStatisticsImpl bin) {
sum += bin.sum;
+ } else if (!(other instanceof BinaryColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of binary
column statistics");
} else {
if (isStatsExists() && sum != 0) {
throw new IllegalArgumentException("Incompatible merging of binary
column statistics");
@@ -1128,6 +1139,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
sum.mutateAdd(dec.sum);
}
}
+ } else if (!(other instanceof DecimalColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of decimal
column statistics");
} else {
if (isStatsExists() && minimum != null) {
throw new IllegalArgumentException("Incompatible merging of decimal
column statistics");
@@ -1321,6 +1334,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
hasSum = false;
}
}
+ } else if (!(other instanceof DecimalColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of decimal
column statistics");
} else {
if (other.getNumberOfValues() != 0) {
throw new IllegalArgumentException("Incompatible merging of decimal
column statistics");
@@ -1486,6 +1501,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
if (other instanceof DateStatisticsImpl dateStats) {
minimum = Math.min(minimum, dateStats.minimum);
maximum = Math.max(maximum, dateStats.maximum);
+ } else if (!(other instanceof DateColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of date
column statistics");
} else {
if (isStatsExists() && count != 0) {
throw new IllegalArgumentException("Incompatible merging of date
column statistics");
@@ -1698,6 +1715,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
maximum = timestampStats.maximum;
}
}
+ } else if (!(other instanceof TimestampColumnStatistics)) {
+ throw new IllegalArgumentException("Incompatible merging of timestamp
column statistics");
} else {
if (isStatsExists() && count != 0) {
throw new IllegalArgumentException("Incompatible merging of
timestamp column statistics");
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index 2ef96e5f5..ddcbcdc1a 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -27,6 +27,7 @@ import
org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.orc.impl.ColumnStatisticsImpl;
import org.junit.jupiter.api.BeforeEach;
@@ -44,6 +45,7 @@ import java.util.TimeZone;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
@@ -699,6 +701,47 @@ public class TestColumnStatistics {
"Incorrect minimum value");
}
+ @Test
+ public void testBinaryMerge() {
+ TypeDescription schema = TypeDescription.createBinary();
+
+ ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+ stats1.increment(3);
+ stats1.updateBinary(new
BytesWritable("bob".getBytes(StandardCharsets.UTF_8)));
+ stats1.updateBinary(new
BytesWritable("david".getBytes(StandardCharsets.UTF_8)));
+ stats1.updateBinary(new
BytesWritable("charles".getBytes(StandardCharsets.UTF_8)));
+ stats2.increment(2);
+ stats2.updateBinary(new
BytesWritable("anne".getBytes(StandardCharsets.UTF_8)));
+ stats2.updateBinary(new
BytesWritable("abcdef".getBytes(StandardCharsets.UTF_8)));
+
+ assertEquals(15, ((BinaryColumnStatistics) stats1).getSum());
+ assertEquals(10, ((BinaryColumnStatistics) stats2).getSum());
+
+ stats1.merge(stats2);
+
+ assertEquals(25, ((BinaryColumnStatistics) stats1).getSum());
+ }
+
+ @Test
+ public void testMergeIncompatible() {
+ TypeDescription stringSchema = TypeDescription.createString();
+ ColumnStatisticsImpl stringStats =
ColumnStatisticsImpl.create(stringSchema);
+
+ TypeDescription doubleSchema = TypeDescription.createDouble();
+ ColumnStatisticsImpl doubleStats =
ColumnStatisticsImpl.create(doubleSchema);
+
+ stringStats.increment(3);
+ stringStats.updateString(new Text("bob"));
+ stringStats.updateString(new Text("david"));
+ stringStats.updateString(new Text("charles"));
+
+ assertThrows(IllegalArgumentException.class, () -> {
+ doubleStats.merge(stringStats);
+ });
+
+ assertEquals(0, ((DoubleColumnStatistics)
doubleStats).getNumberOfValues());
+ }
Path workDir = new Path(System.getProperty("test.tmp.dir",
"target" + File.separator + "test" + File.separator + "tmp"));