Github user omalley commented on a diff in the pull request:
https://github.com/apache/orc/pull/299#discussion_r208398032
--- Diff: java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
---
@@ -584,16 +630,40 @@ public void merge(ColumnStatisticsImpl other) {
if (str.minimum != null) {
maximum = new Text(str.getMaximum());
minimum = new Text(str.getMinimum());
- } else {
+ }
+ /* str.minimum == null when lower bound set */
+ else if (str.isLowerBoundSet) {
+ minimum = new Text(str.getLowerBound());
+ isLowerBoundSet = true;
+
+ /* check for upper bound before setting max */
+ if (str.isUpperBoundSet) {
+ maximum = new Text(str.getUpperBound());
+ isUpperBoundSet = true;
+ } else {
+ maximum = new Text(str.getMaximum());
+ }
+ }
+ else {
/* both are empty */
maximum = minimum = null;
}
} else if (str.minimum != null) {
if (minimum.compareTo(str.minimum) > 0) {
- minimum = new Text(str.getMinimum());
+ if(str.isLowerBoundSet) {
+ minimum = new Text(str.getLowerBound());
+ isLowerBoundSet = true;
+ } else {
+ minimum = new Text(str.getMinimum());
--- End diff --
You could simplify this as:
minimum = new Text(str.minimum)
isLowerBoundSet = str.isLowerBoundSet;
---