This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
commit 5862ec66438538fbcf2c54c4f3b3f9cf107ed697 Author: Ayush Saxena <[email protected]> AuthorDate: Sat Feb 4 19:36:40 2023 +0530 Revert "HIVE-27000: Improve the modularity of the *ColumnStatsMerger classes (#3997). (Alessandro Solimando, reviewed by Ayush Saxena)" This reverts commit 35b151876fc87cc4a6b757b9172dad557e13904c. Reverting to correct the commit message --- .../aggr/DateColumnStatsAggregator.java | 10 +- .../aggr/DecimalColumnStatsAggregator.java | 10 +- .../aggr/DoubleColumnStatsAggregator.java | 10 +- .../aggr/LongColumnStatsAggregator.java | 10 +- .../aggr/TimestampColumnStatsAggregator.java | 10 +- .../columnstats/merge/BinaryColumnStatsMerger.java | 9 +- .../merge/BooleanColumnStatsMerger.java | 5 +- .../columnstats/merge/ColumnStatsMerger.java | 62 +---- .../merge/ColumnStatsMergerFactory.java | 6 +- .../columnstats/merge/DateColumnStatsMerger.java | 94 +++---- .../merge/DecimalColumnStatsMerger.java | 95 +++---- .../columnstats/merge/DoubleColumnStatsMerger.java | 94 +++---- .../columnstats/merge/LongColumnStatsMerger.java | 94 +++---- .../columnstats/merge/StringColumnStatsMerger.java | 36 +-- .../merge/TimestampColumnStatsMerger.java | 94 +++---- .../hive/metastore/utils/MetaStoreServerUtils.java | 2 +- .../merge/BinaryColumnStatsMergerTest.java | 64 ----- .../merge/BooleanColumnStatsMergerTest.java | 64 ----- .../columnstats/merge/ColumnStatsMergerTest.java | 119 +-------- .../merge/DateColumnStatsMergerTest.java | 242 ++++-------------- .../merge/DecimalColumnStatsMergerTest.java | 272 +++++++++++---------- .../merge/DoubleColumnStatsMergerTest.java | 240 ------------------ .../merge/LongColumnStatsMergerTest.java | 240 ------------------ .../merge/TimestampColumnStatsMergerTest.java | 241 ------------------ 24 files changed, 495 insertions(+), 1628 deletions(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java index 211bd2e597f..9318a05596c 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java @@ -112,13 +112,11 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - aggregateData.setLowValue(merger.mergeLowValue( - merger.getLowValue(aggregateData), merger.getLowValue(newData))); - aggregateData.setHighValue(merger.mergeHighValue( - merger.getHighValue(aggregateData), merger.getHighValue(newData))); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); - aggregateData.setNumNulls(merger.mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - aggregateData.setNumDVs(merger.mergeNumDVs(aggregateData.getNumDVs(), newData.getNumDVs())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } } if (areAllNDVEstimatorsMergeable && ndvEstimator != null) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java index 0854ca41395..7d7e6251c64 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java @@ -115,13 +115,11 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - aggregateData.setLowValue(merger.mergeLowValue( - merger.getLowValue(aggregateData), merger.getLowValue(newData))); - aggregateData.setHighValue(merger.mergeHighValue( - merger.getHighValue(aggregateData), merger.getHighValue(newData))); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); - aggregateData.setNumNulls(merger.mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - aggregateData.setNumDVs(merger.mergeNumDVs(aggregateData.getNumDVs(), newData.getNumDVs())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } } if (areAllNDVEstimatorsMergeable && ndvEstimator != null) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java index 878a6e18082..2ce2c7281aa 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java @@ -110,13 +110,11 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - aggregateData.setLowValue(merger.mergeLowValue( - merger.getLowValue(aggregateData), merger.getLowValue(newData))); - aggregateData.setHighValue(merger.mergeHighValue( - merger.getHighValue(aggregateData), merger.getHighValue(newData))); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); - aggregateData.setNumNulls(merger.mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - aggregateData.setNumDVs(merger.mergeNumDVs(aggregateData.getNumDVs(), newData.getNumDVs())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } } if (areAllNDVEstimatorsMergeable && ndvEstimator != null) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java index 87077cbf7e0..d2999054a53 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java @@ -109,13 +109,11 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - aggregateData.setLowValue(merger.mergeLowValue( - merger.getLowValue(aggregateData), merger.getLowValue(newData))); - aggregateData.setHighValue(merger.mergeHighValue( - merger.getHighValue(aggregateData), merger.getHighValue(newData))); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); - aggregateData.setNumNulls(merger.mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - aggregateData.setNumDVs(merger.mergeNumDVs(aggregateData.getNumDVs(), newData.getNumDVs())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } } if (areAllNDVEstimatorsMergeable && ndvEstimator != null) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java index 5e8e51c8900..5962792cb5b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java @@ -113,13 +113,11 @@ public class TimestampColumnStatsAggregator extends ColumnStatsAggregator implem if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { - aggregateData.setLowValue(merger.mergeLowValue( - merger.getLowValue(aggregateData), merger.getLowValue(newData))); - aggregateData.setHighValue(merger.mergeHighValue( - merger.getHighValue(aggregateData), merger.getHighValue(newData))); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); - aggregateData.setNumNulls(merger.mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - aggregateData.setNumDVs(merger.mergeNumDVs(aggregateData.getNumDVs(), newData.getNumDVs())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } } if (areAllNDVEstimatorsMergeable && ndvEstimator != null) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java index 0ab43a6dcc3..1756db81e8e 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class BinaryColumnStatsMerger extends ColumnStatsMerger<byte []> { +public class BinaryColumnStatsMerger extends ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(BinaryColumnStatsMerger.class); @@ -34,9 +34,8 @@ public class BinaryColumnStatsMerger extends ColumnStatsMerger<byte []> { BinaryColumnStatsData aggregateData = aggregateColStats.getStatsData().getBinaryStats(); BinaryColumnStatsData newData = newColStats.getStatsData().getBinaryStats(); - - aggregateData.setMaxColLen(mergeMaxColLen(aggregateData.getMaxColLen(), newData.getMaxColLen())); - aggregateData.setAvgColLen(mergeAvgColLen(aggregateData.getAvgColLen(), newData.getAvgColLen())); - aggregateData.setNumNulls(mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); + aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); + aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java index 86639ca2072..e33573c4840 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class BooleanColumnStatsMerger extends ColumnStatsMerger<Boolean> { +public class BooleanColumnStatsMerger extends ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(BooleanColumnStatsMerger.class); @@ -34,9 +34,8 @@ public class BooleanColumnStatsMerger extends ColumnStatsMerger<Boolean> { BooleanColumnStatsData aggregateData = aggregateColStats.getStatsData().getBooleanStats(); BooleanColumnStatsData newData = newColStats.getStatsData().getBooleanStats(); - aggregateData.setNumTrues(aggregateData.getNumTrues() + newData.getNumTrues()); aggregateData.setNumFalses(aggregateData.getNumFalses() + newData.getNumFalses()); - aggregateData.setNumNulls(mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java index 218757c8e18..8d4da8a5ab3 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java @@ -20,15 +20,11 @@ package org.apache.hadoop.hive.metastore.columnstats.merge; import org.apache.hadoop.hive.common.histogram.KllHistogramEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.List; -import java.util.stream.Collectors; - -public abstract class ColumnStatsMerger<T> { +public abstract class ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(ColumnStatsMerger.class); @@ -50,60 +46,4 @@ public abstract class ColumnStatsMerger<T> { } return oldEst; } - - protected long mergeNumDistinctValueEstimator(String columnName, List<NumDistinctValueEstimator> estimators, - long oldNumDVs, long newNumDVs) { - if (estimators == null || estimators.size() != 2) { - throw new IllegalArgumentException("NDV estimators list must be set and contain exactly two elements, " + - "found " + (estimators == null ? "null" : - estimators.stream().map(NumDistinctValueEstimator::toString).collect(Collectors.joining(", ")))); - } - - NumDistinctValueEstimator oldEst = estimators.get(0); - NumDistinctValueEstimator newEst = estimators.get(1); - if (oldEst == null && newEst == null) { - return mergeNumDVs(oldNumDVs, newNumDVs); - } - - if (oldEst == null) { - estimators.set(0, newEst); - return mergeNumDVs(oldNumDVs, newEst.estimateNumDistinctValues()); - } - - final long ndv; - if (oldEst.canMerge(newEst)) { - oldEst.mergeEstimators(newEst); - ndv = oldEst.estimateNumDistinctValues(); - return ndv; - } else { - ndv = mergeNumDVs(oldNumDVs, newNumDVs); - } - LOG.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", columnName, - oldNumDVs, newNumDVs, ndv); - return ndv; - } - - public T mergeLowValue(T oldValue, T newValue) { - throw new UnsupportedOperationException("This operation is not supported"); - } - - public T mergeHighValue(T oldValue, T newValue) { - throw new UnsupportedOperationException("This operation is not supported"); - } - - public long mergeNumDVs(long oldValue, long newValue) { - return Math.max(oldValue, newValue); - } - - public long mergeNumNulls(long oldValue, long newValue) { - return oldValue + newValue; - } - - public long mergeMaxColLen(long oldValue, long newValue) { - return Math.max(oldValue, newValue); - } - - public double mergeAvgColLen(double oldValue, double newValue) { - return Math.max(oldValue, newValue); - } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java index 273c9a69929..04a264942be 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java @@ -50,10 +50,10 @@ public class ColumnStatsMergerFactory { * two different types or if they are of an unknown type * @throws NullPointerException if statistics object is {@code null} */ - public static ColumnStatsMerger<?> getColumnStatsMerger(final ColumnStatisticsObj statsObjNew, + public static ColumnStatsMerger getColumnStatsMerger(final ColumnStatisticsObj statsObjNew, final ColumnStatisticsObj statsObjOld) { - Objects.requireNonNull(statsObjNew, "Column 1 statistics cannot be null"); - Objects.requireNonNull(statsObjOld, "Column 2 statistics cannot be null"); + Objects.requireNonNull(statsObjNew, "Column 1 statistcs cannot be null"); + Objects.requireNonNull(statsObjOld, "Column 2 statistcs cannot be null"); final _Fields typeNew = statsObjNew.getStatsData().getSetField(); final _Fields typeOld = statsObjOld.getStatsData().getSetField(); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java index 2f51af81f1d..12c50354843 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java @@ -32,10 +32,7 @@ import com.google.common.base.MoreObjects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Arrays; -import java.util.List; - -public class DateColumnStatsMerger extends ColumnStatsMerger<Date> { +public class DateColumnStatsMerger extends ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(DateColumnStatsMerger.class); @@ -46,57 +43,64 @@ public class DateColumnStatsMerger extends ColumnStatsMerger<Date> { DateColumnStatsDataInspector aggregateData = dateInspectorFromStats(aggregateColStats); DateColumnStatsDataInspector newData = dateInspectorFromStats(newColStats); - Date lowValue = mergeLowValue(getLowValue(aggregateData), getLowValue(newData)); - if (lowValue != null) { - aggregateData.setLowValue(lowValue); - } - Date highValue = mergeHighValue(getHighValue(aggregateData), getHighValue(newData)); - if (highValue != null) { - aggregateData.setHighValue(highValue); + setLowValue(aggregateData, newData); + setHighValue(aggregateData, newData); + + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + final long ndv; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(), + aggregateData.getNumDVs(), newData.getNumDVs(), ndv); + aggregateData.setNumDVs(ndv); } - aggregateData.setNumNulls(mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - - NumDistinctValueEstimator oldNDVEst = aggregateData.getNdvEstimator(); - NumDistinctValueEstimator newNDVEst = newData.getNdvEstimator(); - List<NumDistinctValueEstimator> ndvEstimatorsList = Arrays.asList(oldNDVEst, newNDVEst); - aggregateData.setNumDVs(mergeNumDistinctValueEstimator(aggregateColStats.getColName(), - ndvEstimatorsList, aggregateData.getNumDVs(), newData.getNumDVs())); - aggregateData.setNdvEstimator(ndvEstimatorsList.get(0)); - KllHistogramEstimator oldKllEst = aggregateData.getHistogramEstimator(); - KllHistogramEstimator newKllEst = newData.getHistogramEstimator(); - aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldKllEst, newKllEst)); + KllHistogramEstimator oldEst = aggregateData.getHistogramEstimator(); + KllHistogramEstimator newEst = newData.getHistogramEstimator(); + aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldEst, newEst)); aggregateColStats.getStatsData().setDateStats(aggregateData); } - public Date getLowValue(DateColumnStatsDataInspector data) { - return data.isSetLowValue() ? data.getLowValue() : null; - } + public void setLowValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) { + final Date aggregateLowValue = aggregateData.getLowValue(); + final Date newLowValue = newData.getLowValue(); + + final Date mergedLowValue; + if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) { + return; + } else if (aggregateData.isSetLowValue() && newData.isSetLowValue()) { + mergedLowValue = ObjectUtils.min(aggregateLowValue, newLowValue); + } else { + mergedLowValue = MoreObjects.firstNonNull(aggregateLowValue, newLowValue); + } - public Date getHighValue(DateColumnStatsDataInspector data) { - return data.isSetHighValue() ? data.getHighValue() : null; + aggregateData.setLowValue(mergedLowValue); } - @Override - public Date mergeLowValue(Date oldValue, Date newValue) { - if (oldValue != null && newValue != null) { - return ObjectUtils.min(oldValue, newValue); + public void setHighValue(DateColumnStatsDataInspector aggregateData, DateColumnStatsDataInspector newData) { + final Date aggregateHighValue = aggregateData.getHighValue(); + final Date newHighValue = newData.getHighValue(); + + final Date mergedHighValue; + if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) { + return; + } else if (aggregateData.isSetHighValue() && newData.isSetHighValue()) { + mergedHighValue = ObjectUtils.max(newHighValue, aggregateHighValue); + } else { + mergedHighValue = MoreObjects.firstNonNull(aggregateHighValue, newHighValue); } - if (oldValue != null || newValue != null) { - return MoreObjects.firstNonNull(oldValue, newValue); - } - return null; - } - @Override - public Date mergeHighValue(Date oldValue, Date newValue) { - if (oldValue != null && newValue != null) { - return ObjectUtils.max(oldValue, newValue); - } - if (oldValue != null || newValue != null) { - return MoreObjects.firstNonNull(oldValue, newValue); - } - return null; + aggregateData.setHighValue(mergedHighValue); } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java index 523f848ba44..e3737f7a2d5 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMerger.java @@ -33,10 +33,7 @@ import org.apache.commons.lang3.ObjectUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Arrays; -import java.util.List; - -public class DecimalColumnStatsMerger extends ColumnStatsMerger<Decimal> { +public class DecimalColumnStatsMerger extends ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(DecimalColumnStatsMerger.class); @@ -47,57 +44,65 @@ public class DecimalColumnStatsMerger extends ColumnStatsMerger<Decimal> { DecimalColumnStatsDataInspector aggregateData = decimalInspectorFromStats(aggregateColStats); DecimalColumnStatsDataInspector newData = decimalInspectorFromStats(newColStats); - Decimal lowValue = mergeLowValue(getLowValue(aggregateData), getLowValue(newData)); - if (lowValue != null) { - aggregateData.setLowValue(lowValue); - } - Decimal highValue = mergeHighValue(getHighValue(aggregateData), getHighValue(newData)); - if (highValue != null) { - aggregateData.setHighValue(highValue); + setLowValue(aggregateData, newData); + setHighValue(aggregateData, newData); + + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + final long ndv; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(), + aggregateData.getNumDVs(), newData.getNumDVs(), ndv); + aggregateData.setNumDVs(ndv); } - aggregateData.setNumNulls(mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - - NumDistinctValueEstimator oldNDVEst = aggregateData.getNdvEstimator(); - NumDistinctValueEstimator newNDVEst = newData.getNdvEstimator(); - List<NumDistinctValueEstimator> ndvEstimatorsList = Arrays.asList(oldNDVEst, newNDVEst); - aggregateData.setNumDVs(mergeNumDistinctValueEstimator(aggregateColStats.getColName(), - ndvEstimatorsList, aggregateData.getNumDVs(), newData.getNumDVs())); - aggregateData.setNdvEstimator(ndvEstimatorsList.get(0)); - KllHistogramEstimator oldKllEst = aggregateData.getHistogramEstimator(); - KllHistogramEstimator newKllEst = newData.getHistogramEstimator(); - aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldKllEst, newKllEst)); + KllHistogramEstimator oldEst = aggregateData.getHistogramEstimator(); + KllHistogramEstimator newEst = newData.getHistogramEstimator(); + aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldEst, newEst)); aggregateColStats.getStatsData().setDecimalStats(aggregateData); } - public Decimal getLowValue(DecimalColumnStatsDataInspector data) { - return data.isSetLowValue() ? data.getLowValue() : null; - } + public void setLowValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) { + final Decimal aggregateLowValue = aggregateData.getLowValue(); + final Decimal newLowValue = newData.getLowValue(); + + final Decimal mergedLowValue; + if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) { + return; + } else if (aggregateData.isSetLowValue() && newData.isSetLowValue()) { + mergedLowValue = ObjectUtils.min(newLowValue, aggregateLowValue); + } else { + mergedLowValue = MoreObjects.firstNonNull(aggregateLowValue, newLowValue); + } - public Decimal getHighValue(DecimalColumnStatsDataInspector data) { - return data.isSetHighValue() ? data.getHighValue() : null; + aggregateData.setLowValue(mergedLowValue); } - @Override - public Decimal mergeLowValue(Decimal oldValue, Decimal newValue) { - if (oldValue != null && newValue != null) { - return ObjectUtils.min(oldValue, newValue); + public void setHighValue(DecimalColumnStatsDataInspector aggregateData, DecimalColumnStatsDataInspector newData) { + final Decimal aggregateHighValue = aggregateData.getHighValue(); + final Decimal newHighValue = newData.getHighValue(); + + final Decimal mergedHighValue; + if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) { + return; + } else if (aggregateData.isSetHighValue() && newData.isSetHighValue()) { + mergedHighValue = ObjectUtils.max(aggregateHighValue, newHighValue); + } else { + mergedHighValue = MoreObjects.firstNonNull(aggregateHighValue, newHighValue); } - if (oldValue != null || newValue != null) { - return MoreObjects.firstNonNull(oldValue, newValue); - } - return null; - } - @Override - public Decimal mergeHighValue(Decimal oldValue, Decimal newValue) { - if (oldValue != null && newValue != null) { - return ObjectUtils.max(oldValue, newValue); - } - if (oldValue != null || newValue != null) { - return MoreObjects.firstNonNull(oldValue, newValue); - } - return null; + aggregateData.setHighValue(mergedHighValue); } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java index da3e6ead04e..ff552b14329 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMerger.java @@ -26,12 +26,9 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataI import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Arrays; -import java.util.List; - import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.doubleInspectorFromStats; -public class DoubleColumnStatsMerger extends ColumnStatsMerger<Double> { +public class DoubleColumnStatsMerger extends ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(DoubleColumnStatsMerger.class); @@ -41,58 +38,63 @@ public class DoubleColumnStatsMerger extends ColumnStatsMerger<Double> { DoubleColumnStatsDataInspector aggregateData = doubleInspectorFromStats(aggregateColStats); DoubleColumnStatsDataInspector newData = doubleInspectorFromStats(newColStats); - - Double lowValue = mergeLowValue(getLowValue(aggregateData), getLowValue(newData)); - if (lowValue != null) { - aggregateData.setLowValue(lowValue); - } - Double highValue = mergeHighValue(getHighValue(aggregateData), getHighValue(newData)); - if (highValue != null) { - aggregateData.setHighValue(highValue); + setLowValue(aggregateData, newData); + setHighValue(aggregateData, newData); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + long ndv; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", + aggregateColStats.getColName(), aggregateData.getNumDVs(), newData.getNumDVs(), ndv); + aggregateData.setNumDVs(ndv); } - aggregateData.setNumNulls(mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - - NumDistinctValueEstimator oldNDVEst = aggregateData.getNdvEstimator(); - NumDistinctValueEstimator newNDVEst = newData.getNdvEstimator(); - List<NumDistinctValueEstimator> ndvEstimatorsList = Arrays.asList(oldNDVEst, newNDVEst); - aggregateData.setNumDVs(mergeNumDistinctValueEstimator(aggregateColStats.getColName(), - ndvEstimatorsList, aggregateData.getNumDVs(), newData.getNumDVs())); - aggregateData.setNdvEstimator(ndvEstimatorsList.get(0)); - KllHistogramEstimator oldKllEst = aggregateData.getHistogramEstimator(); - KllHistogramEstimator newKllEst = newData.getHistogramEstimator(); - aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldKllEst, newKllEst)); + KllHistogramEstimator oldEst = aggregateData.getHistogramEstimator(); + KllHistogramEstimator newEst = newData.getHistogramEstimator(); + aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldEst, newEst)); aggregateColStats.getStatsData().setDoubleStats(aggregateData); } - public Double getLowValue(DoubleColumnStatsDataInspector data) { - return data.isSetLowValue() ? data.getLowValue() : null; - } + public void setLowValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) { + final double lowValue; + + if (aggregateData.isSetLowValue() && newData.isSetLowValue()) { + lowValue = Math.min(aggregateData.getLowValue(), newData.getLowValue()); + } else if (aggregateData.isSetLowValue()) { + lowValue = aggregateData.getLowValue(); + } else if (newData.isSetLowValue()) { + lowValue = newData.getLowValue(); + } else { + return; + } - public Double getHighValue(DoubleColumnStatsDataInspector data) { - return data.isSetHighValue() ? data.getHighValue() : null; + aggregateData.setLowValue(lowValue); } - @Override - public Double mergeLowValue(Double oldValue, Double newValue) { - if (oldValue != null && newValue != null) { - return Math.min(oldValue, newValue); - } else if (oldValue != null) { - return oldValue; + public void setHighValue(DoubleColumnStatsDataInspector aggregateData, DoubleColumnStatsDataInspector newData) { + final double highValue; + + if (aggregateData.isSetHighValue() && newData.isSetHighValue()) { + highValue = Math.max(aggregateData.getHighValue(), newData.getHighValue()); + } else if (aggregateData.isSetHighValue()) { + highValue = aggregateData.getHighValue(); + } else if (newData.isSetHighValue()) { + highValue = newData.getHighValue(); + } else { + return; } - // it can be null - return newValue; - } - @Override - public Double mergeHighValue(Double oldValue, Double newValue) { - if (oldValue != null && newValue != null) { - return Math.max(oldValue, newValue); - } else if (oldValue != null) { - return oldValue; - } - // it can be null - return newValue; + aggregateData.setHighValue(highValue); } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java index ee2753820d8..9cd4ba72c5d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMerger.java @@ -26,12 +26,9 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataIns import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Arrays; -import java.util.List; - import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.longInspectorFromStats; -public class LongColumnStatsMerger extends ColumnStatsMerger<Long> { +public class LongColumnStatsMerger extends ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(LongColumnStatsMerger.class); @@ -41,58 +38,63 @@ public class LongColumnStatsMerger extends ColumnStatsMerger<Long> { LongColumnStatsDataInspector aggregateData = longInspectorFromStats(aggregateColStats); LongColumnStatsDataInspector newData = longInspectorFromStats(newColStats); - - Long lowValue = mergeLowValue(getLowValue(aggregateData), getLowValue(newData)); - if (lowValue != null) { - aggregateData.setLowValue(lowValue); - } - Long highValue = mergeHighValue(getHighValue(aggregateData), getHighValue(newData)); - if (highValue != null) { - aggregateData.setHighValue(highValue); + setLowValue(aggregateData, newData); + setHighValue(aggregateData, newData); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + final long ndv; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(), + aggregateData.getNumDVs(), newData.getNumDVs(), ndv); + aggregateData.setNumDVs(ndv); } - aggregateData.setNumNulls(mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - NumDistinctValueEstimator oldNDVEst = aggregateData.getNdvEstimator(); - NumDistinctValueEstimator newNDVEst = newData.getNdvEstimator(); - List<NumDistinctValueEstimator> ndvEstimatorsList = Arrays.asList(oldNDVEst, newNDVEst); - aggregateData.setNumDVs(mergeNumDistinctValueEstimator(aggregateColStats.getColName(), - ndvEstimatorsList, aggregateData.getNumDVs(), newData.getNumDVs())); - aggregateData.setNdvEstimator(ndvEstimatorsList.get(0)); - - KllHistogramEstimator oldKllEst = aggregateData.getHistogramEstimator(); - KllHistogramEstimator newKllEst = newData.getHistogramEstimator(); - aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldKllEst, newKllEst)); + KllHistogramEstimator oldEst = aggregateData.getHistogramEstimator(); + KllHistogramEstimator newEst = newData.getHistogramEstimator(); + aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldEst, newEst)); aggregateColStats.getStatsData().setLongStats(aggregateData); } - public Long getLowValue(LongColumnStatsDataInspector data) { - return data.isSetLowValue() ? data.getLowValue() : null; - } + public void setLowValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) { + final long lowValue; + + if (aggregateData.isSetLowValue() && newData.isSetLowValue()) { + lowValue = Math.min(aggregateData.getLowValue(), newData.getLowValue()); + } else if (aggregateData.isSetLowValue()) { + lowValue = aggregateData.getLowValue(); + } else if (newData.isSetLowValue()) { + lowValue = newData.getLowValue(); + } else { + return; + } - public Long getHighValue(LongColumnStatsDataInspector data) { - return data.isSetHighValue() ? data.getHighValue() : null; + aggregateData.setLowValue(lowValue); } - @Override - public Long mergeLowValue(Long oldValue, Long newValue) { - if (oldValue != null && newValue != null) { - return Math.min(oldValue, newValue); - } else if (oldValue != null) { - return oldValue; + public void setHighValue(LongColumnStatsDataInspector aggregateData, LongColumnStatsDataInspector newData) { + final long highValue; + + if (aggregateData.isSetHighValue() && newData.isSetHighValue()) { + highValue = Math.max(aggregateData.getHighValue(), newData.getHighValue()); + } else if (aggregateData.isSetHighValue()) { + highValue = aggregateData.getHighValue(); + } else if (newData.isSetHighValue()) { + highValue = newData.getHighValue(); + } else { + return; } - // it can be null - return newValue; - } - @Override - public Long mergeHighValue(Long oldValue, Long newValue) { - if (oldValue != null && newValue != null) { - return Math.max(oldValue, newValue); - } else if (oldValue != null) { - return oldValue; - } - // it can be null - return newValue; + aggregateData.setHighValue(highValue); } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java index 591c53437fa..7bd5b72802e 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/StringColumnStatsMerger.java @@ -25,12 +25,9 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataI import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Arrays; -import java.util.List; - import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.stringInspectorFromStats; -public class StringColumnStatsMerger extends ColumnStatsMerger<String> { +public class StringColumnStatsMerger extends ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(StringColumnStatsMerger.class); @@ -40,17 +37,26 @@ public class StringColumnStatsMerger extends ColumnStatsMerger<String> { StringColumnStatsDataInspector aggregateData = stringInspectorFromStats(aggregateColStats); StringColumnStatsDataInspector newData = stringInspectorFromStats(newColStats); - - aggregateData.setMaxColLen(mergeMaxColLen(aggregateData.getMaxColLen(), newData.getMaxColLen())); - aggregateData.setAvgColLen(mergeAvgColLen(aggregateData.getAvgColLen(), newData.getAvgColLen())); - aggregateData.setNumNulls(mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - - NumDistinctValueEstimator oldNDVEst = aggregateData.getNdvEstimator(); - NumDistinctValueEstimator newNDVEst = newData.getNdvEstimator(); - List<NumDistinctValueEstimator> ndvEstimatorsList = Arrays.asList(oldNDVEst, newNDVEst); - aggregateData.setNumDVs(mergeNumDistinctValueEstimator(aggregateColStats.getColName(), - ndvEstimatorsList, aggregateData.getNumDVs(), newData.getNumDVs())); - aggregateData.setNdvEstimator(ndvEstimatorsList.get(0)); + aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); + aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + final long ndv; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(), + aggregateData.getNumDVs(), newData.getNumDVs(), ndv); + aggregateData.setNumDVs(ndv); + } aggregateColStats.getStatsData().setStringStats(aggregateData); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java index 7198d909a9d..7ecdc2139ad 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java @@ -31,10 +31,7 @@ import com.google.common.base.MoreObjects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Arrays; -import java.util.List; - -public class TimestampColumnStatsMerger extends ColumnStatsMerger<Timestamp> { +public class TimestampColumnStatsMerger extends ColumnStatsMerger { private static final Logger LOG = LoggerFactory.getLogger(TimestampColumnStatsMerger.class); @@ -45,57 +42,64 @@ public class TimestampColumnStatsMerger extends ColumnStatsMerger<Timestamp> { TimestampColumnStatsDataInspector aggregateData = timestampInspectorFromStats(aggregateColStats); TimestampColumnStatsDataInspector newData = timestampInspectorFromStats(newColStats); - Timestamp lowValue = mergeLowValue(getLowValue(aggregateData), getLowValue(newData)); - if (lowValue != null) { - aggregateData.setLowValue(lowValue); - } - Timestamp highValue = mergeHighValue(getHighValue(aggregateData), getHighValue(newData)); - if (highValue != null) { - aggregateData.setHighValue(highValue); + setLowValue(aggregateData, newData); + setHighValue(aggregateData, newData); + + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + final long ndv; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column {}'s ndvs of {} and {} to be {}", aggregateColStats.getColName(), + aggregateData.getNumDVs(), newData.getNumDVs(), ndv); + aggregateData.setNumDVs(ndv); } - aggregateData.setNumNulls(mergeNumNulls(aggregateData.getNumNulls(), newData.getNumNulls())); - - NumDistinctValueEstimator oldNDVEst = aggregateData.getNdvEstimator(); - NumDistinctValueEstimator newNDVEst = newData.getNdvEstimator(); - List<NumDistinctValueEstimator> ndvEstimatorsList = Arrays.asList(oldNDVEst, newNDVEst); - aggregateData.setNumDVs(mergeNumDistinctValueEstimator(aggregateColStats.getColName(), - ndvEstimatorsList, aggregateData.getNumDVs(), newData.getNumDVs())); - aggregateData.setNdvEstimator(ndvEstimatorsList.get(0)); - KllHistogramEstimator oldKllEst = aggregateData.getHistogramEstimator(); - KllHistogramEstimator newKllEst = newData.getHistogramEstimator(); - aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldKllEst, newKllEst)); + KllHistogramEstimator oldEst = aggregateData.getHistogramEstimator(); + KllHistogramEstimator newEst = newData.getHistogramEstimator(); + aggregateData.setHistogramEstimator(mergeHistogramEstimator(aggregateColStats.getColName(), oldEst, newEst)); aggregateColStats.getStatsData().setTimestampStats(aggregateData); } - public Timestamp getLowValue(TimestampColumnStatsDataInspector data) { - return data.isSetLowValue() ? data.getLowValue() : null; - } + public void setLowValue(TimestampColumnStatsDataInspector aggregateData, TimestampColumnStatsDataInspector newData) { + final Timestamp aggregateLowValue = aggregateData.getLowValue(); + final Timestamp newLowValue = newData.getLowValue(); + + final Timestamp mergedLowValue; + if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) { + return; + } else if (aggregateData.isSetLowValue() && newData.isSetLowValue()) { + mergedLowValue = ObjectUtils.min(newLowValue, aggregateLowValue); + } else { + mergedLowValue = MoreObjects.firstNonNull(aggregateLowValue, newLowValue); + } - public Timestamp getHighValue(TimestampColumnStatsDataInspector data) { - return data.isSetHighValue() ? data.getHighValue() : null; + aggregateData.setLowValue(mergedLowValue); } - @Override - public Timestamp mergeLowValue(Timestamp oldValue, Timestamp newValue) { - if (oldValue != null && newValue != null) { - return ObjectUtils.min(oldValue, newValue); + public void setHighValue(TimestampColumnStatsDataInspector aggregateData, TimestampColumnStatsDataInspector newData) { + final Timestamp aggregateHighValue = aggregateData.getHighValue(); + final Timestamp newHighValue = newData.getHighValue(); + + final Timestamp mergedHighValue; + if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) { + return; + } else if (aggregateData.isSetHighValue() && newData.isSetHighValue()) { + mergedHighValue = ObjectUtils.max(aggregateHighValue, newHighValue); + } else { + mergedHighValue = MoreObjects.firstNonNull(aggregateHighValue, newHighValue); } - if (oldValue != null || newValue != null) { - return MoreObjects.firstNonNull(oldValue, newValue); - } - return null; - } - @Override - public Timestamp mergeHighValue(Timestamp oldValue, Timestamp newValue) { - if (oldValue != null && newValue != null) { - return ObjectUtils.max(oldValue, newValue); - } - if (oldValue != null || newValue != null) { - return MoreObjects.firstNonNull(oldValue, newValue); - } - return null; + aggregateData.setHighValue(mergedHighValue); } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java index d487752f1a3..5e3a3311294 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java @@ -763,7 +763,7 @@ public class MetaStoreServerUtils { assert (statsObjNew.getStatsData().getSetField() == statsObjOld.getStatsData() .getSetField()); // If statsObjOld is found, we can merge. - ColumnStatsMerger<?> merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, + ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, statsObjOld); merger.merge(statsObjNew, statsObjOld); } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMergerTest.java deleted file mode 100644 index 0b49f5a172c..00000000000 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMergerTest.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.columnstats.merge; - -import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -import static org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj; -import static org.junit.Assert.assertEquals; - -@Category(MetastoreUnitTest.class) -public class BinaryColumnStatsMergerTest { - private static final BinaryColumnStatsMerger MERGER = new BinaryColumnStatsMerger(); - - @Test - public void testMergeNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(byte[].class) - .avgColLen(3) - .maxColLen(2) - .numNulls(2) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(byte[].class) - .avgColLen(2) - .maxColLen(3) - .numNulls(3) - .build()); - MERGER.merge(aggrObj, newObj); - - newObj = createColumnStatisticsObj(new ColStatsBuilder<>(byte[].class) - .avgColLen(3) - .maxColLen(3) - .numNulls(1) - .build()); - MERGER.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(byte[].class) - .avgColLen(3) - .maxColLen(3) - .numNulls(6) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } -} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMergerTest.java deleted file mode 100644 index 03eac017a65..00000000000 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMergerTest.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.columnstats.merge; - -import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -import static org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj; -import static org.junit.Assert.assertEquals; - -@Category(MetastoreUnitTest.class) -public class BooleanColumnStatsMergerTest { - private static final BooleanColumnStatsMerger MERGER = new BooleanColumnStatsMerger(); - - @Test - public void testMergeNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Boolean.class) - .numFalses(1) - .numTrues(2) - .numNulls(2) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Boolean.class) - .numFalses(1) - .numTrues(2) - .numNulls(3) - .build()); - MERGER.merge(aggrObj, newObj); - - newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Boolean.class) - .numFalses(1) - .numTrues(1) - .numNulls(1) - .build()); - MERGER.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Boolean.class) - .numFalses(3) - .numTrues(5) - .numNulls(6) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } -} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerTest.java index 7386782b981..30798fc875c 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerTest.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerTest.java @@ -23,72 +23,24 @@ import com.google.common.primitives.Longs; import org.apache.datasketches.kll.KllFloatsSketch; import org.apache.hadoop.hive.common.histogram.KllHistogramEstimator; import org.apache.hadoop.hive.common.histogram.KllHistogramEstimatorFactory; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; -import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; -import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.metastore.StatisticsTestUtils; import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.junit.Assert; import org.junit.Test; import org.junit.experimental.categories.Category; -import java.util.Arrays; -import java.util.List; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - @Category(MetastoreUnitTest.class) public class ColumnStatsMergerTest { // the implementation we use does not matter, as we only tests methods from the parent plan here - private final static ColumnStatsMerger<?> MERGER = new DateColumnStatsMerger(); - - private final static List<ColumnStatsMerger<?>> MERGERS = Arrays.asList( - new BinaryColumnStatsMerger(), - new BooleanColumnStatsMerger(), - new DateColumnStatsMerger(), - new DecimalColumnStatsMerger(), - new DoubleColumnStatsMerger(), - new LongColumnStatsMerger(), - new StringColumnStatsMerger(), - new TimestampColumnStatsMerger() - ); + private final static ColumnStatsMerger MERGER = new DateColumnStatsMerger(); private final static long[] VALUES_1 = { 1, 2 }; private final static long[] VALUES_2 = { 1, 3 }; - private final static HyperLogLog HLL_1 = StatisticsTestUtils.createHll(VALUES_1); - private final static HyperLogLog HLL_2 = StatisticsTestUtils.createHll(VALUES_2); - private final static KllFloatsSketch KLL_1 = StatisticsTestUtils.createKll(VALUES_1); private final static KllFloatsSketch KLL_2 = StatisticsTestUtils.createKll(VALUES_2); - @Test - public void testMergeNumDVs() { - assertEquals(3, MERGER.mergeNumDVs(1, 3)); - assertEquals(3, MERGER.mergeNumDVs(3, 1)); - } - - @Test - public void testMergeNumNulls() { - assertEquals(4, MERGER.mergeNumNulls(1, 3)); - assertEquals(4, MERGER.mergeNumNulls(3, 1)); - } - - @Test - public void testMergeMaxColLen() { - assertEquals(3, MERGER.mergeMaxColLen(1, 3)); - assertEquals(3, MERGER.mergeMaxColLen(3, 1)); - } - - @Test - public void testMergeAvgColLen() { - assertEquals(3, MERGER.mergeAvgColLen(1, 3), Double.MIN_VALUE); - assertEquals(3, MERGER.mergeAvgColLen(3, 1), Double.MIN_VALUE); - } - @Test public void testMergeNonNullHistogramEstimators() { KllHistogramEstimator estimator1 = @@ -102,7 +54,7 @@ public class ColumnStatsMergerTest { KllHistogramEstimator expectedEstimator = KllHistogramEstimatorFactory.getKllHistogramEstimator(expectedKll.toByteArray()); - assertEquals(expectedEstimator.getSketch().toString(), computedEstimator.getSketch().toString()); + Assert.assertEquals(expectedEstimator.getSketch().toString(), computedEstimator.getSketch().toString()); } @Test @@ -112,7 +64,7 @@ public class ColumnStatsMergerTest { KllHistogramEstimator computedEstimator = MERGER.mergeHistogramEstimator("", null, estimator2); - assertEquals(estimator2.getSketch().toString(), computedEstimator.getSketch().toString()); + Assert.assertEquals(estimator2.getSketch().toString(), computedEstimator.getSketch().toString()); } @Test @@ -122,70 +74,11 @@ public class ColumnStatsMergerTest { KllHistogramEstimator computedEstimator = MERGER.mergeHistogramEstimator("", estimator1, null); - assertEquals(estimator1.getSketch().toString(), computedEstimator.getSketch().toString()); + Assert.assertEquals(estimator1.getSketch().toString(), computedEstimator.getSketch().toString()); } @Test public void testMergeNullHistogramEstimators() { - assertNull(MERGER.mergeHistogramEstimator("", null, null)); - } - - @Test - public void testMergeNonNullNDVEstimators() { - NumDistinctValueEstimator estimator1 = - NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(HLL_1.serialize()); - NumDistinctValueEstimator estimator2 = - NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(HLL_2.serialize()); - - for (ColumnStatsMerger<?> MERGER : MERGERS) { - long computedNDV = MERGER.mergeNumDistinctValueEstimator( - "", Arrays.asList(estimator1, estimator2), 2, 2); - assertEquals(3, computedNDV); - } - } - - @Test - public void testMergeNDVEstimatorsFirstNull() { - NumDistinctValueEstimator estimator2 = - NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(HLL_2.serialize()); - - for (ColumnStatsMerger<?> MERGER : MERGERS) { - List<NumDistinctValueEstimator> estimatorList = Arrays.asList(null, estimator2); - long computedNDV = MERGER.mergeNumDistinctValueEstimator("", estimatorList, 1, 2); - - assertEquals(estimator2, estimatorList.get(0)); - assertEquals(2, computedNDV); - } - } - - @Test - public void testMergeNDVEstimatorsSecondNull() { - NumDistinctValueEstimator estimator1 = - NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(HLL_1.serialize()); - - for (ColumnStatsMerger<?> MERGER : MERGERS) { - List<NumDistinctValueEstimator> estimatorList = Arrays.asList(estimator1, null); - long computedNDV = MERGER.mergeNumDistinctValueEstimator("", estimatorList, 2, 1); - - assertEquals(Arrays.asList(estimator1, null), estimatorList); - assertEquals(2, computedNDV); - } - } - - @Test - public void testMergeNullNDVEstimators() { - List<NumDistinctValueEstimator> estimatorList = Arrays.asList(null, null); - - for (ColumnStatsMerger<?> MERGER : MERGERS) { - long computedNDV = MERGER.mergeNumDistinctValueEstimator("", estimatorList, 1, 2); - assertEquals(2, computedNDV); - assertEquals(Arrays.asList(null, null), estimatorList); - } - } - - protected static ColumnStatisticsObj createColumnStatisticsObj(ColumnStatisticsData columnStatisticsData) { - ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj(); - columnStatisticsObj.setStatsData(columnStatisticsData); - return columnStatisticsObj; + Assert.assertNull(MERGER.mergeHistogramEstimator("", null, null)); } } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMergerTest.java index e4ff7698fba..e41339d84b1 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMergerTest.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMergerTest.java @@ -23,15 +23,11 @@ import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; -import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder; import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.junit.Assert; import org.junit.Test; import org.junit.experimental.categories.Category; -import static org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - @Category(MetastoreUnitTest.class) public class DateColumnStatsMergerTest { @@ -39,210 +35,74 @@ public class DateColumnStatsMergerTest { private static final Date DATE_2 = new Date(2); private static final Date DATE_3 = new Date(3); - private static final DateColumnStatsDataInspector DATA_1 = new DateColumnStatsDataInspector(); - private static final DateColumnStatsDataInspector DATA_2 = new DateColumnStatsDataInspector(); - private static final DateColumnStatsDataInspector DATA_3 = new DateColumnStatsDataInspector(); - - static { - DATA_1.setLowValue(DATE_1); - DATA_1.setHighValue(DATE_1); - DATA_2.setLowValue(DATE_2); - DATA_2.setHighValue(DATE_2); - DATA_3.setLowValue(DATE_3); - DATA_3.setHighValue(DATE_3); - } - - private final DateColumnStatsMerger merger = new DateColumnStatsMerger(); + private ColumnStatsMerger merger = new DateColumnStatsMerger(); @Test - public void testMergeNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Date.class) - .low(null) - .high(null) - .numNulls(1) - .numDVs(0) - .build()); - merger.merge(aggrObj, aggrObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Date.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build(); - - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } + public void testMergeNullMinMaxValues() { + ColumnStatisticsObj old = new ColumnStatisticsObj(); + createData(old, null, null); - @Test - public void testMergeNullWithNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Date.class) - .low(null) - .high(null) - .numNulls(0) - .numDVs(0) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Date.class) - .low(DATE_1) - .high(DATE_3) - .numNulls(4) - .numDVs(2) - .hll(DATE_1.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch()) - .kll(DATE_1.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch()) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Date.class) - .low(DATE_1) - .high(DATE_3) - .numNulls(4) - .numDVs(2) - .hll(DATE_1.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch()) - .kll(DATE_1.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch()) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } + merger.merge(old, old); - @Test - public void testMergeNonNullWithNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Date.class) - .low(DATE_1) - .high(DATE_3) - .numNulls(4) - .numDVs(2) - .hll(DATE_1.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch()) - .kll(DATE_1.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch()) - .build()); - - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Date.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Date.class) - .low(DATE_1) - .high(DATE_3) - .numNulls(6) - .numDVs(2) - .hll(DATE_1.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch()) - .kll(DATE_1.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch()) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Date.class) - .low(DATE_2) - .high(DATE_2) - .numNulls(2) - .numDVs(1) - .hll(DATE_2.getDaysSinceEpoch()) - .kll(DATE_2.getDaysSinceEpoch()) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Date.class) - .low(DATE_3) - .high(DATE_3) - .numNulls(3) - .numDVs(1) - .hll(DATE_3.getDaysSinceEpoch()) - .kll(DATE_3.getDaysSinceEpoch()) - .build()); - merger.merge(aggrObj, newObj); - - newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Date.class) - .low(DATE_1) - .high(DATE_1) - .numNulls(1) - .numDVs(1) - .hll(DATE_1.getDaysSinceEpoch(), DATE_1.getDaysSinceEpoch()) - .kll(DATE_1.getDaysSinceEpoch(), DATE_1.getDaysSinceEpoch()) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Date.class) - .low(DATE_1) - .high(DATE_3) - .numNulls(6) - .numDVs(3) - .hll(DATE_2.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), - DATE_1.getDaysSinceEpoch(), DATE_1.getDaysSinceEpoch()) - .kll(DATE_2.getDaysSinceEpoch(), DATE_3.getDaysSinceEpoch(), - DATE_1.getDaysSinceEpoch(), DATE_1.getDaysSinceEpoch()) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); + Assert.assertNull(old.getStatsData().getDateStats().getLowValue()); + Assert.assertNull(old.getStatsData().getDateStats().getHighValue()); } @Test - public void testCompareSimple() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(DATA_1); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(DATA_2); - assertEquals(DATE_2, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } + public void testMergeNulls() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, null, null); - @Test - public void testCompareSimpleFlipped() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(DATA_2); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(DATA_1); - assertEquals(DATE_2, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } + ColumnStatisticsObj newObj; - @Test - public void testCompareSimpleReversed() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(DATA_1); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(DATA_2); - assertEquals(DATE_1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } + newObj = new ColumnStatisticsObj(); + createData(newObj, null, null); + merger.merge(oldObj, newObj); - @Test - public void testCompareSimpleFlippedReversed() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(DATA_2); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(DATA_1); - assertEquals(DATE_1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } + Assert.assertEquals(null, oldObj.getStatsData().getDateStats().getLowValue()); + Assert.assertEquals(null, oldObj.getStatsData().getDateStats().getHighValue()); - @Test - public void testCompareNullsMin() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(); - assertNull(merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } + newObj = new ColumnStatisticsObj(); + createData(newObj, DATE_1, DATE_3); + merger.merge(oldObj, newObj); - @Test - public void testCompareNullsMax() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(); - assertNull(merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } + newObj = new ColumnStatisticsObj(); + createData(newObj, null, null); + merger.merge(oldObj, newObj); - @Test - public void testCompareFirstNullMin() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(DATA_1); - assertEquals(DATE_1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); + Assert.assertEquals(DATE_1, oldObj.getStatsData().getDateStats().getLowValue()); + Assert.assertEquals(DATE_3, oldObj.getStatsData().getDateStats().getHighValue()); } @Test - public void testCompareSecondNullMin() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(DATA_1); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(); - assertEquals(DATE_1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } + public void testMergeNonNullAndNullLowerValuesNewIsNull() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, DATE_2, DATE_2); - @Test - public void testCompareFirstNullMax() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(DATA_1); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(); - assertEquals(DATE_1, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); + ColumnStatisticsObj newObj; + + newObj = new ColumnStatisticsObj(); + createData(newObj, DATE_3, DATE_3); + merger.merge(oldObj, newObj); + + newObj = new ColumnStatisticsObj(); + createData(newObj, DATE_1, DATE_1); + merger.merge(oldObj, newObj); + + Assert.assertEquals(DATE_1, oldObj.getStatsData().getDateStats().getLowValue()); + Assert.assertEquals(DATE_3, oldObj.getStatsData().getDateStats().getHighValue()); } - @Test - public void testCompareSecondNullMax() { - DateColumnStatsDataInspector data1 = new DateColumnStatsDataInspector(); - DateColumnStatsDataInspector data2 = new DateColumnStatsDataInspector(DATA_1); - assertEquals(DATE_1, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); + private DateColumnStatsDataInspector createData(ColumnStatisticsObj objNulls, Date lowValue, + Date highValue) { + ColumnStatisticsData statisticsData = new ColumnStatisticsData(); + DateColumnStatsDataInspector data = new DateColumnStatsDataInspector(); + + statisticsData.setDateStats(data); + objNulls.setStatsData(statisticsData); + + data.setLowValue(lowValue); + data.setHighValue(highValue); + return data; } } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java index 7e19cbfcad3..a9d55eadf04 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DecimalColumnStatsMergerTest.java @@ -24,21 +24,14 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils; -import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder; import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; +import org.junit.Assert; import org.junit.Test; import org.junit.experimental.categories.Category; -import static org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - @Category(MetastoreUnitTest.class) public class DecimalColumnStatsMergerTest { - private static final Decimal DECIMAL_1 = DecimalUtils.getDecimal(1, 0); private static final Decimal DECIMAL_3 = DecimalUtils.getDecimal(3, 0); private static final Decimal DECIMAL_5 = DecimalUtils.getDecimal(5, 0); private static final Decimal DECIMAL_20 = DecimalUtils.getDecimal(2, 1); @@ -56,212 +49,231 @@ public class DecimalColumnStatsMergerTest { DATA_20.setHighValue(DECIMAL_20); } - private final DecimalColumnStatsMerger merger = new DecimalColumnStatsMerger(); + private DecimalColumnStatsMerger merger = new DecimalColumnStatsMerger(); + + @Test + public void testMergeNullMinMaxValues() { + ColumnStatisticsObj objNulls = new ColumnStatisticsObj(); + createData(objNulls, null, null); + + merger.merge(objNulls, objNulls); + + Assert.assertNull(objNulls.getStatsData().getDecimalStats().getLowValue()); + Assert.assertNull(objNulls.getStatsData().getDecimalStats().getHighValue()); + } + + @Test + public void testMergeNonNullAndNullLowerValuesOldIsNull() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, null, null); + + ColumnStatisticsObj newObj = new ColumnStatisticsObj(); + createData(newObj, DECIMAL_3, null); + + merger.merge(oldObj, newObj); + + Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); + } + + @Test + public void testMergeNonNullAndNullLowerValuesNewIsNull() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, DECIMAL_3, null); + + ColumnStatisticsObj newObj = new ColumnStatisticsObj(); + createData(newObj, null, null); + + merger.merge(oldObj, newObj); + + Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); + } + + @Test + public void testMergeNonNullAndNullHigherValuesOldIsNull() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, null, null); + + ColumnStatisticsObj newObj = new ColumnStatisticsObj(); + createData(newObj, null, DECIMAL_3); + + merger.merge(oldObj, newObj); + + Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue()); + } + + @Test + public void testMergeNonNullAndNullHigherValuesNewIsNull() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, null, DECIMAL_3); + + ColumnStatisticsObj newObj = new ColumnStatisticsObj(); + createData(newObj, null, null); + + merger.merge(oldObj, newObj); + + Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue()); + } @Test - public void testMergeNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Decimal.class) - .low(null) - .high(null) - .numNulls(1) - .numDVs(0) - .build()); - merger.merge(aggrObj, aggrObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Decimal.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build(); - - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); + public void testMergeLowValuesFirstWins() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, DECIMAL_3, null); + + ColumnStatisticsObj newObj = new ColumnStatisticsObj(); + createData(newObj, DECIMAL_5, null); + + merger.merge(oldObj, newObj); + + Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); } @Test - public void testMergeNullWithNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Decimal.class) - .low(null) - .high(null) - .numNulls(0) - .numDVs(0) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Decimal.class) - .low(DECIMAL_1) - .high(DECIMAL_3) - .numNulls(4) - .numDVs(2) - .hll(1, 3, 3) - .kll(1, 3, 3) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Decimal.class) - .low(DECIMAL_1) - .high(DECIMAL_3) - .numNulls(4) - .numDVs(2) - .hll(1, 3, 3) - .kll(1, 3, 3) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); + public void testMergeLowValuesSecondWins() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, DECIMAL_5, null); + + ColumnStatisticsObj newObj = new ColumnStatisticsObj(); + createData(newObj, DECIMAL_3, null); + + merger.merge(oldObj, newObj); + + Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); } @Test - public void testMergeNonNullWithNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Decimal.class) - .low(DECIMAL_1) - .high(DECIMAL_3) - .numNulls(4) - .numDVs(2) - .hll(1, 3, 3) - .kll(1, 3, 3) - .build()); - - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Decimal.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Decimal.class) - .low(DECIMAL_1) - .high(DECIMAL_3) - .numNulls(6) - .numDVs(2) - .hll(1, 3, 3) - .kll(1, 3, 3) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); + public void testMergeHighValuesFirstWins() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, null, DECIMAL_5); + + ColumnStatisticsObj newObj = new ColumnStatisticsObj(); + createData(newObj, null, DECIMAL_3); + + merger.merge(oldObj, newObj); + + Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue()); } @Test - public void testMergeNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Decimal.class) - .low(DECIMAL_1) - .high(DECIMAL_1) - .numNulls(2) - .numDVs(1) - .hll(2) - .kll(2) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Decimal.class) - .low(DECIMAL_3) - .high(DECIMAL_3) - .numNulls(3) - .numDVs(1) - .hll(3) - .kll(3) - .build()); - merger.merge(aggrObj, newObj); - - newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Decimal.class) - .low(DECIMAL_1) - .high(DECIMAL_1) - .numNulls(1) - .numDVs(1) - .hll(1, 1) - .kll(1, 1) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Decimal.class) - .low(DECIMAL_1) - .high(DECIMAL_3) - .numNulls(6) - .numDVs(3) - .hll(2, 3, 1, 1) - .kll(2, 3, 1, 1) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); + public void testMergeHighValuesSecondWins() { + ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); + createData(oldObj, null, DECIMAL_3); + + ColumnStatisticsObj newObj = new ColumnStatisticsObj(); + createData(newObj, null, DECIMAL_5); + + merger.merge(oldObj, newObj); + + Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue()); } @Test public void testDecimalCompareEqual() { - assertTrue(DECIMAL_3.equals(DECIMAL_3)); + Assert.assertTrue(DECIMAL_3.equals(DECIMAL_3)); } @Test public void testDecimalCompareDoesntEqual() { - assertFalse(DECIMAL_3.equals(DECIMAL_5)); + Assert.assertTrue(!DECIMAL_3.equals(DECIMAL_5)); } @Test public void testCompareSimple() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_5); - assertEquals(DECIMAL_5, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); + merger.setHighValue(data1, data2); + Assert.assertEquals(DECIMAL_5, data1.getHighValue()); } @Test public void testCompareSimpleFlipped() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_5); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_3); - assertEquals(DECIMAL_5, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); + merger.setHighValue(data1, data2); + Assert.assertEquals(DECIMAL_5, data1.getHighValue()); } @Test public void testCompareSimpleReversed() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_5); - assertEquals(DECIMAL_3, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); + merger.setLowValue(data1, data2); + Assert.assertEquals(DECIMAL_3, data1.getLowValue()); } @Test public void testCompareSimpleFlippedReversed() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_5); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_3); - assertEquals(DECIMAL_3, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); + merger.setLowValue(data1, data2); + Assert.assertEquals(DECIMAL_3, data1.getLowValue()); } @Test public void testCompareUnscaledValue() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_20); - assertEquals(DECIMAL_20, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); + merger.setHighValue(data1, data2); + Assert.assertEquals(DECIMAL_20, data1.getHighValue()); } @Test public void testCompareNullsMin() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(); - assertNull(merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); + merger.setLowValue(data1, data2); + Assert.assertNull(data1.getLowValue()); } @Test public void testCompareNullsMax() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(); - assertNull(merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); + merger.setHighValue(data1, data2); + Assert.assertNull(data1.getHighValue()); } @Test public void testCompareFirstNullMin() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_3); - assertEquals(DECIMAL_3, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); + merger.setLowValue(data1, data2); + Assert.assertEquals(DECIMAL_3, data1.getLowValue()); } @Test public void testCompareSecondNullMin() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(); - assertEquals(DECIMAL_3, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); + merger.setLowValue(data1, data2); + Assert.assertEquals(DECIMAL_3, data1.getLowValue()); } @Test public void testCompareFirstNullMax() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(DATA_3); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(); - assertEquals(DECIMAL_3, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); + merger.setHighValue(data1, data2); + Assert.assertEquals(DECIMAL_3, data1.getHighValue()); } @Test public void testCompareSecondNullMax() { DecimalColumnStatsDataInspector data1 = new DecimalColumnStatsDataInspector(); DecimalColumnStatsDataInspector data2 = new DecimalColumnStatsDataInspector(DATA_3); - assertEquals(DECIMAL_3, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); + merger.setHighValue(data1, data2); + Assert.assertEquals(DECIMAL_3, data1.getHighValue()); + } + + private DecimalColumnStatsDataInspector createData(ColumnStatisticsObj objNulls, Decimal lowValue, + Decimal highValue) { + ColumnStatisticsData statisticsData = new ColumnStatisticsData(); + DecimalColumnStatsDataInspector data = new DecimalColumnStatsDataInspector(); + + statisticsData.setDecimalStats(data); + objNulls.setStatsData(statisticsData); + + data.setLowValue(lowValue); + data.setHighValue(highValue); + return data; } } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMergerTest.java deleted file mode 100644 index 9ecc960771b..00000000000 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/DoubleColumnStatsMergerTest.java +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.columnstats.merge; - -import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder; -import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -import static org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - -@Category(MetastoreUnitTest.class) -public class DoubleColumnStatsMergerTest { - private final DoubleColumnStatsMerger merger = new DoubleColumnStatsMerger(); - - private static final DoubleColumnStatsDataInspector DATA_1 = new DoubleColumnStatsDataInspector(); - private static final DoubleColumnStatsDataInspector DATA_2 = new DoubleColumnStatsDataInspector(); - private static final DoubleColumnStatsDataInspector DATA_3 = new DoubleColumnStatsDataInspector(); - - static { - DATA_1.setLowValue(1d); - DATA_1.setHighValue(1d); - DATA_2.setLowValue(2d); - DATA_2.setHighValue(2d); - DATA_3.setLowValue(3d); - DATA_3.setHighValue(3d); - } - - @Test - public void testMergeNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(double.class) - .low(null) - .high(null) - .numNulls(1) - .numDVs(0) - .build()); - merger.merge(aggrObj, aggrObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(double.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build(); - - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNullWithNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(double.class) - .low(null) - .high(null) - .numNulls(0) - .numDVs(0) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(double.class) - .low(1d) - .high(3d) - .numNulls(4) - .numDVs(2) - .hll(1d, 3d, 3d) - .kll(1d, 3d, 3d) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(double.class) - .low(1d) - .high(3d) - .numNulls(4) - .numDVs(2) - .hll(1d, 3d, 3d) - .kll(1d, 3d, 3d) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNonNullWithNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(double.class) - .low(1d) - .high(3d) - .numNulls(4) - .numDVs(2) - .hll(1d, 3d, 3d) - .kll(1d, 3d, 3d) - .build()); - - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(double.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(double.class) - .low(1d) - .high(3d) - .numNulls(6) - .numDVs(2) - .hll(1d, 3d, 3d) - .kll(1d, 3d, 3d) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(double.class) - .low(2d) - .high(2d) - .numNulls(2) - .numDVs(1) - .hll(2d) - .kll(2d) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(double.class) - .low(3d) - .high(3d) - .numNulls(3) - .numDVs(1) - .hll(3d) - .kll(3d) - .build()); - merger.merge(aggrObj, newObj); - - newObj = createColumnStatisticsObj(new ColStatsBuilder<>(double.class) - .low(1d) - .high(1d) - .numNulls(1) - .numDVs(1) - .hll(1d, 1d) - .kll(1d, 1d) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(double.class) - .low(1d) - .high(3d) - .numNulls(6) - .numDVs(3) - .hll(2d, 3d, 1d, 1d) - .kll(2d, 3d, 1d, 1d) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testCompareSimple() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(DATA_1); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(DATA_2); - assertEquals(2, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2)), Double.MIN_VALUE); - } - - @Test - public void testCompareSimpleFlipped() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(DATA_2); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(DATA_1); - assertEquals(2, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2)), Double.MIN_VALUE); - } - - @Test - public void testCompareSimpleReversed() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(DATA_1); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(DATA_2); - assertEquals(1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2)), Double.MIN_VALUE); - } - - @Test - public void testCompareSimpleFlippedReversed() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(DATA_2); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(DATA_1); - assertEquals(1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2)), Double.MIN_VALUE); - } - - @Test - public void testCompareNullsMin() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(); - assertNull(merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareNullsMax() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(); - assertNull(merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareFirstNullMin() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(DATA_1); - assertEquals(1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2)), Double.MIN_VALUE); - } - - @Test - public void testCompareSecondNullMin() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(DATA_1); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(); - assertEquals(1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2)), Double.MIN_VALUE); - } - - @Test - public void testCompareFirstNullMax() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(DATA_1); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(); - assertEquals(1, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2)), Double.MIN_VALUE); - } - - @Test - public void testCompareSecondNullMax() { - DoubleColumnStatsDataInspector data1 = new DoubleColumnStatsDataInspector(); - DoubleColumnStatsDataInspector data2 = new DoubleColumnStatsDataInspector(DATA_1); - assertEquals(1, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2)), Double.MIN_VALUE); - } -} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMergerTest.java deleted file mode 100644 index 54a9574c918..00000000000 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/LongColumnStatsMergerTest.java +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.columnstats.merge; - -import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder; -import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -import static org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - -@Category(MetastoreUnitTest.class) -public class LongColumnStatsMergerTest { - private final LongColumnStatsMerger merger = new LongColumnStatsMerger(); - - private static final LongColumnStatsDataInspector DATA_1 = new LongColumnStatsDataInspector(); - private static final LongColumnStatsDataInspector DATA_2 = new LongColumnStatsDataInspector(); - private static final LongColumnStatsDataInspector DATA_3 = new LongColumnStatsDataInspector(); - - static { - DATA_1.setLowValue(1); - DATA_1.setHighValue(1); - DATA_2.setLowValue(2); - DATA_2.setHighValue(2); - DATA_3.setLowValue(3); - DATA_3.setHighValue(3); - } - - @Test - public void testMergeNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(long.class) - .low(null) - .high(null) - .numNulls(1) - .numDVs(0) - .build()); - merger.merge(aggrObj, aggrObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(long.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build(); - - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNullWithNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(long.class) - .low(null) - .high(null) - .numNulls(0) - .numDVs(0) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(long.class) - .low(1L) - .high(3L) - .numNulls(4) - .numDVs(2) - .hll(1, 3, 3) - .kll(1, 3, 3) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(long.class) - .low(1L) - .high(3L) - .numNulls(4) - .numDVs(2) - .hll(1, 3, 3) - .kll(1, 3, 3) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNonNullWithNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(long.class) - .low(1L) - .high(3L) - .numNulls(4) - .numDVs(2) - .hll(1, 3, 3) - .kll(1, 3, 3) - .build()); - - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(long.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(long.class) - .low(1L) - .high(3L) - .numNulls(6) - .numDVs(2) - .hll(1, 3, 3) - .kll(1, 3, 3) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(long.class) - .low(2L) - .high(2L) - .numNulls(2) - .numDVs(1) - .hll(2L) - .kll(2L) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(long.class) - .low(3L) - .high(3L) - .numNulls(3) - .numDVs(1) - .hll(3L) - .kll(3L) - .build()); - merger.merge(aggrObj, newObj); - - newObj = createColumnStatisticsObj(new ColStatsBuilder<>(long.class) - .low(1L) - .high(1L) - .numNulls(1) - .numDVs(1) - .hll(1L, 1L) - .kll(1L, 1L) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(long.class) - .low(1L) - .high(3L) - .numNulls(6) - .numDVs(3) - .hll(2L, 3L, 1L, 1L) - .kll(2L, 3L, 1L, 1L) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testCompareSimple() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(DATA_1); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(DATA_2); - assertEquals(2, (long) merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareSimpleFlipped() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(DATA_2); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(DATA_1); - assertEquals(2, (long) merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareSimpleReversed() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(DATA_1); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(DATA_2); - assertEquals(1, (long) merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareSimpleFlippedReversed() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(DATA_2); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(DATA_1); - assertEquals(1, (long) merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareNullsMin() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(); - assertNull(merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareNullsMax() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(); - assertNull(merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareFirstNullMin() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(DATA_1); - assertEquals(1, (long) merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareSecondNullMin() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(DATA_1); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(); - assertEquals(1, (long) merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareFirstNullMax() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(DATA_1); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(); - assertEquals(1, (long) merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareSecondNullMax() { - LongColumnStatsDataInspector data1 = new LongColumnStatsDataInspector(); - LongColumnStatsDataInspector data2 = new LongColumnStatsDataInspector(DATA_1); - assertEquals(1, (long) merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } -} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMergerTest.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMergerTest.java deleted file mode 100644 index 875a8c1d456..00000000000 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMergerTest.java +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.columnstats.merge; - -import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.Timestamp; -import org.apache.hadoop.hive.metastore.columnstats.ColStatsBuilder; -import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -import static org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerTest.createColumnStatisticsObj; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - -@Category(MetastoreUnitTest.class) -public class TimestampColumnStatsMergerTest { - - private static final Timestamp TS_1 = new Timestamp(1); - private static final Timestamp TS_2 = new Timestamp(2); - private static final Timestamp TS_3 = new Timestamp(3); - - private static final TimestampColumnStatsDataInspector DATA_1 = new TimestampColumnStatsDataInspector(); - private static final TimestampColumnStatsDataInspector DATA_2 = new TimestampColumnStatsDataInspector(); - private static final TimestampColumnStatsDataInspector DATA_3 = new TimestampColumnStatsDataInspector(); - - static { - DATA_1.setLowValue(TS_1); - DATA_1.setHighValue(TS_1); - DATA_2.setLowValue(TS_2); - DATA_2.setHighValue(TS_2); - DATA_3.setLowValue(TS_3); - DATA_3.setHighValue(TS_3); - } - - private final TimestampColumnStatsMerger merger = new TimestampColumnStatsMerger(); - - @Test - public void testMergeNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Timestamp.class) - .low(null) - .high(null) - .numNulls(1) - .numDVs(0) - .build()); - merger.merge(aggrObj, aggrObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Timestamp.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build(); - - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNullWithNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Timestamp.class) - .low(null) - .high(null) - .numNulls(0) - .numDVs(0) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Timestamp.class) - .low(TS_1) - .high(TS_3) - .numNulls(4) - .numDVs(2) - .hll(TS_1.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch()) - .kll(TS_1.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch()) - .build()); - - merger.merge(aggrObj, newObj); - - assertEquals(newObj.getStatsData(), aggrObj.getStatsData()); - } - - @Test - public void testMergeNonNullWithNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Timestamp.class) - .low(TS_1) - .high(TS_3) - .numNulls(4) - .numDVs(2) - .hll(TS_1.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch()) - .kll(TS_1.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch()) - .build()); - - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Timestamp.class) - .low(null) - .high(null) - .numNulls(2) - .numDVs(0) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Timestamp.class) - .low(TS_1) - .high(TS_3) - .numNulls(6) - .numDVs(2) - .hll(TS_1.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch()) - .kll(TS_1.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch()) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testMergeNonNullValues() { - ColumnStatisticsObj aggrObj = createColumnStatisticsObj(new ColStatsBuilder<>(Timestamp.class) - .low(TS_2) - .high(TS_2) - .numNulls(2) - .numDVs(1) - .hll(TS_2.getSecondsSinceEpoch()) - .kll(TS_2.getSecondsSinceEpoch()) - .build()); - ColumnStatisticsObj newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Timestamp.class) - .low(TS_3) - .high(TS_3) - .numNulls(3) - .numDVs(1) - .hll(TS_3.getSecondsSinceEpoch()) - .kll(TS_3.getSecondsSinceEpoch()) - .build()); - merger.merge(aggrObj, newObj); - - newObj = createColumnStatisticsObj(new ColStatsBuilder<>(Timestamp.class) - .low(TS_1) - .high(TS_1) - .numNulls(1) - .numDVs(1) - .hll(TS_1.getSecondsSinceEpoch(), TS_1.getSecondsSinceEpoch()) - .kll(TS_1.getSecondsSinceEpoch(), TS_1.getSecondsSinceEpoch()) - .build()); - merger.merge(aggrObj, newObj); - - ColumnStatisticsData expectedColumnStatisticsData = new ColStatsBuilder<>(Timestamp.class) - .low(TS_1) - .high(TS_3) - .numNulls(6) - .numDVs(3) - .hll(TS_2.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch(), - TS_1.getSecondsSinceEpoch(), TS_1.getSecondsSinceEpoch()) - .kll(TS_2.getSecondsSinceEpoch(), TS_3.getSecondsSinceEpoch(), - TS_1.getSecondsSinceEpoch(), TS_1.getSecondsSinceEpoch()) - .build(); - assertEquals(expectedColumnStatisticsData, aggrObj.getStatsData()); - } - - @Test - public void testCompareSimple() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(DATA_1); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(DATA_2); - assertEquals(TS_2, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareSimpleFlipped() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(DATA_2); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(DATA_1); - assertEquals(TS_2, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareSimpleReversed() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(DATA_1); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(DATA_2); - assertEquals(TS_1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareSimpleFlippedReversed() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(DATA_2); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(DATA_1); - assertEquals(TS_1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareNullsMin() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(); - assertNull(merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareNullsMax() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(); - assertNull(merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareFirstNullMin() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(DATA_1); - assertEquals(TS_1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareSecondNullMin() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(DATA_1); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(); - assertEquals(TS_1, merger.mergeLowValue(merger.getLowValue(data1), merger.getLowValue(data2))); - } - - @Test - public void testCompareFirstNullMax() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(DATA_1); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(); - assertEquals(TS_1, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } - - @Test - public void testCompareSecondNullMax() { - TimestampColumnStatsDataInspector data1 = new TimestampColumnStatsDataInspector(); - TimestampColumnStatsDataInspector data2 = new TimestampColumnStatsDataInspector(DATA_1); - assertEquals(TS_1, merger.mergeHighValue(merger.getHighValue(data1), merger.getHighValue(data2))); - } -}
