[30/51] [partial] hive git commit: HIVE-20188 : Split server-specific code outside of standalone metastore-common (Alexander Kolbasov reviewed by Vihang Karajgaonkar)

vihangk1 Thu, 19 Jul 2018 12:55:15 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
deleted file mode 100644
index ac7e8e3..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.StatObjectConverter;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
-import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
-import 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DecimalColumnStatsAggregator extends ColumnStatsAggregator 
implements
-    IExtrapolatePartStatus {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(DecimalColumnStatsAggregator.class);
-
-  @Override
-  public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> 
colStatsWithSourceInfo,
-      List<String> partNames, boolean areAllPartsFound) throws MetaException {
-    ColumnStatisticsObj statsObj = null;
-    String colType = null;
-    String colName = null;
-    // check if all the ColumnStatisticsObjs contain stats and all the ndv are
-    // bitvectors
-    boolean doAllPartitionContainStats = partNames.size() == 
colStatsWithSourceInfo.size();
-    NumDistinctValueEstimator ndvEstimator = null;
-    for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-      ColumnStatisticsObj cso = csp.getColStatsObj();
-      if (statsObj == null) {
-        colName = cso.getColName();
-        colType = cso.getColType();
-        statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType,
-            cso.getStatsData().getSetField());
-        LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
-            doAllPartitionContainStats);
-      }
-      DecimalColumnStatsDataInspector decimalColumnStatsData =
-          (DecimalColumnStatsDataInspector) 
cso.getStatsData().getDecimalStats();
-      if (decimalColumnStatsData.getNdvEstimator() == null) {
-        ndvEstimator = null;
-        break;
-      } else {
-        // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = 
decimalColumnStatsData.getNdvEstimator();
-        if (ndvEstimator == null) {
-          ndvEstimator = estimator;
-        } else {
-          if (ndvEstimator.canMerge(estimator)) {
-            continue;
-          } else {
-            ndvEstimator = null;
-            break;
-          }
-        }
-      }
-    }
-    if (ndvEstimator != null) {
-      ndvEstimator = NumDistinctValueEstimatorFactory
-          .getEmptyNumDistinctValueEstimator(ndvEstimator);
-    }
-    LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
-    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
-    if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) {
-      DecimalColumnStatsDataInspector aggregateData = null;
-      long lowerBound = 0;
-      long higherBound = 0;
-      double densityAvgSum = 0.0;
-      for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-        ColumnStatisticsObj cso = csp.getColStatsObj();
-        DecimalColumnStatsDataInspector newData =
-            (DecimalColumnStatsDataInspector) 
cso.getStatsData().getDecimalStats();
-        lowerBound = Math.max(lowerBound, newData.getNumDVs());
-        higherBound += newData.getNumDVs();
-        densityAvgSum += 
(MetaStoreUtils.decimalToDouble(newData.getHighValue()) - MetaStoreUtils
-            .decimalToDouble(newData.getLowValue())) / newData.getNumDVs();
-        if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (aggregateData == null) {
-          aggregateData = newData.deepCopy();
-        } else {
-          if (MetaStoreUtils.decimalToDouble(aggregateData.getLowValue()) < 
MetaStoreUtils
-              .decimalToDouble(newData.getLowValue())) {
-            aggregateData.setLowValue(aggregateData.getLowValue());
-          } else {
-            aggregateData.setLowValue(newData.getLowValue());
-          }
-          if (MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) > 
MetaStoreUtils
-              .decimalToDouble(newData.getHighValue())) {
-            aggregateData.setHighValue(aggregateData.getHighValue());
-          } else {
-            aggregateData.setHighValue(newData.getHighValue());
-          }
-          aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), 
newData.getNumDVs()));
-        }
-      }
-      if (ndvEstimator != null) {
-        // if all the ColumnStatisticsObjs contain bitvectors, we do not need 
to
-        // use uniform distribution assumption because we can merge bitvectors
-        // to get a good estimation.
-        aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-      } else {
-        long estimation;
-        if (useDensityFunctionForNDVEstimation) {
-          // We have estimation, lowerbound and higherbound. We use estimation
-          // if it is between lowerbound and higherbound.
-          double densityAvg = densityAvgSum / partNames.size();
-          estimation = (long) 
((MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreUtils
-              .decimalToDouble(aggregateData.getLowValue())) / densityAvg);
-          if (estimation < lowerBound) {
-            estimation = lowerBound;
-          } else if (estimation > higherBound) {
-            estimation = higherBound;
-          }
-        } else {
-          estimation = (long) (lowerBound + (higherBound - lowerBound) * 
ndvTuner);
-        }
-        aggregateData.setNumDVs(estimation);
-      }
-      columnStatisticsData.setDecimalStats(aggregateData);
-    } else {
-      // we need extrapolation
-      LOG.debug("start extrapolation for " + colName);
-      Map<String, Integer> indexMap = new HashMap<>();
-      for (int index = 0; index < partNames.size(); index++) {
-        indexMap.put(partNames.get(index), index);
-      }
-      Map<String, Double> adjustedIndexMap = new HashMap<>();
-      Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<>();
-      // while we scan the css, we also get the densityAvg, lowerbound and
-      // higerbound when useDensityFunctionForNDVEstimation is true.
-      double densityAvgSum = 0.0;
-      if (ndvEstimator == null) {
-        // if not every partition uses bitvector for ndv, we just fall back to
-        // the traditional extrapolation methods.
-        for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-          ColumnStatisticsObj cso = csp.getColStatsObj();
-          String partName = csp.getPartName();
-          DecimalColumnStatsData newData = 
cso.getStatsData().getDecimalStats();
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += 
(MetaStoreUtils.decimalToDouble(newData.getHighValue()) - MetaStoreUtils
-                .decimalToDouble(newData.getLowValue())) / newData.getNumDVs();
-          }
-          adjustedIndexMap.put(partName, (double) indexMap.get(partName));
-          adjustedStatsMap.put(partName, cso.getStatsData());
-        }
-      } else {
-        // we first merge all the adjacent bitvectors that we could merge and
-        // derive new partition names and index.
-        StringBuilder pseudoPartName = new StringBuilder();
-        double pseudoIndexSum = 0;
-        int length = 0;
-        int curIndex = -1;
-        DecimalColumnStatsDataInspector aggregateData = null;
-        for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-          ColumnStatisticsObj cso = csp.getColStatsObj();
-          String partName = csp.getPartName();
-          DecimalColumnStatsDataInspector newData =
-              (DecimalColumnStatsDataInspector) 
cso.getStatsData().getDecimalStats();
-          // newData.isSetBitVectors() should be true for sure because we
-          // already checked it before.
-          if (indexMap.get(partName) != curIndex) {
-            // There is bitvector, but it is not adjacent to the previous ones.
-            if (length > 0) {
-              // we have to set ndv
-              adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-              
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-              ColumnStatisticsData csd = new ColumnStatisticsData();
-              csd.setDecimalStats(aggregateData);
-              adjustedStatsMap.put(pseudoPartName.toString(), csd);
-              if (useDensityFunctionForNDVEstimation) {
-                densityAvgSum += 
(MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreUtils
-                    .decimalToDouble(aggregateData.getLowValue())) / 
aggregateData.getNumDVs();
-              }
-              // reset everything
-              pseudoPartName = new StringBuilder();
-              pseudoIndexSum = 0;
-              length = 0;
-              ndvEstimator = 
NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator);
-            }
-            aggregateData = null;
-          }
-          curIndex = indexMap.get(partName);
-          pseudoPartName.append(partName);
-          pseudoIndexSum += curIndex;
-          length++;
-          curIndex++;
-          if (aggregateData == null) {
-            aggregateData = newData.deepCopy();
-          } else {
-            if (MetaStoreUtils.decimalToDouble(aggregateData.getLowValue()) < 
MetaStoreUtils
-                .decimalToDouble(newData.getLowValue())) {
-              aggregateData.setLowValue(aggregateData.getLowValue());
-            } else {
-              aggregateData.setLowValue(newData.getLowValue());
-            }
-            if (MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) > 
MetaStoreUtils
-                .decimalToDouble(newData.getHighValue())) {
-              aggregateData.setHighValue(aggregateData.getHighValue());
-            } else {
-              aggregateData.setHighValue(newData.getHighValue());
-            }
-            aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          }
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (length > 0) {
-          // we have to set ndv
-          adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-          aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-          ColumnStatisticsData csd = new ColumnStatisticsData();
-          csd.setDecimalStats(aggregateData);
-          adjustedStatsMap.put(pseudoPartName.toString(), csd);
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += 
(MetaStoreUtils.decimalToDouble(aggregateData.getHighValue()) - MetaStoreUtils
-                .decimalToDouble(aggregateData.getLowValue())) / 
aggregateData.getNumDVs();
-          }
-        }
-      }
-      extrapolate(columnStatisticsData, partNames.size(), 
colStatsWithSourceInfo.size(),
-          adjustedIndexMap, adjustedStatsMap, densityAvgSum / 
adjustedStatsMap.size());
-    }
-    LOG.debug(
-        "Ndv estimatation for {} is {} # of partitions requested: {} # of 
partitions found: {}",
-        colName, columnStatisticsData.getDecimalStats().getNumDVs(), 
partNames.size(),
-        colStatsWithSourceInfo.size());
-    statsObj.setStatsData(columnStatisticsData);
-    return statsObj;
-  }
-
-  @Override
-  public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
-    int rightBorderInd = numParts;
-    DecimalColumnStatsDataInspector extrapolateDecimalData = new 
DecimalColumnStatsDataInspector();
-    Map<String, DecimalColumnStatsData> extractedAdjustedStatsMap = new 
HashMap<>();
-    for (Map.Entry<String, ColumnStatisticsData> entry : 
adjustedStatsMap.entrySet()) {
-      extractedAdjustedStatsMap.put(entry.getKey(), 
entry.getValue().getDecimalStats());
-    }
-    List<Map.Entry<String, DecimalColumnStatsData>> list = new LinkedList<>(
-        extractedAdjustedStatsMap.entrySet());
-    // get the lowValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DecimalColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
-          Map.Entry<String, DecimalColumnStatsData> o2) {
-        return 
o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
-      }
-    });
-    double minInd = adjustedIndexMap.get(list.get(0).getKey());
-    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double lowValue = 0;
-    double min = 
MetaStoreUtils.decimalToDouble(list.get(0).getValue().getLowValue());
-    double max = MetaStoreUtils.decimalToDouble(list.get(list.size() - 
1).getValue().getLowValue());
-    if (minInd == maxInd) {
-      lowValue = min;
-    } else if (minInd < maxInd) {
-      // left border is the min
-      lowValue = (max - (max - min) * maxInd / (maxInd - minInd));
-    } else {
-      // right border is the min
-      lowValue = (max - (max - min) * (rightBorderInd - maxInd) / (minInd - 
maxInd));
-    }
-
-    // get the highValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DecimalColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
-          Map.Entry<String, DecimalColumnStatsData> o2) {
-        return 
o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double highValue = 0;
-    min = 
MetaStoreUtils.decimalToDouble(list.get(0).getValue().getHighValue());
-    max = MetaStoreUtils.decimalToDouble(list.get(list.size() - 
1).getValue().getHighValue());
-    if (minInd == maxInd) {
-      highValue = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      highValue = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      highValue = (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the #nulls
-    long numNulls = 0;
-    for (Map.Entry<String, DecimalColumnStatsData> entry : 
extractedAdjustedStatsMap.entrySet()) {
-      numNulls += entry.getValue().getNumNulls();
-    }
-    // we scale up sumNulls based on the number of partitions
-    numNulls = numNulls * numParts / numPartsWithStats;
-
-    // get the ndv
-    long ndv = 0;
-    long ndvMin = 0;
-    long ndvMax = 0;
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DecimalColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
-          Map.Entry<String, DecimalColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getNumDVs(), 
o2.getValue().getNumDVs());
-      }
-    });
-    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
-    long higherBound = 0;
-    for (Map.Entry<String, DecimalColumnStatsData> entry : list) {
-      higherBound += entry.getValue().getNumDVs();
-    }
-    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
-      ndv = (long) ((highValue - lowValue) / densityAvg);
-      if (ndv < lowerBound) {
-        ndv = lowerBound;
-      } else if (ndv > higherBound) {
-        ndv = higherBound;
-      }
-    } else {
-      minInd = adjustedIndexMap.get(list.get(0).getKey());
-      maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-      ndvMin = list.get(0).getValue().getNumDVs();
-      ndvMax = list.get(list.size() - 1).getValue().getNumDVs();
-      if (minInd == maxInd) {
-        ndv = ndvMin;
-      } else if (minInd < maxInd) {
-        // right border is the max
-        ndv = (long) (ndvMin + (ndvMax - ndvMin) * (rightBorderInd - minInd) / 
(maxInd - minInd));
-      } else {
-        // left border is the max
-        ndv = (long) (ndvMin + (ndvMax - ndvMin) * minInd / (minInd - maxInd));
-      }
-    }
-    extrapolateDecimalData.setLowValue(DecimalUtils.createThriftDecimal(String
-        .valueOf(lowValue)));
-    extrapolateDecimalData.setHighValue(DecimalUtils.createThriftDecimal(String
-        .valueOf(highValue)));
-    extrapolateDecimalData.setNumNulls(numNulls);
-    extrapolateDecimalData.setNumDVs(ndv);
-    extrapolateData.setDecimalStats(extrapolateDecimalData);
-  }
-}


http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
deleted file mode 100644
index ece77dd..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
-import 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DoubleColumnStatsAggregator extends ColumnStatsAggregator 
implements
-    IExtrapolatePartStatus {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(LongColumnStatsAggregator.class);
-
-  @Override
-  public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> 
colStatsWithSourceInfo,
-      List<String> partNames, boolean areAllPartsFound) throws MetaException {
-    ColumnStatisticsObj statsObj = null;
-    String colType = null;
-    String colName = null;
-    // check if all the ColumnStatisticsObjs contain stats and all the ndv are
-    // bitvectors
-    boolean doAllPartitionContainStats = partNames.size() == 
colStatsWithSourceInfo.size();
-    NumDistinctValueEstimator ndvEstimator = null;
-    for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-      ColumnStatisticsObj cso = csp.getColStatsObj();
-      if (statsObj == null) {
-        colName = cso.getColName();
-        colType = cso.getColType();
-        statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType,
-            cso.getStatsData().getSetField());
-        LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
-            doAllPartitionContainStats);
-      }
-      DoubleColumnStatsDataInspector doubleColumnStatsData =
-          (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
-      if (doubleColumnStatsData.getNdvEstimator() == null) {
-        ndvEstimator = null;
-        break;
-      } else {
-        // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = 
doubleColumnStatsData.getNdvEstimator();
-        if (ndvEstimator == null) {
-          ndvEstimator = estimator;
-        } else {
-          if (ndvEstimator.canMerge(estimator)) {
-            continue;
-          } else {
-            ndvEstimator = null;
-            break;
-          }
-        }
-      }
-    }
-    if (ndvEstimator != null) {
-      ndvEstimator = NumDistinctValueEstimatorFactory
-          .getEmptyNumDistinctValueEstimator(ndvEstimator);
-    }
-    LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
-    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
-    if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) {
-      DoubleColumnStatsDataInspector aggregateData = null;
-      long lowerBound = 0;
-      long higherBound = 0;
-      double densityAvgSum = 0.0;
-      for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-        ColumnStatisticsObj cso = csp.getColStatsObj();
-        DoubleColumnStatsDataInspector newData =
-            (DoubleColumnStatsDataInspector) 
cso.getStatsData().getDoubleStats();
-        lowerBound = Math.max(lowerBound, newData.getNumDVs());
-        higherBound += newData.getNumDVs();
-        densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / 
newData.getNumDVs();
-        if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (aggregateData == null) {
-          aggregateData = newData.deepCopy();
-        } else {
-          aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), 
newData.getLowValue()));
-          aggregateData
-              .setHighValue(Math.max(aggregateData.getHighValue(), 
newData.getHighValue()));
-          aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), 
newData.getNumDVs()));
-        }
-      }
-      if (ndvEstimator != null) {
-        // if all the ColumnStatisticsObjs contain bitvectors, we do not need 
to
-        // use uniform distribution assumption because we can merge bitvectors
-        // to get a good estimation.
-        aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-      } else {
-        long estimation;
-        if (useDensityFunctionForNDVEstimation) {
-          // We have estimation, lowerbound and higherbound. We use estimation
-          // if it is between lowerbound and higherbound.
-          double densityAvg = densityAvgSum / partNames.size();
-          estimation = (long) ((aggregateData.getHighValue() - 
aggregateData.getLowValue()) / densityAvg);
-          if (estimation < lowerBound) {
-            estimation = lowerBound;
-          } else if (estimation > higherBound) {
-            estimation = higherBound;
-          }
-        } else {
-          estimation = (long) (lowerBound + (higherBound - lowerBound) * 
ndvTuner);
-        }
-        aggregateData.setNumDVs(estimation);
-      }
-      columnStatisticsData.setDoubleStats(aggregateData);
-    } else {
-      // we need extrapolation
-      LOG.debug("start extrapolation for " + colName);
-      Map<String, Integer> indexMap = new HashMap<>();
-      for (int index = 0; index < partNames.size(); index++) {
-        indexMap.put(partNames.get(index), index);
-      }
-      Map<String, Double> adjustedIndexMap = new HashMap<>();
-      Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<>();
-      // while we scan the css, we also get the densityAvg, lowerbound and
-      // higerbound when useDensityFunctionForNDVEstimation is true.
-      double densityAvgSum = 0.0;
-      if (ndvEstimator == null) {
-        // if not every partition uses bitvector for ndv, we just fall back to
-        // the traditional extrapolation methods.
-        for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-          ColumnStatisticsObj cso = csp.getColStatsObj();
-          String partName = csp.getPartName();
-          DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats();
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += (newData.getHighValue() - newData.getLowValue()) 
/ newData.getNumDVs();
-          }
-          adjustedIndexMap.put(partName, (double) indexMap.get(partName));
-          adjustedStatsMap.put(partName, cso.getStatsData());
-        }
-      } else {
-        // we first merge all the adjacent bitvectors that we could merge and
-        // derive new partition names and index.
-        StringBuilder pseudoPartName = new StringBuilder();
-        double pseudoIndexSum = 0;
-        int length = 0;
-        int curIndex = -1;
-        DoubleColumnStatsData aggregateData = null;
-        for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-          ColumnStatisticsObj cso = csp.getColStatsObj();
-          String partName = csp.getPartName();
-          DoubleColumnStatsDataInspector newData =
-              (DoubleColumnStatsDataInspector) 
cso.getStatsData().getDoubleStats();
-          // newData.isSetBitVectors() should be true for sure because we
-          // already checked it before.
-          if (indexMap.get(partName) != curIndex) {
-            // There is bitvector, but it is not adjacent to the previous ones.
-            if (length > 0) {
-              // we have to set ndv
-              adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-              
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-              ColumnStatisticsData csd = new ColumnStatisticsData();
-              csd.setDoubleStats(aggregateData);
-              adjustedStatsMap.put(pseudoPartName.toString(), csd);
-              if (useDensityFunctionForNDVEstimation) {
-                densityAvgSum += (aggregateData.getHighValue() - 
aggregateData.getLowValue()) / aggregateData.getNumDVs();
-              }
-              // reset everything
-              pseudoPartName = new StringBuilder();
-              pseudoIndexSum = 0;
-              length = 0;
-              ndvEstimator = 
NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator);
-            }
-            aggregateData = null;
-          }
-          curIndex = indexMap.get(partName);
-          pseudoPartName.append(partName);
-          pseudoIndexSum += curIndex;
-          length++;
-          curIndex++;
-          if (aggregateData == null) {
-            aggregateData = newData.deepCopy();
-          } else {
-            aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), 
newData.getLowValue()));
-            aggregateData.setHighValue(Math.max(aggregateData.getHighValue(),
-                newData.getHighValue()));
-            aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          }
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (length > 0) {
-          // we have to set ndv
-          adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-          aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-          ColumnStatisticsData csd = new ColumnStatisticsData();
-          csd.setDoubleStats(aggregateData);
-          adjustedStatsMap.put(pseudoPartName.toString(), csd);
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += (aggregateData.getHighValue() - 
aggregateData.getLowValue()) / aggregateData.getNumDVs();
-          }
-        }
-      }
-      extrapolate(columnStatisticsData, partNames.size(), 
colStatsWithSourceInfo.size(),
-          adjustedIndexMap, adjustedStatsMap, densityAvgSum / 
adjustedStatsMap.size());
-    }
-    LOG.debug(
-        "Ndv estimatation for {} is {}. # of partitions requested: {}. # of 
partitions found: {}",
-        colName, columnStatisticsData.getDoubleStats().getNumDVs(), 
partNames.size(),
-        colStatsWithSourceInfo.size());
-    statsObj.setStatsData(columnStatisticsData);
-    return statsObj;
-  }
-
-  @Override
-  public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
-    int rightBorderInd = numParts;
-    DoubleColumnStatsDataInspector extrapolateDoubleData = new 
DoubleColumnStatsDataInspector();
-    Map<String, DoubleColumnStatsData> extractedAdjustedStatsMap = new 
HashMap<>();
-    for (Map.Entry<String, ColumnStatisticsData> entry : 
adjustedStatsMap.entrySet()) {
-      extractedAdjustedStatsMap.put(entry.getKey(), 
entry.getValue().getDoubleStats());
-    }
-    List<Map.Entry<String, DoubleColumnStatsData>> list = new LinkedList<>(
-        extractedAdjustedStatsMap.entrySet());
-    // get the lowValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DoubleColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
-          Map.Entry<String, DoubleColumnStatsData> o2) {
-        return Double.compare(o1.getValue().getLowValue(), 
o2.getValue().getLowValue());
-      }
-    });
-    double minInd = adjustedIndexMap.get(list.get(0).getKey());
-    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double lowValue = 0;
-    double min = list.get(0).getValue().getLowValue();
-    double max = list.get(list.size() - 1).getValue().getLowValue();
-    if (minInd == maxInd) {
-      lowValue = min;
-    } else if (minInd < maxInd) {
-      // left border is the min
-      lowValue = (max - (max - min) * maxInd / (maxInd - minInd));
-    } else {
-      // right border is the min
-      lowValue = (max - (max - min) * (rightBorderInd - maxInd) / (minInd - 
maxInd));
-    }
-
-    // get the highValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DoubleColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
-          Map.Entry<String, DoubleColumnStatsData> o2) {
-        return Double.compare(o1.getValue().getHighValue(), 
o2.getValue().getHighValue());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double highValue = 0;
-    min = list.get(0).getValue().getHighValue();
-    max = list.get(list.size() - 1).getValue().getHighValue();
-    if (minInd == maxInd) {
-      highValue = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      highValue = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      highValue = (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the #nulls
-    long numNulls = 0;
-    for (Map.Entry<String, DoubleColumnStatsData> entry : 
extractedAdjustedStatsMap.entrySet()) {
-      numNulls += entry.getValue().getNumNulls();
-    }
-    // we scale up sumNulls based on the number of partitions
-    numNulls = numNulls * numParts / numPartsWithStats;
-
-    // get the ndv
-    long ndv = 0;
-    long ndvMin = 0;
-    long ndvMax = 0;
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DoubleColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
-          Map.Entry<String, DoubleColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getNumDVs(), 
o2.getValue().getNumDVs());
-      }
-    });
-    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
-    long higherBound = 0;
-    for (Map.Entry<String, DoubleColumnStatsData> entry : list) {
-      higherBound += entry.getValue().getNumDVs();
-    }
-    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
-      ndv = (long) ((highValue - lowValue) / densityAvg);
-      if (ndv < lowerBound) {
-        ndv = lowerBound;
-      } else if (ndv > higherBound) {
-        ndv = higherBound;
-      }
-    } else {
-      minInd = adjustedIndexMap.get(list.get(0).getKey());
-      maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-      ndvMin = list.get(0).getValue().getNumDVs();
-      ndvMax = list.get(list.size() - 1).getValue().getNumDVs();
-      if (minInd == maxInd) {
-        ndv = ndvMin;
-      } else if (minInd < maxInd) {
-        // right border is the max
-        ndv = (long) (ndvMin + (ndvMax - ndvMin) * (rightBorderInd - minInd) / 
(maxInd - minInd));
-      } else {
-        // left border is the max
-        ndv = (long) (ndvMin + (ndvMax - ndvMin) * minInd / (minInd - maxInd));
-      }
-    }
-    extrapolateDoubleData.setLowValue(lowValue);
-    extrapolateDoubleData.setHighValue(highValue);
-    extrapolateDoubleData.setNumNulls(numNulls);
-    extrapolateDoubleData.setNumDVs(ndv);
-    extrapolateData.setDoubleStats(extrapolateDoubleData);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java
deleted file mode 100644
index 98a121b..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Map;
-
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-
-public interface IExtrapolatePartStatus {
-  // The following function will extrapolate the stats when the column stats of
-  // some partitions are missing.
-  /**
-   * @param extrapolateData
-   *          it will carry back the specific stats, e.g., DOUBLE_STATS or
-   *          LONG_STATS
-   * @param numParts
-   *          the total number of partitions
-   * @param numPartsWithStats
-   *          the number of partitions that have stats
-   * @param adjustedIndexMap
-   *          the partition name to index map
-   * @param adjustedStatsMap
-   *          the partition name to its stats map
-   * @param densityAvg
-   *          the average of ndv density, which is useful when
-   *          useDensityFunctionForNDVEstimation is true.
-   */
-  void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg);
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
deleted file mode 100644
index e6823d3..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
-import 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
-    IExtrapolatePartStatus {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(LongColumnStatsAggregator.class);
-
-  @Override
-  public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> 
colStatsWithSourceInfo,
-      List<String> partNames, boolean areAllPartsFound) throws MetaException {
-    ColumnStatisticsObj statsObj = null;
-    String colType = null;
-    String colName = null;
-    // check if all the ColumnStatisticsObjs contain stats and all the ndv are
-    // bitvectors
-    boolean doAllPartitionContainStats = partNames.size() == 
colStatsWithSourceInfo.size();
-    NumDistinctValueEstimator ndvEstimator = null;
-    for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-      ColumnStatisticsObj cso = csp.getColStatsObj();
-      if (statsObj == null) {
-        colName = cso.getColName();
-        colType = cso.getColType();
-        statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType,
-            cso.getStatsData().getSetField());
-        LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
-            doAllPartitionContainStats);
-      }
-      LongColumnStatsDataInspector longColumnStatsData =
-          (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
-      if (longColumnStatsData.getNdvEstimator() == null) {
-        ndvEstimator = null;
-        break;
-      } else {
-        // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = 
longColumnStatsData.getNdvEstimator();
-        if (ndvEstimator == null) {
-          ndvEstimator = estimator;
-        } else {
-          if (ndvEstimator.canMerge(estimator)) {
-            continue;
-          } else {
-            ndvEstimator = null;
-            break;
-          }
-        }
-      }
-    }
-    if (ndvEstimator != null) {
-      ndvEstimator = NumDistinctValueEstimatorFactory
-          .getEmptyNumDistinctValueEstimator(ndvEstimator);
-    }
-    LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
-    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
-    if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) {
-      LongColumnStatsDataInspector aggregateData = null;
-      long lowerBound = 0;
-      long higherBound = 0;
-      double densityAvgSum = 0.0;
-      for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-        ColumnStatisticsObj cso = csp.getColStatsObj();
-        LongColumnStatsDataInspector newData =
-            (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
-        lowerBound = Math.max(lowerBound, newData.getNumDVs());
-        higherBound += newData.getNumDVs();
-        densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / 
newData.getNumDVs();
-        if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (aggregateData == null) {
-          aggregateData = newData.deepCopy();
-        } else {
-          aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), 
newData.getLowValue()));
-          aggregateData
-              .setHighValue(Math.max(aggregateData.getHighValue(), 
newData.getHighValue()));
-          aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), 
newData.getNumDVs()));
-        }
-      }
-      if (ndvEstimator != null) {
-        // if all the ColumnStatisticsObjs contain bitvectors, we do not need 
to
-        // use uniform distribution assumption because we can merge bitvectors
-        // to get a good estimation.
-        aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-      } else {
-        long estimation;
-        if (useDensityFunctionForNDVEstimation) {
-          // We have estimation, lowerbound and higherbound. We use estimation
-          // if it is between lowerbound and higherbound.
-          double densityAvg = densityAvgSum / partNames.size();
-          estimation = (long) ((aggregateData.getHighValue() - 
aggregateData.getLowValue()) / densityAvg);
-          if (estimation < lowerBound) {
-            estimation = lowerBound;
-          } else if (estimation > higherBound) {
-            estimation = higherBound;
-          }
-        } else {
-          estimation = (long) (lowerBound + (higherBound - lowerBound) * 
ndvTuner);
-        }
-        aggregateData.setNumDVs(estimation);
-      }
-      columnStatisticsData.setLongStats(aggregateData);
-    } else {
-      // we need extrapolation
-      LOG.debug("start extrapolation for " + colName);
-
-      Map<String, Integer> indexMap = new HashMap<>();
-      for (int index = 0; index < partNames.size(); index++) {
-        indexMap.put(partNames.get(index), index);
-      }
-      Map<String, Double> adjustedIndexMap = new HashMap<>();
-      Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<>();
-      // while we scan the css, we also get the densityAvg, lowerbound and
-      // higerbound when useDensityFunctionForNDVEstimation is true.
-      double densityAvgSum = 0.0;
-      if (ndvEstimator == null) {
-        // if not every partition uses bitvector for ndv, we just fall back to
-        // the traditional extrapolation methods.
-        for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-          ColumnStatisticsObj cso = csp.getColStatsObj();
-          String partName = csp.getPartName();
-          LongColumnStatsData newData = cso.getStatsData().getLongStats();
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += (newData.getHighValue() - newData.getLowValue()) 
/ newData.getNumDVs();
-          }
-          adjustedIndexMap.put(partName, (double) indexMap.get(partName));
-          adjustedStatsMap.put(partName, cso.getStatsData());
-        }
-      } else {
-        // we first merge all the adjacent bitvectors that we could merge and
-        // derive new partition names and index.
-        StringBuilder pseudoPartName = new StringBuilder();
-        double pseudoIndexSum = 0;
-        int length = 0;
-        int curIndex = -1;
-        LongColumnStatsDataInspector aggregateData = null;
-        for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-          ColumnStatisticsObj cso = csp.getColStatsObj();
-          String partName = csp.getPartName();
-          LongColumnStatsDataInspector newData =
-              (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
-          // newData.isSetBitVectors() should be true for sure because we
-          // already checked it before.
-          if (indexMap.get(partName) != curIndex) {
-            // There is bitvector, but it is not adjacent to the previous ones.
-            if (length > 0) {
-              // we have to set ndv
-              adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-              
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-              ColumnStatisticsData csd = new ColumnStatisticsData();
-              csd.setLongStats(aggregateData);
-              adjustedStatsMap.put(pseudoPartName.toString(), csd);
-              if (useDensityFunctionForNDVEstimation) {
-                densityAvgSum += (aggregateData.getHighValue() - 
aggregateData.getLowValue()) / aggregateData.getNumDVs();
-              }
-              // reset everything
-              pseudoPartName = new StringBuilder();
-              pseudoIndexSum = 0;
-              length = 0;
-              ndvEstimator = 
NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator);
-            }
-            aggregateData = null;
-          }
-          curIndex = indexMap.get(partName);
-          pseudoPartName.append(partName);
-          pseudoIndexSum += curIndex;
-          length++;
-          curIndex++;
-          if (aggregateData == null) {
-            aggregateData = newData.deepCopy();
-          } else {
-            aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), 
newData.getLowValue()));
-            aggregateData.setHighValue(Math.max(aggregateData.getHighValue(),
-                newData.getHighValue()));
-            aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          }
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (length > 0) {
-          // we have to set ndv
-          adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-          aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-          ColumnStatisticsData csd = new ColumnStatisticsData();
-          csd.setLongStats(aggregateData);
-          adjustedStatsMap.put(pseudoPartName.toString(), csd);
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += (aggregateData.getHighValue() - 
aggregateData.getLowValue()) / aggregateData.getNumDVs();
-          }
-        }
-      }
-      extrapolate(columnStatisticsData, partNames.size(), 
colStatsWithSourceInfo.size(),
-          adjustedIndexMap, adjustedStatsMap, densityAvgSum / 
adjustedStatsMap.size());
-    }
-    LOG.debug(
-        "Ndv estimatation for {} is {} # of partitions requested: {} # of 
partitions found: {}",
-        colName, columnStatisticsData.getLongStats().getNumDVs(), 
partNames.size(),
-        colStatsWithSourceInfo.size());
-    statsObj.setStatsData(columnStatisticsData);
-    return statsObj;
-  }
-
-  @Override
-  public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
-    int rightBorderInd = numParts;
-    LongColumnStatsDataInspector extrapolateLongData = new 
LongColumnStatsDataInspector();
-    Map<String, LongColumnStatsData> extractedAdjustedStatsMap = new 
HashMap<>();
-    for (Map.Entry<String, ColumnStatisticsData> entry : 
adjustedStatsMap.entrySet()) {
-      extractedAdjustedStatsMap.put(entry.getKey(), 
entry.getValue().getLongStats());
-    }
-    List<Map.Entry<String, LongColumnStatsData>> list = new LinkedList<>(
-        extractedAdjustedStatsMap.entrySet());
-    // get the lowValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
LongColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, LongColumnStatsData> o1,
-          Map.Entry<String, LongColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getLowValue(), 
o2.getValue().getLowValue());
-      }
-    });
-    double minInd = adjustedIndexMap.get(list.get(0).getKey());
-    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    long lowValue = 0;
-    long min = list.get(0).getValue().getLowValue();
-    long max = list.get(list.size() - 1).getValue().getLowValue();
-    if (minInd == maxInd) {
-      lowValue = min;
-    } else if (minInd < maxInd) {
-      // left border is the min
-      lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd));
-    } else {
-      // right border is the min
-      lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / 
(minInd - maxInd));
-    }
-
-    // get the highValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
LongColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, LongColumnStatsData> o1,
-          Map.Entry<String, LongColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getHighValue(), 
o2.getValue().getHighValue());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    long highValue = 0;
-    min = list.get(0).getValue().getHighValue();
-    max = list.get(list.size() - 1).getValue().getHighValue();
-    if (minInd == maxInd) {
-      highValue = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / 
(maxInd - minInd));
-    } else {
-      // left border is the max
-      highValue = (long) (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the #nulls
-    long numNulls = 0;
-    for (Map.Entry<String, LongColumnStatsData> entry : 
extractedAdjustedStatsMap.entrySet()) {
-      numNulls += entry.getValue().getNumNulls();
-    }
-    // we scale up sumNulls based on the number of partitions
-    numNulls = numNulls * numParts / numPartsWithStats;
-
-    // get the ndv
-    long ndv = 0;
-    Collections.sort(list, new Comparator<Map.Entry<String, 
LongColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, LongColumnStatsData> o1,
-          Map.Entry<String, LongColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getNumDVs(), 
o2.getValue().getNumDVs());
-      }
-    });
-    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
-    long higherBound = 0;
-    for (Map.Entry<String, LongColumnStatsData> entry : list) {
-      higherBound += entry.getValue().getNumDVs();
-    }
-    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
-      ndv = (long) ((highValue - lowValue) / densityAvg);
-      if (ndv < lowerBound) {
-        ndv = lowerBound;
-      } else if (ndv > higherBound) {
-        ndv = higherBound;
-      }
-    } else {
-      minInd = adjustedIndexMap.get(list.get(0).getKey());
-      maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-      min = list.get(0).getValue().getNumDVs();
-      max = list.get(list.size() - 1).getValue().getNumDVs();
-      if (minInd == maxInd) {
-        ndv = min;
-      } else if (minInd < maxInd) {
-        // right border is the max
-        ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd 
- minInd));
-      } else {
-        // left border is the max
-        ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
-      }
-    }
-    extrapolateLongData.setLowValue(lowValue);
-    extrapolateLongData.setHighValue(highValue);
-    extrapolateLongData.setNumNulls(numNulls);
-    extrapolateLongData.setNumDVs(ndv);
-    extrapolateData.setLongStats(extrapolateLongData);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
deleted file mode 100644
index 9537647..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
-import 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class StringColumnStatsAggregator extends ColumnStatsAggregator 
implements
-    IExtrapolatePartStatus {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(LongColumnStatsAggregator.class);
-
-  @Override
-  public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> 
colStatsWithSourceInfo,
-      List<String> partNames, boolean areAllPartsFound) throws MetaException {
-    ColumnStatisticsObj statsObj = null;
-    String colType = null;
-    String colName = null;
-    // check if all the ColumnStatisticsObjs contain stats and all the ndv are
-    // bitvectors
-    boolean doAllPartitionContainStats = partNames.size() == 
colStatsWithSourceInfo.size();
-    NumDistinctValueEstimator ndvEstimator = null;
-    for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-      ColumnStatisticsObj cso = csp.getColStatsObj();
-      if (statsObj == null) {
-        colName = cso.getColName();
-        colType = cso.getColType();
-        statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType,
-            cso.getStatsData().getSetField());
-        LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName,
-            doAllPartitionContainStats);
-      }
-      StringColumnStatsDataInspector stringColumnStatsData =
-          (StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
-      if (stringColumnStatsData.getNdvEstimator() == null) {
-        ndvEstimator = null;
-        break;
-      } else {
-        // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = 
stringColumnStatsData.getNdvEstimator();
-        if (ndvEstimator == null) {
-          ndvEstimator = estimator;
-        } else {
-          if (ndvEstimator.canMerge(estimator)) {
-            continue;
-          } else {
-            ndvEstimator = null;
-            break;
-          }
-        }
-      }
-    }
-    if (ndvEstimator != null) {
-      ndvEstimator = NumDistinctValueEstimatorFactory
-          .getEmptyNumDistinctValueEstimator(ndvEstimator);
-    }
-    LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
-    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
-    if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) {
-      StringColumnStatsDataInspector aggregateData = null;
-      for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-        ColumnStatisticsObj cso = csp.getColStatsObj();
-        StringColumnStatsDataInspector newData =
-            (StringColumnStatsDataInspector) 
cso.getStatsData().getStringStats();
-        if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (aggregateData == null) {
-          aggregateData = newData.deepCopy();
-        } else {
-          aggregateData
-              .setMaxColLen(Math.max(aggregateData.getMaxColLen(), 
newData.getMaxColLen()));
-          aggregateData
-              .setAvgColLen(Math.max(aggregateData.getAvgColLen(), 
newData.getAvgColLen()));
-          aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), 
newData.getNumDVs()));
-        }
-      }
-      if (ndvEstimator != null) {
-        // if all the ColumnStatisticsObjs contain bitvectors, we do not need 
to
-        // use uniform distribution assumption because we can merge bitvectors
-        // to get a good estimation.
-        aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-      } else {
-        // aggregateData already has the ndv of the max of all
-      }
-      columnStatisticsData.setStringStats(aggregateData);
-    } else {
-      // we need extrapolation
-      LOG.debug("start extrapolation for " + colName);
-
-      Map<String, Integer> indexMap = new HashMap<>();
-      for (int index = 0; index < partNames.size(); index++) {
-        indexMap.put(partNames.get(index), index);
-      }
-      Map<String, Double> adjustedIndexMap = new HashMap<>();
-      Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<>();
-      if (ndvEstimator == null) {
-        // if not every partition uses bitvector for ndv, we just fall back to
-        // the traditional extrapolation methods.
-        for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-          ColumnStatisticsObj cso = csp.getColStatsObj();
-          String partName = csp.getPartName();
-          adjustedIndexMap.put(partName, (double) indexMap.get(partName));
-          adjustedStatsMap.put(partName, cso.getStatsData());
-        }
-      } else {
-        // we first merge all the adjacent bitvectors that we could merge and
-        // derive new partition names and index.
-        StringBuilder pseudoPartName = new StringBuilder();
-        double pseudoIndexSum = 0;
-        int length = 0;
-        int curIndex = -1;
-        StringColumnStatsDataInspector aggregateData = null;
-        for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
-          ColumnStatisticsObj cso = csp.getColStatsObj();
-          String partName = csp.getPartName();
-          StringColumnStatsDataInspector newData =
-              (StringColumnStatsDataInspector) 
cso.getStatsData().getStringStats();
-          // newData.isSetBitVectors() should be true for sure because we
-          // already checked it before.
-          if (indexMap.get(partName) != curIndex) {
-            // There is bitvector, but it is not adjacent to the previous ones.
-            if (length > 0) {
-              // we have to set ndv
-              adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-              
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-              ColumnStatisticsData csd = new ColumnStatisticsData();
-              csd.setStringStats(aggregateData);
-              adjustedStatsMap.put(pseudoPartName.toString(), csd);
-              // reset everything
-              pseudoPartName = new StringBuilder();
-              pseudoIndexSum = 0;
-              length = 0;
-              ndvEstimator = NumDistinctValueEstimatorFactory
-                  .getEmptyNumDistinctValueEstimator(ndvEstimator);
-            }
-            aggregateData = null;
-          }
-          curIndex = indexMap.get(partName);
-          pseudoPartName.append(partName);
-          pseudoIndexSum += curIndex;
-          length++;
-          curIndex++;
-          if (aggregateData == null) {
-            aggregateData = newData.deepCopy();
-          } else {
-            aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(),
-                newData.getAvgColLen()));
-            aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(),
-                newData.getMaxColLen()));
-            aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          }
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (length > 0) {
-          // we have to set ndv
-          adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-          aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-          ColumnStatisticsData csd = new ColumnStatisticsData();
-          csd.setStringStats(aggregateData);
-          adjustedStatsMap.put(pseudoPartName.toString(), csd);
-        }
-      }
-      extrapolate(columnStatisticsData, partNames.size(), 
colStatsWithSourceInfo.size(),
-          adjustedIndexMap, adjustedStatsMap, -1);
-    }
-    LOG.debug(
-        "Ndv estimatation for {} is {} # of partitions requested: {} # of 
partitions found: {}",
-        colName, columnStatisticsData.getStringStats().getNumDVs(), 
partNames.size(),
-        colStatsWithSourceInfo.size());
-    statsObj.setStatsData(columnStatisticsData);
-    return statsObj;
-  }
-
-  @Override
-  public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
-    int rightBorderInd = numParts;
-    StringColumnStatsDataInspector extrapolateStringData = new 
StringColumnStatsDataInspector();
-    Map<String, StringColumnStatsData> extractedAdjustedStatsMap = new 
HashMap<>();
-    for (Map.Entry<String, ColumnStatisticsData> entry : 
adjustedStatsMap.entrySet()) {
-      extractedAdjustedStatsMap.put(entry.getKey(), 
entry.getValue().getStringStats());
-    }
-    List<Map.Entry<String, StringColumnStatsData>> list = new LinkedList<>(
-        extractedAdjustedStatsMap.entrySet());
-    // get the avgLen
-    Collections.sort(list, new Comparator<Map.Entry<String, 
StringColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, StringColumnStatsData> o1,
-          Map.Entry<String, StringColumnStatsData> o2) {
-        return Double.compare(o1.getValue().getAvgColLen(), 
o2.getValue().getAvgColLen());
-      }
-    });
-    double minInd = adjustedIndexMap.get(list.get(0).getKey());
-    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double avgColLen = 0;
-    double min = list.get(0).getValue().getAvgColLen();
-    double max = list.get(list.size() - 1).getValue().getAvgColLen();
-    if (minInd == maxInd) {
-      avgColLen = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      avgColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      avgColLen = (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the maxLen
-    Collections.sort(list, new Comparator<Map.Entry<String, 
StringColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, StringColumnStatsData> o1,
-          Map.Entry<String, StringColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getMaxColLen(), 
o2.getValue().getMaxColLen());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double maxColLen = 0;
-    min = list.get(0).getValue().getAvgColLen();
-    max = list.get(list.size() - 1).getValue().getAvgColLen();
-    if (minInd == maxInd) {
-      maxColLen = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      maxColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      maxColLen = (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the #nulls
-    long numNulls = 0;
-    for (Map.Entry<String, StringColumnStatsData> entry : 
extractedAdjustedStatsMap.entrySet()) {
-      numNulls += entry.getValue().getNumNulls();
-    }
-    // we scale up sumNulls based on the number of partitions
-    numNulls = numNulls * numParts / numPartsWithStats;
-
-    // get the ndv
-    long ndv = 0;
-    Collections.sort(list, new Comparator<Map.Entry<String, 
StringColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, StringColumnStatsData> o1,
-          Map.Entry<String, StringColumnStatsData> o2) {
-       return Long.compare(o1.getValue().getNumDVs(), 
o2.getValue().getNumDVs());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    min = list.get(0).getValue().getNumDVs();
-    max = list.get(list.size() - 1).getValue().getNumDVs();
-    if (minInd == maxInd) {
-      ndv = (long) min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
-    }
-    extrapolateStringData.setAvgColLen(avgColLen);
-    extrapolateStringData.setMaxColLen((long) maxColLen);
-    extrapolateStringData.setNumNulls(numNulls);
-    extrapolateStringData.setNumDVs(ndv);
-    extrapolateData.setStringStats(extrapolateStringData);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
deleted file mode 100644
index f6eacbc..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
-
-@SuppressWarnings("serial")
-public class DateColumnStatsDataInspector extends DateColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public DateColumnStatsDataInspector() {
-    super();
-  }
-
-  public DateColumnStatsDataInspector(long numNulls, long numDVs) {
-    super(numNulls, numDVs);
-  }
-
-  public DateColumnStatsDataInspector(DateColumnStatsDataInspector other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public DateColumnStatsDataInspector deepCopy() {
-    return new DateColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
deleted file mode 100644
index e2427f3..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-
-@SuppressWarnings("serial")
-public class DecimalColumnStatsDataInspector extends DecimalColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public DecimalColumnStatsDataInspector() {
-    super();
-  }
-
-  public DecimalColumnStatsDataInspector(long numNulls, long numDVs) {
-    super(numNulls, numDVs);
-  }
-
-  public DecimalColumnStatsDataInspector(DecimalColumnStatsDataInspector 
other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public DecimalColumnStatsDataInspector deepCopy() {
-    return new DecimalColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
deleted file mode 100644
index 7ce7127..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-
-@SuppressWarnings("serial")
-public class DoubleColumnStatsDataInspector extends DoubleColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public DoubleColumnStatsDataInspector() {
-    super();
-  }
-
-  public DoubleColumnStatsDataInspector(long numNulls, long numDVs) {
-    super(numNulls, numDVs);
-  }
-
-  public DoubleColumnStatsDataInspector(DoubleColumnStatsDataInspector other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public DoubleColumnStatsDataInspector deepCopy() {
-    return new DoubleColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
deleted file mode 100644
index faf314b..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
-
-@SuppressWarnings("serial")
-public class LongColumnStatsDataInspector extends LongColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public LongColumnStatsDataInspector() {
-    super();
-  }
-
-  public LongColumnStatsDataInspector(long numNulls, long numDVs) {
-    super(numNulls, numDVs);
-  }
-
-  public LongColumnStatsDataInspector(LongColumnStatsDataInspector other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public LongColumnStatsDataInspector deepCopy() {
-    return new LongColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

[30/51] [partial] hive git commit: HIVE-20188 : Split server-specific code outside of standalone metastore-common (Alexander Kolbasov reviewed by Vihang Karajgaonkar)

Reply via email to