[16/27] hive git commit: HIVE-17733 Move RawStore to standalone metastore. This closes #258 github PR. (Alan Gates, reviewed by Sergey Shelukhin, Vihang Karajgaonkar, and Zoltan Haindrich)

gates Sat, 14 Oct 2017 07:43:07 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
deleted file mode 100644
index bb4a725..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
+++ /dev/null
@@ -1,349 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DoubleColumnStatsAggregator extends ColumnStatsAggregator 
implements
-    IExtrapolatePartStatus {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(LongColumnStatsAggregator.class);
-
-  @Override
-  public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
-      List<ColumnStatistics> css) throws MetaException {
-    ColumnStatisticsObj statsObj = null;
-
-    // check if all the ColumnStatisticsObjs contain stats and all the ndv are
-    // bitvectors
-    boolean doAllPartitionContainStats = partNames.size() == css.size();
-    LOG.debug("doAllPartitionContainStats for " + colName + " is " + 
doAllPartitionContainStats);
-    NumDistinctValueEstimator ndvEstimator = null;
-    String colType = null;
-    for (ColumnStatistics cs : css) {
-      if (cs.getStatsObjSize() != 1) {
-        throw new MetaException(
-            "The number of columns should be exactly one in aggrStats, but 
found "
-                + cs.getStatsObjSize());
-      }
-      ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-      if (statsObj == null) {
-        colType = cso.getColType();
-        statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType, cso
-            .getStatsData().getSetField());
-      }
-      DoubleColumnStatsDataInspector doubleColumnStatsData =
-          (DoubleColumnStatsDataInspector) cso.getStatsData().getDoubleStats();
-      if (doubleColumnStatsData.getNdvEstimator() == null) {
-        ndvEstimator = null;
-        break;
-      } else {
-        // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = 
doubleColumnStatsData.getNdvEstimator();
-        if (ndvEstimator == null) {
-          ndvEstimator = estimator;
-        } else {
-          if (ndvEstimator.canMerge(estimator)) {
-            continue;
-          } else {
-            ndvEstimator = null;
-            break;
-          }
-        }
-      }
-    }
-    if (ndvEstimator != null) {
-      ndvEstimator = NumDistinctValueEstimatorFactory
-          .getEmptyNumDistinctValueEstimator(ndvEstimator);
-    }
-    LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
-    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
-    if (doAllPartitionContainStats || css.size() < 2) {
-      DoubleColumnStatsDataInspector aggregateData = null;
-      long lowerBound = 0;
-      long higherBound = 0;
-      double densityAvgSum = 0.0;
-      for (ColumnStatistics cs : css) {
-        ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-        DoubleColumnStatsDataInspector newData =
-            (DoubleColumnStatsDataInspector) 
cso.getStatsData().getDoubleStats();
-        lowerBound = Math.max(lowerBound, newData.getNumDVs());
-        higherBound += newData.getNumDVs();
-        densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / 
newData.getNumDVs();
-        if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (aggregateData == null) {
-          aggregateData = newData.deepCopy();
-        } else {
-          aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), 
newData.getLowValue()));
-          aggregateData
-              .setHighValue(Math.max(aggregateData.getHighValue(), 
newData.getHighValue()));
-          aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), 
newData.getNumDVs()));
-        }
-      }
-      if (ndvEstimator != null) {
-        // if all the ColumnStatisticsObjs contain bitvectors, we do not need 
to
-        // use uniform distribution assumption because we can merge bitvectors
-        // to get a good estimation.
-        aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-      } else {
-        long estimation;
-        if (useDensityFunctionForNDVEstimation) {
-          // We have estimation, lowerbound and higherbound. We use estimation
-          // if it is between lowerbound and higherbound.
-          double densityAvg = densityAvgSum / partNames.size();
-          estimation = (long) ((aggregateData.getHighValue() - 
aggregateData.getLowValue()) / densityAvg);
-          if (estimation < lowerBound) {
-            estimation = lowerBound;
-          } else if (estimation > higherBound) {
-            estimation = higherBound;
-          }
-        } else {
-          estimation = (long) (lowerBound + (higherBound - lowerBound) * 
ndvTuner);
-        }
-        aggregateData.setNumDVs(estimation);
-      }
-      columnStatisticsData.setDoubleStats(aggregateData);
-    } else {
-      // we need extrapolation
-      LOG.debug("start extrapolation for " + colName);
-      Map<String, Integer> indexMap = new HashMap<String, Integer>();
-      for (int index = 0; index < partNames.size(); index++) {
-        indexMap.put(partNames.get(index), index);
-      }
-      Map<String, Double> adjustedIndexMap = new HashMap<String, Double>();
-      Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<String, 
ColumnStatisticsData>();
-      // while we scan the css, we also get the densityAvg, lowerbound and
-      // higerbound when useDensityFunctionForNDVEstimation is true.
-      double densityAvgSum = 0.0;
-      if (ndvEstimator == null) {
-        // if not every partition uses bitvector for ndv, we just fall back to
-        // the traditional extrapolation methods.
-        for (ColumnStatistics cs : css) {
-          String partName = cs.getStatsDesc().getPartName();
-          ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats();
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += (newData.getHighValue() - newData.getLowValue()) 
/ newData.getNumDVs();
-          }
-          adjustedIndexMap.put(partName, (double) indexMap.get(partName));
-          adjustedStatsMap.put(partName, cso.getStatsData());
-        }
-      } else {
-        // we first merge all the adjacent bitvectors that we could merge and
-        // derive new partition names and index.
-        StringBuilder pseudoPartName = new StringBuilder();
-        double pseudoIndexSum = 0;
-        int length = 0;
-        int curIndex = -1;
-        DoubleColumnStatsData aggregateData = null;
-        for (ColumnStatistics cs : css) {
-          String partName = cs.getStatsDesc().getPartName();
-          ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          DoubleColumnStatsDataInspector newData =
-              (DoubleColumnStatsDataInspector) 
cso.getStatsData().getDoubleStats();
-          // newData.isSetBitVectors() should be true for sure because we
-          // already checked it before.
-          if (indexMap.get(partName) != curIndex) {
-            // There is bitvector, but it is not adjacent to the previous ones.
-            if (length > 0) {
-              // we have to set ndv
-              adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-              
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-              ColumnStatisticsData csd = new ColumnStatisticsData();
-              csd.setDoubleStats(aggregateData);
-              adjustedStatsMap.put(pseudoPartName.toString(), csd);
-              if (useDensityFunctionForNDVEstimation) {
-                densityAvgSum += (aggregateData.getHighValue() - 
aggregateData.getLowValue()) / aggregateData.getNumDVs();
-              }
-              // reset everything
-              pseudoPartName = new StringBuilder();
-              pseudoIndexSum = 0;
-              length = 0;
-              ndvEstimator = 
NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator);
-            }
-            aggregateData = null;
-          }
-          curIndex = indexMap.get(partName);
-          pseudoPartName.append(partName);
-          pseudoIndexSum += curIndex;
-          length++;
-          curIndex++;
-          if (aggregateData == null) {
-            aggregateData = newData.deepCopy();
-          } else {
-            aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), 
newData.getLowValue()));
-            aggregateData.setHighValue(Math.max(aggregateData.getHighValue(),
-                newData.getHighValue()));
-            aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          }
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (length > 0) {
-          // we have to set ndv
-          adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-          aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-          ColumnStatisticsData csd = new ColumnStatisticsData();
-          csd.setDoubleStats(aggregateData);
-          adjustedStatsMap.put(pseudoPartName.toString(), csd);
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += (aggregateData.getHighValue() - 
aggregateData.getLowValue()) / aggregateData.getNumDVs();
-          }
-        }
-      }
-      extrapolate(columnStatisticsData, partNames.size(), css.size(), 
adjustedIndexMap,
-          adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
-    }
-    LOG.debug("Ndv estimatation for {} is {}. # of partitions requested: {}. # 
of partitions found: {}", colName,
-        columnStatisticsData.getDoubleStats().getNumDVs(),partNames.size(), 
css.size());
-    statsObj.setStatsData(columnStatisticsData);
-    return statsObj;
-  }
-
-  @Override
-  public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
-    int rightBorderInd = numParts;
-    DoubleColumnStatsDataInspector extrapolateDoubleData = new 
DoubleColumnStatsDataInspector();
-    Map<String, DoubleColumnStatsData> extractedAdjustedStatsMap = new 
HashMap<>();
-    for (Map.Entry<String, ColumnStatisticsData> entry : 
adjustedStatsMap.entrySet()) {
-      extractedAdjustedStatsMap.put(entry.getKey(), 
entry.getValue().getDoubleStats());
-    }
-    List<Map.Entry<String, DoubleColumnStatsData>> list = new 
LinkedList<Map.Entry<String, DoubleColumnStatsData>>(
-        extractedAdjustedStatsMap.entrySet());
-    // get the lowValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DoubleColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
-          Map.Entry<String, DoubleColumnStatsData> o2) {
-        return Double.compare(o1.getValue().getLowValue(), 
o2.getValue().getLowValue());
-      }
-    });
-    double minInd = adjustedIndexMap.get(list.get(0).getKey());
-    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double lowValue = 0;
-    double min = list.get(0).getValue().getLowValue();
-    double max = list.get(list.size() - 1).getValue().getLowValue();
-    if (minInd == maxInd) {
-      lowValue = min;
-    } else if (minInd < maxInd) {
-      // left border is the min
-      lowValue = (max - (max - min) * maxInd / (maxInd - minInd));
-    } else {
-      // right border is the min
-      lowValue = (max - (max - min) * (rightBorderInd - maxInd) / (minInd - 
maxInd));
-    }
-
-    // get the highValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DoubleColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
-          Map.Entry<String, DoubleColumnStatsData> o2) {
-        return Double.compare(o1.getValue().getHighValue(), 
o2.getValue().getHighValue());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double highValue = 0;
-    min = list.get(0).getValue().getHighValue();
-    max = list.get(list.size() - 1).getValue().getHighValue();
-    if (minInd == maxInd) {
-      highValue = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      highValue = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      highValue = (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the #nulls
-    long numNulls = 0;
-    for (Map.Entry<String, DoubleColumnStatsData> entry : 
extractedAdjustedStatsMap.entrySet()) {
-      numNulls += entry.getValue().getNumNulls();
-    }
-    // we scale up sumNulls based on the number of partitions
-    numNulls = numNulls * numParts / numPartsWithStats;
-
-    // get the ndv
-    long ndv = 0;
-    long ndvMin = 0;
-    long ndvMax = 0;
-    Collections.sort(list, new Comparator<Map.Entry<String, 
DoubleColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
-          Map.Entry<String, DoubleColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getNumDVs(), 
o2.getValue().getNumDVs());
-      }
-    });
-    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
-    long higherBound = 0;
-    for (Map.Entry<String, DoubleColumnStatsData> entry : list) {
-      higherBound += entry.getValue().getNumDVs();
-    }
-    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
-      ndv = (long) ((highValue - lowValue) / densityAvg);
-      if (ndv < lowerBound) {
-        ndv = lowerBound;
-      } else if (ndv > higherBound) {
-        ndv = higherBound;
-      }
-    } else {
-      minInd = adjustedIndexMap.get(list.get(0).getKey());
-      maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-      ndvMin = list.get(0).getValue().getNumDVs();
-      ndvMax = list.get(list.size() - 1).getValue().getNumDVs();
-      if (minInd == maxInd) {
-        ndv = ndvMin;
-      } else if (minInd < maxInd) {
-        // right border is the max
-        ndv = (long) (ndvMin + (ndvMax - ndvMin) * (rightBorderInd - minInd) / 
(maxInd - minInd));
-      } else {
-        // left border is the max
-        ndv = (long) (ndvMin + (ndvMax - ndvMin) * minInd / (minInd - maxInd));
-      }
-    }
-    extrapolateDoubleData.setLowValue(lowValue);
-    extrapolateDoubleData.setHighValue(highValue);
-    extrapolateDoubleData.setNumNulls(numNulls);
-    extrapolateDoubleData.setNumDVs(ndv);
-    extrapolateData.setDoubleStats(extrapolateDoubleData);
-  }
-
-}


http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java
deleted file mode 100644
index acf679e..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/IExtrapolatePartStatus.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Map;
-
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-
-public interface IExtrapolatePartStatus {
-  // The following function will extrapolate the stats when the column stats of
-  // some partitions are missing.
-  /**
-   * @param extrapolateData
-   *          it will carry back the specific stats, e.g., DOUBLE_STATS or
-   *          LONG_STATS
-   * @param numParts
-   *          the total number of partitions
-   * @param numPartsWithStats
-   *          the number of partitions that have stats
-   * @param adjustedIndexMap
-   *          the partition name to index map
-   * @param adjustedStatsMap
-   *          the partition name to its stats map
-   * @param densityAvg
-   *          the average of ndv density, which is useful when
-   *          useDensityFunctionForNDVEstimation is true.
-   */
-  public abstract void extrapolate(ColumnStatisticsData extrapolateData, int 
numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg);
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
deleted file mode 100644
index 5b1145e..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
+++ /dev/null
@@ -1,348 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
-    IExtrapolatePartStatus {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(LongColumnStatsAggregator.class);
-
-  @Override
-  public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
-      List<ColumnStatistics> css) throws MetaException {
-    ColumnStatisticsObj statsObj = null;
-
-    // check if all the ColumnStatisticsObjs contain stats and all the ndv are
-    // bitvectors
-    boolean doAllPartitionContainStats = partNames.size() == css.size();
-    LOG.debug("doAllPartitionContainStats for " + colName + " is " + 
doAllPartitionContainStats);
-    NumDistinctValueEstimator ndvEstimator = null;
-    String colType = null;
-    for (ColumnStatistics cs : css) {
-      if (cs.getStatsObjSize() != 1) {
-        throw new MetaException(
-            "The number of columns should be exactly one in aggrStats, but 
found "
-                + cs.getStatsObjSize());
-      }
-      ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-      if (statsObj == null) {
-        colType = cso.getColType();
-        statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType, cso
-            .getStatsData().getSetField());
-      }
-      LongColumnStatsDataInspector longColumnStatsData =
-          (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
-      if (longColumnStatsData.getNdvEstimator() == null) {
-        ndvEstimator = null;
-        break;
-      } else {
-        // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = 
longColumnStatsData.getNdvEstimator();
-        if (ndvEstimator == null) {
-          ndvEstimator = estimator;
-        } else {
-          if (ndvEstimator.canMerge(estimator)) {
-            continue;
-          } else {
-            ndvEstimator = null;
-            break;
-          }
-        }
-      }
-    }
-    if (ndvEstimator != null) {
-      ndvEstimator = NumDistinctValueEstimatorFactory
-          .getEmptyNumDistinctValueEstimator(ndvEstimator);
-    }
-    LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
-    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
-    if (doAllPartitionContainStats || css.size() < 2) {
-      LongColumnStatsDataInspector aggregateData = null;
-      long lowerBound = 0;
-      long higherBound = 0;
-      double densityAvgSum = 0.0;
-      for (ColumnStatistics cs : css) {
-        ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-        LongColumnStatsDataInspector newData =
-            (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
-        lowerBound = Math.max(lowerBound, newData.getNumDVs());
-        higherBound += newData.getNumDVs();
-        densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / 
newData.getNumDVs();
-        if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (aggregateData == null) {
-          aggregateData = newData.deepCopy();
-        } else {
-          aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), 
newData.getLowValue()));
-          aggregateData
-              .setHighValue(Math.max(aggregateData.getHighValue(), 
newData.getHighValue()));
-          aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), 
newData.getNumDVs()));
-        }
-      }
-      if (ndvEstimator != null) {
-        // if all the ColumnStatisticsObjs contain bitvectors, we do not need 
to
-        // use uniform distribution assumption because we can merge bitvectors
-        // to get a good estimation.
-        aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-      } else {
-        long estimation;
-        if (useDensityFunctionForNDVEstimation) {
-          // We have estimation, lowerbound and higherbound. We use estimation
-          // if it is between lowerbound and higherbound.
-          double densityAvg = densityAvgSum / partNames.size();
-          estimation = (long) ((aggregateData.getHighValue() - 
aggregateData.getLowValue()) / densityAvg);
-          if (estimation < lowerBound) {
-            estimation = lowerBound;
-          } else if (estimation > higherBound) {
-            estimation = higherBound;
-          }
-        } else {
-          estimation = (long) (lowerBound + (higherBound - lowerBound) * 
ndvTuner);
-        }
-        aggregateData.setNumDVs(estimation);
-      }
-      columnStatisticsData.setLongStats(aggregateData);
-    } else {
-      // we need extrapolation
-      LOG.debug("start extrapolation for " + colName);
-
-      Map<String, Integer> indexMap = new HashMap<String, Integer>();
-      for (int index = 0; index < partNames.size(); index++) {
-        indexMap.put(partNames.get(index), index);
-      }
-      Map<String, Double> adjustedIndexMap = new HashMap<String, Double>();
-      Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<String, 
ColumnStatisticsData>();
-      // while we scan the css, we also get the densityAvg, lowerbound and
-      // higerbound when useDensityFunctionForNDVEstimation is true.
-      double densityAvgSum = 0.0;
-      if (ndvEstimator == null) {
-        // if not every partition uses bitvector for ndv, we just fall back to
-        // the traditional extrapolation methods.
-        for (ColumnStatistics cs : css) {
-          String partName = cs.getStatsDesc().getPartName();
-          ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          LongColumnStatsData newData = cso.getStatsData().getLongStats();
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += (newData.getHighValue() - newData.getLowValue()) 
/ newData.getNumDVs();
-          }
-          adjustedIndexMap.put(partName, (double) indexMap.get(partName));
-          adjustedStatsMap.put(partName, cso.getStatsData());
-        }
-      } else {
-        // we first merge all the adjacent bitvectors that we could merge and
-        // derive new partition names and index.
-        StringBuilder pseudoPartName = new StringBuilder();
-        double pseudoIndexSum = 0;
-        int length = 0;
-        int curIndex = -1;
-        LongColumnStatsDataInspector aggregateData = null;
-        for (ColumnStatistics cs : css) {
-          String partName = cs.getStatsDesc().getPartName();
-          ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          LongColumnStatsDataInspector newData =
-              (LongColumnStatsDataInspector) cso.getStatsData().getLongStats();
-          // newData.isSetBitVectors() should be true for sure because we
-          // already checked it before.
-          if (indexMap.get(partName) != curIndex) {
-            // There is bitvector, but it is not adjacent to the previous ones.
-            if (length > 0) {
-              // we have to set ndv
-              adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-              
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-              ColumnStatisticsData csd = new ColumnStatisticsData();
-              csd.setLongStats(aggregateData);
-              adjustedStatsMap.put(pseudoPartName.toString(), csd);
-              if (useDensityFunctionForNDVEstimation) {
-                densityAvgSum += (aggregateData.getHighValue() - 
aggregateData.getLowValue()) / aggregateData.getNumDVs();
-              }
-              // reset everything
-              pseudoPartName = new StringBuilder();
-              pseudoIndexSum = 0;
-              length = 0;
-              ndvEstimator = 
NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator);
-            }
-            aggregateData = null;
-          }
-          curIndex = indexMap.get(partName);
-          pseudoPartName.append(partName);
-          pseudoIndexSum += curIndex;
-          length++;
-          curIndex++;
-          if (aggregateData == null) {
-            aggregateData = newData.deepCopy();
-          } else {
-            aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), 
newData.getLowValue()));
-            aggregateData.setHighValue(Math.max(aggregateData.getHighValue(),
-                newData.getHighValue()));
-            aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          }
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (length > 0) {
-          // we have to set ndv
-          adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-          aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-          ColumnStatisticsData csd = new ColumnStatisticsData();
-          csd.setLongStats(aggregateData);
-          adjustedStatsMap.put(pseudoPartName.toString(), csd);
-          if (useDensityFunctionForNDVEstimation) {
-            densityAvgSum += (aggregateData.getHighValue() - 
aggregateData.getLowValue()) / aggregateData.getNumDVs();
-          }
-        }
-      }
-      extrapolate(columnStatisticsData, partNames.size(), css.size(), 
adjustedIndexMap,
-          adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
-    }
-    LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # 
of partitions found: {}", colName,
-        columnStatisticsData.getLongStats().getNumDVs(),partNames.size(), 
css.size());
-    statsObj.setStatsData(columnStatisticsData);
-    return statsObj;
-  }
-
-  @Override
-  public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
-    int rightBorderInd = numParts;
-    LongColumnStatsDataInspector extrapolateLongData = new 
LongColumnStatsDataInspector();
-    Map<String, LongColumnStatsData> extractedAdjustedStatsMap = new 
HashMap<>();
-    for (Map.Entry<String, ColumnStatisticsData> entry : 
adjustedStatsMap.entrySet()) {
-      extractedAdjustedStatsMap.put(entry.getKey(), 
entry.getValue().getLongStats());
-    }
-    List<Map.Entry<String, LongColumnStatsData>> list = new 
LinkedList<Map.Entry<String, LongColumnStatsData>>(
-        extractedAdjustedStatsMap.entrySet());
-    // get the lowValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
LongColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, LongColumnStatsData> o1,
-          Map.Entry<String, LongColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getLowValue(), 
o2.getValue().getLowValue());
-      }
-    });
-    double minInd = adjustedIndexMap.get(list.get(0).getKey());
-    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    long lowValue = 0;
-    long min = list.get(0).getValue().getLowValue();
-    long max = list.get(list.size() - 1).getValue().getLowValue();
-    if (minInd == maxInd) {
-      lowValue = min;
-    } else if (minInd < maxInd) {
-      // left border is the min
-      lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd));
-    } else {
-      // right border is the min
-      lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / 
(minInd - maxInd));
-    }
-
-    // get the highValue
-    Collections.sort(list, new Comparator<Map.Entry<String, 
LongColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, LongColumnStatsData> o1,
-          Map.Entry<String, LongColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getHighValue(), 
o2.getValue().getHighValue());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    long highValue = 0;
-    min = list.get(0).getValue().getHighValue();
-    max = list.get(list.size() - 1).getValue().getHighValue();
-    if (minInd == maxInd) {
-      highValue = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / 
(maxInd - minInd));
-    } else {
-      // left border is the max
-      highValue = (long) (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the #nulls
-    long numNulls = 0;
-    for (Map.Entry<String, LongColumnStatsData> entry : 
extractedAdjustedStatsMap.entrySet()) {
-      numNulls += entry.getValue().getNumNulls();
-    }
-    // we scale up sumNulls based on the number of partitions
-    numNulls = numNulls * numParts / numPartsWithStats;
-
-    // get the ndv
-    long ndv = 0;
-    Collections.sort(list, new Comparator<Map.Entry<String, 
LongColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, LongColumnStatsData> o1,
-          Map.Entry<String, LongColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getNumDVs(), 
o2.getValue().getNumDVs());
-      }
-    });
-    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
-    long higherBound = 0;
-    for (Map.Entry<String, LongColumnStatsData> entry : list) {
-      higherBound += entry.getValue().getNumDVs();
-    }
-    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
-      ndv = (long) ((highValue - lowValue) / densityAvg);
-      if (ndv < lowerBound) {
-        ndv = lowerBound;
-      } else if (ndv > higherBound) {
-        ndv = higherBound;
-      }
-    } else {
-      minInd = adjustedIndexMap.get(list.get(0).getKey());
-      maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-      min = list.get(0).getValue().getNumDVs();
-      max = list.get(list.size() - 1).getValue().getNumDVs();
-      if (minInd == maxInd) {
-        ndv = min;
-      } else if (minInd < maxInd) {
-        // right border is the max
-        ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd 
- minInd));
-      } else {
-        // left border is the max
-        ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
-      }
-    }
-    extrapolateLongData.setLowValue(lowValue);
-    extrapolateLongData.setHighValue(highValue);
-    extrapolateLongData.setNumNulls(numNulls);
-    extrapolateLongData.setNumDVs(ndv);
-    extrapolateData.setLongStats(extrapolateLongData);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
deleted file mode 100644
index 1b29f92..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
+++ /dev/null
@@ -1,305 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.aggr;
-
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class StringColumnStatsAggregator extends ColumnStatsAggregator 
implements
-    IExtrapolatePartStatus {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(LongColumnStatsAggregator.class);
-
-  @Override
-  public ColumnStatisticsObj aggregate(String colName, List<String> partNames,
-      List<ColumnStatistics> css) throws MetaException {
-    ColumnStatisticsObj statsObj = null;
-
-    // check if all the ColumnStatisticsObjs contain stats and all the ndv are
-    // bitvectors. Only when both of the conditions are true, we merge bit
-    // vectors. Otherwise, just use the maximum function.
-    boolean doAllPartitionContainStats = partNames.size() == css.size();
-    LOG.debug("doAllPartitionContainStats for " + colName + " is " + 
doAllPartitionContainStats);
-    NumDistinctValueEstimator ndvEstimator = null;
-    String colType = null;
-    for (ColumnStatistics cs : css) {
-      if (cs.getStatsObjSize() != 1) {
-        throw new MetaException(
-            "The number of columns should be exactly one in aggrStats, but 
found "
-                + cs.getStatsObjSize());
-      }
-      ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-      if (statsObj == null) {
-        colType = cso.getColType();
-        statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, 
colType, cso
-            .getStatsData().getSetField());
-      }
-      StringColumnStatsDataInspector stringColumnStatsData =
-          (StringColumnStatsDataInspector) cso.getStatsData().getStringStats();
-      if (stringColumnStatsData.getNdvEstimator() == null) {
-        ndvEstimator = null;
-        break;
-      } else {
-        // check if all of the bit vectors can merge
-        NumDistinctValueEstimator estimator = 
stringColumnStatsData.getNdvEstimator();
-        if (ndvEstimator == null) {
-          ndvEstimator = estimator;
-        } else {
-          if (ndvEstimator.canMerge(estimator)) {
-            continue;
-          } else {
-            ndvEstimator = null;
-            break;
-          }
-        }
-      }
-    }
-    if (ndvEstimator != null) {
-      ndvEstimator = NumDistinctValueEstimatorFactory
-          .getEmptyNumDistinctValueEstimator(ndvEstimator);
-    }
-    LOG.debug("all of the bit vectors can merge for " + colName + " is " + 
(ndvEstimator != null));
-    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
-    if (doAllPartitionContainStats || css.size() < 2) {
-      StringColumnStatsDataInspector aggregateData = null;
-      for (ColumnStatistics cs : css) {
-        ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-        StringColumnStatsDataInspector newData =
-            (StringColumnStatsDataInspector) 
cso.getStatsData().getStringStats();
-        if (ndvEstimator != null) {
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (aggregateData == null) {
-          aggregateData = newData.deepCopy();
-        } else {
-          aggregateData
-              .setMaxColLen(Math.max(aggregateData.getMaxColLen(), 
newData.getMaxColLen()));
-          aggregateData
-              .setAvgColLen(Math.max(aggregateData.getAvgColLen(), 
newData.getAvgColLen()));
-          aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), 
newData.getNumDVs()));
-        }
-      }
-      if (ndvEstimator != null) {
-        // if all the ColumnStatisticsObjs contain bitvectors, we do not need 
to
-        // use uniform distribution assumption because we can merge bitvectors
-        // to get a good estimation.
-        aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-      } else {
-        // aggregateData already has the ndv of the max of all
-      }
-      columnStatisticsData.setStringStats(aggregateData);
-    } else {
-      // we need extrapolation
-      LOG.debug("start extrapolation for " + colName);
-
-      Map<String, Integer> indexMap = new HashMap<String, Integer>();
-      for (int index = 0; index < partNames.size(); index++) {
-        indexMap.put(partNames.get(index), index);
-      }
-      Map<String, Double> adjustedIndexMap = new HashMap<String, Double>();
-      Map<String, ColumnStatisticsData> adjustedStatsMap = new HashMap<String, 
ColumnStatisticsData>();
-      if (ndvEstimator == null) {
-        // if not every partition uses bitvector for ndv, we just fall back to
-        // the traditional extrapolation methods.
-        for (ColumnStatistics cs : css) {
-          String partName = cs.getStatsDesc().getPartName();
-          ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          adjustedIndexMap.put(partName, (double) indexMap.get(partName));
-          adjustedStatsMap.put(partName, cso.getStatsData());
-        }
-      } else {
-        // we first merge all the adjacent bitvectors that we could merge and
-        // derive new partition names and index.
-        StringBuilder pseudoPartName = new StringBuilder();
-        double pseudoIndexSum = 0;
-        int length = 0;
-        int curIndex = -1;
-        StringColumnStatsDataInspector aggregateData = null;
-        for (ColumnStatistics cs : css) {
-          String partName = cs.getStatsDesc().getPartName();
-          ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          StringColumnStatsDataInspector newData =
-              (StringColumnStatsDataInspector) 
cso.getStatsData().getStringStats();
-          // newData.isSetBitVectors() should be true for sure because we
-          // already checked it before.
-          if (indexMap.get(partName) != curIndex) {
-            // There is bitvector, but it is not adjacent to the previous ones.
-            if (length > 0) {
-              // we have to set ndv
-              adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-              
aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-              ColumnStatisticsData csd = new ColumnStatisticsData();
-              csd.setStringStats(aggregateData);
-              adjustedStatsMap.put(pseudoPartName.toString(), csd);
-              // reset everything
-              pseudoPartName = new StringBuilder();
-              pseudoIndexSum = 0;
-              length = 0;
-              ndvEstimator = NumDistinctValueEstimatorFactory
-                  .getEmptyNumDistinctValueEstimator(ndvEstimator);
-            }
-            aggregateData = null;
-          }
-          curIndex = indexMap.get(partName);
-          pseudoPartName.append(partName);
-          pseudoIndexSum += curIndex;
-          length++;
-          curIndex++;
-          if (aggregateData == null) {
-            aggregateData = newData.deepCopy();
-          } else {
-            aggregateData.setAvgColLen(Math.min(aggregateData.getAvgColLen(),
-                newData.getAvgColLen()));
-            aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(),
-                newData.getMaxColLen()));
-            aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-          }
-          ndvEstimator.mergeEstimators(newData.getNdvEstimator());
-        }
-        if (length > 0) {
-          // we have to set ndv
-          adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / 
length);
-          aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues());
-          ColumnStatisticsData csd = new ColumnStatisticsData();
-          csd.setStringStats(aggregateData);
-          adjustedStatsMap.put(pseudoPartName.toString(), csd);
-        }
-      }
-      extrapolate(columnStatisticsData, partNames.size(), css.size(), 
adjustedIndexMap,
-          adjustedStatsMap, -1);
-    }
-    LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # 
of partitions found: {}", colName,
-        columnStatisticsData.getStringStats().getNumDVs(),partNames.size(), 
css.size());
-    statsObj.setStatsData(columnStatisticsData);
-    return statsObj;
-  }
-
-  @Override
-  public void extrapolate(ColumnStatisticsData extrapolateData, int numParts,
-      int numPartsWithStats, Map<String, Double> adjustedIndexMap,
-      Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
-    int rightBorderInd = numParts;
-    StringColumnStatsDataInspector extrapolateStringData = new 
StringColumnStatsDataInspector();
-    Map<String, StringColumnStatsData> extractedAdjustedStatsMap = new 
HashMap<>();
-    for (Map.Entry<String, ColumnStatisticsData> entry : 
adjustedStatsMap.entrySet()) {
-      extractedAdjustedStatsMap.put(entry.getKey(), 
entry.getValue().getStringStats());
-    }
-    List<Map.Entry<String, StringColumnStatsData>> list = new 
LinkedList<Map.Entry<String, StringColumnStatsData>>(
-        extractedAdjustedStatsMap.entrySet());
-    // get the avgLen
-    Collections.sort(list, new Comparator<Map.Entry<String, 
StringColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, StringColumnStatsData> o1,
-          Map.Entry<String, StringColumnStatsData> o2) {
-        return Double.compare(o1.getValue().getAvgColLen(), 
o2.getValue().getAvgColLen());
-      }
-    });
-    double minInd = adjustedIndexMap.get(list.get(0).getKey());
-    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double avgColLen = 0;
-    double min = list.get(0).getValue().getAvgColLen();
-    double max = list.get(list.size() - 1).getValue().getAvgColLen();
-    if (minInd == maxInd) {
-      avgColLen = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      avgColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      avgColLen = (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the maxLen
-    Collections.sort(list, new Comparator<Map.Entry<String, 
StringColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, StringColumnStatsData> o1,
-          Map.Entry<String, StringColumnStatsData> o2) {
-        return Long.compare(o1.getValue().getMaxColLen(), 
o2.getValue().getMaxColLen());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    double maxColLen = 0;
-    min = list.get(0).getValue().getAvgColLen();
-    max = list.get(list.size() - 1).getValue().getAvgColLen();
-    if (minInd == maxInd) {
-      maxColLen = min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      maxColLen = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      maxColLen = (min + (max - min) * minInd / (minInd - maxInd));
-    }
-
-    // get the #nulls
-    long numNulls = 0;
-    for (Map.Entry<String, StringColumnStatsData> entry : 
extractedAdjustedStatsMap.entrySet()) {
-      numNulls += entry.getValue().getNumNulls();
-    }
-    // we scale up sumNulls based on the number of partitions
-    numNulls = numNulls * numParts / numPartsWithStats;
-
-    // get the ndv
-    long ndv = 0;
-    Collections.sort(list, new Comparator<Map.Entry<String, 
StringColumnStatsData>>() {
-      @Override
-      public int compare(Map.Entry<String, StringColumnStatsData> o1,
-          Map.Entry<String, StringColumnStatsData> o2) {
-       return Long.compare(o1.getValue().getNumDVs(), 
o2.getValue().getNumDVs());
-      }
-    });
-    minInd = adjustedIndexMap.get(list.get(0).getKey());
-    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
-    min = list.get(0).getValue().getNumDVs();
-    max = list.get(list.size() - 1).getValue().getNumDVs();
-    if (minInd == maxInd) {
-      ndv = (long) min;
-    } else if (minInd < maxInd) {
-      // right border is the max
-      ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - 
minInd));
-    } else {
-      // left border is the max
-      ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
-    }
-    extrapolateStringData.setAvgColLen(avgColLen);
-    extrapolateStringData.setMaxColLen((long) maxColLen);
-    extrapolateStringData.setNumNulls(numNulls);
-    extrapolateStringData.setNumDVs(ndv);
-    extrapolateData.setStringStats(extrapolateStringData);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
deleted file mode 100644
index 937ebf2..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DateColumnStatsDataInspector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
-
-@SuppressWarnings("serial")
-public class DateColumnStatsDataInspector extends DateColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public DateColumnStatsDataInspector() {
-    super();
-  }
-
-  public DateColumnStatsDataInspector(long numNulls, long numDVs) {
-    super(numNulls, numDVs);
-  }
-
-  public DateColumnStatsDataInspector(DateColumnStatsDataInspector other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public DateColumnStatsDataInspector deepCopy() {
-    return new DateColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
deleted file mode 100644
index 586b5d8..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DecimalColumnStatsDataInspector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
-
-@SuppressWarnings("serial")
-public class DecimalColumnStatsDataInspector extends DecimalColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public DecimalColumnStatsDataInspector() {
-    super();
-  }
-
-  public DecimalColumnStatsDataInspector(long numNulls, long numDVs) {
-    super(numNulls, numDVs);
-  }
-
-  public DecimalColumnStatsDataInspector(DecimalColumnStatsDataInspector 
other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public DecimalColumnStatsDataInspector deepCopy() {
-    return new DecimalColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
deleted file mode 100644
index 3609ddd..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/DoubleColumnStatsDataInspector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
-
-@SuppressWarnings("serial")
-public class DoubleColumnStatsDataInspector extends DoubleColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public DoubleColumnStatsDataInspector() {
-    super();
-  }
-
-  public DoubleColumnStatsDataInspector(long numNulls, long numDVs) {
-    super(numNulls, numDVs);
-  }
-
-  public DoubleColumnStatsDataInspector(DoubleColumnStatsDataInspector other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public DoubleColumnStatsDataInspector deepCopy() {
-    return new DoubleColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
deleted file mode 100644
index 5632d91..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/LongColumnStatsDataInspector.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
-
-@SuppressWarnings("serial")
-public class LongColumnStatsDataInspector extends LongColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public LongColumnStatsDataInspector() {
-    super();
-  }
-
-  public LongColumnStatsDataInspector(long numNulls, long numDVs) {
-    super(numNulls, numDVs);
-  }
-
-  public LongColumnStatsDataInspector(LongColumnStatsDataInspector other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public LongColumnStatsDataInspector deepCopy() {
-    return new LongColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
deleted file mode 100644
index 2db037b..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/cache/StringColumnStatsDataInspector.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.metastore.columnstats.cache;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
-
-@SuppressWarnings("serial")
-public class StringColumnStatsDataInspector extends StringColumnStatsData {
-
-  private NumDistinctValueEstimator ndvEstimator;
-
-  public StringColumnStatsDataInspector() {
-    super();
-  }
-
-  public StringColumnStatsDataInspector(long maxColLen, double avgColLen,
-      long numNulls, long numDVs) {
-    super(maxColLen, avgColLen, numNulls, numDVs);
-  }
-
-  public StringColumnStatsDataInspector(StringColumnStatsDataInspector other) {
-    super(other);
-    if (other.ndvEstimator != null) {
-      super.setBitVectors(ndvEstimator.serialize());
-    }
-  }
-
-  @Override
-  public StringColumnStatsDataInspector deepCopy() {
-    return new StringColumnStatsDataInspector(this);
-  }
-
-  @Override
-  public byte[] getBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.getBitVectors();
-  }
-
-  @Override
-  public ByteBuffer bufferForBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.bufferForBitVectors();
-  }
-
-  @Override
-  public void setBitVectors(byte[] bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void setBitVectors(ByteBuffer bitVectors) {
-    super.setBitVectors(bitVectors);
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public void unsetBitVectors() {
-    super.unsetBitVectors();
-    this.ndvEstimator = null;
-  }
-
-  @Override
-  public boolean isSetBitVectors() {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    return super.isSetBitVectors();
-  }
-
-  @Override
-  public void setBitVectorsIsSet(boolean value) {
-    if (ndvEstimator != null) {
-      updateBitVectors();
-    }
-    super.setBitVectorsIsSet(value);
-  }
-
-  public NumDistinctValueEstimator getNdvEstimator() {
-    if (isSetBitVectors() && getBitVectors().length != 0) {
-      updateNdvEstimator();
-    }
-    return ndvEstimator;
-  }
-
-  public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) {
-    super.unsetBitVectors();
-    this.ndvEstimator = ndvEstimator;
-  }
-
-  private void updateBitVectors() {
-    super.setBitVectors(ndvEstimator.serialize());
-    this.ndvEstimator = null;
-  }
-
-  private void updateNdvEstimator() {
-    this.ndvEstimator = NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(super.getBitVectors());
-    super.unsetBitVectors();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java
deleted file mode 100644
index 4c2d1bc..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BinaryColumnStatsMerger.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.merge;
-
-import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-
-public class BinaryColumnStatsMerger extends ColumnStatsMerger {
-
-  @Override
-  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj 
newColStats) {
-    BinaryColumnStatsData aggregateData = 
aggregateColStats.getStatsData().getBinaryStats();
-    BinaryColumnStatsData newData = 
newColStats.getStatsData().getBinaryStats();
-    aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), 
newData.getMaxColLen()));
-    aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), 
newData.getAvgColLen()));
-    aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java
deleted file mode 100644
index 8e50153..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/BooleanColumnStatsMerger.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.merge;
-
-import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-
-public class BooleanColumnStatsMerger extends ColumnStatsMerger {
-
-  @Override
-  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj 
newColStats) {
-    BooleanColumnStatsData aggregateData = 
aggregateColStats.getStatsData().getBooleanStats();
-    BooleanColumnStatsData newData = 
newColStats.getStatsData().getBooleanStats();
-    aggregateData.setNumTrues(aggregateData.getNumTrues() + 
newData.getNumTrues());
-    aggregateData.setNumFalses(aggregateData.getNumFalses() + 
newData.getNumFalses());
-    aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java
deleted file mode 100644
index 474d4dd..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMerger.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.merge;
-
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public abstract class ColumnStatsMerger {
-  protected final Logger LOG = 
LoggerFactory.getLogger(ColumnStatsMerger.class.getName());
-
-  public abstract void merge(ColumnStatisticsObj aggregateColStats,
-      ColumnStatisticsObj newColStats);
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
deleted file mode 100644
index 66be524..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.merge;
-
-import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
-
-public class ColumnStatsMergerFactory {
-
-  private ColumnStatsMergerFactory() {
-  }
-
-  public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj 
statsObjNew,
-      ColumnStatisticsObj statsObjOld) {
-    ColumnStatsMerger agg;
-    _Fields typeNew = statsObjNew.getStatsData().getSetField();
-    _Fields typeOld = statsObjOld.getStatsData().getSetField();
-    // make sure that they have the same type
-    typeNew = typeNew == typeOld ? typeNew : null;
-    switch (typeNew) {
-    case BOOLEAN_STATS:
-      agg = new BooleanColumnStatsMerger();
-      break;
-    case LONG_STATS: {
-      agg = new LongColumnStatsMerger();
-      break;
-    }
-    case DOUBLE_STATS: {
-      agg = new DoubleColumnStatsMerger();
-      break;
-    }
-    case STRING_STATS: {
-      agg = new StringColumnStatsMerger();
-      break;
-    }
-    case BINARY_STATS:
-      agg = new BinaryColumnStatsMerger();
-      break;
-    case DECIMAL_STATS: {
-      agg = new DecimalColumnStatsMerger();
-      break;
-    }
-    case DATE_STATS: {
-      agg = new DateColumnStatsMerger();
-      break;
-    }
-    default:
-      throw new IllegalArgumentException("Unknown stats type " + 
typeNew.toString());
-    }
-    return agg;
-  }
-
-  public static ColumnStatisticsObj newColumnStaticsObj(String colName, String 
colType, _Fields type) {
-    ColumnStatisticsObj cso = new ColumnStatisticsObj();
-    ColumnStatisticsData csd = new ColumnStatisticsData();
-    cso.setColName(colName);
-    cso.setColType(colType);
-    switch (type) {
-    case BOOLEAN_STATS:
-      csd.setBooleanStats(new BooleanColumnStatsData());
-      break;
-
-    case LONG_STATS:
-      csd.setLongStats(new LongColumnStatsDataInspector());
-      break;
-
-    case DOUBLE_STATS:
-      csd.setDoubleStats(new DoubleColumnStatsDataInspector());
-      break;
-
-    case STRING_STATS:
-      csd.setStringStats(new StringColumnStatsDataInspector());
-      break;
-
-    case BINARY_STATS:
-      csd.setBinaryStats(new BinaryColumnStatsData());
-      break;
-
-    case DECIMAL_STATS:
-      csd.setDecimalStats(new DecimalColumnStatsDataInspector());
-      break;
-
-    case DATE_STATS:
-      csd.setDateStats(new DateColumnStatsDataInspector());
-      break;
-
-    default:
-      throw new IllegalArgumentException("Unknown stats type");
-    }
-
-    cso.setStatsData(csd);
-    return cso;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
deleted file mode 100644
index e783d3c..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/DateColumnStatsMerger.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.columnstats.merge;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.Date;
-import 
org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector;
-
-public class DateColumnStatsMerger extends ColumnStatsMerger {
-  @Override
-  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj 
newColStats) {
-    DateColumnStatsDataInspector aggregateData =
-        (DateColumnStatsDataInspector) 
aggregateColStats.getStatsData().getDateStats();
-    DateColumnStatsDataInspector newData =
-        (DateColumnStatsDataInspector) 
newColStats.getStatsData().getDateStats();
-    Date lowValue = 
aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData
-        .getLowValue() : newData.getLowValue();
-    aggregateData.setLowValue(lowValue);
-    Date highValue = 
aggregateData.getHighValue().compareTo(newData.getHighValue()) >= 0 ? 
aggregateData
-        .getHighValue() : newData.getHighValue();
-    aggregateData.setHighValue(highValue);
-    aggregateData.setNumNulls(aggregateData.getNumNulls() + 
newData.getNumNulls());
-    if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() 
== null) {
-      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), 
newData.getNumDVs()));
-    } else {
-      NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator();
-      NumDistinctValueEstimator newEst = newData.getNdvEstimator();
-      long ndv = -1;
-      if (oldEst.canMerge(newEst)) {
-        oldEst.mergeEstimators(newEst);
-        ndv = oldEst.estimateNumDistinctValues();
-        aggregateData.setNdvEstimator(oldEst);
-      } else {
-        ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs());
-      }
-      LOG.debug("Use bitvector to merge column " + 
aggregateColStats.getColName() + "'s ndvs of "
-          + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to 
be " + ndv);
-      aggregateData.setNumDVs(ndv);
-    }
-  }
-}

[16/27] hive git commit: HIVE-17733 Move RawStore to standalone metastore. This closes #258 github PR. (Alan Gates, reviewed by Sergey Shelukhin, Vihang Karajgaonkar, and Zoltan Haindrich)

Reply via email to