http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java
----------------------------------------------------------------------
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java 
b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java
deleted file mode 100644
index eefc60f..0000000
--- a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.common.ndv.hll;
-
-public interface HLLRegister {
-
-  /**
-   * Specify a hashcode to add to hyperloglog register.
-   * @param hashcode
-   *          - hashcode to add
-   * @return true if register value is updated else false
-   */
-  public boolean add(long hashcode);
-
-  /**
-   * Instead of specifying hashcode, this interface can be used to directly
-   * specify the register index and register value. This interface is useful
-   * when reconstructing hyperloglog from a serialized representation where its
-   * not possible to regenerate the hashcode.
-   * @param idx
-   *          - register index
-   * @param value
-   *          - register value
-   * @return true if register value is updated else false
-   */
-  public boolean set(int idx, byte value);
-
-  /**
-   * Merge hyperloglog registers of the same type (SPARSE or DENSE register)
-   * @param reg
-   *          - register to be merged
-   */
-  public void merge(HLLRegister reg);
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
----------------------------------------------------------------------
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java 
b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
deleted file mode 100644
index a4a5ba9..0000000
--- 
a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
+++ /dev/null
@@ -1,260 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.common.ndv.hll;
-
-import java.util.Map;
-import java.util.TreeMap;
-
-public class HLLSparseRegister implements HLLRegister {
-
-  private TreeMap<Integer,Byte> sparseMap;
-
-  // for a better insertion performance values are added to temporary unsorted
-  // list which will be merged to sparse map after a threshold
-  private int[] tempList;
-  private int tempListIdx;
-
-  // number of register bits
-  private final int p;
-
-  // new number of register bits for higher accuracy
-  private final int pPrime;
-
-  // number of bits to store the number of zero runs
-  private final int qPrime;
-
-  // masks for quicker extraction of p, pPrime, qPrime values
-  private final int mask;
-  private final int pPrimeMask;
-  private final int qPrimeMask;
-
-  public HLLSparseRegister(int p, int pp, int qp) {
-    this.p = p;
-    this.sparseMap = new TreeMap<Integer,Byte>();
-    this.tempList = new int[HLLConstants.TEMP_LIST_DEFAULT_SIZE];
-    this.tempListIdx = 0;
-    this.pPrime = pp;
-    this.qPrime = qp;
-    this.mask = ((1 << pPrime) - 1) ^ ((1 << p) - 1);
-    this.pPrimeMask = ((1 << pPrime) - 1);
-    this.qPrimeMask = (1 << qPrime) - 1;
-  }
-
-  public boolean add(long hashcode) {
-    boolean updated = false;
-
-    // fill the temp list before merging to sparse map
-    if (tempListIdx < tempList.length) {
-      int encodedHash = encodeHash(hashcode);
-      tempList[tempListIdx++] = encodedHash;
-      updated = true;
-    } else {
-      updated = mergeTempListToSparseMap();
-    }
-
-    return updated;
-  }
-
-  /**
-   * Adds temp list to sparse map. The key for sparse map entry is the register
-   * index determined by pPrime and value is the number of trailing zeroes.
-   * @return
-   */
-  private boolean mergeTempListToSparseMap() {
-    boolean updated = false;
-    for (int i = 0; i < tempListIdx; i++) {
-      int encodedHash = tempList[i];
-      int key = encodedHash & pPrimeMask;
-      byte value = (byte) (encodedHash >>> pPrime);
-      byte nr = 0;
-      // if MSB is set to 1 then next qPrime MSB bits contains the value of
-      // number of zeroes.
-      // if MSB is set to 0 then number of zeroes is contained within pPrime - 
p
-      // bits.
-      if (encodedHash < 0) {
-        nr = (byte) (value & qPrimeMask);
-      } else {
-        nr = (byte) (Integer.numberOfTrailingZeros(encodedHash >>> p) + 1);
-      }
-      updated = set(key, nr);
-    }
-
-    // reset temp list index
-    tempListIdx = 0;
-    return updated;
-  }
-
-  /**
-   * <pre>
-   * <b>Input:</b> 64 bit hashcode
-   * 
-   * |---------w-------------| |------------p'----------|
-   * 10101101.......1010101010 10101010101 01010101010101
-   *                                       |------p-----|
-   *                                       
-   * <b>Output:</b> 32 bit int
-   * 
-   * |b| |-q'-|  |------------p'----------|
-   *  1  010101  01010101010 10101010101010
-   *                         |------p-----|
-   *                    
-   * 
-   * The default values of p', q' and b are 25, 6, 1 (total 32 bits) 
respectively.
-   * This function will return an int encoded in the following format
-   * 
-   * p  - LSB p bits represent the register index
-   * p' - LSB p' bits are used for increased accuracy in estimation
-   * q' - q' bits after p' are left as such from the hashcode if b = 0 else
-   *      q' bits encodes the longest trailing zero runs from in (w-p) input 
bits
-   * b  - 0 if longest trailing zero run is contained within (p'-p) bits
-   *      1 if longest trailing zero run is computeed from (w-p) input bits and
-   *      its value is stored in q' bits
-   * </pre>
-   * @param hashcode
-   * @return
-   */
-  public int encodeHash(long hashcode) {
-    // x = p' - p
-    int x = (int) (hashcode & mask);
-    if (x == 0) {
-      // more bits should be considered for finding q (longest zero runs)
-      // set MSB to 1
-      int ntr = Long.numberOfTrailingZeros(hashcode >> p) + 1;
-      long newHashCode = hashcode & pPrimeMask;
-      newHashCode |= ntr << pPrime;
-      newHashCode |= 0x80000000;
-      return (int) newHashCode;
-    } else {
-      // q is contained within p' - p
-      // set MSB to 0
-      return (int) (hashcode & 0x7FFFFFFF);
-    }
-  }
-
-  public int getSize() {
-
-    // merge temp list before getting the size of sparse map
-    if (tempListIdx != 0) {
-      mergeTempListToSparseMap();
-    }
-    return sparseMap.size();
-  }
-
-  public void merge(HLLRegister hllRegister) {
-    if (hllRegister instanceof HLLSparseRegister) {
-      HLLSparseRegister hsr = (HLLSparseRegister) hllRegister;
-
-      // retain only the largest value for a register index
-      for (Map.Entry<Integer, Byte> entry : hsr.getSparseMap().entrySet()) {
-        int key = entry.getKey();
-        byte value = entry.getValue();
-        set(key, value);
-      }
-    } else {
-      throw new IllegalArgumentException("Specified register not instance of 
HLLSparseRegister");
-    }
-  }
-
-  public boolean set(int key, byte value) {
-    boolean updated = false;
-
-    // retain only the largest value for a register index
-    if (sparseMap.containsKey(key)) {
-      byte containedVal = sparseMap.get(key);
-      if (value > containedVal) {
-        sparseMap.put(key, value);
-        updated = true;
-      }
-    } else {
-      sparseMap.put(key, value);
-      updated = true;
-    }
-    return updated;
-  }
-
-  public TreeMap<Integer,Byte> getSparseMap() {
-    return sparseMap;
-  }
-
-  public TreeMap<Integer,Byte> getMergedSparseMap() {
-    if (tempListIdx != 0) {
-      mergeTempListToSparseMap();
-    }
-    return sparseMap;
-  }
-
-  public int getP() {
-    return p;
-  }
-
-  public int getPPrime() {
-    return pPrime;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append("HLLSparseRegister - ");
-    sb.append("p: ");
-    sb.append(p);
-    sb.append(" pPrime: ");
-    sb.append(pPrime);
-    sb.append(" qPrime: ");
-    sb.append(qPrime);
-    return sb.toString();
-  }
-
-  public String toExtendedString() {
-    return toString() + " register: " + sparseMap.toString();
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (!(obj instanceof HLLSparseRegister)) {
-      return false;
-    }
-    HLLSparseRegister other = (HLLSparseRegister) obj;
-    boolean result = p == other.p && pPrime == other.pPrime && qPrime == 
other.qPrime
-        && tempListIdx == other.tempListIdx;
-    if (result) {
-      for (int i = 0; i < tempListIdx; i++) {
-        if (tempList[i] != other.tempList[i]) {
-          return false;
-        }
-      }
-
-      result = result && sparseMap.equals(other.sparseMap);
-    }
-    return result;
-  }
-
-  @Override
-  public int hashCode() {
-    int hashcode = 0;
-    hashcode += 31 * p;
-    hashcode += 31 * pPrime;
-    hashcode += 31 * qPrime;
-    for (int i = 0; i < tempListIdx; i++) {
-      hashcode += 31 * tempList[tempListIdx];
-    }
-    hashcode += sparseMap.hashCode();
-    return hashcode;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java 
b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
deleted file mode 100644
index b80a0ac..0000000
--- a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ /dev/null
@@ -1,634 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.common.ndv.hll;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.util.JavaDataModel;
-import org.apache.hive.common.util.Murmur3;
-
-/**
- * <pre>
- * This is an implementation of the following variants of hyperloglog (HLL)
- * algorithm 
- * Original  - Original HLL algorithm from Flajolet et. al from
- *             http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
- * HLLNoBias - Google's implementation of bias correction based on lookup table
- *             
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
- * HLL++     - Google's implementation of HLL++ algorithm that uses SPARSE 
registers
- *             
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
- * 
- * Following are the constructor parameters that determines which algorithm is
- * used
- * <b>numRegisterIndexBits</b> - number of LSB hashcode bits to be used as 
register index.
- *                        <i>Default is 14</i>. min = 4 and max = 16
- * <b>numHashBits</b> - number of bits for hashcode. <i>Default is 64</i>. min 
= 32 and max = 128
- * <b>encoding</b> - Type of encoding to use (SPARSE or DENSE). The algorithm 
automatically
- *            switches to DENSE beyond a threshold. <i>Default: SPARSE</i>
- * <b>enableBitPacking</b> - To enable bit packing or not. Bit packing 
improves compression
- *                    at the cost of more CPU cycles. <i>Default: true</i>
- * <b>noBias</b> - Use Google's bias table lookup for short range bias 
correction.
- *          Enabling this will highly improve the estimation accuracy for short
- *          range values. <i>Default: true</i>
- *
- * </pre>
- */
-public class HyperLogLog implements NumDistinctValueEstimator {
-  private final static int DEFAULT_HASH_BITS = 64;
-  private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0});
-  private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1});
-  private final static ByteBuffer SHORT_BUFFER = 
ByteBuffer.allocate(Short.BYTES);
-  private final static ByteBuffer INT_BUFFER = 
ByteBuffer.allocate(Integer.BYTES);
-  private final static ByteBuffer LONG_BUFFER = 
ByteBuffer.allocate(Long.BYTES);
-
-  public enum EncodingType {
-    SPARSE, DENSE
-  }
-
-  // number of bits to address registers
-  private final int p;
-
-  // number of registers - 2^p
-  private final int m;
-
-  // refer paper
-  private float alphaMM;
-
-  // enable/disable bias correction using table lookup
-  private final boolean noBias;
-
-  // enable/disable bitpacking
-  private final boolean bitPacking;
-
-  // Not making it configurable for perf reasons (avoid checks)
-  private final int chosenHashBits = DEFAULT_HASH_BITS;
-
-  private HLLDenseRegister denseRegister;
-  private HLLSparseRegister sparseRegister;
-
-  // counts are cached to avoid repeated complex computation. If register value
-  // is updated the count will be computed again.
-  private long cachedCount;
-  private boolean invalidateCount;
-
-  private EncodingType encoding;
-
-  // threshold to switch from SPARSE to DENSE encoding
-  private int encodingSwitchThreshold;
-
-  private HyperLogLog(HyperLogLogBuilder hllBuilder) {
-    if (hllBuilder.numRegisterIndexBits < HLLConstants.MIN_P_VALUE
-        || hllBuilder.numRegisterIndexBits > HLLConstants.MAX_P_VALUE) {
-      throw new IllegalArgumentException("p value should be between " + 
HLLConstants.MIN_P_VALUE
-          + " to " + HLLConstants.MAX_P_VALUE);
-    }
-    this.p = hllBuilder.numRegisterIndexBits;
-    this.m = 1 << p;
-    this.noBias = hllBuilder.noBias;
-    this.bitPacking = hllBuilder.bitPacking;
-
-    // the threshold should be less than 12K bytes for p = 14.
-    // The reason to divide by 5 is, in sparse mode after serialization the
-    // entriesin sparse map are compressed, and delta encoded as varints. The
-    // worst case size of varints are 5 bytes. Hence, 12K/5 ~= 2400 entries in
-    // sparse map.
-    if (bitPacking) {
-      this.encodingSwitchThreshold = ((m * 6) / 8) / 5;
-    } else {
-      // if bitpacking is disabled, all register values takes 8 bits and hence
-      // we can be more flexible with the threshold. For p=14, 16K/5 = 3200
-      // entries in sparse map can be allowed.
-      this.encodingSwitchThreshold = m / 3;
-    }
-
-    // initializeAlpha(DEFAULT_HASH_BITS);
-    // alphaMM value for 128 bits hash seems to perform better for default 64 
hash bits
-    this.alphaMM = 0.7213f / (1 + 1.079f / m);
-    // For efficiency alpha is multiplied by m^2
-    this.alphaMM = this.alphaMM * m * m;
-
-    this.cachedCount = -1;
-    this.invalidateCount = false;
-    this.encoding = hllBuilder.encoding;
-    if (encoding.equals(EncodingType.SPARSE)) {
-      this.sparseRegister = new HLLSparseRegister(p, 
HLLConstants.P_PRIME_VALUE,
-          HLLConstants.Q_PRIME_VALUE);
-      this.denseRegister = null;
-    } else {
-      this.sparseRegister = null;
-      this.denseRegister = new HLLDenseRegister(p, bitPacking);
-    }
-  }
-
-  public static HyperLogLogBuilder builder() {
-    return new HyperLogLogBuilder();
-  }
-
-  public static class HyperLogLogBuilder {
-    private int numRegisterIndexBits = 14;
-    private EncodingType encoding = EncodingType.SPARSE;
-    private boolean bitPacking = true;
-    private boolean noBias = true;
-
-    public HyperLogLogBuilder() {
-    }
-
-    public HyperLogLogBuilder setNumRegisterIndexBits(int b) {
-      this.numRegisterIndexBits = b;
-      return this;
-    }
-
-    public HyperLogLogBuilder setEncoding(EncodingType enc) {
-      this.encoding = enc;
-      return this;
-    }
-
-    public HyperLogLogBuilder enableBitPacking(boolean b) {
-      this.bitPacking = b;
-      return this;
-    }
-
-    public HyperLogLogBuilder enableNoBias(boolean nb) {
-      this.noBias = nb;
-      return this;
-    }
-
-    public HyperLogLog build() {
-      return new HyperLogLog(this);
-    }
-  }
-
-  // see paper for alpha initialization.
-  private void initializeAlpha(final int hashBits) {
-    if (hashBits <= 16) {
-      alphaMM = 0.673f;
-    } else if (hashBits <= 32) {
-      alphaMM = 0.697f;
-    } else if (hashBits <= 64) {
-      alphaMM = 0.709f;
-    } else {
-      alphaMM = 0.7213f / (float) (1 + 1.079f / m);
-    }
-
-    // For efficiency alpha is multiplied by m^2
-    alphaMM = alphaMM * m * m;
-  }
-
-  public void addBoolean(boolean val) {
-    add(val ? HASH64_ONE : HASH64_ZERO);
-  }
-
-  public void addByte(byte val) {
-    add(Murmur3.hash64(new byte[] {val}));
-  }
-
-  public void addBytes(byte[] val) {
-    add(Murmur3.hash64(val));
-  }
-
-  public void addShort(short val) {
-    SHORT_BUFFER.putShort(0, val);
-    add(Murmur3.hash64(SHORT_BUFFER.array()));
-  }
-
-  public void addInt(int val) {
-    INT_BUFFER.putInt(0, val);
-    add(Murmur3.hash64(INT_BUFFER.array()));
-  }
-
-  public void addLong(long val) {
-    LONG_BUFFER.putLong(0, val);
-    add(Murmur3.hash64(LONG_BUFFER.array()));
-  }
-
-  public void addFloat(float val) {
-    INT_BUFFER.putFloat(0, val);
-    add(Murmur3.hash64(INT_BUFFER.array()));
-  }
-
-  public void addDouble(double val) {
-    LONG_BUFFER.putDouble(0, val);
-    add(Murmur3.hash64(LONG_BUFFER.array()));
-  }
-
-  public void addChar(char val) {
-    SHORT_BUFFER.putChar(0, val);
-    add(Murmur3.hash64(SHORT_BUFFER.array()));
-  }
-
-  /**
-   * Java's default charset will be used for strings.
-   * @param val
-   *          - input string
-   */
-  public void addString(String val) {
-    add(Murmur3.hash64(val.getBytes()));
-  }
-
-  public void addString(String val, Charset charset) {
-    add(Murmur3.hash64(val.getBytes(charset)));
-  }
-
-  public void add(long hashcode) {
-    if (encoding.equals(EncodingType.SPARSE)) {
-      if (sparseRegister.add(hashcode)) {
-        invalidateCount = true;
-      }
-
-      // if size of sparse map excess the threshold convert the sparse map to
-      // dense register and switch to DENSE encoding
-      if (sparseRegister.getSize() > encodingSwitchThreshold) {
-        encoding = EncodingType.DENSE;
-        denseRegister = sparseToDenseRegister(sparseRegister);
-        sparseRegister = null;
-        invalidateCount = true;
-      }
-    } else {
-      if (denseRegister.add(hashcode)) {
-        invalidateCount = true;
-      }
-    }
-  }
-
-  public long estimateNumDistinctValues() {
-    // FMSketch treats the ndv of all nulls as 1 but hll treates the ndv as 0.
-    // In order to get rid of divide by 0 problem, we follow FMSketch
-    return count() > 0 ? count() : 1;
-  }
-
-  public long count() {
-    // compute count only if the register values are updated else return the
-    // cached count
-    if (invalidateCount || cachedCount < 0) {
-      if (encoding.equals(EncodingType.SPARSE)) {
-
-        // if encoding is still SPARSE use linear counting with increase
-        // accuracy (as we use pPrime bits for register index)
-        int mPrime = 1 << sparseRegister.getPPrime();
-        cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSize());
-      } else {
-
-        // for DENSE encoding, use bias table lookup for HLLNoBias algorithm
-        // else fallback to HLLOriginal algorithm
-        double sum = denseRegister.getSumInversePow2();
-        long numZeros = denseRegister.getNumZeroes();
-
-        // cardinality estimate from normalized bias corrected harmonic mean on
-        // the registers
-        cachedCount = (long) (alphaMM * (1.0 / sum));
-        long pow = (long) Math.pow(2, chosenHashBits);
-
-        // when bias correction is enabled
-        if (noBias) {
-          cachedCount = cachedCount <= 5 * m ? (cachedCount - 
estimateBias(cachedCount))
-              : cachedCount;
-          long h = cachedCount;
-          if (numZeros != 0) {
-            h = linearCount(m, numZeros);
-          }
-
-          if (h < getThreshold()) {
-            cachedCount = h;
-          }
-        } else {
-          // HLL algorithm shows stronger bias for values in (2.5 * m) range.
-          // To compensate for this short range bias, linear counting is used
-          // for values before this short range. The original paper also says
-          // similar bias is seen for long range values due to hash collisions
-          // in range >1/30*(2^32). For the default case, we do not have to
-          // worry about this long range bias as the paper used 32-bit hashing
-          // and we use 64-bit hashing as default. 2^64 values are too high to
-          // observe long range bias (hash collisions).
-          if (cachedCount <= 2.5 * m) {
-
-            // for short range use linear counting
-            if (numZeros != 0) {
-              cachedCount = linearCount(m, numZeros);
-            }
-          } else if (chosenHashBits < 64 && cachedCount > (0.033333 * pow)) {
-
-            // long range bias for 32-bit hashcodes
-            if (cachedCount > (1 / 30) * pow) {
-              cachedCount = (long) (-pow * Math.log(1.0 - (double) cachedCount 
/ (double) pow));
-            }
-          }
-        }
-      }
-      invalidateCount = false;
-    }
-
-    return cachedCount;
-  }
-
-  private long getThreshold() {
-    return (long) (HLLConstants.thresholdData[p - 4] + 0.5);
-  }
-
-  /**
-   * Estimate bias from lookup table
-   * @param count
-   *          - cardinality before bias correction
-   * @return cardinality after bias correction
-   */
-  private long estimateBias(long count) {
-    double[] rawEstForP = HLLConstants.rawEstimateData[p - 4];
-
-    // compute distance and store it in sorted map
-    TreeMap<Double,Integer> estIndexMap = new TreeMap<>();
-    double distance = 0;
-    for (int i = 0; i < rawEstForP.length; i++) {
-      distance = Math.pow(count - rawEstForP[i], 2);
-      estIndexMap.put(distance, i);
-    }
-
-    // take top-k closest neighbors and compute the bias corrected cardinality
-    long result = 0;
-    double[] biasForP = HLLConstants.biasData[p - 4];
-    double biasSum = 0;
-    int kNeighbors = HLLConstants.K_NEAREST_NEIGHBOR;
-    for (Map.Entry<Double, Integer> entry : estIndexMap.entrySet()) {
-      biasSum += biasForP[entry.getValue()];
-      kNeighbors--;
-      if (kNeighbors <= 0) {
-        break;
-      }
-    }
-
-    // 0.5 added for rounding off
-    result = (long) ((biasSum / HLLConstants.K_NEAREST_NEIGHBOR) + 0.5);
-    return result;
-  }
-
-  public void setCount(long count) {
-    this.cachedCount = count;
-    this.invalidateCount = true;
-  }
-
-  private long linearCount(int mVal, long numZeros) {
-    return (long) (Math.round(mVal * Math.log(mVal / ((double) numZeros))));
-  }
-
-  // refer paper
-  public double getStandardError() {
-    return 1.04 / Math.sqrt(m);
-  }
-
-  public HLLDenseRegister getHLLDenseRegister() {
-    return denseRegister;
-  }
-
-  public HLLSparseRegister getHLLSparseRegister() {
-    return sparseRegister;
-  }
-
-  /**
-   * Reconstruct sparse map from serialized integer list
-   * @param reg
-   *          - uncompressed and delta decoded integer list
-   */
-  public void setHLLSparseRegister(int[] reg) {
-    for (int i : reg) {
-      int key = i >>> HLLConstants.Q_PRIME_VALUE;
-      byte value = (byte) (i & 0x3f);
-      sparseRegister.set(key, value);
-    }
-  }
-
-  /**
-   * Reconstruct dense registers from byte array
-   * @param reg
-   *          - unpacked byte array
-   */
-  public void setHLLDenseRegister(byte[] reg) {
-    int i = 0;
-    for (byte b : reg) {
-      denseRegister.set(i, b);
-      i++;
-    }
-  }
-
-  /**
-   * Merge the specified hyperloglog to the current one. Encoding switches
-   * automatically after merge if the encoding switch threshold is exceeded.
-   * @param hll
-   *          - hyperloglog to be merged
-   * @throws IllegalArgumentException
-   */
-  public void merge(HyperLogLog hll) {
-    if (p != hll.p || chosenHashBits != hll.chosenHashBits) {
-      throw new IllegalArgumentException(
-          "HyperLogLog cannot be merged as either p or hashbits are different. 
Current: "
-              + toString() + " Provided: " + hll.toString());
-    }
-
-    EncodingType otherEncoding = hll.getEncoding();
-
-    if (encoding.equals(EncodingType.SPARSE) && 
otherEncoding.equals(EncodingType.SPARSE)) {
-      sparseRegister.merge(hll.getHLLSparseRegister());
-      // if after merge the sparse switching threshold is exceeded then change
-      // to dense encoding
-      if (sparseRegister.getSize() > encodingSwitchThreshold) {
-        encoding = EncodingType.DENSE;
-        denseRegister = sparseToDenseRegister(sparseRegister);
-        sparseRegister = null;
-      }
-    } else if (encoding.equals(EncodingType.DENSE) && 
otherEncoding.equals(EncodingType.DENSE)) {
-      denseRegister.merge(hll.getHLLDenseRegister());
-    } else if (encoding.equals(EncodingType.SPARSE) && 
otherEncoding.equals(EncodingType.DENSE)) {
-      denseRegister = sparseToDenseRegister(sparseRegister);
-      denseRegister.merge(hll.getHLLDenseRegister());
-      sparseRegister = null;
-      encoding = EncodingType.DENSE;
-    } else if (encoding.equals(EncodingType.DENSE) && 
otherEncoding.equals(EncodingType.SPARSE)) {
-      HLLDenseRegister otherDenseRegister = 
sparseToDenseRegister(hll.getHLLSparseRegister());
-      denseRegister.merge(otherDenseRegister);
-    }
-
-    invalidateCount = true;
-  }
-
-  /**
-   * Converts sparse to dense hll register
-   * @param sparseRegister
-   *          - sparse register to be converted
-   * @return converted dense register
-   */
-  private HLLDenseRegister sparseToDenseRegister(HLLSparseRegister 
sparseRegister) {
-    if (sparseRegister == null) {
-      return null;
-    }
-    int p = sparseRegister.getP();
-    int pMask = (1 << p) - 1;
-    HLLDenseRegister result = new HLLDenseRegister(p, bitPacking);
-    for (Map.Entry<Integer, Byte> entry : 
sparseRegister.getSparseMap().entrySet()) {
-      int key = entry.getKey();
-      int idx = key & pMask;
-      result.set(idx, entry.getValue());
-    }
-    return result;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append("Encoding: ");
-    sb.append(encoding);
-    sb.append(", p: ");
-    sb.append(p);
-    sb.append(", estimatedCardinality: ");
-    sb.append(estimateNumDistinctValues());
-    return sb.toString();
-  }
-
-  public String toStringExtended() {
-    if (encoding.equals(EncodingType.DENSE)) {
-      return toString() + ", " + denseRegister.toExtendedString();
-    } else if (encoding.equals(EncodingType.SPARSE)) {
-      return toString() + ", " + sparseRegister.toExtendedString();
-    }
-
-    return toString();
-  }
-
-  public int getNumRegisterIndexBits() {
-    return p;
-  }
-
-  public EncodingType getEncoding() {
-    return encoding;
-  }
-
-  public void setEncoding(EncodingType encoding) {
-    this.encoding = encoding;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (!(obj instanceof HyperLogLog)) {
-      return false;
-    }
-
-    HyperLogLog other = (HyperLogLog) obj;
-    long count = estimateNumDistinctValues();
-    long otherCount = other.estimateNumDistinctValues();
-    boolean result = p == other.p && chosenHashBits == other.chosenHashBits
-        && encoding.equals(other.encoding) && count == otherCount;
-    if (encoding.equals(EncodingType.DENSE)) {
-      result = result && denseRegister.equals(other.getHLLDenseRegister());
-    }
-
-    if (encoding.equals(EncodingType.SPARSE)) {
-      result = result && sparseRegister.equals(other.getHLLSparseRegister());
-    }
-    return result;
-  }
-
-  @Override
-  public int hashCode() {
-    int hashcode = 0;
-    hashcode += 31 * p;
-    hashcode += 31 * chosenHashBits;
-    hashcode += encoding.hashCode();
-    hashcode += 31 * estimateNumDistinctValues();
-    if (encoding.equals(EncodingType.DENSE)) {
-      hashcode += 31 * denseRegister.hashCode();
-    }
-
-    if (encoding.equals(EncodingType.SPARSE)) {
-      hashcode += 31 * sparseRegister.hashCode();
-    }
-    return hashcode;
-  }
-
-  @Override
-  public void reset() {
-  }
-
-  @Override
-  public byte[] serialize() {
-    ByteArrayOutputStream bos = new ByteArrayOutputStream();
-    // write bytes to bos ...
-    try {
-      HyperLogLogUtils.serializeHLL(bos, this);
-      byte[] result = bos.toByteArray();
-      bos.close();
-      return result;
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Override
-  public NumDistinctValueEstimator deserialize(byte[] buf) {
-    InputStream is = new ByteArrayInputStream(buf);
-    try {
-      HyperLogLog result = HyperLogLogUtils.deserializeHLL(is);
-      is.close();
-      return result;
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Override
-  public void addToEstimator(long v) {
-    addLong(v);
-  }
-
-  @Override
-  public void addToEstimator(String s) {
-    addString(s);
-  }
-
-  @Override
-  public void addToEstimator(double d) {
-    addDouble(d);
-  }
-
-  @Override
-  public void addToEstimator(HiveDecimal decimal) {
-    addDouble(decimal.doubleValue());
-  }
-
-  @Override
-  public void mergeEstimators(NumDistinctValueEstimator o) {
-    merge((HyperLogLog) o);
-  }
-
-  @Override
-  public int lengthFor(JavaDataModel model) {
-    // 5 is the head, 1<<p means the number of bytes for register
-    return (5 + (1 << p));
-  }
-
-  @Override
-  public boolean canMerge(NumDistinctValueEstimator o) {
-    return o instanceof HyperLogLog;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
----------------------------------------------------------------------
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
deleted file mode 100644
index 2d82bd0..0000000
--- 
a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
+++ /dev/null
@@ -1,392 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.common.ndv.hll;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType;
-
-/**
- * HyperLogLog serialization utilities.
- */
-public class HyperLogLogUtils {
-
-  public static final byte[] MAGIC = new byte[] { 'H', 'L', 'L' };
-
-  /**
-   * HyperLogLog is serialized using the following format
-   * 
-   * <pre>
-   * |-4 byte-|------varlong----|varint (optional)|----------|  
-   * ---------------------------------------------------------
-   * | header | estimated-count | register-length | register |
-   * ---------------------------------------------------------
-   * 
-   * <b>4 byte header</b> is encoded like below
-   * 3 bytes - HLL magic string to identify serialized stream
-   * 4 bits  - p (number of bits to be used as register index)
-   * 1       - spare bit (not used)
-   * 3 bits  - encoding (000 - sparse, 001..110 - n bit packing, 111 - no bit 
packing)
-   * 
-   * Followed by header are 3 fields that are required for reconstruction
-   * of hyperloglog
-   * Estimated count - variable length long to store last computed estimated 
count.
-   *                   This is just for quick lookup without deserializing 
registers
-   * Register length - number of entries in the register (required only for 
-   *                   for sparse representation. For bit-packing, the register
-   *                   length can be found from p)
-   * </pre>
-   * @param out
-   *          - output stream to write to
-   * @param hll
-   *          - hyperloglog that needs to be serialized
-   * @throws IOException
-   */
-  public static void serializeHLL(OutputStream out, HyperLogLog hll) throws 
IOException {
-
-    // write header
-    out.write(MAGIC);
-    int fourthByte = 0;
-    int p = hll.getNumRegisterIndexBits();
-    fourthByte = (p & 0xff) << 4;
-
-    int bitWidth = 0;
-    EncodingType enc = hll.getEncoding();
-
-    // determine bit width for bitpacking and encode it in header
-    if (enc.equals(EncodingType.DENSE)) {
-      int lzr = hll.getHLLDenseRegister().getMaxRegisterValue();
-      bitWidth = getBitWidth(lzr);
-
-      // the max value of number of zeroes for 64 bit hash can be encoded using
-      // only 6 bits. So we will disable bit packing for any values >6
-      if (bitWidth > 6) {
-        fourthByte |= 7;
-        bitWidth = 8;
-      } else {
-        fourthByte |= (bitWidth & 7);
-      }
-    }
-
-    // write fourth byte of header
-    out.write(fourthByte);
-
-    // write estimated count
-    long estCount = hll.estimateNumDistinctValues();
-    writeVulong(out, estCount);
-
-    // serialize dense/sparse registers. Dense registers are bitpacked whereas
-    // sparse registers are delta and variable length encoded
-    if (enc.equals(EncodingType.DENSE)) {
-      byte[] register = hll.getHLLDenseRegister().getRegister();
-      bitpackHLLRegister(out, register, bitWidth);
-    } else if (enc.equals(EncodingType.SPARSE)) {
-      TreeMap<Integer, Byte> sparseMap = 
hll.getHLLSparseRegister().getSparseMap();
-
-      // write the number of elements in sparse map (required for
-      // reconstruction)
-      writeVulong(out, sparseMap.size());
-
-      // compute deltas and write the values as varints
-      int prev = 0;
-      for (Map.Entry<Integer, Byte> entry : sparseMap.entrySet()) {
-        if (prev == 0) {
-          prev = (entry.getKey() << HLLConstants.Q_PRIME_VALUE) | 
entry.getValue();
-          writeVulong(out, prev);
-        } else {
-          int curr = (entry.getKey() << HLLConstants.Q_PRIME_VALUE) | 
entry.getValue();
-          int delta = curr - prev;
-          writeVulong(out, delta);
-          prev = curr;
-        }
-      }
-    }
-  }
-
-  /**
-   * Refer serializeHLL() for format of serialization. This funtions
-   * deserializes the serialized hyperloglogs
-   * @param in
-   *          - input stream
-   * @return deserialized hyperloglog
-   * @throws IOException
-   */
-  public static HyperLogLog deserializeHLL(InputStream in) throws IOException {
-    checkMagicString(in);
-    int fourthByte = in.read() & 0xff;
-    int p = fourthByte >>> 4;
-
-    // read type of encoding
-    int enc = fourthByte & 7;
-    EncodingType encoding = null;
-    int bitSize = 0;
-    if (enc == 0) {
-      encoding = EncodingType.SPARSE;
-    } else if (enc > 0 && enc < 7) {
-      bitSize = enc;
-      encoding = EncodingType.DENSE;
-    } else {
-      // bit packing disabled
-      bitSize = 8;
-      encoding = EncodingType.DENSE;
-    }
-
-    // estimated count
-    long estCount = readVulong(in);
-
-    HyperLogLog result = null;
-    if (encoding.equals(EncodingType.SPARSE)) {
-      result = HyperLogLog.builder().setNumRegisterIndexBits(p)
-          .setEncoding(EncodingType.SPARSE).build();
-      int numRegisterEntries = (int) readVulong(in);
-      int[] reg = new int[numRegisterEntries];
-      int prev = 0;
-
-      // reconstruct the sparse map from delta encoded and varint input stream
-      if (numRegisterEntries > 0) {
-        prev = (int) readVulong(in);
-        reg[0] = prev;
-      }
-      int delta = 0;
-      int curr = 0;
-      for (int i = 1; i < numRegisterEntries; i++) {
-        delta = (int) readVulong(in);
-        curr = prev + delta;
-        reg[i] = curr;
-        prev = curr;
-      }
-      result.setHLLSparseRegister(reg);
-    } else {
-
-      // explicitly disable bit packing
-      if (bitSize == 8) {
-        result = HyperLogLog.builder().setNumRegisterIndexBits(p)
-            .setEncoding(EncodingType.DENSE).enableBitPacking(false).build();
-      } else {
-        result = HyperLogLog.builder().setNumRegisterIndexBits(p)
-            .setEncoding(EncodingType.DENSE).enableBitPacking(true).build();
-      }
-      int m = 1 << p;
-      byte[] register = unpackHLLRegister(in, m, bitSize);
-      result.setHLLDenseRegister(register);
-    }
-
-    result.setCount(estCount);
-
-    return result;
-  }
-
-  private static void bitpackHLLRegister(OutputStream out, byte[] register, 
int bitWidth)
-      throws IOException {
-    int bitsLeft = 8;
-    byte current = 0;
-
-    if (bitWidth == 8) {
-      fastPathWrite(out, register);
-      return;
-    }
-
-    // write the blob
-    for (byte value : register) {
-      int bitsToWrite = bitWidth;
-      while (bitsToWrite > bitsLeft) {
-        // add the bits to the bottom of the current word
-        current |= value >>> (bitsToWrite - bitsLeft);
-        // subtract out the bits we just added
-        bitsToWrite -= bitsLeft;
-        // zero out the bits above bitsToWrite
-        value &= (1 << bitsToWrite) - 1;
-        out.write(current);
-        current = 0;
-        bitsLeft = 8;
-      }
-      bitsLeft -= bitsToWrite;
-      current |= value << bitsLeft;
-      if (bitsLeft == 0) {
-        out.write(current);
-        current = 0;
-        bitsLeft = 8;
-      }
-    }
-
-    out.flush();
-  }
-
-  private static void fastPathWrite(OutputStream out, byte[] register) throws 
IOException {
-    for (byte b : register) {
-      out.write(b);
-    }
-  }
-
-  /**
-   * Unpack the bitpacked HyperLogLog register.
-   * @param in
-   *          - input stream
-   * @param length
-   *          - serialized length
-   * @return unpacked HLL register
-   * @throws IOException
-   */
-  private static byte[] unpackHLLRegister(InputStream in, int length, int 
bitSize)
-      throws IOException {
-    int mask = (1 << bitSize) - 1;
-    int bitsLeft = 8;
-
-    if (bitSize == 8) {
-      return fastPathRead(in, length);
-    }
-
-    byte current = (byte) (0xff & in.read());
-
-    byte[] output = new byte[length];
-    for (int i = 0; i < output.length; i++) {
-      byte result = 0;
-      int bitsLeftToRead = bitSize;
-      while (bitsLeftToRead > bitsLeft) {
-        result <<= bitsLeft;
-        result |= current & ((1 << bitsLeft) - 1);
-        bitsLeftToRead -= bitsLeft;
-        current = (byte) (0xff & in.read());
-        bitsLeft = 8;
-      }
-      if (bitsLeftToRead > 0) {
-        result <<= bitsLeftToRead;
-        bitsLeft -= bitsLeftToRead;
-        result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1);
-      }
-      output[i] = (byte) (result & mask);
-    }
-    return output;
-  }
-
-  private static byte[] fastPathRead(InputStream in, int length) throws 
IOException {
-    byte[] result = new byte[length];
-    for (int i = 0; i < length; i++) {
-      result[i] = (byte) in.read();
-    }
-    return result;
-  }
-
-  /**
-   * Get estimated cardinality without deserializing HLL
-   * @param in
-   *          - serialized HLL
-   * @return - cardinality
-   * @throws IOException
-   */
-  public static long getEstimatedCountFromSerializedHLL(InputStream in) throws 
IOException {
-    checkMagicString(in);
-    in.read();
-    return readVulong(in);
-  }
-
-  /**
-   * Check if the specified input stream is actually a HLL stream
-   * @param in
-   *          - input stream
-   * @throws IOException
-   */
-  private static void checkMagicString(InputStream in) throws IOException {
-    byte[] magic = new byte[3];
-    magic[0] = (byte) in.read();
-    magic[1] = (byte) in.read();
-    magic[2] = (byte) in.read();
-
-    if (!Arrays.equals(magic, MAGIC)) {
-      throw new IllegalArgumentException("The input stream is not a 
HyperLogLog stream.");
-    }
-  }
-
-  /**
-   * Minimum bits required to encode the specified value
-   * @param val
-   *          - input value
-   * @return
-   */
-  private static int getBitWidth(int val) {
-    int count = 0;
-    while (val != 0) {
-      count++;
-      val = (byte) (val >>> 1);
-    }
-    return count;
-  }
-
-  /**
-   * Return relative error between actual and estimated cardinality
-   * @param actualCount
-   *          - actual count
-   * @param estimatedCount
-   *          - estimated count
-   * @return relative error
-   */
-  public static float getRelativeError(long actualCount, long estimatedCount) {
-    float err = (1.0f - ((float) estimatedCount / (float) actualCount)) * 
100.0f;
-    return err;
-  }
-
-  /**
-   * Write variable length encoded longs to output stream
-   * @param output
-   *          - out stream
-   * @param value
-   *          - long
-   * @throws IOException
-   */
-  private static void writeVulong(OutputStream output, long value) throws 
IOException {
-    while (true) {
-      if ((value & ~0x7f) == 0) {
-        output.write((byte) value);
-        return;
-      } else {
-        output.write((byte) (0x80 | (value & 0x7f)));
-        value >>>= 7;
-      }
-    }
-  }
-
-  /**
-   * Read variable length encoded longs from input stream
-   * @param in
-   *          - input stream
-   * @return decoded long value
-   * @throws IOException
-   */
-  private static long readVulong(InputStream in) throws IOException {
-    long result = 0;
-    long b;
-    int offset = 0;
-    do {
-      b = in.read();
-      if (b == -1) {
-        throw new EOFException("Reading Vulong past EOF");
-      }
-      result |= (0x7f & b) << offset;
-      offset += 7;
-    } while (b >= 0x80);
-    return result;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
----------------------------------------------------------------------
diff --git 
a/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java 
b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
deleted file mode 100644
index 792b862..0000000
--- a/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.common;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.junit.Test;
-
-import com.google.common.collect.Lists;
-public class TestStatsSetupConst {
-
-  @Test
-  public void testSetBasicStatsState_missesUpgrade() {
-    Map<String, String> params=new HashMap<>();
-    params.put(StatsSetupConst.COLUMN_STATS_ACCURATE, "FALSE");
-    StatsSetupConst.setBasicStatsState(params, String.valueOf(true));
-    
assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
-  }
-
-  @Test
-  public void setColumnStatsState_camelcase() {
-    Map<String, String> params=new HashMap<>();
-    StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo"));
-    String val1 = params.get(StatsSetupConst.COLUMN_STATS_ACCURATE);
-    StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo"));
-    String val2 = params.get(StatsSetupConst.COLUMN_STATS_ACCURATE);
-    assertEquals(val1, val2);
-  }
-
-  @Test
-  public void testSetBasicStatsState_none() {
-    Map<String, String> params=new HashMap<>();
-    StatsSetupConst.setBasicStatsState(params, String.valueOf(true));
-    
assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
-  }
-
-  @Test
-  public void testSetBasicStatsState_falseIsAbsent() {
-    Map<String, String> params=new HashMap<>();
-    StatsSetupConst.setBasicStatsState(params, String.valueOf(true));
-    StatsSetupConst.setBasicStatsState(params, String.valueOf(false));
-    assertNull(params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
-  }
-
-  // earlier implementation have quoted boolean values...so the new 
implementation should preserve this
-  @Test
-  public void testStatColumnEntriesCompat() {
-    Map<String, String> params0=new HashMap<>();
-    StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
-    StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo"));
-
-    
assertEquals("{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
-  }
-
-  @Test
-  public void testColumnEntries_orderIndependence() {
-    Map<String, String> params0=new HashMap<>();
-    StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
-    StatsSetupConst.setColumnStatsState(params0, 
Lists.newArrayList("Foo","Bar"));
-    Map<String, String> params1=new HashMap<>();
-    StatsSetupConst.setColumnStatsState(params1, 
Lists.newArrayList("Bar","Foo"));
-    StatsSetupConst.setBasicStatsState(params1, String.valueOf(true));
-
-    
assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
-  }
-
-  @Test
-  public void testColumnEntries_orderIndependence2() {
-    Map<String, String> params0=new HashMap<>();
-    // in case jackson is able to deserialize...it may use a different 
implementation for the map - which may not preserve order
-    StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
-    StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year"));
-    StatsSetupConst.setColumnStatsState(params0, 
Lists.newArrayList("year","month"));
-    Map<String, String> params1=new HashMap<>();
-    StatsSetupConst.setColumnStatsState(params1, 
Lists.newArrayList("month","year"));
-    StatsSetupConst.setBasicStatsState(params1, String.valueOf(true));
-
-    System.out.println(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
-    
assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
-  }
-
-  // FIXME: current objective is to keep the previous outputs...but this is 
possibly bad..
-  @Test
-  public void testColumnEntries_areKept_whenBasicIsAbsent() {
-    Map<String, String> params=new HashMap<>();
-    StatsSetupConst.setBasicStatsState(params, String.valueOf(false));
-    StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo"));
-    
assertEquals("{\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
----------------------------------------------------------------------
diff --git 
a/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
 
b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
deleted file mode 100644
index 377f9c7..0000000
--- 
a/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv.fm;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertArrayEquals;
-
-import java.io.IOException;
-
-import javolution.util.FastBitSet;
-
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
-import org.junit.Test;
-
-public class TestFMSketchSerialization {
-
-  private FastBitSet[] deserialize(String s, int numBitVectors) {
-    FastBitSet[] b = new FastBitSet[numBitVectors];
-    for (int j = 0; j < numBitVectors; j++) {
-      b[j] = new FastBitSet(FMSketch.BIT_VECTOR_SIZE);
-      b[j].clear();
-    }
-
-    int vectorIndex = 0;
-
-    /*
-     * Parse input string to obtain the indexes that are set in the bitvector.
-     * When a toString() is called on a FastBitSet object to serialize it, the
-     * serialization adds { and } to the beginning and end of the return 
String.
-     * Skip "{", "}", ",", " " in the input string.
-     */
-    for (int i = 1; i < s.length() - 1;) {
-      char c = s.charAt(i);
-      i = i + 1;
-
-      // Move on to the next bit vector
-      if (c == '}') {
-        vectorIndex = vectorIndex + 1;
-      }
-
-      // Encountered a numeric value; Extract out the entire number
-      if (c >= '0' && c <= '9') {
-        String t = new String();
-        t = t + c;
-        c = s.charAt(i);
-        i = i + 1;
-
-        while (c != ',' && c != '}') {
-          t = t + c;
-          c = s.charAt(i);
-          i = i + 1;
-        }
-
-        int bitIndex = Integer.parseInt(t);
-        assert (bitIndex >= 0);
-        assert (vectorIndex < numBitVectors);
-        b[vectorIndex].set(bitIndex);
-        if (c == '}') {
-          vectorIndex = vectorIndex + 1;
-        }
-      }
-    }
-    return b;
-  }
-
-  @Test
-  public void testSerDe() throws IOException {
-    String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 
3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}";
-    FastBitSet[] fastBitSet = deserialize(bitVectors, 16);
-    FMSketch sketch = new FMSketch(16);
-    for (int i = 0; i < 16; i++) {
-      sketch.setBitVector(fastBitSet[i], i);
-    }
-    assertEquals(sketch.estimateNumDistinctValues(), 3);
-    byte[] buf = sketch.serialize();
-    FMSketch newSketch = (FMSketch) NumDistinctValueEstimatorFactory
-        .getNumDistinctValueEstimator(buf);
-    sketch.equals(newSketch);
-    assertEquals(newSketch.estimateNumDistinctValues(), 3);
-    assertArrayEquals(newSketch.serialize(), buf);
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLNoBias.java
----------------------------------------------------------------------
diff --git 
a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLNoBias.java 
b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLNoBias.java
deleted file mode 100644
index 30f5ca3..0000000
--- a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLNoBias.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv.hll;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-@RunWith(value = Parameterized.class)
-public class TestHLLNoBias {
-
-  // 1.5% tolerance for long range bias (when no bias enabled) and 5% when (no
-  // bias is disabled) and
-  // 0.5% for short range bias
-  private float noBiaslongRangeTolerance = 2.0f;
-  private float biasedlongRangeTolerance = 5.0f;
-  private float shortRangeTolerance = 0.5f;
-
-  private int size;
-
-  public TestHLLNoBias(int n) {
-    this.size = n;
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { { 30000 }, { 41000 }, { 50000 }, { 
60000 }, { 75000 },
-        { 80000 }, { 81920 } };
-    return Arrays.asList(data);
-  }
-
-  @Test
-  public void testHLLAdd() {
-    Random rand = new Random(size);
-    HyperLogLog hll = HyperLogLog.builder().build();
-    int size = 100;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(rand.nextLong());
-    }
-    double threshold = size > 40000 ? noBiaslongRangeTolerance : 
shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-  }
-
-  @Test
-  public void testHLLAddHalfDistinct() {
-    Random rand = new Random(size);
-    HyperLogLog hll = HyperLogLog.builder().build();
-    int unique = size / 2;
-    Set<Long> hashset = new HashSet<Long>();
-    for (int i = 0; i < size; i++) {
-      long val = rand.nextInt(unique);
-      hashset.add(val);
-      hll.addLong(val);
-    }
-    double threshold = size > 40000 ? noBiaslongRangeTolerance : 
shortRangeTolerance;
-    double delta = threshold * hashset.size() / 100;
-    assertEquals((double) hashset.size(), (double) hll.count(), delta);
-  }
-
-  @Test
-  public void testHLLNoBiasDisabled() {
-    Random rand = new Random(size);
-    HyperLogLog hll = HyperLogLog.builder().enableNoBias(false).build();
-    int size = 100;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(rand.nextLong());
-    }
-    double threshold = size > 40000 ? biasedlongRangeTolerance : 
shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-  }
-
-  @Test
-  public void testHLLNoBiasDisabledHalfDistinct() {
-    Random rand = new Random(size);
-    HyperLogLog hll = HyperLogLog.builder().enableNoBias(false).build();
-    int unique = size / 2;
-    Set<Long> hashset = new HashSet<Long>();
-    for (int i = 0; i < size; i++) {
-      long val = rand.nextInt(unique);
-      hashset.add(val);
-      hll.addLong(val);
-    }
-    double threshold = size > 40000 ? biasedlongRangeTolerance : 
shortRangeTolerance;
-    double delta = threshold * hashset.size() / 100;
-    assertEquals((double) hashset.size(), (double) hll.count(), delta);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLSerialization.java
----------------------------------------------------------------------
diff --git 
a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLSerialization.java
 
b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLSerialization.java
deleted file mode 100644
index b4b8df1..0000000
--- 
a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHLLSerialization.java
+++ /dev/null
@@ -1,267 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv.hll;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType;
-import org.junit.After;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-@RunWith(value = Parameterized.class)
-public class TestHLLSerialization {
-
-  private int size;
-  private File testFile;
-  private static final String pathPrefix = ".";
-  private static final int SEED = 100;
-  // 5% tolerance for long range bias and 2.5% for short range bias
-  private float longRangeTolerance = 5.0f;
-  private float shortRangeTolerance = 2.5f;
-
-  public TestHLLSerialization(int n) {
-    this.size = n;
-    this.testFile = new File(pathPrefix + testCaseName.getMethodName() + "_" + 
size + ".hll");
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { { 2 }, { 10 }, { 100 }, { 1000 }, { 
2000 }, { 3000 },
-        { 5000 }, { 6000 }, { 10000 }, { 100000 }, { 1000000 } };
-    return Arrays.asList(data);
-  }
-
-  @After
-  public void close() {
-    if (testFile.exists()) {
-      testFile.delete();
-    }
-  }
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Test
-  public void testHLLSparseSerialization() throws IOException {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    Random rand = new Random(SEED);
-    for (int i = 0; i < size; i++) {
-      hll.addLong(rand.nextLong());
-    }
-    FileOutputStream fos = new FileOutputStream(testFile);
-    DataOutputStream out = new DataOutputStream(fos);
-    HyperLogLogUtils.serializeHLL(out, hll);
-    FileInputStream fis = new FileInputStream(testFile);
-    DataInputStream in = new DataInputStream(fis);
-    HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
-    assertEquals(hll, deserializedHLL);
-    assertEquals(hll.toString(), deserializedHLL.toString());
-    assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended());
-    assertEquals(hll.hashCode(), deserializedHLL.hashCode());
-    assertEquals(hll.count(), deserializedHLL.count());
-  }
-
-  @Test
-  public void testHLLSparseSerializationHalfDistinct() throws IOException {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    Random rand = new Random(SEED);
-    Set<Integer> hashset = new HashSet<Integer>();
-    for (int i = 0; i < size; i++) {
-      int val = rand.nextInt(size / 2);
-      hll.addLong(val);
-      hashset.add(val);
-    }
-    FileOutputStream fos = new FileOutputStream(testFile);
-    DataOutputStream out = new DataOutputStream(fos);
-    HyperLogLogUtils.serializeHLL(out, hll);
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * hashset.size() / 100;
-    FileInputStream fis = new FileInputStream(testFile);
-    DataInputStream in = new DataInputStream(fis);
-    HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
-    assertEquals(hll, deserializedHLL);
-    assertEquals(hll.toString(), deserializedHLL.toString());
-    assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended());
-    assertEquals(hll.hashCode(), deserializedHLL.hashCode());
-    assertEquals(hll.count(), deserializedHLL.count());
-    assertEquals(hashset.size(), hll.count(), delta);
-    assertEquals(hashset.size(), deserializedHLL.count(), delta);
-  }
-
-  @Test
-  public void testHLLSparseNoBitPacking() throws IOException {
-    HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE)
-        .enableBitPacking(false).build();
-    Random rand = new Random(SEED);
-    for (int i = 0; i < size; i++) {
-      hll.addLong(rand.nextLong());
-    }
-    FileOutputStream fos = new FileOutputStream(testFile);
-    DataOutputStream out = new DataOutputStream(fos);
-    HyperLogLogUtils.serializeHLL(out, hll);
-    FileInputStream fis = new FileInputStream(testFile);
-    DataInputStream in = new DataInputStream(fis);
-    HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
-    assertEquals(hll, deserializedHLL);
-    assertEquals(hll.toString(), deserializedHLL.toString());
-    assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended());
-    assertEquals(hll.hashCode(), deserializedHLL.hashCode());
-    assertEquals(hll.count(), deserializedHLL.count());
-  }
-
-  @Test
-  public void testHLLSparseNoBitPackingHalfDistinct() throws IOException {
-    HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE)
-        .enableBitPacking(false).build();
-    Random rand = new Random(SEED);
-    Set<Integer> hashset = new HashSet<Integer>();
-    for (int i = 0; i < size; i++) {
-      int val = rand.nextInt(size / 2);
-      hll.addLong(val);
-      hashset.add(val);
-    }
-    FileOutputStream fos = new FileOutputStream(testFile);
-    DataOutputStream out = new DataOutputStream(fos);
-    HyperLogLogUtils.serializeHLL(out, hll);
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * hashset.size() / 100;
-    FileInputStream fis = new FileInputStream(testFile);
-    DataInputStream in = new DataInputStream(fis);
-    HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
-    assertEquals(hll, deserializedHLL);
-    assertEquals(hll.toString(), deserializedHLL.toString());
-    assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended());
-    assertEquals(hll.hashCode(), deserializedHLL.hashCode());
-    assertEquals(hll.count(), deserializedHLL.count());
-    assertEquals(hashset.size(), hll.count(), delta);
-    assertEquals(hashset.size(), deserializedHLL.count(), delta);
-  }
-
-  @Test
-  public void testHLLDenseSerialization() throws IOException {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
-    Random rand = new Random(SEED);
-    for (int i = 0; i < size; i++) {
-      hll.addLong(rand.nextLong());
-    }
-    FileOutputStream fos = new FileOutputStream(testFile);
-    DataOutputStream out = new DataOutputStream(fos);
-    HyperLogLogUtils.serializeHLL(out, hll);
-    FileInputStream fis = new FileInputStream(testFile);
-    DataInputStream in = new DataInputStream(fis);
-    HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
-    assertEquals(hll, deserializedHLL);
-    assertEquals(hll.toString(), deserializedHLL.toString());
-    assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended());
-    assertEquals(hll.hashCode(), deserializedHLL.hashCode());
-    assertEquals(hll.count(), deserializedHLL.count());
-  }
-
-  @Test
-  public void testHLLDenseSerializationHalfDistinct() throws IOException {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
-    Random rand = new Random(SEED);
-    Set<Integer> hashset = new HashSet<Integer>();
-    for (int i = 0; i < size; i++) {
-      int val = rand.nextInt(size / 2);
-      hll.addLong(val);
-      hashset.add(val);
-    }
-    FileOutputStream fos = new FileOutputStream(testFile);
-    DataOutputStream out = new DataOutputStream(fos);
-    HyperLogLogUtils.serializeHLL(out, hll);
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * hashset.size() / 100;
-    FileInputStream fis = new FileInputStream(testFile);
-    DataInputStream in = new DataInputStream(fis);
-    HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
-    assertEquals(hll, deserializedHLL);
-    assertEquals(hll.toString(), deserializedHLL.toString());
-    assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended());
-    assertEquals(hll.hashCode(), deserializedHLL.hashCode());
-    assertEquals(hll.count(), deserializedHLL.count());
-    assertEquals(hashset.size(), hll.count(), delta);
-    assertEquals(hashset.size(), deserializedHLL.count(), delta);
-  }
-
-  @Test
-  public void testHLLDenseNoBitPacking() throws IOException {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).enableBitPacking(false)
-        .build();
-    Random rand = new Random(SEED);
-    for (int i = 0; i < size; i++) {
-      hll.addLong(rand.nextLong());
-    }
-    FileOutputStream fos = new FileOutputStream(testFile);
-    DataOutputStream out = new DataOutputStream(fos);
-    HyperLogLogUtils.serializeHLL(out, hll);
-    FileInputStream fis = new FileInputStream(testFile);
-    DataInputStream in = new DataInputStream(fis);
-    HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
-    assertEquals(hll, deserializedHLL);
-    assertEquals(hll.toString(), deserializedHLL.toString());
-    assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended());
-    assertEquals(hll.hashCode(), deserializedHLL.hashCode());
-    assertEquals(hll.count(), deserializedHLL.count());
-  }
-
-  @Test
-  public void testHLLDenseNoBitPackingHalfDistinct() throws IOException {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).enableBitPacking(false)
-        .build();
-    Random rand = new Random(SEED);
-    Set<Integer> hashset = new HashSet<Integer>();
-    for (int i = 0; i < size; i++) {
-      int val = rand.nextInt(size / 2);
-      hll.addLong(val);
-      hashset.add(val);
-    }
-    FileOutputStream fos = new FileOutputStream(testFile);
-    DataOutputStream out = new DataOutputStream(fos);
-    HyperLogLogUtils.serializeHLL(out, hll);
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * hashset.size() / 100;
-    FileInputStream fis = new FileInputStream(testFile);
-    DataInputStream in = new DataInputStream(fis);
-    HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in);
-    assertEquals(hll, deserializedHLL);
-    assertEquals(hll.toString(), deserializedHLL.toString());
-    assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended());
-    assertEquals(hll.hashCode(), deserializedHLL.hashCode());
-    assertEquals(hll.count(), deserializedHLL.count());
-    assertEquals(hashset.size(), hll.count(), delta);
-    assertEquals(hashset.size(), deserializedHLL.count(), delta);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
----------------------------------------------------------------------
diff --git 
a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java 
b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
deleted file mode 100644
index 635073f..0000000
--- a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
+++ /dev/null
@@ -1,227 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv.hll;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType;
-import org.junit.Test;
-
-public class TestHyperLogLog {
-  // 5% tolerance for estimated count
-  private float longRangeTolerance = 5.0f;
-  private float shortRangeTolerance = 2.0f;
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testHLLDenseMerge() {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
-    HyperLogLog hll2 = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
-    HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
-    HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
-        .setEncoding(EncodingType.DENSE).build();
-    int size = 1000;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(i);
-      hll2.addLong(size + i);
-      hll3.addLong(2 * size + i);
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
-
-    // merge
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.DENSE, hll.getEncoding());
-
-    // merge should update registers and hence the count
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.DENSE, hll.getEncoding());
-
-    // new merge
-    hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.DENSE, hll.getEncoding());
-
-    // invalid merge -- register set size doesn't match
-    hll.merge(hll4);
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testHLLSparseMerge() {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll2 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
-        .setEncoding(EncodingType.SPARSE).build();
-    int size = 500;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(i);
-      hll2.addLong(size + i);
-      hll3.addLong(2 * size + i);
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
-
-    // merge
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.SPARSE, hll.getEncoding());
-
-    // merge should update registers and hence the count
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.SPARSE, hll.getEncoding());
-
-    // new merge
-    hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.SPARSE, hll.getEncoding());
-
-    // invalid merge -- register set size doesn't match
-    hll.merge(hll4);
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testHLLSparseDenseMerge() {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll2 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
-    HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
-        .setEncoding(EncodingType.DENSE).build();
-    int size = 1000;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(i);
-      hll2.addLong(size + i);
-      hll3.addLong(2 * size + i);
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
-
-    // sparse-sparse merge
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.SPARSE, hll.getEncoding());
-
-    // merge should update registers and hence the count
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.SPARSE, hll.getEncoding());
-
-    // sparse-dense merge
-    hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.DENSE, hll.getEncoding());
-
-    // invalid merge -- register set size doesn't match
-    hll.merge(hll4);
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testHLLDenseSparseMerge() {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
-    HyperLogLog hll2 = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
-    HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
-        .setEncoding(EncodingType.SPARSE).build();
-    int size = 1000;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(i);
-      hll2.addLong(size + i);
-      hll3.addLong(2 * size + i);
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
-
-    // sparse-sparse merge
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.DENSE, hll.getEncoding());
-
-    // merge should update registers and hence the count
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.DENSE, hll.getEncoding());
-
-    // sparse-dense merge
-    hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.DENSE, hll.getEncoding());
-
-    // invalid merge -- register set size doesn't match
-    hll.merge(hll4);
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testHLLSparseOverflowMerge() {
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll2 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
-    HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
-        .setEncoding(EncodingType.SPARSE).build();
-    int size = 1000;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(i);
-      hll2.addLong(size + i);
-      hll3.addLong(2 * size + i);
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
-
-    // sparse-sparse merge
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.SPARSE, hll.getEncoding());
-
-    // merge should update registers and hence the count
-    hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.SPARSE, hll.getEncoding());
-
-    // sparse-sparse overload to dense
-    hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
-    assertEquals(EncodingType.DENSE, hll.getEncoding());
-
-    // invalid merge -- register set size doesn't match
-    hll.merge(hll4);
-  }
-
-  @Test
-  public void testHLLSparseMoreRegisterBits() {
-    HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE)
-        .setNumRegisterIndexBits(16).build();
-    int size = 1000;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(i);
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogDense.java
----------------------------------------------------------------------
diff --git 
a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogDense.java
 
b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogDense.java
deleted file mode 100644
index 00fd785..0000000
--- 
a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogDense.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv.hll;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-@RunWith(value = Parameterized.class)
-public class TestHyperLogLogDense {
-
-  // 5% tolerance for long range bias and 3% for short range bias
-  private float longRangeTolerance = 5.0f;
-  private float shortRangeTolerance = 3.0f;
-
-  private int size;
-
-  public TestHyperLogLogDense(int n) {
-    this.size = n;
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { { 2 }, { 10 }, { 100 }, { 1000 }, { 
10000 }, { 100000 },
-        { 1000000 } };
-    return Arrays.asList(data);
-  }
-
-  @Test
-  public void testHLLAdd() {
-    Random rand = new Random(size);
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(HyperLogLog.EncodingType.DENSE).build();
-    int size = 100;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(rand.nextLong());
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-  }
-
-  @Test
-  public void testHLLAddHalfDistinct() {
-    Random rand = new Random(size);
-    HyperLogLog hll = 
HyperLogLog.builder().setEncoding(HyperLogLog.EncodingType.DENSE).build();
-    int unique = size / 2;
-    Set<Long> hashset = new HashSet<Long>();
-    for (int i = 0; i < size; i++) {
-      long val = rand.nextInt(unique);
-      hashset.add(val);
-      hll.addLong(val);
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * hashset.size() / 100;
-    assertEquals((double) hashset.size(), (double) hll.count(), delta);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/133d3c47/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogSparse.java
----------------------------------------------------------------------
diff --git 
a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogSparse.java
 
b/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogSparse.java
deleted file mode 100644
index cfa5886..0000000
--- 
a/common/src/test/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLogSparse.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv.hll;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-@RunWith(value = Parameterized.class)
-public class TestHyperLogLogSparse {
-
-  // 5% tolerance for long range bias and 1% for short range bias
-  private float longRangeTolerance = 5.0f;
-  private float shortRangeTolerance = 1.0f;
-
-  private int size;
-
-  public TestHyperLogLogSparse(int n) {
-    this.size = n;
-  }
-
-  @Parameters
-  public static Collection<Object[]> data() {
-    Object[][] data = new Object[][] { { 2 }, { 10 }, { 100 }, { 1000 }, { 
10000 }, { 100000 },
-        { 1000000 } };
-    return Arrays.asList(data);
-  }
-
-  @Test
-  public void testHLLAdd() {
-    Random rand = new Random(size);
-    HyperLogLog hll = HyperLogLog.builder().build();
-    int size = 100;
-    for (int i = 0; i < size; i++) {
-      hll.addLong(rand.nextLong());
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-  }
-
-  @Test
-  public void testHLLAddHalfDistinct() {
-    Random rand = new Random(size);
-    HyperLogLog hll = HyperLogLog.builder().build();
-    int unique = size / 2;
-    Set<Long> hashset = new HashSet<Long>();
-    for (int i = 0; i < size; i++) {
-      long val = rand.nextInt(unique);
-      hashset.add(val);
-      hll.addLong(val);
-    }
-    double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
-    double delta = threshold * hashset.size() / 100;
-    assertEquals((double) hashset.size(), (double) hll.count(), delta);
-  }
-}

Reply via email to