[01/23] orc git commit: ORC-1 Import of ORC code from Hive. (omalley reviewed by prasanthj)

omalley Fri, 13 May 2016 12:51:12 -0700

Repository: orc
Updated Branches:
  refs/heads/master 3c30fe85b -> 3283d2381



http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java 
b/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
new file mode 100644
index 0000000..bb0b8f2
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
@@ -0,0 +1,309 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+/**
+ * BloomFilter is a probabilistic data structure for set membership check. 
BloomFilters are
+ * highly space efficient when compared to using a HashSet. Because of the 
probabilistic nature of
+ * bloom filter false positive (element not present in bloom filter but test() 
says true) are
+ * possible but false negatives are not possible (if element is present then 
test() will never
+ * say false). The false positive probability is configurable (default: 5%) 
depending on which
+ * storage requirement may increase or decrease. Lower the false positive 
probability greater
+ * is the space requirement.
+ * Bloom filters are sensitive to number of elements that will be inserted in 
the bloom filter.
+ * During the creation of bloom filter expected number of entries must be 
specified. If the number
+ * of insertions exceed the specified initial number of entries then false 
positive probability will
+ * increase accordingly.
+ *
+ * Internally, this implementation of bloom filter uses Murmur3 fast 
non-cryptographic hash
+ * algorithm. Although Murmur2 is slightly faster than Murmur3 in Java, it 
suffers from hash
+ * collisions for specific sequence of repeating bytes. Check the following 
link for more info
+ * https://code.google.com/p/smhasher/wiki/MurmurHash2Flaw
+ */
+public class BloomFilter {
+  public static final double DEFAULT_FPP = 0.05;
+  protected BitSet bitSet;
+  protected int numBits;
+  protected int numHashFunctions;
+
+  public BloomFilter() {
+  }
+
+  public BloomFilter(long expectedEntries) {
+    this(expectedEntries, DEFAULT_FPP);
+  }
+
+  public BloomFilter(long expectedEntries, double fpp) {
+    checkArgument(expectedEntries > 0, "expectedEntries should be > 0");
+    checkArgument(fpp > 0.0 && fpp < 1.0, "False positive probability should 
be > 0.0 & < 1.0");
+    int nb = optimalNumOfBits(expectedEntries, fpp);
+    // make 'm' multiple of 64
+    this.numBits = nb + (Long.SIZE - (nb % Long.SIZE));
+    this.numHashFunctions = optimalNumOfHashFunctions(expectedEntries, 
numBits);
+    this.bitSet = new BitSet(numBits);
+  }
+
+  /**
+   * A constructor to support rebuilding the BloomFilter from a serialized 
representation.
+   * @param bits
+   * @param numBits
+   * @param numFuncs
+   */
+  public BloomFilter(List<Long> bits, int numBits, int numFuncs) {
+    super();
+    long[] copied = new long[bits.size()];
+    for (int i = 0; i < bits.size(); i++) copied[i] = bits.get(i);
+    bitSet = new BitSet(copied);
+    this.numBits = numBits;
+    numHashFunctions = numFuncs;
+  }
+
+  static int optimalNumOfHashFunctions(long n, long m) {
+    return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
+  }
+
+  static int optimalNumOfBits(long n, double p) {
+    return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
+  }
+
+  public void add(byte[] val) {
+    if (val == null) {
+      addBytes(val, -1, -1);
+    } else {
+      addBytes(val, 0, val.length);
+    }
+  }
+
+  public void addBytes(byte[] val, int offset, int length) {
+    // We use the trick mentioned in "Less Hashing, Same Performance: Building 
a Better Bloom Filter"
+    // by Kirsch et.al. From abstract 'only two hash functions are necessary 
to effectively
+    // implement a Bloom filter without any loss in the asymptotic false 
positive probability'
+
+    // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the 
technique mentioned
+    // in the above paper
+    long hash64 = val == null ? Murmur3.NULL_HASHCODE :
+        Murmur3.hash64(val, offset, length);
+    addHash(hash64);
+  }
+
+  private void addHash(long hash64) {
+    int hash1 = (int) hash64;
+    int hash2 = (int) (hash64 >>> 32);
+
+    for (int i = 1; i <= numHashFunctions; i++) {
+      int combinedHash = hash1 + (i * hash2);
+      // hashcode should be positive, flip all the bits if it's negative
+      if (combinedHash < 0) {
+        combinedHash = ~combinedHash;
+      }
+      int pos = combinedHash % numBits;
+      bitSet.set(pos);
+    }
+  }
+
+  public void addString(String val) {
+    if (val == null) {
+      add(null);
+    } else {
+      add(val.getBytes());
+    }
+  }
+
+  public void addLong(long val) {
+    addHash(getLongHash(val));
+  }
+
+  public void addDouble(double val) {
+    addLong(Double.doubleToLongBits(val));
+  }
+
+  public boolean test(byte[] val) {
+    if (val == null) {
+      return testBytes(val, -1, -1);
+    }
+    return testBytes(val, 0, val.length);
+  }
+
+  public boolean testBytes(byte[] val, int offset, int length) {
+    long hash64 = val == null ? Murmur3.NULL_HASHCODE :
+        Murmur3.hash64(val, offset, length);
+    return testHash(hash64);
+  }
+
+  private boolean testHash(long hash64) {
+    int hash1 = (int) hash64;
+    int hash2 = (int) (hash64 >>> 32);
+
+    for (int i = 1; i <= numHashFunctions; i++) {
+      int combinedHash = hash1 + (i * hash2);
+      // hashcode should be positive, flip all the bits if it's negative
+      if (combinedHash < 0) {
+        combinedHash = ~combinedHash;
+      }
+      int pos = combinedHash % numBits;
+      if (!bitSet.get(pos)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  public boolean testString(String val) {
+    if (val == null) {
+      return test(null);
+    } else {
+      return test(val.getBytes());
+    }
+  }
+
+  public boolean testLong(long val) {
+    return testHash(getLongHash(val));
+  }
+
+  // Thomas Wang's integer hash function
+  // 
http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
+  private long getLongHash(long key) {
+    key = (~key) + (key << 21); // key = (key << 21) - key - 1;
+    key = key ^ (key >> 24);
+    key = (key + (key << 3)) + (key << 8); // key * 265
+    key = key ^ (key >> 14);
+    key = (key + (key << 2)) + (key << 4); // key * 21
+    key = key ^ (key >> 28);
+    key = key + (key << 31);
+    return key;
+  }
+
+  public boolean testDouble(double val) {
+    return testLong(Double.doubleToLongBits(val));
+  }
+
+  public long sizeInBytes() {
+    return getBitSize() / 8;
+  }
+
+  public int getBitSize() {
+    return bitSet.getData().length * Long.SIZE;
+  }
+
+  public int getNumHashFunctions() {
+    return numHashFunctions;
+  }
+
+  public long[] getBitSet() {
+    return bitSet.getData();
+  }
+
+  @Override
+  public String toString() {
+    return "m: " + numBits + " k: " + numHashFunctions;
+  }
+
+  /**
+   * Merge the specified bloom filter with current bloom filter.
+   *
+   * @param that - bloom filter to merge
+   */
+  public void merge(BloomFilter that) {
+    if (this != that && this.numBits == that.numBits && this.numHashFunctions 
== that.numHashFunctions) {
+      this.bitSet.putAll(that.bitSet);
+    } else {
+      throw new IllegalArgumentException("BloomFilters are not compatible for 
merging." +
+          " this - " + this.toString() + " that - " + that.toString());
+    }
+  }
+
+  public void reset() {
+    this.bitSet.clear();
+  }
+
+  /**
+   * Bare metal bit set implementation. For performance reasons, this 
implementation does not check
+   * for index bounds nor expand the bit set size if the specified index is 
greater than the size.
+   */
+  public class BitSet {
+    private final long[] data;
+
+    public BitSet(long bits) {
+      this(new long[(int) Math.ceil((double) bits / (double) Long.SIZE)]);
+    }
+
+    /**
+     * Deserialize long array as bit set.
+     *
+     * @param data - bit array
+     */
+    public BitSet(long[] data) {
+      assert data.length > 0 : "data length is zero!";
+      this.data = data;
+    }
+
+    /**
+     * Sets the bit at specified index.
+     *
+     * @param index - position
+     */
+    public void set(int index) {
+      data[index >>> 6] |= (1L << index);
+    }
+
+    /**
+     * Returns true if the bit is set in the specified index.
+     *
+     * @param index - position
+     * @return - value at the bit position
+     */
+    public boolean get(int index) {
+      return (data[index >>> 6] & (1L << index)) != 0;
+    }
+
+    /**
+     * Number of bits
+     */
+    public long bitSize() {
+      return (long) data.length * Long.SIZE;
+    }
+
+    public long[] getData() {
+      return data;
+    }
+
+    /**
+     * Combines the two BitArrays using bitwise OR.
+     */
+    public void putAll(BitSet array) {
+      assert data.length == array.data.length :
+          "BitArrays must be of equal length (" + data.length + "!= " + 
array.data.length + ")";
+      for (int i = 0; i < data.length; i++) {
+        data[i] |= array.data[i];
+      }
+    }
+
+    /**
+     * Clear the bit set.
+     */
+    public void clear() {
+      Arrays.fill(data, 0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hive/common/util/IntervalDayTimeUtils.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hive/common/util/IntervalDayTimeUtils.java
 
b/java/storage-api/src/java/org/apache/hive/common/util/IntervalDayTimeUtils.java
new file mode 100644
index 0000000..727c1e6
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hive/common/util/IntervalDayTimeUtils.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import java.math.BigDecimal;
+import java.text.SimpleDateFormat;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+
+
+/**
+ * DateUtils. Thread-safe class
+ *
+ */
+public class IntervalDayTimeUtils {
+
+  private static final ThreadLocal<SimpleDateFormat> dateFormatLocal = new 
ThreadLocal<SimpleDateFormat>() {
+    @Override
+    protected SimpleDateFormat initialValue() {
+      return new SimpleDateFormat("yyyy-MM-dd");
+    }
+  };
+
+  public static SimpleDateFormat getDateFormat() {
+    return dateFormatLocal.get();
+  }
+
+  public static final int NANOS_PER_SEC = 1000000000;
+  public static final BigDecimal MAX_INT_BD = new 
BigDecimal(Integer.MAX_VALUE);
+  public static final BigDecimal NANOS_PER_SEC_BD = new 
BigDecimal(NANOS_PER_SEC);
+
+  public static int parseNumericValueWithRange(String fieldName,
+      String strVal, int minValue, int maxValue) throws 
IllegalArgumentException {
+    int result = 0;
+    if (strVal != null) {
+      result = Integer.parseInt(strVal);
+      if (result < minValue || result > maxValue) {
+        throw new IllegalArgumentException(String.format("%s value %d outside 
range [%d, %d]",
+            fieldName, result, minValue, maxValue));
+      }
+    }
+    return result;
+  }
+
+  public static long getIntervalDayTimeTotalNanos(HiveIntervalDayTime 
intervalDayTime) {
+    return intervalDayTime.getTotalSeconds() * NANOS_PER_SEC + 
intervalDayTime.getNanos();
+  }
+
+  public static void setIntervalDayTimeTotalNanos(HiveIntervalDayTime 
intervalDayTime,
+      long totalNanos) {
+    intervalDayTime.set(totalNanos / NANOS_PER_SEC, (int) (totalNanos % 
NANOS_PER_SEC));
+  }
+
+  public static long getIntervalDayTimeTotalSecondsFromTotalNanos(long 
totalNanos) {
+    return totalNanos / NANOS_PER_SEC;
+  }
+
+  public static int getIntervalDayTimeNanosFromTotalNanos(long totalNanos) {
+    return (int) (totalNanos % NANOS_PER_SEC);
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hive/common/util/Murmur3.java 
b/java/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
new file mode 100644
index 0000000..88c3514
--- /dev/null
+++ b/java/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
@@ -0,0 +1,335 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+/**
+ * Murmur3 is successor to Murmur2 fast non-crytographic hash algorithms.
+ *
+ * Murmur3 32 and 128 bit variants.
+ * 32-bit Java port of 
https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94
+ * 128-bit Java port of 
https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255
+ *
+ * This is a public domain code with no copyrights.
+ * From homepage of MurmurHash (https://code.google.com/p/smhasher/),
+ * "All MurmurHash versions are public domain software, and the author 
disclaims all copyright
+ * to their code."
+ */
+public class Murmur3 {
+  // from 64-bit linear congruential generator
+  public static final long NULL_HASHCODE = 2862933555777941757L;
+
+  // Constants for 32 bit variant
+  private static final int C1_32 = 0xcc9e2d51;
+  private static final int C2_32 = 0x1b873593;
+  private static final int R1_32 = 15;
+  private static final int R2_32 = 13;
+  private static final int M_32 = 5;
+  private static final int N_32 = 0xe6546b64;
+
+  // Constants for 128 bit variant
+  private static final long C1 = 0x87c37b91114253d5L;
+  private static final long C2 = 0x4cf5ad432745937fL;
+  private static final int R1 = 31;
+  private static final int R2 = 27;
+  private static final int R3 = 33;
+  private static final int M = 5;
+  private static final int N1 = 0x52dce729;
+  private static final int N2 = 0x38495ab5;
+
+  private static final int DEFAULT_SEED = 104729;
+
+  /**
+   * Murmur3 32-bit variant.
+   *
+   * @param data - input byte array
+   * @return - hashcode
+   */
+  public static int hash32(byte[] data) {
+    return hash32(data, data.length, DEFAULT_SEED);
+  }
+
+  /**
+   * Murmur3 32-bit variant.
+   *
+   * @param data   - input byte array
+   * @param length - length of array
+   * @param seed   - seed. (default 0)
+   * @return - hashcode
+   */
+  public static int hash32(byte[] data, int length, int seed) {
+    int hash = seed;
+    final int nblocks = length >> 2;
+
+    // body
+    for (int i = 0; i < nblocks; i++) {
+      int i_4 = i << 2;
+      int k = (data[i_4] & 0xff)
+          | ((data[i_4 + 1] & 0xff) << 8)
+          | ((data[i_4 + 2] & 0xff) << 16)
+          | ((data[i_4 + 3] & 0xff) << 24);
+
+      // mix functions
+      k *= C1_32;
+      k = Integer.rotateLeft(k, R1_32);
+      k *= C2_32;
+      hash ^= k;
+      hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
+    }
+
+    // tail
+    int idx = nblocks << 2;
+    int k1 = 0;
+    switch (length - idx) {
+      case 3:
+        k1 ^= data[idx + 2] << 16;
+      case 2:
+        k1 ^= data[idx + 1] << 8;
+      case 1:
+        k1 ^= data[idx];
+
+        // mix functions
+        k1 *= C1_32;
+        k1 = Integer.rotateLeft(k1, R1_32);
+        k1 *= C2_32;
+        hash ^= k1;
+    }
+
+    // finalization
+    hash ^= length;
+    hash ^= (hash >>> 16);
+    hash *= 0x85ebca6b;
+    hash ^= (hash >>> 13);
+    hash *= 0xc2b2ae35;
+    hash ^= (hash >>> 16);
+
+    return hash;
+  }
+
+  /**
+   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 
128-bit variant.
+   *
+   * @param data - input byte array
+   * @return - hashcode
+   */
+  public static long hash64(byte[] data) {
+    return hash64(data, 0, data.length, DEFAULT_SEED);
+  }
+
+  public static long hash64(byte[] data, int offset, int length) {
+    return hash64(data, offset, length, DEFAULT_SEED);
+  }
+
+  /**
+   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 
128-bit variant.
+   *
+   * @param data   - input byte array
+   * @param length - length of array
+   * @param seed   - seed. (default is 0)
+   * @return - hashcode
+   */
+  public static long hash64(byte[] data, int offset, int length, int seed) {
+    long hash = seed;
+    final int nblocks = length >> 3;
+
+    // body
+    for (int i = 0; i < nblocks; i++) {
+      final int i8 = i << 3;
+      long k = ((long) data[offset + i8] & 0xff)
+          | (((long) data[offset + i8 + 1] & 0xff) << 8)
+          | (((long) data[offset + i8 + 2] & 0xff) << 16)
+          | (((long) data[offset + i8 + 3] & 0xff) << 24)
+          | (((long) data[offset + i8 + 4] & 0xff) << 32)
+          | (((long) data[offset + i8 + 5] & 0xff) << 40)
+          | (((long) data[offset + i8 + 6] & 0xff) << 48)
+          | (((long) data[offset + i8 + 7] & 0xff) << 56);
+
+      // mix functions
+      k *= C1;
+      k = Long.rotateLeft(k, R1);
+      k *= C2;
+      hash ^= k;
+      hash = Long.rotateLeft(hash, R2) * M + N1;
+    }
+
+    // tail
+    long k1 = 0;
+    int tailStart = nblocks << 3;
+    switch (length - tailStart) {
+      case 7:
+        k1 ^= ((long) data[offset + tailStart + 6] & 0xff) << 48;
+      case 6:
+        k1 ^= ((long) data[offset + tailStart + 5] & 0xff) << 40;
+      case 5:
+        k1 ^= ((long) data[offset + tailStart + 4] & 0xff) << 32;
+      case 4:
+        k1 ^= ((long) data[offset + tailStart + 3] & 0xff) << 24;
+      case 3:
+        k1 ^= ((long) data[offset + tailStart + 2] & 0xff) << 16;
+      case 2:
+        k1 ^= ((long) data[offset + tailStart + 1] & 0xff) << 8;
+      case 1:
+        k1 ^= ((long) data[offset + tailStart] & 0xff);
+        k1 *= C1;
+        k1 = Long.rotateLeft(k1, R1);
+        k1 *= C2;
+        hash ^= k1;
+    }
+
+    // finalization
+    hash ^= length;
+    hash = fmix64(hash);
+
+    return hash;
+  }
+
+  /**
+   * Murmur3 128-bit variant.
+   *
+   * @param data - input byte array
+   * @return - hashcode (2 longs)
+   */
+  public static long[] hash128(byte[] data) {
+    return hash128(data, 0, data.length, DEFAULT_SEED);
+  }
+
+  /**
+   * Murmur3 128-bit variant.
+   *
+   * @param data   - input byte array
+   * @param offset - the first element of array
+   * @param length - length of array
+   * @param seed   - seed. (default is 0)
+   * @return - hashcode (2 longs)
+   */
+  public static long[] hash128(byte[] data, int offset, int length, int seed) {
+    long h1 = seed;
+    long h2 = seed;
+    final int nblocks = length >> 4;
+
+    // body
+    for (int i = 0; i < nblocks; i++) {
+      final int i16 = i << 4;
+      long k1 = ((long) data[offset + i16] & 0xff)
+          | (((long) data[offset + i16 + 1] & 0xff) << 8)
+          | (((long) data[offset + i16 + 2] & 0xff) << 16)
+          | (((long) data[offset + i16 + 3] & 0xff) << 24)
+          | (((long) data[offset + i16 + 4] & 0xff) << 32)
+          | (((long) data[offset + i16 + 5] & 0xff) << 40)
+          | (((long) data[offset + i16 + 6] & 0xff) << 48)
+          | (((long) data[offset + i16 + 7] & 0xff) << 56);
+
+      long k2 = ((long) data[offset + i16 + 8] & 0xff)
+          | (((long) data[offset + i16 + 9] & 0xff) << 8)
+          | (((long) data[offset + i16 + 10] & 0xff) << 16)
+          | (((long) data[offset + i16 + 11] & 0xff) << 24)
+          | (((long) data[offset + i16 + 12] & 0xff) << 32)
+          | (((long) data[offset + i16 + 13] & 0xff) << 40)
+          | (((long) data[offset + i16 + 14] & 0xff) << 48)
+          | (((long) data[offset + i16 + 15] & 0xff) << 56);
+
+      // mix functions for k1
+      k1 *= C1;
+      k1 = Long.rotateLeft(k1, R1);
+      k1 *= C2;
+      h1 ^= k1;
+      h1 = Long.rotateLeft(h1, R2);
+      h1 += h2;
+      h1 = h1 * M + N1;
+
+      // mix functions for k2
+      k2 *= C2;
+      k2 = Long.rotateLeft(k2, R3);
+      k2 *= C1;
+      h2 ^= k2;
+      h2 = Long.rotateLeft(h2, R1);
+      h2 += h1;
+      h2 = h2 * M + N2;
+    }
+
+    // tail
+    long k1 = 0;
+    long k2 = 0;
+    int tailStart = nblocks << 4;
+    switch (length - tailStart) {
+      case 15:
+        k2 ^= (long) (data[offset + tailStart + 14] & 0xff) << 48;
+      case 14:
+        k2 ^= (long) (data[offset + tailStart + 13] & 0xff) << 40;
+      case 13:
+        k2 ^= (long) (data[offset + tailStart + 12] & 0xff) << 32;
+      case 12:
+        k2 ^= (long) (data[offset + tailStart + 11] & 0xff) << 24;
+      case 11:
+        k2 ^= (long) (data[offset + tailStart + 10] & 0xff) << 16;
+      case 10:
+        k2 ^= (long) (data[offset + tailStart + 9] & 0xff) << 8;
+      case 9:
+        k2 ^= (long) (data[offset + tailStart + 8] & 0xff);
+        k2 *= C2;
+        k2 = Long.rotateLeft(k2, R3);
+        k2 *= C1;
+        h2 ^= k2;
+
+      case 8:
+        k1 ^= (long) (data[offset + tailStart + 7] & 0xff) << 56;
+      case 7:
+        k1 ^= (long) (data[offset + tailStart + 6] & 0xff) << 48;
+      case 6:
+        k1 ^= (long) (data[offset + tailStart + 5] & 0xff) << 40;
+      case 5:
+        k1 ^= (long) (data[offset + tailStart + 4] & 0xff) << 32;
+      case 4:
+        k1 ^= (long) (data[offset + tailStart + 3] & 0xff) << 24;
+      case 3:
+        k1 ^= (long) (data[offset + tailStart + 2] & 0xff) << 16;
+      case 2:
+        k1 ^= (long) (data[offset + tailStart + 1] & 0xff) << 8;
+      case 1:
+        k1 ^= (long) (data[offset + tailStart] & 0xff);
+        k1 *= C1;
+        k1 = Long.rotateLeft(k1, R1);
+        k1 *= C2;
+        h1 ^= k1;
+    }
+
+    // finalization
+    h1 ^= length;
+    h2 ^= length;
+
+    h1 += h2;
+    h2 += h1;
+
+    h1 = fmix64(h1);
+    h2 = fmix64(h2);
+
+    h1 += h2;
+    h2 += h1;
+
+    return new long[]{h1, h2};
+  }
+
+  private static long fmix64(long h) {
+    h ^= (h >>> 33);
+    h *= 0xff51afd7ed558ccdL;
+    h ^= (h >>> 33);
+    h *= 0xc4ceb9fe1a85ec53L;
+    h ^= (h >>> 33);
+    return h;
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java
 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java
new file mode 100644
index 0000000..395d8f5
--- /dev/null
+++ 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java
@@ -0,0 +1,200 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.Test;
+
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test for ListColumnVector
+ */
+public class TestListColumnVector {
+
+  @Test
+  public void testFlatten() throws Exception {
+    LongColumnVector col1 = new LongColumnVector(10);
+    ListColumnVector vector = new ListColumnVector(10, col1);
+    vector.init();
+
+    // TEST - repeating NULL & no selection
+    col1.isRepeating = true;
+    vector.isRepeating = true;
+    vector.noNulls = false;
+    vector.isNull[0] = true;
+    vector.childCount = 0;
+    for(int i=0; i < 10; ++i) {
+      col1.vector[i] = i + 3;
+      vector.offsets[i] = i;
+      vector.lengths[i] = 10 + i;
+    }
+    vector.flatten(false, null, 10);
+    // make sure the vector was flattened
+    assertFalse(vector.isRepeating);
+    assertFalse(vector.noNulls);
+    // child isn't flattened, because parent is repeating null
+    assertTrue(col1.isRepeating);
+    assertTrue(col1.noNulls);
+    for(int i=0; i < 10; ++i) {
+      assertTrue("isNull at " + i, vector.isNull[i]);
+    }
+    for(int i=0; i < 10; ++i) {
+      StringBuilder buf = new StringBuilder();
+      vector.stringifyValue(buf, i);
+      assertEquals("null", buf.toString());
+    }
+    vector.unFlatten();
+    assertTrue(col1.isRepeating);
+    assertTrue(vector.isRepeating);
+
+    // TEST - repeating NULL & selection
+    Arrays.fill(vector.isNull, 1, 10, false);
+    int[] sel = new int[]{3, 5, 7};
+    vector.flatten(true, sel, 3);
+    for(int i=1; i < 10; i++) {
+      assertEquals("failure at " + i,
+          i == 3 || i == 5 || i == 7, vector.isNull[i]);
+    }
+    vector.unFlatten();
+
+    // TEST - repeating non-NULL & no-selection
+    vector.noNulls = true;
+    vector.isRepeating = true;
+    vector.offsets[0] = 0;
+    vector.lengths[0] = 3;
+    vector.childCount = 3;
+    vector.flatten(false, null, 10);
+    // make sure the vector was flattened
+    assertFalse(vector.isRepeating);
+    assertFalse(vector.noNulls);
+    assertFalse(col1.isRepeating);
+    assertFalse(col1.noNulls);
+    for(int i=0; i < 10; ++i) {
+      assertEquals("offset at " + i, 0, vector.offsets[i]);
+      assertEquals("length at " + i, 3, vector.lengths[i]);
+    }
+    for(int i=0; i < 10; ++i) {
+      StringBuilder buf = new StringBuilder();
+      vector.stringifyValue(buf, i);
+      assertEquals("[3, 3, 3]", buf.toString());
+    }
+    vector.unFlatten();
+    assertTrue(col1.isRepeating);
+    assertTrue(col1.noNulls);
+    assertTrue(vector.isRepeating);
+    assertTrue(vector.noNulls);
+
+    // TEST - repeating non-NULL & selection
+    Arrays.fill(vector.offsets, 1, 10, -1);
+    Arrays.fill(vector.lengths, 1, 10, -1);
+    Arrays.fill(col1.vector, 1, 10, -1);
+    vector.flatten(true, sel, 3);
+    for(int i=1; i < 10; i++) {
+      if (i == 3 || i == 5 || i == 7) {
+        assertEquals("failure at " + i, 0, vector.offsets[i]);
+        assertEquals("failure at " + i, 3, vector.lengths[i]);
+      } else {
+        assertEquals("failure at " + i, -1, vector.offsets[i]);
+        assertEquals("failure at " + i, -1, vector.lengths[i]);
+      }
+    }
+    for(int i=0; i < 3; ++i) {
+      assertEquals("failure at " + i, 3, col1.vector[i]);
+    }
+    for(int i=3; i < 10; ++i) {
+      assertEquals("failure at " + i, -1, col1.vector[i]);
+    }
+    vector.unFlatten();
+
+    // TEST - reset
+    vector.reset();
+    assertFalse(col1.isRepeating);
+    assertTrue(col1.noNulls);
+    assertFalse(vector.isRepeating);
+    assertTrue(vector.noNulls);
+    assertEquals(0, vector.childCount);
+  }
+
+  @Test
+  public void testSet() throws Exception {
+    LongColumnVector input1 = new LongColumnVector(10);
+    ListColumnVector input = new ListColumnVector(10, input1);
+    input.init();
+    LongColumnVector output1 = new LongColumnVector(30);
+    ListColumnVector output = new ListColumnVector(10, output1);
+    output.init();
+    input.noNulls = false;
+    input.isNull[6] = true;
+    input.childCount = 11;
+    Arrays.fill(output1.vector, -1);
+    for(int i=0; i < 10; ++i) {
+      input1.vector[i] = 10 * i;
+      input.offsets[i] = i;
+      input.lengths[i] = 2;
+      output.offsets[i] = i + 2;
+      output.lengths[i] = 3;
+    }
+    output.childCount = 30;
+
+    // copy a null
+    output.setElement(3, 6, input);
+    assertEquals(30, output.childCount);
+    StringBuilder buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("null", buf.toString());
+
+    // copy a value
+    output.setElement(3, 5, input);
+    assertEquals(30, output.offsets[3]);
+    assertEquals(2, output.lengths[3]);
+    assertEquals(32, output.childCount);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("[50, 60]", buf.toString());
+
+    // overwrite a value
+    output.setElement(3, 4, input);
+    assertEquals(34, output.childCount);
+    assertEquals(34, output1.vector.length);
+    assertEquals(50, output1.vector[30]);
+    assertEquals(60, output1.vector[31]);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("[40, 50]", buf.toString());
+
+    input.reset();
+    assertEquals(false, input1.isRepeating);
+    assertEquals(true, input.noNulls);
+    output.reset();
+    assertEquals(0, output.childCount);
+
+    input.isRepeating = true;
+    input.offsets[0] = 0;
+    input.lengths[0] = 10;
+    output.setElement(2, 7, input);
+    assertEquals(10, output.childCount);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 2);
+    assertEquals("[0, 10, 20, 30, 40, 50, 60, 70, 80, 90]", buf.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java
 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java
new file mode 100644
index 0000000..c77c286
--- /dev/null
+++ 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.Test;
+
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test for MapColumnVector
+ */
+public class TestMapColumnVector {
+
+  @Test
+  public void testFlatten() throws Exception {
+    LongColumnVector col1 = new LongColumnVector(10);
+    DoubleColumnVector col2 = new DoubleColumnVector(10);
+    MapColumnVector vector = new MapColumnVector(10, col1, col2);
+    vector.init();
+
+    // TEST - repeating NULL & no selection
+    col1.isRepeating = true;
+    vector.isRepeating = true;
+    vector.noNulls = false;
+    vector.isNull[0] = true;
+    vector.childCount = 0;
+    for(int i=0; i < 10; ++i) {
+      col1.vector[i] = i + 3;
+      col2.vector[i] = i * 10;
+      vector.offsets[i] = i;
+      vector.lengths[i] = 10 + i;
+    }
+    vector.flatten(false, null, 10);
+    // make sure the vector was flattened
+    assertFalse(vector.isRepeating);
+    assertFalse(vector.noNulls);
+    // child isn't flattened, because parent is repeating null
+    assertTrue(col1.isRepeating);
+    assertTrue(col1.noNulls);
+    for(int i=0; i < 10; ++i) {
+      assertTrue("isNull at " + i, vector.isNull[i]);
+    }
+    for(int i=0; i < 10; ++i) {
+      StringBuilder buf = new StringBuilder();
+      vector.stringifyValue(buf, i);
+      assertEquals("null", buf.toString());
+    }
+    vector.unFlatten();
+    assertTrue(col1.isRepeating);
+    assertTrue(vector.isRepeating);
+
+    // TEST - repeating NULL & selection
+    Arrays.fill(vector.isNull, 1, 10, false);
+    int[] sel = new int[]{3, 5, 7};
+    vector.flatten(true, sel, 3);
+    for(int i=1; i < 10; i++) {
+      assertEquals("failure at " + i,
+          i == 3 || i == 5 || i == 7, vector.isNull[i]);
+    }
+    vector.unFlatten();
+
+    // TEST - repeating non-NULL & no-selection
+    vector.noNulls = true;
+    vector.isRepeating = true;
+    vector.offsets[0] = 0;
+    vector.lengths[0] = 3;
+    vector.childCount = 3;
+    vector.flatten(false, null, 10);
+    // make sure the vector was flattened
+    assertFalse(vector.isRepeating);
+    assertFalse(vector.noNulls);
+    assertFalse(col1.isRepeating);
+    assertFalse(col1.noNulls);
+    assertFalse(col2.isRepeating);
+    assertFalse(col2.noNulls);
+    for(int i=0; i < 10; ++i) {
+      assertEquals("offset at " + i, 0, vector.offsets[i]);
+      assertEquals("length at " + i, 3, vector.lengths[i]);
+    }
+    for(int i=0; i < 10; ++i) {
+      StringBuilder buf = new StringBuilder();
+      vector.stringifyValue(buf, i);
+      assertEquals("[{\"key\": 3, \"value\": 0.0}," +
+          " {\"key\": 3, \"value\": 10.0}," +
+          " {\"key\": 3, \"value\": 20.0}]", buf.toString());
+    }
+    vector.unFlatten();
+    assertTrue(col1.isRepeating);
+    assertTrue(col1.noNulls);
+    assertTrue(vector.isRepeating);
+    assertFalse(col2.isRepeating);
+    assertTrue(col2.noNulls);
+    assertTrue(vector.noNulls);
+
+    // TEST - repeating non-NULL & selection
+    Arrays.fill(vector.offsets, 1, 10, -1);
+    Arrays.fill(vector.lengths, 1, 10, -1);
+    Arrays.fill(col1.vector, 1, 10, -1);
+    vector.flatten(true, sel, 3);
+    for(int i=1; i < 10; i++) {
+      if (i == 3 || i == 5 || i == 7) {
+        assertEquals("failure at " + i, 0, vector.offsets[i]);
+        assertEquals("failure at " + i, 3, vector.lengths[i]);
+      } else {
+        assertEquals("failure at " + i, -1, vector.offsets[i]);
+        assertEquals("failure at " + i, -1, vector.lengths[i]);
+      }
+    }
+    for(int i=0; i < 3; ++i) {
+      assertEquals("failure at " + i, 3, col1.vector[i]);
+    }
+    for(int i=3; i < 10; ++i) {
+      assertEquals("failure at " + i, -1, col1.vector[i]);
+    }
+    vector.unFlatten();
+
+    // TEST - reset
+    vector.reset();
+    assertFalse(col1.isRepeating);
+    assertTrue(col1.noNulls);
+    assertFalse(col2.isRepeating);
+    assertTrue(col2.noNulls);
+    assertFalse(vector.isRepeating);
+    assertTrue(vector.noNulls);
+    assertEquals(0, vector.childCount);
+  }
+
+  @Test
+  public void testSet() throws Exception {
+    LongColumnVector input1 = new LongColumnVector(10);
+    DoubleColumnVector input2 = new DoubleColumnVector(10);
+    MapColumnVector input = new MapColumnVector(10, input1, input2);
+    input.init();
+    LongColumnVector output1 = new LongColumnVector(30);
+    DoubleColumnVector output2 = new DoubleColumnVector(30);
+    MapColumnVector output = new MapColumnVector(10, output1, output2);
+    output.init();
+    input.noNulls = false;
+    input.isNull[6] = true;
+    input.childCount = 11;
+    Arrays.fill(output1.vector, -1);
+    for(int i=0; i < 10; ++i) {
+      input1.vector[i] = 10 * i;
+      input2.vector[i] = 100 * i;
+      input.offsets[i] = i;
+      input.lengths[i] = 2;
+      output.offsets[i] = i + 2;
+      output.lengths[i] = 3;
+    }
+    output.childCount = 30;
+
+    // copy a null
+    output.setElement(3, 6, input);
+    assertEquals(30, output.childCount);
+    StringBuilder buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("null", buf.toString());
+
+    // copy a value
+    output.setElement(3, 5, input);
+    assertEquals(30, output.offsets[3]);
+    assertEquals(2, output.lengths[3]);
+    assertEquals(32, output.childCount);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("[{\"key\": 50, \"value\": 500.0}," +
+        " {\"key\": 60, \"value\": 600.0}]", buf.toString());
+
+    // overwrite a value
+    output.setElement(3, 4, input);
+    assertEquals(34, output.childCount);
+    assertEquals(34, output1.vector.length);
+    assertEquals(50, output1.vector[30]);
+    assertEquals(60, output1.vector[31]);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("[{\"key\": 40, \"value\": 400.0}," +
+        " {\"key\": 50, \"value\": 500.0}]", buf.toString());
+
+    input.reset();
+    assertEquals(false, input1.isRepeating);
+    assertEquals(true, input.noNulls);
+    output.reset();
+    assertEquals(0, output.childCount);
+
+    input.isRepeating = true;
+    input.offsets[0] = 0;
+    input.lengths[0] = 10;
+    output.setElement(2, 7, input);
+    assertEquals(10, output.childCount);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 2);
+    assertEquals("[{\"key\": 0, \"value\": 0.0}," +
+        " {\"key\": 10, \"value\": 100.0}," +
+        " {\"key\": 20, \"value\": 200.0}," +
+        " {\"key\": 30, \"value\": 300.0}," +
+        " {\"key\": 40, \"value\": 400.0}," +
+        " {\"key\": 50, \"value\": 500.0}," +
+        " {\"key\": 60, \"value\": 600.0}," +
+        " {\"key\": 70, \"value\": 700.0}," +
+        " {\"key\": 80, \"value\": 800.0}," +
+        " {\"key\": 90, \"value\": 900.0}]", buf.toString());
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java
 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java
new file mode 100644
index 0000000..41b4b65
--- /dev/null
+++ 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test for StructColumnVector
+ */
+public class TestStructColumnVector {
+
+  @Test
+  public void testFlatten() throws Exception {
+    LongColumnVector col1 = new LongColumnVector(10);
+    LongColumnVector col2 = new LongColumnVector(10);
+    StructColumnVector vector = new StructColumnVector(10, col1, col2);
+    vector.init();
+    col1.isRepeating = true;
+    for(int i=0; i < 10; ++i) {
+      col1.vector[i] = i;
+      col2.vector[i] = 2 * i;
+    }
+    vector.flatten(false, null, 10);
+    assertFalse(col1.isRepeating);
+    for(int i=0; i < 10; ++i) {
+      assertEquals("col1 at " + i, 0, col1.vector[i]);
+      assertEquals("col2 at " + i, 2 * i, col2.vector[i]);
+    }
+    vector.unFlatten();
+    assertTrue(col1.isRepeating);
+    for(int i=0; i < 10; ++i) {
+      StringBuilder buf = new StringBuilder();
+      vector.stringifyValue(buf, i);
+      assertEquals("[0, " + (2 * i) + "]", buf.toString());
+    }
+    vector.reset();
+    assertFalse(col1.isRepeating);
+  }
+
+  @Test
+  public void testSet() throws Exception {
+    LongColumnVector input1 = new LongColumnVector(10);
+    LongColumnVector input2 = new LongColumnVector(10);
+    StructColumnVector input = new StructColumnVector(10, input1, input2);
+    input.init();
+    LongColumnVector output1 = new LongColumnVector(10);
+    LongColumnVector output2 = new LongColumnVector(10);
+    StructColumnVector output = new StructColumnVector(10, output1, output2);
+    output.init();
+    input1.isRepeating = true;
+    input2.noNulls = false;
+    input2.isNull[5] = true;
+    input.noNulls = false;
+    input.isNull[6] = true;
+    for(int i=0; i < 10; ++i) {
+      input1.vector[i] = i + 1;
+      input2.vector[i] = i + 2;
+    }
+    output.setElement(3, 6, input);
+    StringBuilder buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("null", buf.toString());
+    output.setElement(3, 5, input);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("[1, null]", buf.toString());
+    output.setElement(3, 4, input);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("[1, 6]", buf.toString());
+    input.reset();
+    assertEquals(false, input1.isRepeating);
+    assertEquals(true, input.noNulls);
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
new file mode 100644
index 0000000..6e5d5c8
--- /dev/null
+++ 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.Test;
+
+import java.io.PrintWriter;
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.sql.Timestamp;
+import java.util.Date;
+import java.util.Random;
+
+import org.apache.hadoop.hive.common.type.RandomTypeUtil;
+
+import static org.junit.Assert.*;
+
+/**
+ * Test for ListColumnVector
+ */
+public class TestTimestampColumnVector {
+
+  private static int TEST_COUNT = 5000;
+
+  private static int fake = 0;
+
+  @Test
+  public void testSaveAndRetrieve() throws Exception {
+
+    Random r = new Random(1234);
+    TimestampColumnVector timestampColVector = new TimestampColumnVector();
+    Timestamp[] randTimestamps = new 
Timestamp[VectorizedRowBatch.DEFAULT_SIZE];
+
+    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r);
+      randTimestamps[i] = randTimestamp;
+      timestampColVector.set(i, randTimestamp);
+    }
+    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      Timestamp retrievedTimestamp = timestampColVector.asScratchTimestamp(i);
+      Timestamp randTimestamp = randTimestamps[i];
+      if (!retrievedTimestamp.equals(randTimestamp)) {
+        assertTrue(false);
+      }
+    }
+  }
+
+  @Test
+  public void testTimestampCompare() throws Exception {
+    Random r = new Random(1234);
+    TimestampColumnVector timestampColVector = new TimestampColumnVector();
+    Timestamp[] randTimestamps = new 
Timestamp[VectorizedRowBatch.DEFAULT_SIZE];
+    Timestamp[] candTimestamps = new 
Timestamp[VectorizedRowBatch.DEFAULT_SIZE];
+    int[] compareToLeftRights = new int[VectorizedRowBatch.DEFAULT_SIZE];
+    int[] compareToRightLefts = new int[VectorizedRowBatch.DEFAULT_SIZE];
+
+    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r);
+      randTimestamps[i] = randTimestamp;
+      timestampColVector.set(i, randTimestamp);
+      Timestamp candTimestamp = RandomTypeUtil.getRandTimestamp(r);
+      candTimestamps[i] = candTimestamp;
+      compareToLeftRights[i] = candTimestamp.compareTo(randTimestamp);
+      compareToRightLefts[i] = randTimestamp.compareTo(candTimestamp);
+    }
+
+    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
+      Timestamp retrievedTimestamp = timestampColVector.asScratchTimestamp(i);
+      Timestamp randTimestamp = randTimestamps[i];
+      if (!retrievedTimestamp.equals(randTimestamp)) {
+        assertTrue(false);
+      }
+      Timestamp candTimestamp = candTimestamps[i];
+      int compareToLeftRight = timestampColVector.compareTo(candTimestamp, i);
+      if (compareToLeftRight != compareToLeftRights[i]) {
+        assertTrue(false);
+      }
+      int compareToRightLeft = timestampColVector.compareTo(i, candTimestamp);
+      if (compareToRightLeft != compareToRightLefts[i]) {
+        assertTrue(false);
+      }
+    }
+  }
+
+  /*
+  @Test
+  public void testGenerate() throws Exception {
+    PrintWriter writer = new PrintWriter("/Users/you/timestamps.txt");
+    Random r = new Random(18485);
+    for (int i = 0; i < 25; i++) {
+      Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r);
+      writer.println(randTimestamp.toString());
+    }
+    for (int i = 0; i < 25; i++) {
+      Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r, 1965, 2025);
+      writer.println(randTimestamp.toString());
+    }
+    writer.close();
+  }
+  */
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestUnionColumnVector.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestUnionColumnVector.java
 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestUnionColumnVector.java
new file mode 100644
index 0000000..c378cd4
--- /dev/null
+++ 
b/java/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestUnionColumnVector.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test for StructColumnVector
+ */
+public class TestUnionColumnVector {
+
+  @Test
+  public void testFlatten() throws Exception {
+    LongColumnVector col1 = new LongColumnVector(10);
+    LongColumnVector col2 = new LongColumnVector(10);
+    UnionColumnVector vector = new UnionColumnVector(10, col1, col2);
+    vector.init();
+    col1.isRepeating = true;
+    for(int i=0; i < 10; ++i) {
+      vector.tags[i] = i % 2;
+      col1.vector[i] = i;
+      col2.vector[i] = 2 * i;
+    }
+    vector.flatten(false, null, 10);
+    assertFalse(col1.isRepeating);
+    for(int i=0; i < 10; ++i) {
+      assertEquals(i % 2, vector.tags[i]);
+      assertEquals("col1 at " + i, 0, col1.vector[i]);
+      assertEquals("col2 at " + i, 2 * i, col2.vector[i]);
+    }
+    vector.unFlatten();
+    assertTrue(col1.isRepeating);
+    for(int i=0; i < 10; ++i) {
+      StringBuilder buf = new StringBuilder();
+      vector.stringifyValue(buf, i);
+      assertEquals("{\"tag\": " + (i % 2) + ", \"value\": " +
+          (i % 2 == 0 ? 0 : 2 * i) + "}", buf.toString());
+    }
+    vector.reset();
+    assertFalse(col1.isRepeating);
+  }
+
+  @Test
+  public void testSet() throws Exception {
+    LongColumnVector input1 = new LongColumnVector(10);
+    LongColumnVector input2 = new LongColumnVector(10);
+    UnionColumnVector input = new UnionColumnVector(10, input1, input2);
+    input.init();
+    LongColumnVector output1 = new LongColumnVector(10);
+    LongColumnVector output2 = new LongColumnVector(10);
+    UnionColumnVector output = new UnionColumnVector(10, output1, output2);
+    output.init();
+    input1.isRepeating = true;
+    for(int i=0; i < 10; ++i) {
+      input.tags[i] = i % 2;
+      input1.vector[i] = i + 1;
+      input2.vector[i] = i + 2;
+    }
+    output.setElement(3, 4, input);
+    StringBuilder buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("{\"tag\": 0, \"value\": 1}", buf.toString());
+    input.noNulls = false;
+    input.isNull[5] = true;
+    output.setElement(3, 5, input);
+    buf = new StringBuilder();
+    output.stringifyValue(buf, 3);
+    assertEquals("null", buf.toString());
+    input.reset();
+    assertEquals(false, input1.isRepeating);
+    assertEquals(true, input.noNulls);
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java 
b/java/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
new file mode 100644
index 0000000..5facc7c
--- /dev/null
+++ b/java/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+
+import org.junit.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * Tests for Murmur3 variants.
+ */
+public class TestMurmur3 {
+
+  @Test
+  public void testHashCodesM3_32_string() {
+    String key = "test";
+    int seed = 123;
+    HashFunction hf = Hashing.murmur3_32(seed);
+    int hc1 = hf.hashBytes(key.getBytes()).asInt();
+    int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+    assertEquals(hc1, hc2);
+
+    key = "testkey";
+    hc1 = hf.hashBytes(key.getBytes()).asInt();
+    hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
+    assertEquals(hc1, hc2);
+  }
+
+  @Test
+  public void testHashCodesM3_32_ints() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_32(seed);
+    for (int i = 0; i < 1000; i++) {
+      int val = rand.nextInt();
+      byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+      int hc1 = hf.hashBytes(data).asInt();
+      int hc2 = Murmur3.hash32(data, data.length, seed);
+      assertEquals(hc1, hc2);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_32_longs() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_32(seed);
+    for (int i = 0; i < 1000; i++) {
+      long val = rand.nextLong();
+      byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+      int hc1 = hf.hashBytes(data).asInt();
+      int hc2 = Murmur3.hash32(data, data.length, seed);
+      assertEquals(hc1, hc2);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_32_double() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_32(seed);
+    for (int i = 0; i < 1000; i++) {
+      double val = rand.nextDouble();
+      byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+      int hc1 = hf.hashBytes(data).asInt();
+      int hc2 = Murmur3.hash32(data, data.length, seed);
+      assertEquals(hc1, hc2);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_128_string() {
+    String key = "test";
+    int seed = 123;
+    HashFunction hf = Hashing.murmur3_128(seed);
+    // guava stores the hashcodes in little endian order
+    ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+    buf.put(hf.hashBytes(key.getBytes()).asBytes());
+    buf.flip();
+    long gl1 = buf.getLong();
+    long gl2 = buf.getLong(8);
+    long[] hc = Murmur3.hash128(key.getBytes(), 0, key.getBytes().length, 
seed);
+    long m1 = hc[0];
+    long m2 = hc[1];
+    assertEquals(gl1, m1);
+    assertEquals(gl2, m2);
+
+    key = "testkey128_testkey128";
+    buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+    buf.put(hf.hashBytes(key.getBytes()).asBytes());
+    buf.flip();
+    gl1 = buf.getLong();
+    gl2 = buf.getLong(8);
+    byte[] keyBytes = key.getBytes();
+    hc = Murmur3.hash128(keyBytes, 0, keyBytes.length, seed);
+    m1 = hc[0];
+    m2 = hc[1];
+    assertEquals(gl1, m1);
+    assertEquals(gl2, m2);
+
+    byte[] offsetKeyBytes = new byte[keyBytes.length + 35];
+    Arrays.fill(offsetKeyBytes, (byte) -1);
+    System.arraycopy(keyBytes, 0, offsetKeyBytes, 35, keyBytes.length);
+    hc = Murmur3.hash128(offsetKeyBytes, 35, keyBytes.length, seed);
+    assertEquals(gl1, hc[0]);
+    assertEquals(gl2, hc[1]);
+  }
+
+  @Test
+  public void testHashCodeM3_64() {
+    byte[] origin = ("It was the best of times, it was the worst of times," +
+        " it was the age of wisdom, it was the age of foolishness," +
+        " it was the epoch of belief, it was the epoch of incredulity," +
+        " it was the season of Light, it was the season of Darkness," +
+        " it was the spring of hope, it was the winter of despair," +
+        " we had everything before us, we had nothing before us," +
+        " we were all going direct to Heaven," +
+        " we were all going direct the other way.").getBytes();
+    long hash = Murmur3.hash64(origin, 0, origin.length);
+    assertEquals(305830725663368540L, hash);
+
+    byte[] originOffset = new byte[origin.length + 150];
+    Arrays.fill(originOffset, (byte) 123);
+    System.arraycopy(origin, 0, originOffset, 150, origin.length);
+    hash = Murmur3.hash64(originOffset, 150, origin.length);
+    assertEquals(305830725663368540L, hash);
+  }
+
+  @Test
+  public void testHashCodesM3_128_ints() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_128(seed);
+    for (int i = 0; i < 1000; i++) {
+      int val = rand.nextInt();
+      byte[] data = ByteBuffer.allocate(4).putInt(val).array();
+      // guava stores the hashcodes in little endian order
+      ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+      buf.put(hf.hashBytes(data).asBytes());
+      buf.flip();
+      long gl1 = buf.getLong();
+      long gl2 = buf.getLong(8);
+      long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+      long m1 = hc[0];
+      long m2 = hc[1];
+      assertEquals(gl1, m1);
+      assertEquals(gl2, m2);
+
+      byte[] offsetData = new byte[data.length + 50];
+      System.arraycopy(data, 0, offsetData, 50, data.length);
+      hc = Murmur3.hash128(offsetData, 50, data.length, seed);
+      assertEquals(gl1, hc[0]);
+      assertEquals(gl2, hc[1]);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_128_longs() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_128(seed);
+    for (int i = 0; i < 1000; i++) {
+      long val = rand.nextLong();
+      byte[] data = ByteBuffer.allocate(8).putLong(val).array();
+      // guava stores the hashcodes in little endian order
+      ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+      buf.put(hf.hashBytes(data).asBytes());
+      buf.flip();
+      long gl1 = buf.getLong();
+      long gl2 = buf.getLong(8);
+      long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+      long m1 = hc[0];
+      long m2 = hc[1];
+      assertEquals(gl1, m1);
+      assertEquals(gl2, m2);
+    }
+  }
+
+  @Test
+  public void testHashCodesM3_128_double() {
+    int seed = 123;
+    Random rand = new Random(seed);
+    HashFunction hf = Hashing.murmur3_128(seed);
+    for (int i = 0; i < 1000; i++) {
+      double val = rand.nextDouble();
+      byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
+      // guava stores the hashcodes in little endian order
+      ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
+      buf.put(hf.hashBytes(data).asBytes());
+      buf.flip();
+      long gl1 = buf.getLong();
+      long gl2 = buf.getLong(8);
+      long[] hc = Murmur3.hash128(data, 0, data.length, seed);
+      long m1 = hc[0];
+      long m2 = hc[1];
+      assertEquals(gl1, m1);
+      assertEquals(gl2, m2);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 2e00566..6b7e597 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -18,7 +18,7 @@
 
 package orc.proto;
 
-option java_package = "org.apache.hadoop.hive.ql.io.orc";
+option java_package = "org.apache.orc";
 
 message IntegerStatistics  {
   optional sint64 minimum = 1;
@@ -215,6 +215,9 @@ message PostScript {
   // Version of the writer:
   //   0 (or missing) = original
   //   1 = HIVE-8732 fixed
+  //   2 = HIVE-4243 fixed
+  //   3 = HIVE-12055 fixed
+  //   4 = HIVE-13083 fixed
   optional uint32 writerVersion = 6;
   // Leave this last in the record
   optional string magic = 8000;

[01/23] orc git commit: ORC-1 Import of ORC code from Hive. (omalley reviewed by prasanthj)

Reply via email to