http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
new file mode 100644
index 0000000..90817a5
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
@@ -0,0 +1,354 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+/**
+ * String expression evaluation helper functions.
+ */
+public class StringExpr {
+
+  /* Compare two strings from two byte arrays each
+   * with their own start position and length.
+   * Use lexicographic unsigned byte value order.
+   * This is what's used for UTF-8 sort order.
+   * Return negative value if arg1 < arg2, 0 if arg1 = arg2,
+   * positive if arg1 > arg2.
+   */
+  public static int compare(byte[] arg1, int start1, int len1, byte[] arg2, 
int start2, int len2) {
+    for (int i = 0; i < len1 && i < len2; i++) {
+      // Note the "& 0xff" is just a way to convert unsigned bytes to signed 
integer.
+      int b1 = arg1[i + start1] & 0xff;
+      int b2 = arg2[i + start2] & 0xff;
+      if (b1 != b2) {
+        return b1 - b2;
+      }
+    }
+    return len1 - len2;
+  }
+
+  /* Determine if two strings are equal from two byte arrays each
+   * with their own start position and length.
+   * Use lexicographic unsigned byte value order.
+   * This is what's used for UTF-8 sort order.
+   */
+  public static boolean equal(byte[] arg1, final int start1, final int len1,
+      byte[] arg2, final int start2, final int len2) {
+    if (len1 != len2) {
+      return false;
+    }
+    if (len1 == 0) {
+      return true;
+    }
+
+    // do bounds check for OOB exception
+    if (arg1[start1] != arg2[start2]
+        || arg1[start1 + len1 - 1] != arg2[start2 + len2 - 1]) {
+      return false;
+    }
+
+    if (len1 == len2) {
+      // prove invariant to the compiler: len1 = len2
+      // all array access between (start1, start1+len1) 
+      // and (start2, start2+len2) are valid
+      // no more OOB exceptions are possible
+      final int step = 8;
+      final int remainder = len1 % step;
+      final int wlen = len1 - remainder;
+      // suffix first
+      for (int i = wlen; i < len1; i++) {
+        if (arg1[start1 + i] != arg2[start2 + i]) {
+          return false;
+        }
+      }
+      // SIMD loop
+      for (int i = 0; i < wlen; i += step) {
+        final int s1 = start1 + i;
+        final int s2 = start2 + i;
+        boolean neq = false;
+        for (int j = 0; j < step; j++) {
+          neq = (arg1[s1 + j] != arg2[s2 + j]) || neq;
+        }
+        if (neq) {
+          return false;
+        }
+      }
+    }
+
+    return true;
+  }
+
+  public static int characterCount(byte[] bytes) {
+    int end = bytes.length;
+
+    // count characters
+    int j = 0;
+    int charCount = 0;
+    while(j < end) {
+      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
+      if ((bytes[j] & 0xc0) != 0x80) {
+        ++charCount;
+      }
+      j++;
+    }
+    return charCount;
+  }
+
+  public static int characterCount(byte[] bytes, int start, int length) {
+    int end = start + length;
+
+    // count characters
+    int j = start;
+    int charCount = 0;
+    while(j < end) {
+      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
+      if ((bytes[j] & 0xc0) != 0x80) {
+        ++charCount;
+      }
+      j++;
+    }
+    return charCount;
+  }
+
+  // A setVal with the same function signature as rightTrim, leftTrim, 
truncate, etc, below.
+  // Useful for class generation via templates.
+  public static void assign(BytesColumnVector outV, int i, byte[] bytes, int 
start, int length) {
+    // set output vector
+    outV.setVal(i, bytes, start, length);
+  }
+
+  /*
+   * Right trim a slice of a byte array and return the new byte length.
+   */
+  public static int rightTrim(byte[] bytes, int start, int length) {
+    // skip trailing blank characters
+    int j = start + length - 1;
+    while(j >= start && bytes[j] == 0x20) {
+      j--;
+    }
+
+    return (j - start) + 1;
+  }
+
+  /*
+   * Right trim a slice of a byte array and place the result into element i of 
a vector.
+   */
+  public static void rightTrim(BytesColumnVector outV, int i, byte[] bytes, 
int start, int length) {
+    // skip trailing blank characters
+    int j = start + length - 1;
+    while(j >= start && bytes[j] == 0x20) {
+      j--;
+    }
+
+    // set output vector
+    outV.setVal(i, bytes, start, (j - start) + 1);
+  }
+
+  /*
+   * Truncate a slice of a byte array to a maximum number of characters and
+   * return the new byte length.
+   */
+  public static int truncate(byte[] bytes, int start, int length, int 
maxLength) {
+    int end = start + length;
+
+    // count characters forward
+    int j = start;
+    int charCount = 0;
+    while(j < end) {
+      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
+      if ((bytes[j] & 0xc0) != 0x80) {
+        if (charCount == maxLength) {
+          break;
+        }
+        ++charCount;
+      }
+      j++;
+    }
+    return (j - start);
+  }
+
+  /*
+   * Truncate a slice of a byte array to a maximum number of characters and
+   * place the result into element i of a vector.
+   */
+  public static void truncate(BytesColumnVector outV, int i, byte[] bytes, int 
start, int length, int maxLength) {
+    int end = start + length;
+
+    // count characters forward
+    int j = start;
+    int charCount = 0;
+    while(j < end) {
+      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
+      if ((bytes[j] & 0xc0) != 0x80) {
+        if (charCount == maxLength) {
+          break;
+        }
+        ++charCount;
+      }
+      j++;
+    }
+
+    // set output vector
+    outV.setVal(i, bytes, start, (j - start));
+  }
+
+  /*
+   * Truncate a byte array to a maximum number of characters and
+   * return a byte array with only truncated bytes.
+   */
+  public static byte[] truncateScalar(byte[] bytes, int maxLength) {
+    int end = bytes.length;
+
+    // count characters forward
+    int j = 0;
+    int charCount = 0;
+    while(j < end) {
+      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
+      if ((bytes[j] & 0xc0) != 0x80) {
+        if (charCount == maxLength) {
+          break;
+        }
+        ++charCount;
+      }
+      j++;
+    }
+    if (j == end) {
+      return bytes;
+    } else {
+      return Arrays.copyOf(bytes, j);
+    }
+  }
+
+  /*
+   * Right trim and truncate a slice of a byte array to a maximum number of 
characters and
+   * return the new byte length.
+   */
+  public static int rightTrimAndTruncate(byte[] bytes, int start, int length, 
int maxLength) {
+    int end = start + length;
+
+    // count characters forward and watch for final run of pads
+    int j = start;
+    int charCount = 0;
+    int padRunStart = -1;
+    while(j < end) {
+      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
+      if ((bytes[j] & 0xc0) != 0x80) {
+        if (charCount == maxLength) {
+          break;
+        }
+        if (bytes[j] == 0x20) {
+          if (padRunStart == -1) {
+            padRunStart = j;
+          }
+        } else {
+          padRunStart = -1;
+        }
+        ++charCount;
+      } else {
+        padRunStart = -1;
+      }
+      j++;
+    }
+    if (padRunStart != -1) {
+      return (padRunStart - start);
+    } else {
+      return (j - start);
+    }
+  }
+
+  /*
+   * Right trim and truncate a slice of a byte array to a maximum number of 
characters and
+   * place the result into element i of a vector.
+   */
+  public static void rightTrimAndTruncate(BytesColumnVector outV, int i, 
byte[] bytes, int start, int length, int maxLength) {
+    int end = start + length;
+
+    // count characters forward and watch for final run of pads
+    int j = start;
+    int charCount = 0;
+    int padRunStart = -1;
+    while(j < end) {
+      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
+      if ((bytes[j] & 0xc0) != 0x80) {
+        if (charCount == maxLength) {
+          break;
+        }
+        if (bytes[j] == 0x20) {
+          if (padRunStart == -1) {
+            padRunStart = j;
+          }
+        } else {
+          padRunStart = -1;
+        }
+        ++charCount;
+      } else {
+        padRunStart = -1;
+      }
+      j++;
+    }
+    // set output vector
+    if (padRunStart != -1) {
+      outV.setVal(i, bytes, start, (padRunStart - start));
+    } else {
+      outV.setVal(i, bytes, start, (j - start) );
+    }
+  }
+
+  /*
+   * Right trim and truncate a byte array to a maximum number of characters and
+   * return a byte array with only the trimmed and truncated bytes.
+   */
+  public static byte[] rightTrimAndTruncateScalar(byte[] bytes, int maxLength) 
{
+    int end = bytes.length;
+
+    // count characters forward and watch for final run of pads
+    int j = 0;
+    int charCount = 0;
+    int padRunStart = -1;
+    while(j < end) {
+      // UTF-8 continuation bytes have 2 high bits equal to 0x80.
+      if ((bytes[j] & 0xc0) != 0x80) {
+        if (charCount == maxLength) {
+          break;
+        }
+        if (bytes[j] == 0x20) {
+          if (padRunStart == -1) {
+            padRunStart = j;
+          }
+        } else {
+          padRunStart = -1;
+        }
+        ++charCount;
+      } else {
+        padRunStart = -1;
+      }
+      j++;
+    }
+    if (padRunStart != -1) {
+      return Arrays.copyOf(bytes, padRunStart);
+    } else if (j == end) {
+      return bytes;
+    } else {
+      return Arrays.copyOf(bytes, j);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
new file mode 100644
index 0000000..443083d
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * The inner representation of the SearchArgument. Most users should not
+ * need this interface, it is only for file formats that need to translate
+ * the SearchArgument into an internal form.
+ */
+public class ExpressionTree {
+  public enum Operator {OR, AND, NOT, LEAF, CONSTANT}
+  private final Operator operator;
+  private final List<ExpressionTree> children;
+  private int leaf;
+  private final SearchArgument.TruthValue constant;
+
+  ExpressionTree() {
+    operator = null;
+    children = null;
+    leaf = 0;
+    constant = null;
+  }
+
+  ExpressionTree(Operator op, ExpressionTree... kids) {
+    operator = op;
+    children = new ArrayList<ExpressionTree>();
+    leaf = -1;
+    this.constant = null;
+    Collections.addAll(children, kids);
+  }
+
+  ExpressionTree(int leaf) {
+    operator = Operator.LEAF;
+    children = null;
+    this.leaf = leaf;
+    this.constant = null;
+  }
+
+  ExpressionTree(SearchArgument.TruthValue constant) {
+    operator = Operator.CONSTANT;
+    children = null;
+    this.leaf = -1;
+    this.constant = constant;
+  }
+
+  ExpressionTree(ExpressionTree other) {
+    this.operator = other.operator;
+    if (other.children == null) {
+      this.children = null;
+    } else {
+      this.children = new ArrayList<ExpressionTree>();
+      for(ExpressionTree child: other.children) {
+        children.add(new ExpressionTree(child));
+      }
+    }
+    this.leaf = other.leaf;
+    this.constant = other.constant;
+  }
+
+  public SearchArgument.TruthValue evaluate(SearchArgument.TruthValue[] leaves
+                                            ) {
+    SearchArgument.TruthValue result = null;
+    switch (operator) {
+      case OR:
+        for(ExpressionTree child: children) {
+          result = child.evaluate(leaves).or(result);
+        }
+        return result;
+      case AND:
+        for(ExpressionTree child: children) {
+          result = child.evaluate(leaves).and(result);
+        }
+        return result;
+      case NOT:
+        return children.get(0).evaluate(leaves).not();
+      case LEAF:
+        return leaves[leaf];
+      case CONSTANT:
+        return constant;
+      default:
+        throw new IllegalStateException("Unknown operator: " + operator);
+    }
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder buffer = new StringBuilder();
+    switch (operator) {
+      case OR:
+        buffer.append("(or");
+        for(ExpressionTree child: children) {
+          buffer.append(' ');
+          buffer.append(child.toString());
+        }
+        buffer.append(')');
+        break;
+      case AND:
+        buffer.append("(and");
+        for(ExpressionTree child: children) {
+          buffer.append(' ');
+          buffer.append(child.toString());
+        }
+        buffer.append(')');
+        break;
+      case NOT:
+        buffer.append("(not ");
+        buffer.append(children.get(0));
+        buffer.append(')');
+        break;
+      case LEAF:
+        buffer.append("leaf-");
+        buffer.append(leaf);
+        break;
+      case CONSTANT:
+        buffer.append(constant);
+        break;
+    }
+    return buffer.toString();
+  }
+
+  public Operator getOperator() {
+    return operator;
+  }
+
+  public List<ExpressionTree> getChildren() {
+    return children;
+  }
+
+  public SearchArgument.TruthValue getConstant() {
+    return constant;
+  }
+
+  public int getLeaf() {
+    return leaf;
+  }
+
+  public void setLeaf(int leaf) {
+    this.leaf = leaf;
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
new file mode 100644
index 0000000..469a3da
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.List;
+
+/**
+ * The primitive predicates that form a SearchArgument.
+ */
+public interface PredicateLeaf {
+
+  /**
+   * The possible operators for predicates. To get the opposites, construct
+   * an expression with a not operator.
+   */
+  public static enum Operator {
+    EQUALS,
+    NULL_SAFE_EQUALS,
+    LESS_THAN,
+    LESS_THAN_EQUALS,
+    IN,
+    BETWEEN,
+    IS_NULL
+  }
+
+  /**
+   * The possible types for sargs.
+   */
+  public static enum Type {
+    LONG(Long.class),      // all of the integer types
+    FLOAT(Double.class),   // float and double
+    STRING(String.class),  // string, char, varchar
+    DATE(Date.class),
+    DECIMAL(HiveDecimalWritable.class),
+    TIMESTAMP(Timestamp.class),
+    BOOLEAN(Boolean.class);
+
+    private final Class cls;
+    Type(Class cls) {
+      this.cls = cls;
+    }
+
+    /**
+     * For all SARG leaves, the values must be the matching class.
+     * @return the value class
+     */
+    public Class getValueClass() {
+      return cls;
+    }
+  }
+
+  /**
+   * Get the operator for the leaf.
+   */
+  public Operator getOperator();
+
+  /**
+   * Get the type of the column and literal by the file format.
+   */
+  public Type getType();
+
+  /**
+   * Get the simple column name.
+   * @return the column name
+   */
+  public String getColumnName();
+
+  /**
+   * Get the literal half of the predicate leaf. Adapt the original type for 
what orc needs
+   *
+   * @return an Integer, Long, Double, or String
+   */
+  public Object getLiteral();
+
+  /**
+   * For operators with multiple literals (IN and BETWEEN), get the literals.
+   *
+   * @return the list of literals (Integer, Longs, Doubles, or Strings)
+   *
+   */
+  public List<Object> getLiteralList();
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
new file mode 100644
index 0000000..d70b3b0
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import java.util.List;
+
+/**
+ * Primary interface for <a href="http://en.wikipedia.org/wiki/Sargable";>
+ *   SearchArgument</a>, which are the subset of predicates
+ * that can be pushed down to the RecordReader. Each SearchArgument consists
+ * of a series of SearchClauses that must each be true for the row to be
+ * accepted by the filter.
+ *
+ * This requires that the filter be normalized into conjunctive normal form
+ * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form";>CNF</a>).
+ */
+public interface SearchArgument {
+
+  /**
+   * The potential result sets of logical operations.
+   */
+  public static enum TruthValue {
+    YES, NO, NULL, YES_NULL, NO_NULL, YES_NO, YES_NO_NULL;
+
+    /**
+     * Compute logical or between the two values.
+     * @param right the other argument or null
+     * @return the result
+     */
+    public TruthValue or(TruthValue right) {
+      if (right == null || right == this) {
+        return this;
+      }
+      if (right == YES || this == YES) {
+        return YES;
+      }
+      if (right == YES_NULL || this == YES_NULL) {
+        return YES_NULL;
+      }
+      if (right == NO) {
+        return this;
+      }
+      if (this == NO) {
+        return right;
+      }
+      if (this == NULL) {
+        if (right == NO_NULL) {
+          return NULL;
+        } else {
+          return YES_NULL;
+        }
+      }
+      if (right == NULL) {
+        if (this == NO_NULL) {
+          return NULL;
+        } else {
+          return YES_NULL;
+        }
+      }
+      return YES_NO_NULL;
+    }
+
+    /**
+     * Compute logical AND between the two values.
+     * @param right the other argument or null
+     * @return the result
+     */
+    public TruthValue and(TruthValue right) {
+      if (right == null || right == this) {
+        return this;
+      }
+      if (right == NO || this == NO) {
+        return NO;
+      }
+      if (right == NO_NULL || this == NO_NULL) {
+        return NO_NULL;
+      }
+      if (right == YES) {
+        return this;
+      }
+      if (this == YES) {
+        return right;
+      }
+      if (this == NULL) {
+        if (right == YES_NULL) {
+          return NULL;
+        } else {
+          return NO_NULL;
+        }
+      }
+      if (right == NULL) {
+        if (this == YES_NULL) {
+          return NULL;
+        } else {
+          return NO_NULL;
+        }
+      }
+      return YES_NO_NULL;
+    }
+
+    public TruthValue not() {
+      switch (this) {
+        case NO:
+          return YES;
+        case YES:
+          return NO;
+        case NULL:
+        case YES_NO:
+        case YES_NO_NULL:
+          return this;
+        case NO_NULL:
+          return YES_NULL;
+        case YES_NULL:
+          return NO_NULL;
+        default:
+          throw new IllegalArgumentException("Unknown value: " + this);
+      }
+    }
+
+    /**
+     * Does the RecordReader need to include this set of records?
+     * @return true unless none of the rows qualify
+     */
+    public boolean isNeeded() {
+      switch (this) {
+        case NO:
+        case NULL:
+        case NO_NULL:
+          return false;
+        default:
+          return true;
+      }
+    }
+  }
+
+  /**
+   * Get the leaf predicates that are required to evaluate the predicate. The
+   * list will have the duplicates removed.
+   * @return the list of leaf predicates
+   */
+  public List<PredicateLeaf> getLeaves();
+
+  /**
+   * Get the expression tree. This should only needed for file formats that
+   * need to translate the expression to an internal form.
+   */
+  public ExpressionTree getExpression();
+
+  /**
+   * Evaluate the entire predicate based on the values for the leaf predicates.
+   * @param leaves the value of each leaf predicate
+   * @return the value of hte entire predicate
+   */
+  public TruthValue evaluate(TruthValue[] leaves);
+
+  /**
+   * A builder object for contexts outside of Hive where it isn't easy to
+   * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot
+   * before adding any leaves.
+   */
+  public interface Builder {
+
+    /**
+     * Start building an or operation and push it on the stack.
+     * @return this
+     */
+    public Builder startOr();
+
+    /**
+     * Start building an and operation and push it on the stack.
+     * @return this
+     */
+    public Builder startAnd();
+
+    /**
+     * Start building a not operation and push it on the stack.
+     * @return this
+     */
+    public Builder startNot();
+
+    /**
+     * Finish the current operation and pop it off of the stack. Each start
+     * call must have a matching end.
+     * @return this
+     */
+    public Builder end();
+
+    /**
+     * Add a less than leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder lessThan(String column, PredicateLeaf.Type type,
+                            Object literal);
+
+    /**
+     * Add a less than equals leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder lessThanEquals(String column, PredicateLeaf.Type type,
+                                  Object literal);
+
+    /**
+     * Add an equals leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder equals(String column, PredicateLeaf.Type type,
+                          Object literal);
+
+    /**
+     * Add a null safe equals leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
+                                  Object literal);
+
+    /**
+     * Add an in leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    public Builder in(String column, PredicateLeaf.Type type,
+                      Object... literal);
+
+    /**
+     * Add an is null leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @return this
+     */
+    public Builder isNull(String column, PredicateLeaf.Type type);
+
+    /**
+     * Add a between leaf to the current item on the stack.
+     * @param column the name of the column
+     * @param type the type of the expression
+     * @param lower the literal
+     * @param upper the literal
+     * @return this
+     */
+    public Builder between(String column, PredicateLeaf.Type type,
+                           Object lower, Object upper);
+
+    /**
+     * Add a truth value to the expression.
+     * @param truth
+     * @return this
+     */
+    public Builder literal(TruthValue truth);
+
+    /**
+     * Build and return the SearchArgument that has been defined. All of the
+     * starts must have been ended before this call.
+     * @return the new SearchArgument
+     */
+    public SearchArgument build();
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
new file mode 100644
index 0000000..8fda95c
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+/**
+ * A factory for creating SearchArguments, as well as modifying those created 
by this factory.
+ */
+public class SearchArgumentFactory {
+  public static SearchArgument.Builder newBuilder() {
+    return new SearchArgumentImpl.BuilderImpl();
+  }
+  public static void setPredicateLeafColumn(PredicateLeaf leaf, String 
newName) {
+    SearchArgumentImpl.PredicateLeafImpl.setColumnName(leaf, newName);
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
new file mode 100644
index 0000000..10d8c51
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@@ -0,0 +1,703 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import java.sql.Timestamp;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+
+/**
+ * The implementation of SearchArguments. Visible for testing only.
+ */
+public final class SearchArgumentImpl implements SearchArgument {
+
+  public static final class PredicateLeafImpl implements PredicateLeaf {
+    private final Operator operator;
+    private final Type type;
+    private String columnName;
+    private final Object literal;
+    private final List<Object> literalList;
+
+    // Used by kryo
+    @SuppressWarnings("unused")
+    PredicateLeafImpl() {
+      operator = null;
+      type = null;
+      columnName = null;
+      literal = null;
+      literalList = null;
+    }
+
+    public PredicateLeafImpl(Operator operator,
+                             Type type,
+                             String columnName,
+                             Object literal,
+                             List<Object> literalList) {
+      this.operator = operator;
+      this.type = type;
+      this.columnName = columnName;
+      this.literal = literal;
+      if (literal != null) {
+        if (literal.getClass() != type.getValueClass()) {
+          throw new IllegalArgumentException("Wrong value class " +
+              literal.getClass().getName() + " for " + type + "." + operator +
+              " leaf");
+        }
+      }
+      this.literalList = literalList;
+      if (literalList != null) {
+        Class valueCls = type.getValueClass();
+        for(Object lit: literalList) {
+          if (lit != null && lit.getClass() != valueCls) {
+            throw new IllegalArgumentException("Wrong value class item " +
+                lit.getClass().getName() + " for " + type + "." + operator +
+                " leaf");
+          }
+        }
+      }
+    }
+
+    @Override
+    public Operator getOperator() {
+      return operator;
+    }
+
+    @Override
+    public Type getType(){
+      return type;
+    }
+
+    @Override
+    public String getColumnName() {
+      return columnName;
+    }
+
+    @Override
+    public Object getLiteral() {
+      // To get around a kryo 2.22 bug while deserialize a Timestamp into Date
+      // (https://github.com/EsotericSoftware/kryo/issues/88)
+      // When we see a Date, convert back into Timestamp
+      if (literal instanceof java.util.Date) {
+        return new Timestamp(((java.util.Date)literal).getTime());
+      }
+      return literal;
+    }
+
+    @Override
+    public List<Object> getLiteralList() {
+      return literalList;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buffer = new StringBuilder();
+      buffer.append('(');
+      buffer.append(operator);
+      buffer.append(' ');
+      buffer.append(columnName);
+      if (literal != null) {
+        buffer.append(' ');
+        buffer.append(literal);
+      } else if (literalList != null) {
+        for(Object lit: literalList) {
+          buffer.append(' ');
+          buffer.append(lit == null ? "null" : lit.toString());
+        }
+      }
+      buffer.append(')');
+      return buffer.toString();
+    }
+
+    private static boolean isEqual(Object left, Object right) {
+
+      return left == right ||
+          (left != null && right != null && left.equals(right));
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == null || other.getClass() != getClass()) {
+        return false;
+      } else if (other == this) {
+        return true;
+      } else {
+        PredicateLeafImpl o = (PredicateLeafImpl) other;
+        return operator == o.operator &&
+            type == o.type &&
+            columnName.equals(o.columnName) &&
+            isEqual(literal, o.literal) &&
+            isEqual(literalList, o.literalList);
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return operator.hashCode() +
+             type.hashCode() * 17 +
+             columnName.hashCode() * 3 * 17+
+             (literal == null ? 0 : literal.hashCode()) * 101 * 3 * 17 +
+             (literalList == null ? 0 : literalList.hashCode()) *
+                 103 * 101 * 3 * 17;
+    }
+
+    public static void setColumnName(PredicateLeaf leaf, String newName) {
+      assert leaf instanceof PredicateLeafImpl;
+      ((PredicateLeafImpl)leaf).columnName = newName;
+    }
+  }
+
+  private final List<PredicateLeaf> leaves;
+  private final ExpressionTree expression;
+
+  SearchArgumentImpl(ExpressionTree expression, List<PredicateLeaf> leaves) {
+    this.expression = expression;
+    this.leaves = leaves;
+  }
+
+  // Used by kyro
+  @SuppressWarnings("unused")
+  SearchArgumentImpl() {
+        leaves = null;
+        expression = null;
+  }
+
+  @Override
+  public List<PredicateLeaf> getLeaves() {
+    return leaves;
+  }
+
+  @Override
+  public TruthValue evaluate(TruthValue[] leaves) {
+    return expression == null ? TruthValue.YES : expression.evaluate(leaves);
+  }
+
+  @Override
+  public ExpressionTree getExpression() {
+    return expression;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder buffer = new StringBuilder();
+    for(int i=0; i < leaves.size(); ++i) {
+      buffer.append("leaf-");
+      buffer.append(i);
+      buffer.append(" = ");
+      buffer.append(leaves.get(i).toString());
+      buffer.append(", ");
+    }
+    buffer.append("expr = ");
+    buffer.append(expression);
+    return buffer.toString();
+  }
+
+  static class BuilderImpl implements Builder {
+
+    // max threshold for CNF conversion. having >8 elements in andList will be
+    // converted to maybe
+    private static final int CNF_COMBINATIONS_THRESHOLD = 256;
+
+    private final Deque<ExpressionTree> currentTree =
+        new ArrayDeque<ExpressionTree>();
+    private final Map<PredicateLeaf, Integer> leaves =
+        new HashMap<PredicateLeaf, Integer>();
+    private final ExpressionTree root =
+        new ExpressionTree(ExpressionTree.Operator.AND);
+    {
+      currentTree.add(root);
+    }
+
+    @Override
+    public Builder startOr() {
+      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR);
+      currentTree.getFirst().getChildren().add(node);
+      currentTree.addFirst(node);
+      return this;
+    }
+
+    @Override
+    public Builder startAnd() {
+      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND);
+      currentTree.getFirst().getChildren().add(node);
+      currentTree.addFirst(node);
+      return this;
+    }
+
+    @Override
+    public Builder startNot() {
+      ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT);
+      currentTree.getFirst().getChildren().add(node);
+      currentTree.addFirst(node);
+      return this;
+    }
+
+    @Override
+    public Builder end() {
+      ExpressionTree current = currentTree.removeFirst();
+      if (current.getChildren().size() == 0) {
+        throw new IllegalArgumentException("Can't create expression " + root +
+            " with no children.");
+      }
+      if (current.getOperator() == ExpressionTree.Operator.NOT &&
+          current.getChildren().size() != 1) {
+        throw new IllegalArgumentException("Can't create not expression " +
+            current + " with more than 1 child.");
+      }
+      return this;
+    }
+
+    private int addLeaf(PredicateLeaf leaf) {
+      Integer result = leaves.get(leaf);
+      if (result == null) {
+        int id = leaves.size();
+        leaves.put(leaf, id);
+        return id;
+      } else {
+        return result;
+      }
+    }
+
+    @Override
+    public Builder lessThan(String column, PredicateLeaf.Type type,
+                            Object literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN,
+                type, column, literal, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
+      }
+      return this;
+    }
+
+    @Override
+    public Builder lessThanEquals(String column, PredicateLeaf.Type type,
+                                  Object literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS,
+                type, column, literal, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
+      }
+      return this;
+    }
+
+    @Override
+    public Builder equals(String column, PredicateLeaf.Type type,
+                          Object literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS,
+                type, column, literal, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
+      }
+      return this;
+    }
+
+    @Override
+    public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
+                                  Object literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
+                type, column, literal, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
+      }
+      return this;
+    }
+
+    @Override
+    public Builder in(String column, PredicateLeaf.Type type,
+                      Object... literal) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column  == null || literal == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        if (literal.length == 0) {
+          throw new IllegalArgumentException("Can't create in expression with "
+              + "no arguments");
+        }
+        List<Object> argList = new ArrayList<Object>();
+        argList.addAll(Arrays.asList(literal));
+
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.IN,
+                type, column, null, argList);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
+      }
+      return this;
+    }
+
+    @Override
+    public Builder isNull(String column, PredicateLeaf.Type type) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL,
+                type, column, null, null);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
+      }
+      return this;
+    }
+
+    @Override
+    public Builder between(String column, PredicateLeaf.Type type, Object 
lower,
+                           Object upper) {
+      ExpressionTree parent = currentTree.getFirst();
+      if (column == null || lower == null || upper == null) {
+        parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
+      } else {
+        List<Object> argList = new ArrayList<Object>();
+        argList.add(lower);
+        argList.add(upper);
+        PredicateLeaf leaf =
+            new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN,
+                type, column, null, argList);
+        parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
+      }
+      return this;
+    }
+
+    @Override
+    public Builder literal(TruthValue truth) {
+      ExpressionTree parent = currentTree.getFirst();
+      parent.getChildren().add(new ExpressionTree(truth));
+      return this;
+    }
+
+    /**
+     * Recursively explore the tree to find the leaves that are still reachable
+     * after optimizations.
+     * @param tree the node to check next
+     * @param next the next available leaf id
+     * @param leafReorder
+     * @return the next available leaf id
+     */
+    static int compactLeaves(ExpressionTree tree, int next, int[] leafReorder) 
{
+      if (tree.getOperator() == ExpressionTree.Operator.LEAF) {
+        int oldLeaf = tree.getLeaf();
+        if (leafReorder[oldLeaf] == -1) {
+          leafReorder[oldLeaf] = next++;
+        }
+      } else if (tree.getChildren() != null){
+        for(ExpressionTree child: tree.getChildren()) {
+          next = compactLeaves(child, next, leafReorder);
+        }
+      }
+      return next;
+    }
+
+    /**
+     * Rewrite expression tree to update the leaves.
+     * @param root the root of the tree to fix
+     * @param leafReorder a map from old leaf ids to new leaf ids
+     * @return the fixed root
+     */
+    static ExpressionTree rewriteLeaves(ExpressionTree root,
+                              int[] leafReorder) {
+      // The leaves could be shared in the tree. Use Set to remove the 
duplicates.
+      Set<ExpressionTree> leaves = new HashSet<ExpressionTree>();
+      Queue<ExpressionTree> nodes = new LinkedList<ExpressionTree>();
+      nodes.add(root);
+
+      while(!nodes.isEmpty()) {
+        ExpressionTree node = nodes.remove();
+        if (node.getOperator() == ExpressionTree.Operator.LEAF) {
+          leaves.add(node);
+        } else {
+          if (node.getChildren() != null){
+            nodes.addAll(node.getChildren());
+          }
+        }
+      }
+
+      // Update the leaf in place
+      for(ExpressionTree leaf : leaves) {
+        leaf.setLeaf(leafReorder[leaf.getLeaf()]);
+      }
+
+      return root;
+    }
+
+    @Override
+    public SearchArgument build() {
+      if (currentTree.size() != 1) {
+        throw new IllegalArgumentException("Failed to end " +
+            currentTree.size() + " operations.");
+      }
+      ExpressionTree optimized = pushDownNot(root);
+      optimized = foldMaybe(optimized);
+      optimized = flatten(optimized);
+      optimized = convertToCNF(optimized);
+      optimized = flatten(optimized);
+      int leafReorder[] = new int[leaves.size()];
+      Arrays.fill(leafReorder, -1);
+      int newLeafCount = compactLeaves(optimized, 0, leafReorder);
+      optimized = rewriteLeaves(optimized, leafReorder);
+      ArrayList<PredicateLeaf> leafList = new ArrayList<>(newLeafCount);
+      // expand list to correct size
+      for(int i=0; i < newLeafCount; ++i) {
+        leafList.add(null);
+      }
+      // build the new list
+      for(Map.Entry<PredicateLeaf, Integer> elem: leaves.entrySet()) {
+        int newLoc = leafReorder[elem.getValue()];
+        if (newLoc != -1) {
+          leafList.set(newLoc, elem.getKey());
+        }
+      }
+      return new SearchArgumentImpl(optimized, leafList);
+    }
+
+    /**
+     * Push the negations all the way to just before the leaves. Also remove
+     * double negatives.
+     * @param root the expression to normalize
+     * @return the normalized expression, which may share some or all of the
+     * nodes of the original expression.
+     */
+    static ExpressionTree pushDownNot(ExpressionTree root) {
+      if (root.getOperator() == ExpressionTree.Operator.NOT) {
+        ExpressionTree child = root.getChildren().get(0);
+        switch (child.getOperator()) {
+          case NOT:
+            return pushDownNot(child.getChildren().get(0));
+          case CONSTANT:
+            return  new ExpressionTree(child.getConstant().not());
+          case AND:
+            root = new ExpressionTree(ExpressionTree.Operator.OR);
+            for(ExpressionTree kid: child.getChildren()) {
+              root.getChildren().add(pushDownNot(new
+                  ExpressionTree(ExpressionTree.Operator.NOT, kid)));
+            }
+            break;
+          case OR:
+            root = new ExpressionTree(ExpressionTree.Operator.AND);
+            for(ExpressionTree kid: child.getChildren()) {
+              root.getChildren().add(pushDownNot(new ExpressionTree
+                  (ExpressionTree.Operator.NOT, kid)));
+            }
+            break;
+          // for leaf, we don't do anything
+          default:
+            break;
+        }
+      } else if (root.getChildren() != null) {
+        // iterate through children and push down not for each one
+        for(int i=0; i < root.getChildren().size(); ++i) {
+          root.getChildren().set(i, pushDownNot(root.getChildren().get(i)));
+        }
+      }
+      return root;
+    }
+
+    /**
+     * Remove MAYBE values from the expression. If they are in an AND operator,
+     * they are dropped. If they are in an OR operator, they kill their parent.
+     * This assumes that pushDownNot has already been called.
+     * @param expr The expression to clean up
+     * @return The cleaned up expression
+     */
+    static ExpressionTree foldMaybe(ExpressionTree expr) {
+      if (expr.getChildren() != null) {
+        for(int i=0; i < expr.getChildren().size(); ++i) {
+          ExpressionTree child = foldMaybe(expr.getChildren().get(i));
+          if (child.getConstant() == TruthValue.YES_NO_NULL) {
+            switch (expr.getOperator()) {
+              case AND:
+                expr.getChildren().remove(i);
+                i -= 1;
+                break;
+              case OR:
+                // a maybe will kill the or condition
+                return child;
+              default:
+                throw new IllegalStateException("Got a maybe as child of " +
+                    expr);
+            }
+          } else {
+            expr.getChildren().set(i, child);
+          }
+        }
+        if (expr.getChildren().isEmpty()) {
+          return new ExpressionTree(TruthValue.YES_NO_NULL);
+        }
+      }
+      return expr;
+    }
+
+    /**
+     * Converts multi-level ands and ors into single level ones.
+     * @param root the expression to flatten
+     * @return the flattened expression, which will always be root with
+     *   potentially modified children.
+     */
+    static ExpressionTree flatten(ExpressionTree root) {
+      if (root.getChildren() != null) {
+        // iterate through the index, so that if we add more children,
+        // they don't get re-visited
+        for(int i=0; i < root.getChildren().size(); ++i) {
+          ExpressionTree child = flatten(root.getChildren().get(i));
+          // do we need to flatten?
+          if (child.getOperator() == root.getOperator() &&
+              child.getOperator() != ExpressionTree.Operator.NOT) {
+            boolean first = true;
+            for(ExpressionTree grandkid: child.getChildren()) {
+              // for the first grandkid replace the original parent
+              if (first) {
+                first = false;
+                root.getChildren().set(i, grandkid);
+              } else {
+                root.getChildren().add(++i, grandkid);
+              }
+            }
+          } else {
+            root.getChildren().set(i, child);
+          }
+        }
+        // if we have a singleton AND or OR, just return the child
+        if ((root.getOperator() == ExpressionTree.Operator.OR ||
+            root.getOperator() == ExpressionTree.Operator.AND) &&
+            root.getChildren().size() == 1) {
+          return root.getChildren().get(0);
+        }
+      }
+      return root;
+    }
+
+    /**
+     * Generate all combinations of items on the andList. For each item on the
+     * andList, it generates all combinations of one child from each and
+     * expression. Thus, (and a b) (and c d) will be expanded to: (or a c)
+     * (or a d) (or b c) (or b d). If there are items on the nonAndList, they
+     * are added to each or expression.
+     * @param result a list to put the results onto
+     * @param andList a list of and expressions
+     * @param nonAndList a list of non-and expressions
+     */
+    private static void generateAllCombinations(List<ExpressionTree> result,
+                                                List<ExpressionTree> andList,
+                                                List<ExpressionTree> nonAndList
+    ) {
+      List<ExpressionTree> kids = andList.get(0).getChildren();
+      if (result.isEmpty()) {
+        for(ExpressionTree kid: kids) {
+          ExpressionTree or = new ExpressionTree(ExpressionTree.Operator.OR);
+          result.add(or);
+          for(ExpressionTree node: nonAndList) {
+            or.getChildren().add(new ExpressionTree(node));
+          }
+          or.getChildren().add(kid);
+        }
+      } else {
+        List<ExpressionTree> work = new ArrayList<ExpressionTree>(result);
+        result.clear();
+        for(ExpressionTree kid: kids) {
+          for(ExpressionTree or: work) {
+            ExpressionTree copy = new ExpressionTree(or);
+            copy.getChildren().add(kid);
+            result.add(copy);
+          }
+        }
+      }
+      if (andList.size() > 1) {
+        generateAllCombinations(result, andList.subList(1, andList.size()),
+            nonAndList);
+      }
+    }
+
+    /**
+     * Convert an expression so that the top level operator is AND with OR
+     * operators under it. This routine assumes that all of the NOT operators
+     * have been pushed to the leaves via pushdDownNot.
+     * @param root the expression
+     * @return the normalized expression
+     */
+    static ExpressionTree convertToCNF(ExpressionTree root) {
+      if (root.getChildren() != null) {
+        // convert all of the children to CNF
+        int size = root.getChildren().size();
+        for(int i=0; i < size; ++i) {
+          root.getChildren().set(i, convertToCNF(root.getChildren().get(i)));
+        }
+        if (root.getOperator() == ExpressionTree.Operator.OR) {
+          // a list of leaves that weren't under AND expressions
+          List<ExpressionTree> nonAndList = new ArrayList<ExpressionTree>();
+          // a list of AND expressions that we need to distribute
+          List<ExpressionTree> andList = new ArrayList<ExpressionTree>();
+          for(ExpressionTree child: root.getChildren()) {
+            if (child.getOperator() == ExpressionTree.Operator.AND) {
+              andList.add(child);
+            } else if (child.getOperator() == ExpressionTree.Operator.OR) {
+              // pull apart the kids of the OR expression
+              for(ExpressionTree grandkid: child.getChildren()) {
+                nonAndList.add(grandkid);
+              }
+            } else {
+              nonAndList.add(child);
+            }
+          }
+          if (!andList.isEmpty()) {
+            if (checkCombinationsThreshold(andList)) {
+              root = new ExpressionTree(ExpressionTree.Operator.AND);
+              generateAllCombinations(root.getChildren(), andList, nonAndList);
+            } else {
+              root = new ExpressionTree(TruthValue.YES_NO_NULL);
+            }
+          }
+        }
+      }
+      return root;
+    }
+
+    private static boolean checkCombinationsThreshold(List<ExpressionTree> 
andList) {
+      int numComb = 1;
+      for (ExpressionTree tree : andList) {
+        numComb *= tree.getChildren().size();
+        if (numComb > CNF_COMBINATIONS_THRESHOLD) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
new file mode 100644
index 0000000..151f30d
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java
@@ -0,0 +1,335 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.util;
+
+/**
+ * Estimation of memory footprint of object
+ */
+public enum JavaDataModel {
+
+  JAVA32 {
+    @Override
+    public int object() {
+      return JAVA32_OBJECT;
+    }
+
+    @Override
+    public int array() {
+      return JAVA32_ARRAY;
+    }
+
+    @Override
+    public int ref() {
+      return JAVA32_REF;
+    }
+
+    @Override
+    public int hashMap(int entry) {
+      // base  = JAVA32_OBJECT + PRIMITIVES1 * 4 + JAVA32_FIELDREF * 3 + 
JAVA32_ARRAY;
+      // entry = JAVA32_OBJECT + JAVA32_FIELDREF + PRIMITIVES1
+      return hashMapBase() + hashMapEntry() * entry;
+    }
+
+    @Override
+    public int hashMapBase() {
+      return 64;
+    }
+
+    @Override
+    public int hashMapEntry() {
+      return 24;
+    }
+
+    @Override
+    public int hashSet(int entry) {
+      // hashMap += JAVA32_OBJECT
+      return hashSetBase() + hashSetEntry() * entry;
+    }
+
+    @Override
+    public int hashSetBase() {
+      return 80;
+    }
+
+    @Override
+    public int hashSetEntry() {
+      return 24;
+    }
+
+    @Override
+    public int linkedHashMap(int entry) {
+      // hashMap += JAVA32_FIELDREF + PRIMITIVES1
+      // hashMap.entry += JAVA32_FIELDREF * 2
+      return 72 + 32 * entry;
+    }
+
+    @Override
+    public int linkedList(int entry) {
+      // base  = JAVA32_OBJECT + PRIMITIVES1 * 2 + JAVA32_FIELDREF;
+      // entry = JAVA32_OBJECT + JAVA32_FIELDREF * 2
+      return linkedListBase() + linkedListEntry() * entry;
+     }
+
+     @Override
+     public int linkedListBase() {
+       return 28;
+     }
+
+     @Override
+     public int linkedListEntry() {
+       return 24;
+     }
+
+    @Override
+    public int arrayList() {
+      // JAVA32_OBJECT + PRIMITIVES1 * 2 + JAVA32_ARRAY;
+      return 44;
+    }
+
+    @Override
+    public int memoryAlign() {
+      return 8;
+    }
+  }, JAVA64 {
+    @Override
+    public int object() {
+      return JAVA64_OBJECT;
+    }
+
+    @Override
+    public int array() {
+      return JAVA64_ARRAY;
+    }
+
+    @Override
+    public int ref() {
+      return JAVA64_REF;
+    }
+
+    @Override
+    public int hashMap(int entry) {
+      // base  = JAVA64_OBJECT + PRIMITIVES1 * 4 + JAVA64_FIELDREF * 3 + 
JAVA64_ARRAY;
+      // entry = JAVA64_OBJECT + JAVA64_FIELDREF + PRIMITIVES1
+      return hashMapBase() + hashMapEntry() * entry;
+    }
+
+    @Override
+    public int hashMapBase() {
+      return 112;
+    }
+
+
+    @Override
+    public int hashMapEntry() {
+      return 44;
+    }
+
+    @Override
+    public int hashSet(int entry) {
+      // hashMap += JAVA64_OBJECT
+      return hashSetBase() + hashSetEntry() * entry;
+     }
+
+     @Override
+     public int hashSetBase() {
+       return 144;
+     }
+
+     @Override
+     public int hashSetEntry() {
+       return 44;
+     }
+
+    @Override
+    public int linkedHashMap(int entry) {
+      // hashMap += JAVA64_FIELDREF + PRIMITIVES1
+      // hashMap.entry += JAVA64_FIELDREF * 2
+      return 128 + 60 * entry;
+    }
+
+    @Override
+    public int linkedList(int entry) {
+      // base  = JAVA64_OBJECT + PRIMITIVES1 * 2 + JAVA64_FIELDREF;
+      // entry = JAVA64_OBJECT + JAVA64_FIELDREF * 2
+      return linkedListBase() + linkedListEntry() * entry;
+     }
+
+     @Override
+     public int linkedListBase() {
+       return 48;
+     }
+
+     @Override
+     public int linkedListEntry() {
+       return 48;
+     }
+
+    @Override
+    public int arrayList() {
+      // JAVA64_OBJECT + PRIMITIVES1 * 2 + JAVA64_ARRAY;
+      return 80;
+    }
+
+    @Override
+    public int memoryAlign() {
+      return 8;
+    }
+  };
+
+  public abstract int object();
+  public abstract int array();
+  public abstract int ref();
+  public abstract int hashMap(int entry);
+  public abstract int hashMapBase();
+  public abstract int hashMapEntry();
+  public abstract int hashSetBase();
+  public abstract int hashSetEntry();
+  public abstract int hashSet(int entry);
+  public abstract int linkedHashMap(int entry);
+  public abstract int linkedListBase();
+  public abstract int linkedListEntry();
+  public abstract int linkedList(int entry);
+  public abstract int arrayList();
+  public abstract int memoryAlign();
+
+  // ascii string
+  public int lengthFor(String string) {
+    return lengthForStringOfLength(string.length());
+  }
+
+  public int lengthForRandom() {
+    // boolean + double + AtomicLong
+    return object() + primitive1() + primitive2() + object() + primitive2();
+  }
+
+  public int primitive1() {
+    return PRIMITIVES1;
+  }
+  public int primitive2() {
+    return PRIMITIVES2;
+  }
+
+  public static int alignUp(int value, int align) {
+    return (value + align - 1) & ~(align - 1);
+  }
+
+  public static final int JAVA32_META = 12;
+  public static final int JAVA32_ARRAY_META = 16;
+  public static final int JAVA32_REF = 4;
+  public static final int JAVA32_OBJECT = 16;   // JAVA32_META + JAVA32_REF
+  public static final int JAVA32_ARRAY = 20;    // JAVA32_ARRAY_META + 
JAVA32_REF
+
+  public static final int JAVA64_META = 24;
+  public static final int JAVA64_ARRAY_META = 32;
+  public static final int JAVA64_REF = 8;
+  public static final int JAVA64_OBJECT = 32;   // JAVA64_META + JAVA64_REF
+  public static final int JAVA64_ARRAY = 40;    // JAVA64_ARRAY_META + 
JAVA64_REF
+
+  public static final int PRIMITIVES1 = 4;      // void, boolean, byte, short, 
int, float
+  public static final int PRIMITIVES2 = 8;      // long, double
+
+  public static final int PRIMITIVE_BYTE = 1;    // byte
+
+  private static JavaDataModel current;
+
+  public static JavaDataModel get() {
+    if (current != null) {
+      return current;
+    }
+    try {
+      String props = System.getProperty("sun.arch.data.model");
+      if ("32".equals(props)) {
+        return current = JAVA32;
+      }
+    } catch (Exception e) {
+      // ignore
+    }
+    // TODO: separate model is needed for compressedOops, which can be guessed 
from memory size.
+    return current = JAVA64;
+  }
+
+  public static int round(int size) {
+    JavaDataModel model = get();
+    if (model == JAVA32 || size % 8 == 0) {
+      return size;
+    }
+    return ((size + 8) >> 3) << 3;
+  }
+
+  private int lengthForPrimitiveArrayOfSize(int primitiveSize, int length) {
+    return alignUp(array() + primitiveSize*length, memoryAlign());
+  }
+
+  public int lengthForByteArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
+  }
+  public int lengthForObjectArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(ref(), length);
+  }
+  public int lengthForLongArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(primitive2(), length);
+  }
+  public int lengthForDoubleArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(primitive2(), length);
+  }
+  public int lengthForIntArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(primitive1(), length);
+  }
+  public int lengthForBooleanArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length);
+  }
+  public int lengthForTimestampArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(lengthOfTimestamp(), length);
+  }
+  public int lengthForDateArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(lengthOfDate(), length);
+  }
+  public int lengthForDecimalArrayOfSize(int length) {
+    return lengthForPrimitiveArrayOfSize(lengthOfDecimal(), length);
+  }
+
+  public int lengthOfDecimal() {
+    // object overhead + 8 bytes for intCompact + 4 bytes for precision
+    // + 4 bytes for scale + size of BigInteger
+    return object() + 2 * primitive2() + lengthOfBigInteger();
+  }
+
+  private int lengthOfBigInteger() {
+    // object overhead + 4 bytes for bitCount + 4 bytes for bitLength
+    // + 4 bytes for firstNonzeroByteNum + 4 bytes for firstNonzeroIntNum +
+    // + 4 bytes for lowestSetBit + 5 bytes for size of magnitude (since max 
precision
+    // is only 38 for HiveDecimal) + 7 bytes of padding (since java memory 
allocations
+    // are 8 byte aligned)
+    return object() + 4 * primitive2();
+  }
+
+  public int lengthOfTimestamp() {
+    // object overhead + 4 bytes for int (nanos) + 4 bytes of padding
+    return object() + primitive2();
+  }
+
+  public int lengthOfDate() {
+    // object overhead + 8 bytes for long (fastTime) + 16 bytes for cdate
+    return object() + 3 * primitive2();
+  }
+
+  public int lengthForStringOfLength(int strLen) {
+    return object() + primitive1() * 3 + array() + strLen;
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
new file mode 100644
index 0000000..189ead5
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/ql/util/TimestampUtils.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.util;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+import java.math.BigDecimal;
+import java.sql.Timestamp;
+
+/**
+ * Utitilities for Timestamps and the relevant conversions.
+ */
+public class TimestampUtils {
+  public static final BigDecimal BILLION_BIG_DECIMAL = 
BigDecimal.valueOf(1000000000);
+
+  /**
+   * Convert the timestamp to a double measured in seconds.
+   * @return double representation of the timestamp, accurate to nanoseconds
+   */
+  public static double getDouble(Timestamp ts) {
+    long seconds = millisToSeconds(ts.getTime());
+    return seconds + ((double) ts.getNanos()) / 1000000000;
+  }
+
+  public static Timestamp doubleToTimestamp(double f) {
+    long seconds = (long) f;
+
+    // We must ensure the exactness of the double's fractional portion.
+    // 0.6 as the fraction part will be converted to 0.59999... and
+    // significantly reduce the savings from binary serialization
+    BigDecimal bd;
+    try {
+      bd = new BigDecimal(String.valueOf(f));
+    } catch (NumberFormatException nfe) {
+      return null;
+    }
+    bd = bd.subtract(new BigDecimal(seconds)).multiply(new 
BigDecimal(1000000000));
+    int nanos = bd.intValue();
+
+    // Convert to millis
+    long millis = seconds * 1000;
+    if (nanos < 0) {
+      millis -= 1000;
+      nanos += 1000000000;
+    }
+    Timestamp t = new Timestamp(millis);
+
+    // Set remaining fractional portion to nanos
+    t.setNanos(nanos);
+    return t;
+  }
+
+  public static Timestamp decimalToTimestamp(HiveDecimal d) {
+    BigDecimal nanoInstant = d.bigDecimalValue().multiply(BILLION_BIG_DECIMAL);
+    int nanos = nanoInstant.remainder(BILLION_BIG_DECIMAL).intValue();
+    if (nanos < 0) {
+      nanos += 1000000000;
+    }
+    long seconds =
+        nanoInstant.subtract(new 
BigDecimal(nanos)).divide(BILLION_BIG_DECIMAL).longValue();
+    Timestamp t = new Timestamp(seconds * 1000);
+    t.setNanos(nanos);
+
+    return t;
+  }
+
+  /**
+   * Rounds the number of milliseconds relative to the epoch down to the 
nearest whole number of
+   * seconds. 500 would round to 0, -500 would round to -1.
+   */
+  public static long millisToSeconds(long millis) {
+    if (millis >= 0) {
+      return millis / 1000;
+    } else {
+      return (millis - 999) / 1000;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java 
b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
new file mode 100644
index 0000000..dd2b1d9
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/DateWritable.java
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.sql.Date;
+import java.util.Calendar;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
+
+
+/**
+ * DateWritable
+ * Writable equivalent of java.sql.Date.
+ *
+ * Dates are of the format
+ *    YYYY-MM-DD
+ *
+ */
+public class DateWritable implements WritableComparable<DateWritable> {
+
+  private static final long MILLIS_PER_DAY = TimeUnit.DAYS.toMillis(1);
+
+  // Local time zone.
+  // Java TimeZone has no mention of thread safety. Use thread local instance 
to be safe.
+  private static final ThreadLocal<TimeZone> LOCAL_TIMEZONE = new 
ThreadLocal<TimeZone>() {
+    @Override
+    protected TimeZone initialValue() {
+      return Calendar.getInstance().getTimeZone();
+    }
+  };
+
+  // Internal representation is an integer representing day offset from our 
epoch value 1970-01-01
+  private int daysSinceEpoch = 0;
+
+  /* Constructors */
+  public DateWritable() {
+  }
+
+  public DateWritable(DateWritable d) {
+    set(d);
+  }
+
+  public DateWritable(Date d) {
+    set(d);
+  }
+
+  public DateWritable(int d) {
+    set(d);
+  }
+
+  /**
+   * Set the DateWritable based on the days since epoch date.
+   * @param d integer value representing days since epoch date
+   */
+  public void set(int d) {
+    daysSinceEpoch = d;
+  }
+
+  /**
+   * Set the DateWritable based on the year/month/day of the date in the local 
timezone.
+   * @param d Date value
+   */
+  public void set(Date d) {
+    if (d == null) {
+      daysSinceEpoch = 0;
+      return;
+    }
+
+    set(dateToDays(d));
+  }
+
+  public void set(DateWritable d) {
+    set(d.daysSinceEpoch);
+  }
+
+  /**
+   *
+   * @return Date value corresponding to the date in the local time zone
+   */
+  public Date get() {
+    return new Date(daysToMillis(daysSinceEpoch));
+  }
+
+  public int getDays() {
+    return daysSinceEpoch;
+  }
+
+  /**
+   *
+   * @return time in seconds corresponding to this DateWritable
+   */
+  public long getTimeInSeconds() {
+    return get().getTime() / 1000;
+  }
+
+  public static Date timeToDate(long l) {
+    return new Date(l * 1000);
+  }
+
+  public static long daysToMillis(int d) {
+    // Convert from day offset to ms in UTC, then apply local timezone offset.
+    long millisUtc = d * MILLIS_PER_DAY;
+    long tmp =  millisUtc - LOCAL_TIMEZONE.get().getOffset(millisUtc);
+    // Between millisUtc and tmp, the time zone offset may have changed due to 
DST.
+    // Look up the offset again.
+    return millisUtc - LOCAL_TIMEZONE.get().getOffset(tmp);
+  }
+
+  public static int millisToDays(long millisLocal) {
+    long millisUtc = millisLocal + LOCAL_TIMEZONE.get().getOffset(millisLocal);
+    int days;
+    if (millisUtc >= 0L) {
+      days = (int) (millisUtc / MILLIS_PER_DAY);
+    } else {
+      days = (int) ((millisUtc - 86399999) / MILLIS_PER_DAY);
+    }
+    return days;
+  }
+
+  public static int dateToDays(Date d) {
+    // convert to equivalent time in UTC, then get day offset
+    long millisLocal = d.getTime();
+    return millisToDays(millisLocal);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    daysSinceEpoch = WritableUtils.readVInt(in);
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVInt(out, daysSinceEpoch);
+  }
+
+  @Override
+  public int compareTo(DateWritable d) {
+    return daysSinceEpoch - d.daysSinceEpoch;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof DateWritable)) {
+      return false;
+    }
+    return compareTo((DateWritable) o) == 0;
+  }
+
+  @Override
+  public String toString() {
+    return get().toString();
+  }
+
+  @Override
+  public int hashCode() {
+    return daysSinceEpoch;
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3283d238/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
----------------------------------------------------------------------
diff --git 
a/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
 
b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
new file mode 100644
index 0000000..41452da
--- /dev/null
+++ 
b/java/storage-api/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.math.BigInteger;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
+
+public class HiveDecimalWritable implements 
WritableComparable<HiveDecimalWritable> {
+
+  private byte[] internalStorage = new byte[0];
+  private int scale;
+
+  public HiveDecimalWritable() {
+  }
+
+  public HiveDecimalWritable(String value) {
+    set(HiveDecimal.create(value));
+  }
+
+  public HiveDecimalWritable(byte[] bytes, int scale) {
+    set(bytes, scale);
+  }
+
+  public HiveDecimalWritable(HiveDecimalWritable writable) {
+    set(writable.getHiveDecimal());
+  }
+
+  public HiveDecimalWritable(HiveDecimal value) {
+    set(value);
+  }
+
+  public HiveDecimalWritable(long value) {
+    set((HiveDecimal.create(value)));
+  }
+
+  public void set(HiveDecimal value) {
+    set(value.unscaledValue().toByteArray(), value.scale());
+  }
+
+  public void set(HiveDecimal value, int maxPrecision, int maxScale) {
+    set(HiveDecimal.enforcePrecisionScale(value, maxPrecision, maxScale));
+  }
+
+  public void set(HiveDecimalWritable writable) {
+    set(writable.getHiveDecimal());
+  }
+
+  public void set(byte[] bytes, int scale) {
+    this.internalStorage = bytes;
+    this.scale = scale;
+  }
+
+  public HiveDecimal getHiveDecimal() {
+    return HiveDecimal.create(new BigInteger(internalStorage), scale);
+  }
+
+  /**
+   * Get a HiveDecimal instance from the writable and constraint it with 
maximum precision/scale.
+   *
+   * @param maxPrecision maximum precision
+   * @param maxScale maximum scale
+   * @return HiveDecimal instance
+   */
+  public HiveDecimal getHiveDecimal(int maxPrecision, int maxScale) {
+     return HiveDecimal.enforcePrecisionScale(HiveDecimal.
+             create(new BigInteger(internalStorage), scale),
+         maxPrecision, maxScale);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    scale = WritableUtils.readVInt(in);
+    int byteArrayLen = WritableUtils.readVInt(in);
+    if (internalStorage.length != byteArrayLen) {
+      internalStorage = new byte[byteArrayLen];
+    }
+    in.readFully(internalStorage);
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVInt(out, scale);
+    WritableUtils.writeVInt(out, internalStorage.length);
+    out.write(internalStorage);
+  }
+
+  @Override
+  public int compareTo(HiveDecimalWritable that) {
+    return getHiveDecimal().compareTo(that.getHiveDecimal());
+  }
+
+  @Override
+  public String toString() {
+    return getHiveDecimal().toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    }
+    if (other == null || getClass() != other.getClass()) {
+      return false;
+    }
+    HiveDecimalWritable bdw = (HiveDecimalWritable) other;
+
+    // 'equals' and 'compareTo' are not compatible with HiveDecimals. We want
+    // compareTo which returns true iff the numbers are equal (e.g.: 3.14 is
+    // the same as 3.140). 'Equals' returns true iff equal and the same scale
+    // is set in the decimals (e.g.: 3.14 is not the same as 3.140)
+    return getHiveDecimal().compareTo(bdw.getHiveDecimal()) == 0;
+  }
+
+  @Override
+  public int hashCode() {
+    return getHiveDecimal().hashCode();
+  }
+
+  /* (non-Javadoc)
+   * In order to update a Decimal128 fast (w/o allocation) we need to expose 
access to the
+   * internal storage bytes and scale.
+   * @return
+   */
+  public byte[] getInternalStorage() {
+    return internalStorage;
+  }
+
+  /* (non-Javadoc)
+   * In order to update a Decimal128 fast (w/o allocation) we need to expose 
access to the
+   * internal storage bytes and scale.
+   */
+  public int getScale() {
+    return scale;
+  }
+
+  public static
+  HiveDecimalWritable enforcePrecisionScale(HiveDecimalWritable writable,
+                                            int precision, int scale) {
+    if (writable == null) {
+      return null;
+    }
+
+    HiveDecimal dec =
+        HiveDecimal.enforcePrecisionScale(writable.getHiveDecimal(), precision,
+            scale);
+    return dec == null ? null : new HiveDecimalWritable(dec);
+  }
+}

Reply via email to