Author: prasanthj
Date: Fri Dec 19 19:22:20 2014
New Revision: 1646836
URL: http://svn.apache.org/r1646836
Log:
HIVE-9166: Place an upper bound for SARG CNF conversion (Prasanth Jayachandran
reviewed by Owen O'Malley)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1646836&r1=1646835&r2=1646836&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
Fri Dec 19 19:22:20 2014
@@ -22,16 +22,6 @@ import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
-import java.math.BigDecimal;
-import java.sql.Timestamp;
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
@@ -64,6 +54,16 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import java.math.BigDecimal;
+import java.sql.Timestamp;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import parquet.filter2.predicate.FilterApi;
import parquet.filter2.predicate.FilterPredicate;
@@ -415,6 +415,8 @@ final class SearchArgumentImpl implement
}
static class ExpressionBuilder {
+ // max threshold for CNF conversion. having >8 elements in andList will be
converted to maybe
+ private static final int CNF_COMBINATIONS_THRESHOLD = 256;
private final List<PredicateLeaf> leaves = new ArrayList<PredicateLeaf>();
/**
@@ -844,14 +846,29 @@ final class SearchArgumentImpl implement
}
}
if (!andList.isEmpty()) {
- root = new ExpressionTree(ExpressionTree.Operator.AND);
- generateAllCombinations(root.children, andList, nonAndList);
+ if (checkCombinationsThreshold(andList)) {
+ root = new ExpressionTree(ExpressionTree.Operator.AND);
+ generateAllCombinations(root.children, andList, nonAndList);
+ } else {
+ root = new ExpressionTree(TruthValue.YES_NO_NULL);
+ }
}
}
}
return root;
}
+ private static boolean checkCombinationsThreshold(List<ExpressionTree>
andList) {
+ int numComb = 1;
+ for (ExpressionTree tree : andList) {
+ numComb *= tree.children.size();
+ if (numComb > CNF_COMBINATIONS_THRESHOLD) {
+ return false;
+ }
+ }
+ return true;
+ }
+
/**
* Converts multi-level ands and ors into single level ones.
* @param root the expression to flatten
Modified:
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java?rev=1646836&r1=1646835&r2=1646836&view=diff
==============================================================================
---
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
(original)
+++
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
Fri Dec 19 19:22:20 2014
@@ -18,7 +18,12 @@
package org.apache.hadoop.hive.ql.io.sarg;
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNull;
+import static junit.framework.Assert.assertTrue;
+
import com.google.common.collect.Sets;
+
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
@@ -27,8 +32,7 @@ import org.apache.hadoop.hive.ql.io.sarg
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.junit.*;
-import parquet.filter2.predicate.FilterPredicate;
+import org.junit.Test;
import java.beans.XMLDecoder;
import java.io.ByteArrayInputStream;
@@ -37,9 +41,7 @@ import java.math.BigDecimal;
import java.util.List;
import java.util.Set;
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertNull;
-import static junit.framework.Assert.assertTrue;
+import parquet.filter2.predicate.FilterPredicate;
/**
* These test the SARG implementation.
@@ -175,6 +177,17 @@ public class TestSearchArgumentImpl {
assertEquals("(and leaf-1)",
ExpressionBuilder.foldMaybe(and(or(leaf(2),
constant(TruthValue.YES_NO_NULL)), leaf(1))).toString());
+ assertEquals("(and leaf-100)", ExpressionBuilder.foldMaybe(
+ ExpressionBuilder.convertToCNF(and(leaf(100),
+ or(and(leaf(0), leaf(1)),
+ and(leaf(2), leaf(3)),
+ and(leaf(4), leaf(5)),
+ and(leaf(6), leaf(7)),
+ and(leaf(8), leaf(9)),
+ and(leaf(10), leaf(11)),
+ and(leaf(12), leaf(13)),
+ and(leaf(14), leaf(15)),
+ and(leaf(16), leaf(17)))))).toString());
}
@Test
@@ -236,6 +249,25 @@ public class TestSearchArgumentImpl {
and(leaf(3), leaf(4), leaf(5)),
and(leaf(6), leaf(7)),
leaf(8))).toString());
+ assertEquals("YES_NO_NULL", ExpressionBuilder.convertToCNF(or(and(leaf(0),
leaf(1)),
+ and(leaf(2), leaf(3)),
+ and(leaf(4), leaf(5)),
+ and(leaf(6), leaf(7)),
+ and(leaf(8), leaf(9)),
+ and(leaf(10), leaf(11)),
+ and(leaf(12), leaf(13)),
+ and(leaf(14), leaf(15)),
+ and(leaf(16), leaf(17)))).toString());
+ assertEquals("(and leaf-100 YES_NO_NULL)",
ExpressionBuilder.convertToCNF(and(leaf(100),
+ or(and(leaf(0), leaf(1)),
+ and(leaf(2), leaf(3)),
+ and(leaf(4), leaf(5)),
+ and(leaf(6), leaf(7)),
+ and(leaf(8), leaf(9)),
+ and(leaf(10), leaf(11)),
+ and(leaf(12), leaf(13)),
+ and(leaf(14), leaf(15)),
+ and(leaf(16), leaf(17))))).toString());
assertNoSharedNodes(ExpressionBuilder.convertToCNF(or(and(leaf(0),
leaf(1), leaf(2)),
and(leaf(3), leaf(4), leaf(5)),
and(leaf(6), leaf(7)),