Author: prasanthj
Date: Fri Dec 19 19:22:20 2014
New Revision: 1646836

URL: http://svn.apache.org/r1646836
Log:
HIVE-9166: Place an upper bound for SARG CNF conversion (Prasanth Jayachandran 
reviewed by Owen O'Malley)

Modified:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
    
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1646836&r1=1646835&r2=1646836&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
 Fri Dec 19 19:22:20 2014
@@ -22,16 +22,6 @@ import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
 
-import java.math.BigDecimal;
-import java.sql.Timestamp;
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
@@ -64,6 +54,16 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
+import java.math.BigDecimal;
+import java.sql.Timestamp;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 import parquet.filter2.predicate.FilterApi;
 import parquet.filter2.predicate.FilterPredicate;
 
@@ -415,6 +415,8 @@ final class SearchArgumentImpl implement
   }
 
   static class ExpressionBuilder {
+    // max threshold for CNF conversion. having >8 elements in andList will be 
converted to maybe
+    private static final int CNF_COMBINATIONS_THRESHOLD = 256;
     private final List<PredicateLeaf> leaves = new ArrayList<PredicateLeaf>();
 
     /**
@@ -844,14 +846,29 @@ final class SearchArgumentImpl implement
             }
           }
           if (!andList.isEmpty()) {
-            root = new ExpressionTree(ExpressionTree.Operator.AND);
-            generateAllCombinations(root.children, andList, nonAndList);
+            if (checkCombinationsThreshold(andList)) {
+              root = new ExpressionTree(ExpressionTree.Operator.AND);
+              generateAllCombinations(root.children, andList, nonAndList);
+            } else {
+              root = new ExpressionTree(TruthValue.YES_NO_NULL);
+            }
           }
         }
       }
       return root;
     }
 
+    private static boolean checkCombinationsThreshold(List<ExpressionTree> 
andList) {
+      int numComb = 1;
+      for (ExpressionTree tree : andList) {
+        numComb *= tree.children.size();
+        if (numComb > CNF_COMBINATIONS_THRESHOLD) {
+          return false;
+        }
+      }
+      return true;
+    }
+
     /**
      * Converts multi-level ands and ors into single level ones.
      * @param root the expression to flatten

Modified: 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java?rev=1646836&r1=1646835&r2=1646836&view=diff
==============================================================================
--- 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
 (original)
+++ 
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java
 Fri Dec 19 19:22:20 2014
@@ -18,7 +18,12 @@
 
 package org.apache.hadoop.hive.ql.io.sarg;
 
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNull;
+import static junit.framework.Assert.assertTrue;
+
 import com.google.common.collect.Sets;
+
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
@@ -27,8 +32,7 @@ import org.apache.hadoop.hive.ql.io.sarg
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.junit.*;
-import parquet.filter2.predicate.FilterPredicate;
+import org.junit.Test;
 
 import java.beans.XMLDecoder;
 import java.io.ByteArrayInputStream;
@@ -37,9 +41,7 @@ import java.math.BigDecimal;
 import java.util.List;
 import java.util.Set;
 
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.assertNull;
-import static junit.framework.Assert.assertTrue;
+import parquet.filter2.predicate.FilterPredicate;
 
 /**
  * These test the SARG implementation.
@@ -175,6 +177,17 @@ public class TestSearchArgumentImpl {
     assertEquals("(and leaf-1)",
         ExpressionBuilder.foldMaybe(and(or(leaf(2),
             constant(TruthValue.YES_NO_NULL)), leaf(1))).toString());
+    assertEquals("(and leaf-100)", ExpressionBuilder.foldMaybe(
+        ExpressionBuilder.convertToCNF(and(leaf(100),
+            or(and(leaf(0), leaf(1)),
+                and(leaf(2), leaf(3)),
+                and(leaf(4), leaf(5)),
+                and(leaf(6), leaf(7)),
+                and(leaf(8), leaf(9)),
+                and(leaf(10), leaf(11)),
+                and(leaf(12), leaf(13)),
+                and(leaf(14), leaf(15)),
+                and(leaf(16), leaf(17)))))).toString());
   }
 
   @Test
@@ -236,6 +249,25 @@ public class TestSearchArgumentImpl {
             and(leaf(3), leaf(4), leaf(5)),
             and(leaf(6), leaf(7)),
             leaf(8))).toString());
+    assertEquals("YES_NO_NULL", ExpressionBuilder.convertToCNF(or(and(leaf(0), 
leaf(1)),
+        and(leaf(2), leaf(3)),
+        and(leaf(4), leaf(5)),
+        and(leaf(6), leaf(7)),
+        and(leaf(8), leaf(9)),
+        and(leaf(10), leaf(11)),
+        and(leaf(12), leaf(13)),
+        and(leaf(14), leaf(15)),
+        and(leaf(16), leaf(17)))).toString());
+    assertEquals("(and leaf-100 YES_NO_NULL)", 
ExpressionBuilder.convertToCNF(and(leaf(100),
+        or(and(leaf(0), leaf(1)),
+        and(leaf(2), leaf(3)),
+        and(leaf(4), leaf(5)),
+        and(leaf(6), leaf(7)),
+        and(leaf(8), leaf(9)),
+        and(leaf(10), leaf(11)),
+        and(leaf(12), leaf(13)),
+        and(leaf(14), leaf(15)),
+        and(leaf(16), leaf(17))))).toString());
     assertNoSharedNodes(ExpressionBuilder.convertToCNF(or(and(leaf(0), 
leaf(1), leaf(2)),
         and(leaf(3), leaf(4), leaf(5)),
         and(leaf(6), leaf(7)),


Reply via email to