Author: nzhang
Date: Mon Nov 14 04:37:20 2011
New Revision: 1201600

URL: http://svn.apache.org/viewvc?rev=1201600&view=rev
Log:
HIVE-2553. Introduction of Hashing for IN operator for constant values (Robert 
Surówka via Ning Zhang)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/input49.q
    hive/trunk/ql/src/test/results/clientpositive/input49.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
    
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1201600&r1=1201599&r2=1201600&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java 
Mon Nov 14 04:37:20 2011
@@ -36,9 +36,9 @@ import org.apache.hadoop.hive.ql.stats.S
 import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapred.JobConf;
 
@@ -71,8 +71,8 @@ public class TableScanOperator extends O
   /**
    * Other than gathering statistics for the ANALYZE command, the table scan 
operator
    * does not do anything special other than just forwarding the row. Since 
the table
-   * data is always read as part of the map-reduce framework by the mapper. 
But, this
-   * assumption is not true, i.e table data is not only read by the mapper, 
this
+   * data is always read as part of the map-reduce framework by the mapper. 
But, when this
+   * assumption stops to be true, i.e table data won't be only read by the 
mapper, this
    * operator will be enhanced to read the table.
    **/
   @Override

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java?rev=1201600&r1=1201599&r2=1201600&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java 
Mon Nov 14 04:37:20 2011
@@ -18,13 +18,20 @@
 
 package org.apache.hadoop.hive.ql.udf.generic;
 
+import java.util.HashSet;
+import java.util.Set;
+
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import 
org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.BooleanWritable;
 
@@ -50,9 +57,12 @@ import org.apache.hadoop.io.BooleanWrita
 public class GenericUDFIn extends GenericUDF {
 
   private ObjectInspector[] argumentOIs;
+  private Set<Object> constantInSet;
+  private boolean isInSetConstant = true; //are variables from IN(...) constant
+
   BooleanWritable bw = new BooleanWritable();
 
-  ReturnObjectInspectorResolver conversionHelper = null;
+  ReturnObjectInspectorResolver conversionHelper;
   ObjectInspector compareOI;
 
   @Override
@@ -89,9 +99,37 @@ public class GenericUDFIn extends Generi
     }
     compareOI = conversionHelper.get();
 
+    checkIfInSetConstant();
+
     return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
   }
 
+  private void checkIfInSetConstant(){
+    for (int i = 1; i < argumentOIs.length; ++i){
+      if (!(argumentOIs[i] instanceof ConstantObjectInspector)){
+        isInSetConstant = false;
+        return;
+      }
+    }
+  }
+
+  // we start at index 1, since at 0 is the variable from table column
+  // (and those from IN(...) follow it)
+  private void prepareInSet(DeferredObject[] arguments) throws HiveException {
+    constantInSet = new HashSet<Object>();
+    if (compareOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
+      for (int i = 1; i < arguments.length; ++i) {
+        constantInSet.add(((PrimitiveObjectInspector) compareOI)
+            .getPrimitiveJavaObject(conversionHelper
+                .convertIfNecessary(arguments[i].get(), argumentOIs[i])));
+      }
+    } else {
+      for (int i = 1; i < arguments.length; ++i) {
+        constantInSet.add(((ConstantObjectInspector) 
argumentOIs[i]).getWritableConstantValue());
+      }
+    }
+  }
+
   @Override
   public Object evaluate(DeferredObject[] arguments) throws HiveException {
     bw.set(false);
@@ -100,21 +138,60 @@ public class GenericUDFIn extends Generi
       return null;
     }
 
-    for (int i=1; i<arguments.length; i++) {
-      if(ObjectInspectorUtils.compare(
-          conversionHelper.convertIfNecessary(
-              arguments[0].get(), argumentOIs[0]), compareOI,
-          conversionHelper.convertIfNecessary(
-              arguments[i].get(), argumentOIs[i]), compareOI) == 0) {
-        bw.set(true);
-        return bw;
+    if (isInSetConstant) {
+      if (constantInSet == null) {
+        prepareInSet(arguments);
       }
-    }
-    // Nothing matched. See comment at top.
-    for (int i=1; i<arguments.length; i++) {
-      if(arguments[i].get() == null) {
+      switch (compareOI.getCategory()) {
+      case PRIMITIVE: {
+        if (constantInSet.contains(((PrimitiveObjectInspector) compareOI)
+            
.getPrimitiveJavaObject(conversionHelper.convertIfNecessary(arguments[0].get(),
+                argumentOIs[0])))) {
+          bw.set(true);
+          return bw;
+        }
+        break;
+      }
+      case LIST: {
+        if (constantInSet.contains(((ListObjectInspector) 
compareOI).getList(conversionHelper
+            .convertIfNecessary(arguments[0].get(), argumentOIs[0])))) {
+          bw.set(true);
+          return bw;
+        }
+        break;
+      }
+      case MAP: {
+        if (constantInSet.contains(((MapObjectInspector) 
compareOI).getMap(conversionHelper
+            .convertIfNecessary(arguments[0].get(), argumentOIs[0])))) {
+          bw.set(true);
+          return bw;
+        }
+        break;
+      }
+      default:
+        throw new RuntimeException("Compare of unsupported constant type: "
+            + compareOI.getCategory());
+      }
+      if (constantInSet.contains(null)) {
         return null;
       }
+    } else {
+      for (int i = 1; i < arguments.length; i++) {
+        if (ObjectInspectorUtils.compare(
+            conversionHelper.convertIfNecessary(
+                arguments[0].get(), argumentOIs[0]), compareOI,
+            conversionHelper.convertIfNecessary(
+                arguments[i].get(), argumentOIs[i]), compareOI) == 0) {
+          bw.set(true);
+          return bw;
+        }
+      }
+      // Nothing matched. See comment at top.
+      for (int i = 1; i < arguments.length; i++) {
+        if (arguments[i].get() == null) {
+          return null;
+        }
+      }
     }
     return bw;
   }

Added: hive/trunk/ql/src/test/queries/clientpositive/input49.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/input49.q?rev=1201600&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/input49.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/input49.q Mon Nov 14 04:37:20 
2011
@@ -0,0 +1,4 @@
+create table intable (b boolean, d double, f float, i int, l bigint, s string, 
t tinyint);
+insert overwrite table intable select 0, 29098519.0, 1410.0, 996, 40408519555, 
"test_string", 12 from src limit 1;
+select * from intable where d in (29098519.0) and f in (1410.0) and i in (996) 
and l in (40408519555) and s in ('test_string') and t in (12);
+drop table intable;
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/input49.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input49.q.out?rev=1201600&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input49.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/input49.q.out Mon Nov 14 
04:37:20 2011
@@ -0,0 +1,51 @@
+PREHOOK: query: create table intable (b boolean, d double, f float, i int, l 
bigint, s string, t tinyint)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table intable (b boolean, d double, f float, i int, l 
bigint, s string, t tinyint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@intable
+PREHOOK: query: insert overwrite table intable select 0, 29098519.0, 1410.0, 
996, 40408519555, "test_string", 12 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@intable
+POSTHOOK: query: insert overwrite table intable select 0, 29098519.0, 1410.0, 
996, 40408519555, "test_string", 12 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@intable
+POSTHOOK: Lineage: intable.b EXPRESSION []
+POSTHOOK: Lineage: intable.d SIMPLE []
+POSTHOOK: Lineage: intable.f EXPRESSION []
+POSTHOOK: Lineage: intable.i SIMPLE []
+POSTHOOK: Lineage: intable.l SIMPLE []
+POSTHOOK: Lineage: intable.s SIMPLE []
+POSTHOOK: Lineage: intable.t EXPRESSION []
+PREHOOK: query: select * from intable where d in (29098519.0) and f in 
(1410.0) and i in (996) and l in (40408519555) and s in ('test_string') and t 
in (12)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intable
+PREHOOK: Output: 
file:/tmp/rsurowka/hive_2011-11-09_18-54-33_568_1893553420015585903/-mr-10000
+POSTHOOK: query: select * from intable where d in (29098519.0) and f in 
(1410.0) and i in (996) and l in (40408519555) and s in ('test_string') and t 
in (12)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intable
+POSTHOOK: Output: 
file:/tmp/rsurowka/hive_2011-11-09_18-54-33_568_1893553420015585903/-mr-10000
+POSTHOOK: Lineage: intable.b EXPRESSION []
+POSTHOOK: Lineage: intable.d SIMPLE []
+POSTHOOK: Lineage: intable.f EXPRESSION []
+POSTHOOK: Lineage: intable.i SIMPLE []
+POSTHOOK: Lineage: intable.l SIMPLE []
+POSTHOOK: Lineage: intable.s SIMPLE []
+POSTHOOK: Lineage: intable.t EXPRESSION []
+false  2.9098519E7     1410.0  996     40408519555     test_string     12
+PREHOOK: query: drop table intable
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@intable
+PREHOOK: Output: default@intable
+POSTHOOK: query: drop table intable
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@intable
+POSTHOOK: Output: default@intable
+POSTHOOK: Lineage: intable.b EXPRESSION []
+POSTHOOK: Lineage: intable.d SIMPLE []
+POSTHOOK: Lineage: intable.f EXPRESSION []
+POSTHOOK: Lineage: intable.i SIMPLE []
+POSTHOOK: Lineage: intable.l SIMPLE []
+POSTHOOK: Lineage: intable.s SIMPLE []
+POSTHOOK: Lineage: intable.t EXPRESSION []

Modified: 
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java?rev=1201600&r1=1201599&r2=1201600&view=diff
==============================================================================
--- 
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java
 (original)
+++ 
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java
 Mon Nov 14 04:37:20 2011
@@ -30,7 +30,7 @@ import org.apache.hadoop.hive.serde2.obj
  * StructTypeInfo represents the TypeInfo of a struct. A struct contains one or
  * more fields each of which has a unique name and its own TypeInfo. Different
  * fields can have the same or different TypeInfo.
- * 
+ *
  * Always use the TypeInfoFactory to create new TypeInfo objects, instead of
  * directly creating an instance of this class.
  */
@@ -82,10 +82,8 @@ public final class StructTypeInfo extend
    * For TypeInfoFactory use only.
    */
   StructTypeInfo(List<String> names, List<TypeInfo> typeInfos) {
-    allStructFieldNames = new ArrayList<String>();
-    allStructFieldNames.addAll(names);
-    allStructFieldTypeInfos = new ArrayList<TypeInfo>();
-    allStructFieldTypeInfos.addAll(typeInfos);
+    allStructFieldNames = new ArrayList<String>(names);
+    allStructFieldTypeInfos = new ArrayList<TypeInfo>(typeInfos);
   }
 
   @Override


Reply via email to