Author: nzhang
Date: Mon Nov 14 04:37:20 2011
New Revision: 1201600
URL: http://svn.apache.org/viewvc?rev=1201600&view=rev
Log:
HIVE-2553. Introduction of Hashing for IN operator for constant values (Robert
Surówka via Ning Zhang)
Added:
hive/trunk/ql/src/test/queries/clientpositive/input49.q
hive/trunk/ql/src/test/results/clientpositive/input49.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1201600&r1=1201599&r2=1201600&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
Mon Nov 14 04:37:20 2011
@@ -36,9 +36,9 @@ import org.apache.hadoop.hive.ql.stats.S
import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapred.JobConf;
@@ -71,8 +71,8 @@ public class TableScanOperator extends O
/**
* Other than gathering statistics for the ANALYZE command, the table scan
operator
* does not do anything special other than just forwarding the row. Since
the table
- * data is always read as part of the map-reduce framework by the mapper.
But, this
- * assumption is not true, i.e table data is not only read by the mapper,
this
+ * data is always read as part of the map-reduce framework by the mapper.
But, when this
+ * assumption stops to be true, i.e table data won't be only read by the
mapper, this
* operator will be enhanced to read the table.
**/
@Override
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java?rev=1201600&r1=1201599&r2=1201600&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
Mon Nov 14 04:37:20 2011
@@ -18,13 +18,20 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import java.util.HashSet;
+import java.util.Set;
+
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import
org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.BooleanWritable;
@@ -50,9 +57,12 @@ import org.apache.hadoop.io.BooleanWrita
public class GenericUDFIn extends GenericUDF {
private ObjectInspector[] argumentOIs;
+ private Set<Object> constantInSet;
+ private boolean isInSetConstant = true; //are variables from IN(...) constant
+
BooleanWritable bw = new BooleanWritable();
- ReturnObjectInspectorResolver conversionHelper = null;
+ ReturnObjectInspectorResolver conversionHelper;
ObjectInspector compareOI;
@Override
@@ -89,9 +99,37 @@ public class GenericUDFIn extends Generi
}
compareOI = conversionHelper.get();
+ checkIfInSetConstant();
+
return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
}
+ private void checkIfInSetConstant(){
+ for (int i = 1; i < argumentOIs.length; ++i){
+ if (!(argumentOIs[i] instanceof ConstantObjectInspector)){
+ isInSetConstant = false;
+ return;
+ }
+ }
+ }
+
+ // we start at index 1, since at 0 is the variable from table column
+ // (and those from IN(...) follow it)
+ private void prepareInSet(DeferredObject[] arguments) throws HiveException {
+ constantInSet = new HashSet<Object>();
+ if (compareOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
+ for (int i = 1; i < arguments.length; ++i) {
+ constantInSet.add(((PrimitiveObjectInspector) compareOI)
+ .getPrimitiveJavaObject(conversionHelper
+ .convertIfNecessary(arguments[i].get(), argumentOIs[i])));
+ }
+ } else {
+ for (int i = 1; i < arguments.length; ++i) {
+ constantInSet.add(((ConstantObjectInspector)
argumentOIs[i]).getWritableConstantValue());
+ }
+ }
+ }
+
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
bw.set(false);
@@ -100,21 +138,60 @@ public class GenericUDFIn extends Generi
return null;
}
- for (int i=1; i<arguments.length; i++) {
- if(ObjectInspectorUtils.compare(
- conversionHelper.convertIfNecessary(
- arguments[0].get(), argumentOIs[0]), compareOI,
- conversionHelper.convertIfNecessary(
- arguments[i].get(), argumentOIs[i]), compareOI) == 0) {
- bw.set(true);
- return bw;
+ if (isInSetConstant) {
+ if (constantInSet == null) {
+ prepareInSet(arguments);
}
- }
- // Nothing matched. See comment at top.
- for (int i=1; i<arguments.length; i++) {
- if(arguments[i].get() == null) {
+ switch (compareOI.getCategory()) {
+ case PRIMITIVE: {
+ if (constantInSet.contains(((PrimitiveObjectInspector) compareOI)
+
.getPrimitiveJavaObject(conversionHelper.convertIfNecessary(arguments[0].get(),
+ argumentOIs[0])))) {
+ bw.set(true);
+ return bw;
+ }
+ break;
+ }
+ case LIST: {
+ if (constantInSet.contains(((ListObjectInspector)
compareOI).getList(conversionHelper
+ .convertIfNecessary(arguments[0].get(), argumentOIs[0])))) {
+ bw.set(true);
+ return bw;
+ }
+ break;
+ }
+ case MAP: {
+ if (constantInSet.contains(((MapObjectInspector)
compareOI).getMap(conversionHelper
+ .convertIfNecessary(arguments[0].get(), argumentOIs[0])))) {
+ bw.set(true);
+ return bw;
+ }
+ break;
+ }
+ default:
+ throw new RuntimeException("Compare of unsupported constant type: "
+ + compareOI.getCategory());
+ }
+ if (constantInSet.contains(null)) {
return null;
}
+ } else {
+ for (int i = 1; i < arguments.length; i++) {
+ if (ObjectInspectorUtils.compare(
+ conversionHelper.convertIfNecessary(
+ arguments[0].get(), argumentOIs[0]), compareOI,
+ conversionHelper.convertIfNecessary(
+ arguments[i].get(), argumentOIs[i]), compareOI) == 0) {
+ bw.set(true);
+ return bw;
+ }
+ }
+ // Nothing matched. See comment at top.
+ for (int i = 1; i < arguments.length; i++) {
+ if (arguments[i].get() == null) {
+ return null;
+ }
+ }
}
return bw;
}
Added: hive/trunk/ql/src/test/queries/clientpositive/input49.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/input49.q?rev=1201600&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/input49.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/input49.q Mon Nov 14 04:37:20
2011
@@ -0,0 +1,4 @@
+create table intable (b boolean, d double, f float, i int, l bigint, s string,
t tinyint);
+insert overwrite table intable select 0, 29098519.0, 1410.0, 996, 40408519555,
"test_string", 12 from src limit 1;
+select * from intable where d in (29098519.0) and f in (1410.0) and i in (996)
and l in (40408519555) and s in ('test_string') and t in (12);
+drop table intable;
\ No newline at end of file
Added: hive/trunk/ql/src/test/results/clientpositive/input49.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input49.q.out?rev=1201600&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input49.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/input49.q.out Mon Nov 14
04:37:20 2011
@@ -0,0 +1,51 @@
+PREHOOK: query: create table intable (b boolean, d double, f float, i int, l
bigint, s string, t tinyint)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table intable (b boolean, d double, f float, i int, l
bigint, s string, t tinyint)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@intable
+PREHOOK: query: insert overwrite table intable select 0, 29098519.0, 1410.0,
996, 40408519555, "test_string", 12 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@intable
+POSTHOOK: query: insert overwrite table intable select 0, 29098519.0, 1410.0,
996, 40408519555, "test_string", 12 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@intable
+POSTHOOK: Lineage: intable.b EXPRESSION []
+POSTHOOK: Lineage: intable.d SIMPLE []
+POSTHOOK: Lineage: intable.f EXPRESSION []
+POSTHOOK: Lineage: intable.i SIMPLE []
+POSTHOOK: Lineage: intable.l SIMPLE []
+POSTHOOK: Lineage: intable.s SIMPLE []
+POSTHOOK: Lineage: intable.t EXPRESSION []
+PREHOOK: query: select * from intable where d in (29098519.0) and f in
(1410.0) and i in (996) and l in (40408519555) and s in ('test_string') and t
in (12)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intable
+PREHOOK: Output:
file:/tmp/rsurowka/hive_2011-11-09_18-54-33_568_1893553420015585903/-mr-10000
+POSTHOOK: query: select * from intable where d in (29098519.0) and f in
(1410.0) and i in (996) and l in (40408519555) and s in ('test_string') and t
in (12)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intable
+POSTHOOK: Output:
file:/tmp/rsurowka/hive_2011-11-09_18-54-33_568_1893553420015585903/-mr-10000
+POSTHOOK: Lineage: intable.b EXPRESSION []
+POSTHOOK: Lineage: intable.d SIMPLE []
+POSTHOOK: Lineage: intable.f EXPRESSION []
+POSTHOOK: Lineage: intable.i SIMPLE []
+POSTHOOK: Lineage: intable.l SIMPLE []
+POSTHOOK: Lineage: intable.s SIMPLE []
+POSTHOOK: Lineage: intable.t EXPRESSION []
+false 2.9098519E7 1410.0 996 40408519555 test_string 12
+PREHOOK: query: drop table intable
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@intable
+PREHOOK: Output: default@intable
+POSTHOOK: query: drop table intable
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@intable
+POSTHOOK: Output: default@intable
+POSTHOOK: Lineage: intable.b EXPRESSION []
+POSTHOOK: Lineage: intable.d SIMPLE []
+POSTHOOK: Lineage: intable.f EXPRESSION []
+POSTHOOK: Lineage: intable.i SIMPLE []
+POSTHOOK: Lineage: intable.l SIMPLE []
+POSTHOOK: Lineage: intable.s SIMPLE []
+POSTHOOK: Lineage: intable.t EXPRESSION []
Modified:
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java
URL:
http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java?rev=1201600&r1=1201599&r2=1201600&view=diff
==============================================================================
---
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java
(original)
+++
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/StructTypeInfo.java
Mon Nov 14 04:37:20 2011
@@ -30,7 +30,7 @@ import org.apache.hadoop.hive.serde2.obj
* StructTypeInfo represents the TypeInfo of a struct. A struct contains one or
* more fields each of which has a unique name and its own TypeInfo. Different
* fields can have the same or different TypeInfo.
- *
+ *
* Always use the TypeInfoFactory to create new TypeInfo objects, instead of
* directly creating an instance of this class.
*/
@@ -82,10 +82,8 @@ public final class StructTypeInfo extend
* For TypeInfoFactory use only.
*/
StructTypeInfo(List<String> names, List<TypeInfo> typeInfos) {
- allStructFieldNames = new ArrayList<String>();
- allStructFieldNames.addAll(names);
- allStructFieldTypeInfos = new ArrayList<TypeInfo>();
- allStructFieldTypeInfos.addAll(typeInfos);
+ allStructFieldNames = new ArrayList<String>(names);
+ allStructFieldTypeInfos = new ArrayList<TypeInfo>(typeInfos);
}
@Override