Repository: hive Updated Branches: refs/heads/master 69c231786 -> af8dbecb6
HIVE-11428: Performance: Struct IN() clauses are extremely slow (Hari Sankar Sivarama Subramaniyan, reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af8dbecb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af8dbecb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af8dbecb Branch: refs/heads/master Commit: af8dbecb6fa95ecd372f7be00f42711ed3d993b2 Parents: 69c2317 Author: Hari Subramaniyan <[email protected]> Authored: Fri Aug 7 14:16:43 2015 -0700 Committer: Hari Subramaniyan <[email protected]> Committed: Fri Aug 7 14:16:43 2015 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 44 ++++++++++++++++---- .../hive/ql/udf/generic/GenericUDFIn.java | 11 +++++ .../test/results/clientpositive/null_cast.q.out | 4 +- .../results/clientpositive/udf_inline.q.out | 6 +-- .../objectinspector/ObjectInspectorFactory.java | 7 +++- .../objectinspector/ObjectInspectorUtils.java | 16 +++++++ 6 files changed, 73 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/af8dbecb/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index a069394..55aea0e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -60,11 +60,13 @@ import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; @@ -940,15 +942,21 @@ public class StatsUtils { } break; case STRUCT: - StructObjectInspector soi = (StructObjectInspector) oi; - - // add constant object overhead for struct - result += JavaDataModel.get().object(); - - // add constant struct field names references overhead - result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); - for (StructField field : soi.getAllStructFieldRefs()) { - result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector()); + if (oi instanceof StandardConstantStructObjectInspector) { + // constant map projection of known length + StandardConstantStructObjectInspector scsoi = (StandardConstantStructObjectInspector) oi; + result += getSizeOfStruct(scsoi); + } else { + StructObjectInspector soi = (StructObjectInspector) oi; + + // add constant object overhead for struct + result += JavaDataModel.get().object(); + + // add constant struct field names references overhead + result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); + for (StructField field : soi.getAllStructFieldRefs()) { + result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector()); + } } break; case UNION: @@ -1053,6 +1061,24 @@ public class StatsUtils { return result; } + public static long getSizeOfStruct(StandardConstantStructObjectInspector soi) { + long result = 0; + // add constant object overhead for struct + result += JavaDataModel.get().object(); + + // add constant struct field names references overhead + result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); + List<?> value = soi.getWritableConstantValue(); + List<? extends StructField> fields = soi.getAllStructFieldRefs(); + if (value == null || value.size() != fields.size()) { + return result; + } + for (int i = 0; i < fields.size(); i++) { + result += getWritableSize(fields.get(i).getFieldObjectInspector(), value.get(i)); + } + return result; + } + /** * Get size of primitive data types based on their respective writable object inspector * @param oi http://git-wip-us.apache.org/repos/asf/hive/blob/af8dbecb/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java index 38b1dc4..56ac3e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java @@ -32,9 +32,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.BooleanWritable; +import com.esotericsoftware.minlog.Log; + /** * GenericUDFIn * @@ -168,6 +171,14 @@ public class GenericUDFIn extends GenericUDF { } break; } + case STRUCT: { + if (constantInSet.contains(((StructObjectInspector) compareOI).getStructFieldsDataAsList(conversionHelper + .convertIfNecessary(arguments[0].get(), argumentOIs[0])))) { + bw.set(true); + return bw; + } + break; + } default: throw new RuntimeException("Compare of unsupported constant type: " + compareOI.getCategory()); http://git-wip-us.apache.org/repos/asf/hive/blob/af8dbecb/ql/src/test/results/clientpositive/null_cast.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/null_cast.q.out b/ql/src/test/results/clientpositive/null_cast.q.out index 810eacd..b5af69b 100644 --- a/ql/src/test/results/clientpositive/null_cast.q.out +++ b/ql/src/test/results/clientpositive/null_cast.q.out @@ -25,10 +25,10 @@ STAGE PLANS: Select Operator expressions: array(null,0) (type: array<int>), array(null,array()) (type: array<array<string>>), array(null,map()) (type: array<map<string,string>>), array(null,struct(0)) (type: array<struct<col1:int>>) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 340000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 340000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/af8dbecb/ql/src/test/results/clientpositive/udf_inline.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_inline.q.out b/ql/src/test/results/clientpositive/udf_inline.q.out index 45bd463..7d372f3 100644 --- a/ql/src/test/results/clientpositive/udf_inline.q.out +++ b/ql/src/test/results/clientpositive/udf_inline.q.out @@ -33,13 +33,13 @@ STAGE PLANS: Select Operator expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?')) (type: array<struct<col1:int,col2:string>>) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats: COMPLETE function name: inline Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: SELECT inline( http://git-wip-us.apache.org/repos/asf/hive/blob/af8dbecb/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java index c35f4e9..97bb715 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java @@ -215,7 +215,6 @@ public final class ObjectInspectorFactory { return new StandardConstantListObjectInspector(listElementObjectInspector, constantValue); } - static ConcurrentHashMap<List<ObjectInspector>, StandardMapObjectInspector> cachedStandardMapObjectInspector = new ConcurrentHashMap<List<ObjectInspector>, StandardMapObjectInspector>(); @@ -297,6 +296,12 @@ public final class ObjectInspectorFactory { return result; } + public static StandardConstantStructObjectInspector getStandardConstantStructObjectInspector( + List<String> structFieldNames, + List<ObjectInspector> structFieldObjectInspectors, List<?> value) { + return new StandardConstantStructObjectInspector(structFieldNames, structFieldObjectInspectors, value); + } + static ConcurrentHashMap<List<StructObjectInspector>, UnionStructObjectInspector> cachedUnionStructObjectInspector = new ConcurrentHashMap<List<StructObjectInspector>, UnionStructObjectInspector>(); http://git-wip-us.apache.org/repos/asf/hive/blob/af8dbecb/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 6ef9f5d..64dd512 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -1073,6 +1073,21 @@ public final class ObjectInspectorUtils { ObjectInspectorCopyOption.WRITABLE ), (Map<?, ?>)writableValue); + case STRUCT: + StructObjectInspector soi = (StructObjectInspector) oi; + List<? extends StructField> fields = soi.getAllStructFieldRefs(); + List<String> fieldNames = new ArrayList<String>(fields.size()); + List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>( + fields.size()); + for (StructField f : fields) { + fieldNames.add(f.getFieldName()); + fieldObjectInspectors.add(getStandardObjectInspector(f + .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE)); + } + return ObjectInspectorFactory.getStandardConstantStructObjectInspector( + fieldNames, + fieldObjectInspectors, + (List<?>)writableValue); default: throw new IllegalArgumentException( writableOI.getCategory() + " not yet supported for constant OI"); @@ -1088,6 +1103,7 @@ public final class ObjectInspectorUtils { case PRIMITIVE: case LIST: case MAP: + case STRUCT: return true; default: return false;
