Repository: hive Updated Branches: refs/heads/master abede8ee2 -> cd8eda856
HIVE-18610: Performance: ListKeyWrapper does not check for hashcode equals, before comparing members (Gopal V, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cd8eda85 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cd8eda85 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cd8eda85 Branch: refs/heads/master Commit: cd8eda85676341e697720c90550b61360266159d Parents: abede8e Author: Gopal V <gop...@apache.org> Authored: Tue Mar 6 22:08:23 2018 -0800 Committer: Gopal V <gop...@apache.org> Committed: Tue Mar 6 22:08:39 2018 -0800 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/KeyWrapperFactory.java | 17 ++++++++++------- .../hadoop/hive/ql/exec/TestKeyWrapperFactory.java | 8 ++++++++ .../objectinspector/ObjectInspectorUtils.java | 15 ++++++++++----- 3 files changed, 28 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/cd8eda85/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java index 73683ff..3c7f0b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java @@ -18,10 +18,7 @@ package org.apache.hadoop.hive.ql.exec; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.lazy.LazyDouble; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectsEqualComparer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -29,10 +26,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.Object import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.Text; public class KeyWrapperFactory { + public KeyWrapperFactory(ExprNodeEvaluator[] keyFields, ObjectInspector[] keyObjectInspectors, ObjectInspector[] currentKeyObjectInspectors) { this.keyFields = keyFields; @@ -66,7 +63,7 @@ public class KeyWrapperFactory { transient ListObjectsEqualComparer newKeyStructEqualComparer; class ListKeyWrapper extends KeyWrapper { - int hashcode; + int hashcode = -1; Object[] keys; // decide whether this is already in hashmap (keys in hashmap are deepcopied // version, and we need to use 'currentKeyObjectInspector'). @@ -102,8 +99,13 @@ public class KeyWrapperFactory { if (!(obj instanceof ListKeyWrapper)) { return false; } - Object[] copied_in_hashmap = ((ListKeyWrapper) obj).keys; - return equalComparer.areEqual(copied_in_hashmap, keys); + ListKeyWrapper other = ((ListKeyWrapper) obj); + if (other.hashcode != this.hashcode && this.hashcode != -1 && other.hashcode != -1) { + return false; + } + Object[] copied_in_hashmap = other.keys; + boolean result = equalComparer.areEqual(copied_in_hashmap, keys); + return result; } @Override @@ -117,6 +119,7 @@ public class KeyWrapperFactory { for (int i = 0; i < keyFields.length; i++) { keys[i] = keyFields[i].evaluate(row); } + hashcode = -1; } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/cd8eda85/ql/src/test/org/apache/hadoop/hive/ql/exec/TestKeyWrapperFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestKeyWrapperFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestKeyWrapperFactory.java index 4de6dd0..03c4ed6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestKeyWrapperFactory.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestKeyWrapperFactory.java @@ -110,4 +110,12 @@ public class TestKeyWrapperFactory { assertFalse(w3.equals(w4)); assertFalse(w4.equals(w3)); } + + @Test + public void testUnsetHashCode() { + KeyWrapper w1 = factory.getKeyWrapper(); + KeyWrapper w2 = w1.copyKey(); + w1.setHashKey(); + assertTrue(w1.equals(w2)); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/cd8eda85/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index efb1df6..8823d41 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -22,6 +22,7 @@ import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.lang.reflect.Type; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -167,13 +168,17 @@ public final class ObjectInspectorUtils { int hashcode = 1; for (Object element : keys) { hashcode = 31 * hashcode; - if (element == null) continue; - if (element instanceof LazyDouble) { - long v = Double.doubleToLongBits(((LazyDouble)element).getWritableObject().get()); + if (element == null) { + // nothing + } else if (element instanceof LazyDouble) { + long v = Double.doubleToLongBits(((LazyDouble) element).getWritableObject().get()); hashcode = hashcode + (int) (v ^ (v >>> 32)); - } else if (element instanceof DoubleWritable){ - long v = Double.doubleToLongBits(((DoubleWritable)element).get()); + } else if (element instanceof DoubleWritable) { + long v = Double.doubleToLongBits(((DoubleWritable) element).get()); hashcode = hashcode + (int) (v ^ (v >>> 32)); + } else if (element instanceof Object[]) { + // use deep hashcode for arrays + hashcode = hashcode + Arrays.deepHashCode((Object[]) element); } else { hashcode = hashcode + element.hashCode(); }