Repository: hive
Updated Branches:
  refs/heads/master df12aec50 -> ffb7e043e


HIVE-18866 : Semijoin and analyze: Implement a Long -> Hash64 vector fast-path 
(Gopal Vijayaraghavan, Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5df1eb3f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5df1eb3f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5df1eb3f

Branch: refs/heads/master
Commit: 5df1eb3f16111bbf70ae8d17b915a4bffb67594b
Parents: 43e2f96
Author: sergey <ser...@apache.org>
Authored: Tue May 22 12:06:12 2018 -0700
Committer: sergey <ser...@apache.org>
Committed: Tue May 22 12:14:11 2018 -0700

----------------------------------------------------------------------
 itests/hive-jmh/pom.xml                         |   5 +
 .../hive/benchmark/hash/Murmur3Bench.java       | 107 +++++++++++++++++++
 .../hadoop/hive/common/ndv/hll/HyperLogLog.java |  21 ++--
 .../apache/hive/common/util/BloomKFilter.java   |   4 +-
 .../org/apache/hive/common/util/Murmur3.java    |  46 ++++++++
 .../apache/hive/common/util/TestMurmur3.java    |  29 ++++-
 6 files changed, 193 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/itests/hive-jmh/pom.xml
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/pom.xml b/itests/hive-jmh/pom.xml
index c0a6564..5eb3026 100644
--- a/itests/hive-jmh/pom.xml
+++ b/itests/hive-jmh/pom.xml
@@ -65,6 +65,11 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
+      <artifactId>hive-storage-api</artifactId>
+       <version>2.7.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
       <artifactId>hive-exec</artifactId>
        <version>${project.version}</version>
     </dependency>

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
----------------------------------------------------------------------
diff --git 
a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
 
b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
new file mode 100644
index 0000000..cd85148
--- /dev/null
+++ 
b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.benchmark.hash;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
+import org.apache.hive.common.util.Murmur3;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * This test measures the performance for vectorization.
+ * <p/>
+ * This test uses JMH framework for benchmarking.
+ * You may execute this benchmark tool using JMH command line in different 
ways:
+ * <p/>
+ * To use the settings shown in the main() function, use:
+ * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench
+ * <p/>
+ * To use the default settings used by JMH, use:
+ * $ java -jar target/benchmarks.jar 
org.apache.hive.benchmark.hash.Murmur3Bench
+ * <p/>
+ * To specify different parameters, use:
+ * - This command will use 10 warm-up iterations, 5 test iterations, and 2 
forks. And it will
+ * display the Average Time (avgt) in Microseconds (us)
+ * - Benchmark mode. Available modes are:
+ * [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, 
All/all]
+ * - Output time unit. Available time units are: [m, s, ms, us, ns].
+ * <p/>
+ * $ java -jar target/benchmarks.jar 
org.apache.hive.benchmark.hash.Murmur3Bench
+ * -wi 10 -i 5 -f 2 -bm avgt -tu us
+ */
+@State(Scope.Benchmark)
+public class Murmur3Bench {
+  @BenchmarkMode(Mode.AverageTime)
+  @Fork(1)
+  @State(Scope.Thread)
+  @OutputTimeUnit(TimeUnit.MILLISECONDS)
+  public static class Hash64Bench {
+    
+    @Param({ "-1"}) //"123456789", "987654321", "1234", "4321",
+    long v;
+    
+    
+    
+    @Benchmark
+    @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
+    @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS)
+    public long longHash() {
+      long k = 0;
+      for (int i = 0; i < 4096; i++) {
+        k += Murmur3.hash64(v);
+      }
+      return k;
+    }
+    
+    @Benchmark
+    @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
+    @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS)
+    public long longBytesHash() {
+      ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
+      long k = 0;
+      for (int i = 0; i < 4096; i++) {
+        LONG_BUFFER.putLong(0, v+i);
+        k += Murmur3.hash64(LONG_BUFFER.array());
+      }
+      return k;
+    }
+  }
+
+  public static void main(String[] args) throws RunnerException {
+    Options opt = new OptionsBuilder().include(".*" + 
Murmur3Bench.class.getSimpleName() +
+        ".*").build();
+    new Runner(opt).run();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index 8bdb47b..07a93c6 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -62,9 +62,6 @@ public class HyperLogLog implements NumDistinctValueEstimator 
{
   private final static int DEFAULT_HASH_BITS = 64;
   private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0});
   private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1});
-  private final static ByteBuffer SHORT_BUFFER = 
ByteBuffer.allocate(Short.BYTES);
-  private final static ByteBuffer INT_BUFFER = 
ByteBuffer.allocate(Integer.BYTES);
-  private final static ByteBuffer LONG_BUFFER = 
ByteBuffer.allocate(Long.BYTES);
 
   public enum EncodingType {
     SPARSE, DENSE
@@ -212,33 +209,27 @@ public class HyperLogLog implements 
NumDistinctValueEstimator {
   }
 
   public void addShort(short val) {
-    SHORT_BUFFER.putShort(0, val);
-    add(Murmur3.hash64(SHORT_BUFFER.array()));
+    add(Murmur3.hash64(val));
   }
 
   public void addInt(int val) {
-    INT_BUFFER.putInt(0, val);
-    add(Murmur3.hash64(INT_BUFFER.array()));
+    add(Murmur3.hash64(val));
   }
 
   public void addLong(long val) {
-    LONG_BUFFER.putLong(0, val);
-    add(Murmur3.hash64(LONG_BUFFER.array()));
+    add(Murmur3.hash64(val));
   }
 
   public void addFloat(float val) {
-    INT_BUFFER.putFloat(0, val);
-    add(Murmur3.hash64(INT_BUFFER.array()));
+    add(Murmur3.hash64(Float.floatToIntBits(val)));
   }
 
   public void addDouble(double val) {
-    LONG_BUFFER.putDouble(0, val);
-    add(Murmur3.hash64(LONG_BUFFER.array()));
+    add(Murmur3.hash64(Double.doubleToLongBits(val)));
   }
 
   public void addChar(char val) {
-    SHORT_BUFFER.putChar(0, val);
-    add(Murmur3.hash64(SHORT_BUFFER.array()));
+    add(Murmur3.hash64((short)val));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java 
b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
index 6ccf5ab..5b1914d 100644
--- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
+++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
@@ -156,7 +156,7 @@ public class BloomKFilter {
 
   public void addLong(long val) {
     // puts long in little endian order
-    addBytes(longToByteArrayLE(val));
+    addHash(Murmur3.hash64(val));
   }
 
   public void addFloat(float val) {
@@ -239,7 +239,7 @@ public class BloomKFilter {
   }
 
   public boolean testLong(long val) {
-    return testBytes(longToByteArrayLE(val));
+    return testHash(Murmur3.hash64(val));
   }
 
   public boolean testFloat(float val) {

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/Murmur3.java 
b/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
index c896fa7..8aae28b 100644
--- a/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
+++ b/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
@@ -155,6 +155,52 @@ public class Murmur3 {
     return hash64(data, 0, data.length, DEFAULT_SEED);
   }
 
+  public static long hash64(long data) {
+    long hash = DEFAULT_SEED;
+    long k = Long.reverseBytes(data);
+    int length = Long.BYTES;
+    // mix functions
+    k *= C1;
+    k = Long.rotateLeft(k, R1);
+    k *= C2;
+    hash ^= k;
+    hash = Long.rotateLeft(hash, R2) * M + N1;
+    // finalization
+    hash ^= length;
+    hash = fmix64(hash);
+    return hash;
+  }
+
+  public static long hash64(int data) {
+    long k1 = Integer.reverseBytes(data) & (-1L >>> 32);
+    int length = Integer.BYTES;
+    long hash = DEFAULT_SEED;
+    k1 *= C1;
+    k1 = Long.rotateLeft(k1, R1);
+    k1 *= C2;
+    hash ^= k1;
+    // finalization
+    hash ^= length;
+    hash = fmix64(hash);
+    return hash;
+  }
+
+  public static long hash64(short data) {
+    long hash = DEFAULT_SEED;
+    long k1 = 0;
+    k1 ^= ((long) data & 0xff) << 8;
+    k1 ^= ((long)((data & 0xFF00) >> 8) & 0xff);
+    k1 *= C1;
+    k1 = Long.rotateLeft(k1, R1);
+    k1 *= C2;
+    hash ^= k1;
+
+    // finalization
+    hash ^= Short.BYTES;
+    hash = fmix64(hash);
+    return hash;
+  }
+
   public static long hash64(byte[] data, int offset, int length) {
     return hash64(data, offset, length, DEFAULT_SEED);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java 
b/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
index f20366b..16955c1 100644
--- a/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
+++ b/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
@@ -18,7 +18,7 @@
 
 package org.apache.hive.common.util;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
 import org.apache.hive.common.util.Murmur3.IncrementalHash32;
 
 import com.google.common.hash.HashFunction;
@@ -222,7 +222,32 @@ public class TestMurmur3 {
       assertEquals(gl2, m2);
     }
   }
-  
+
+  @Test
+  public void test64() {
+    final int seed = 123, iters = 1000000;
+    ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES);
+    ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES);
+    ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
+    Random rdm = new Random(seed);
+    for (int i = 0; i < iters; ++i) {
+      long ln = rdm.nextLong();
+      int in = rdm.nextInt();
+      short sn = (short) (rdm.nextInt(2* Short.MAX_VALUE - 1) - 
Short.MAX_VALUE);
+      float fn = rdm.nextFloat();
+      double dn = rdm.nextDouble();
+      SHORT_BUFFER.putShort(0, sn);
+      assertEquals(Murmur3.hash64(SHORT_BUFFER.array()), Murmur3.hash64(sn));
+      INT_BUFFER.putInt(0, in);
+      assertEquals(Murmur3.hash64(INT_BUFFER.array()), Murmur3.hash64(in));
+      LONG_BUFFER.putLong(0, ln);
+      assertEquals(Murmur3.hash64(LONG_BUFFER.array()), Murmur3.hash64(ln));
+      INT_BUFFER.putFloat(0, fn);
+      assertEquals(Murmur3.hash64(INT_BUFFER.array()), 
Murmur3.hash64(Float.floatToIntBits(fn)));
+      LONG_BUFFER.putDouble(0, dn);
+      assertEquals(Murmur3.hash64(LONG_BUFFER.array()), 
Murmur3.hash64(Double.doubleToLongBits(dn)));
+    }
+  }
 
   @Test
   public void testIncremental() {

Reply via email to