This is an automated email from the ASF dual-hosted git repository.

jonwei pushed a commit to branch 0.14.0-incubating
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git


The following commit(s) were added to refs/heads/0.14.0-incubating by this push:
     new 3d8b1bc  Speed up 
StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466)
3d8b1bc is described below

commit 3d8b1bca033bedc3effa93b7579f96121346a428
Author: Jonathan Wei <jon-...@users.noreply.github.com>
AuthorDate: Wed Sep 4 20:26:04 2019 -0700

    Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466)
    
    * Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize
    
    * Remove print
    
    * Move benchmark, add header
---
 .../indexing/StringDimensionIndexerBenchmark.java  | 78 ++++++++++++++++++++++
 .../druid/segment/StringDimensionIndexer.java      | 12 ++--
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git 
a/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
 
b/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
new file mode 100644
index 0000000..eacfc65
--- /dev/null
+++ 
b/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.benchmark.indexing;
+
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.segment.StringDimensionIndexer;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 10)
+@Measurement(iterations = 10)
+public class StringDimensionIndexerBenchmark
+{
+  StringDimensionIndexer indexer;
+  int[] exampleArray;
+
+  @Param({"10000"})
+  public int cardinality;
+
+  @Param({"8"})
+  public int rowSize;
+
+  @Setup
+  public void setup()
+  {
+    indexer = new 
StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true);
+
+    for (int i = 0; i < cardinality; i++) {
+      indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true);
+    }
+
+    exampleArray = new int[rowSize];
+    int stride = cardinality / rowSize;
+    for (int i = 0; i < rowSize; i++) {
+      exampleArray[i] = i * stride;
+    }
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void estimateEncodedKeyComponentSize(Blackhole blackhole)
+  {
+    long sz = indexer.estimateEncodedKeyComponentSize(exampleArray);
+    blackhole.consume(sz);
+  }
+}
diff --git 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index cf201e4..3bedd0d 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -305,10 +305,14 @@ public class StringDimensionIndexer implements 
DimensionIndexer<Integer, int[],
     // even though they are stored just once. It may overestimate the size by 
a bit, but we wanted to leave
     // more buffer to be safe
     long estimatedSize = key.length * Integer.BYTES;
-    estimatedSize += Arrays.stream(key)
-                           .filter(element -> dimLookup.getValue(element) != 
null)
-                           .mapToLong(element -> 
dimLookup.getValue(element).length() * Character.BYTES)
-                           .sum();
+    long totalChars = 0;
+    for (int element : key) {
+      String val = dimLookup.getValue(element);
+      if (val != null) {
+        totalChars += val.length();
+      }
+    }
+    estimatedSize += totalChars * Character.BYTES;
     return estimatedSize;
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org
For additional commands, e-mail: commits-h...@druid.apache.org

Reply via email to