gf2121 commented on code in PR #14935:
URL: https://github.com/apache/lucene/pull/14935#discussion_r2200926950


##########
lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/BitsetToArrayBenchmark.java:
##########
@@ -0,0 +1,379 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.util.SplittableRandom;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.IntStream;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 3, time = 1)
+@Measurement(iterations = 5, time = 1)
+@Fork(
+    value = 1,
+    jvmArgsAppend = {"-Xmx1g", "-Xms1g", "-XX:+AlwaysPreTouch"})
+public class BitsetToArrayBenchmark {
+
+  private final SplittableRandom R = new SplittableRandom(4314123142L);
+
+  @Param({"5", "10", "20", "30", "40", "50", "60"})
+  int bitCount;
+
+  private final int[] scratch = new int[64];
+  private long word;
+  private int[] resultArray;
+  private int base;
+  private int offset;
+
+  @Setup(Level.Trial)
+  public void setup() {
+    base = R.nextInt(1000);
+    resultArray = new int[bitCount + Long.SIZE];
+  }
+
+  @Setup(Level.Invocation)
+  public void setupInvocation() {
+    word = 0L;
+    while (Long.bitCount(word) < bitCount) {
+      word |= 1L << R.nextInt(64);
+    }
+    offset = R.nextInt(64);
+  }
+
+  @Benchmark
+  public int whileLoop() {
+    return _whileLoop(word, resultArray, offset, base);
+  }
+
+  @Benchmark
+  public int forLoop() {
+    return _forLoop(word, resultArray, offset, base);
+  }
+
+  @Benchmark
+  public int forLoopManualUnrolling() {
+    return _forLoopManualUnrolling(word, resultArray, offset, base);
+  }
+
+  @Benchmark
+  public int dense() {
+    return _dense(word, resultArray, offset, base);
+  }
+
+  @Benchmark
+  public int denseBranchLess() {
+    return _denseBranchLess(word, resultArray, offset, base);
+  }
+
+  @Benchmark
+  public int denseBranchLessUnrolling() {
+    return _denseBranchLessUnrolling(word, resultArray, offset, base);
+  }
+
+  @Benchmark
+  public int denseBranchLessVectorized() {
+    return _denseBranchLessVectorized(word, resultArray, offset, base, 
scratch);
+  }
+
+  @Benchmark
+  public int denseInvert() {
+    return _denseInvert(word, resultArray, offset, base);
+  }
+
+  @Benchmark
+  public int hybrid() {
+    return _hybrid(word, resultArray, offset, base, scratch);
+  }
+
+  private static int _whileLoop(long word, int[] resultArray, int offset, int 
base) {
+    while (word != 0) {
+      int bit = Long.numberOfTrailingZeros(word);
+      resultArray[offset++] = base + bit;
+      word ^= 1L << bit;
+    }
+    return offset;
+  }
+
+  private static int _forLoop(long word, int[] resultArray, int offset, int 
base) {
+    int to = offset + Long.bitCount(word);
+    for (int i = offset; i < to; i++) {
+      int bit = Long.numberOfTrailingZeros(word);
+      resultArray[i] = base + bit;
+      word ^= 1L << bit;
+    }
+    return to;
+  }
+
+  private static int _forLoopManualUnrolling(long word, int[] resultArray, int 
offset, int base) {
+    int to = offset + Long.bitCount(word);
+    int i = offset;
+
+    for (; i < to - 3; i += 4) {
+      int ntz = Long.numberOfTrailingZeros(word);
+      resultArray[i] = base + ntz;
+      word ^= 1L << ntz;
+      ntz = Long.numberOfTrailingZeros(word);
+      resultArray[i] = base + ntz;
+      word ^= 1L << ntz;
+      ntz = Long.numberOfTrailingZeros(word);
+      resultArray[i] = base + ntz;
+      word ^= 1L << ntz;
+      ntz = Long.numberOfTrailingZeros(word);
+      resultArray[i] = base + ntz;
+      word ^= 1L << ntz;
+    }
+
+    for (; i < to; i++) {
+      int ntz = Long.numberOfTrailingZeros(word);
+      resultArray[i] = base + ntz;
+      word ^= 1L << ntz;
+    }
+
+    return to;
+  }
+
+  private static int _dense(long word, int[] resultArray, int offset, int 
base) {
+    for (int i = 0; i < Long.SIZE; i++) {
+      if ((word & (1L << i)) != 0) {
+        resultArray[offset++] = base + i;
+      }
+    }
+    return offset;
+  }
+
+  private static int _denseBranchLess(long word, int[] resultArray, int 
offset, int base) {
+    int lWord = (int) word;
+    int hWord = (int) (word >>> 32);
+    for (int i = 0; i < Integer.SIZE; i++) {
+      resultArray[offset] = base + i;
+      offset += lWord & 1;
+      lWord >>>= 1;
+    }
+    for (int i = Integer.SIZE; i < Long.SIZE; i++) {
+      resultArray[offset] = base + i;
+      offset += hWord & 1;
+      hWord >>>= 1;
+    }
+    return offset;

Review Comment:
   Here is the result: (i'll try to check if it is really compiled to COMV 
after setting up the env on this new machine)
   
   ```
   Benchmark                                              (bitCount)   Mode  
Cnt   Score   Error   Units
   BitsetToArrayBenchmark.denseBranchLess                          5  thrpt    
5  13.582 ± 0.183  ops/us
   BitsetToArrayBenchmark.denseBranchLess                         10  thrpt    
5  13.485 ± 0.290  ops/us
   BitsetToArrayBenchmark.denseBranchLess                         20  thrpt    
5  13.575 ± 0.094  ops/us
   BitsetToArrayBenchmark.denseBranchLess                         30  thrpt    
5  13.570 ± 0.233  ops/us
   BitsetToArrayBenchmark.denseBranchLess                         40  thrpt    
5  13.478 ± 0.110  ops/us
   BitsetToArrayBenchmark.denseBranchLess                         50  thrpt    
5  13.577 ± 0.075  ops/us
   BitsetToArrayBenchmark.denseBranchLess                         60  thrpt    
5  13.558 ± 0.090  ops/us
   BitsetToArrayBenchmark.denseBranchLessCmov                      5  thrpt    
5   8.501 ± 0.154  ops/us
   BitsetToArrayBenchmark.denseBranchLessCmov                     10  thrpt    
5   6.373 ± 0.065  ops/us
   BitsetToArrayBenchmark.denseBranchLessCmov                     20  thrpt    
5  11.544 ± 0.057  ops/us
   BitsetToArrayBenchmark.denseBranchLessCmov                     30  thrpt    
5  11.538 ± 0.014  ops/us
   BitsetToArrayBenchmark.denseBranchLessCmov                     40  thrpt    
5  11.475 ± 0.284  ops/us
   BitsetToArrayBenchmark.denseBranchLessCmov                     50  thrpt    
5  11.546 ± 0.081  ops/us
   BitsetToArrayBenchmark.denseBranchLessCmov                     60  thrpt    
5   9.061 ± 0.213  ops/us
   BitsetToArrayBenchmark.denseBranchLessNoIdentityArray           5  thrpt    
5  15.190 ± 0.505  ops/us
   BitsetToArrayBenchmark.denseBranchLessNoIdentityArray          10  thrpt    
5  15.236 ± 0.358  ops/us
   BitsetToArrayBenchmark.denseBranchLessNoIdentityArray          20  thrpt    
5  15.217 ± 0.324  ops/us
   BitsetToArrayBenchmark.denseBranchLessNoIdentityArray          30  thrpt    
5  15.271 ± 0.217  ops/us
   BitsetToArrayBenchmark.denseBranchLessNoIdentityArray          40  thrpt    
5  15.267 ± 0.406  ops/us
   BitsetToArrayBenchmark.denseBranchLessNoIdentityArray          50  thrpt    
5  15.212 ± 0.345  ops/us
   BitsetToArrayBenchmark.denseBranchLessNoIdentityArray          60  thrpt    
5  15.242 ± 0.308  ops/us
   BitsetToArrayBenchmark.denseBranchLessUnrolling                 5  thrpt    
5  15.702 ± 0.483  ops/us
   BitsetToArrayBenchmark.denseBranchLessUnrolling                10  thrpt    
5  15.778 ± 0.275  ops/us
   BitsetToArrayBenchmark.denseBranchLessUnrolling                20  thrpt    
5  15.730 ± 0.468  ops/us
   BitsetToArrayBenchmark.denseBranchLessUnrolling                30  thrpt    
5  15.730 ± 1.251  ops/us
   BitsetToArrayBenchmark.denseBranchLessUnrolling                40  thrpt    
5  15.748 ± 0.429  ops/us
   BitsetToArrayBenchmark.denseBranchLessUnrolling                50  thrpt    
5  15.842 ± 0.213  ops/us
   BitsetToArrayBenchmark.denseBranchLessUnrolling                60  thrpt    
5  15.817 ± 0.396  ops/us
   BitsetToArrayBenchmark.denseBranchLessVectorized                5  thrpt    
5  14.905 ± 0.129  ops/us
   BitsetToArrayBenchmark.denseBranchLessVectorized               10  thrpt    
5  14.922 ± 0.322  ops/us
   BitsetToArrayBenchmark.denseBranchLessVectorized               20  thrpt    
5  14.828 ± 0.516  ops/us
   BitsetToArrayBenchmark.denseBranchLessVectorized               30  thrpt    
5  14.686 ± 0.133  ops/us
   BitsetToArrayBenchmark.denseBranchLessVectorized               40  thrpt    
5  14.881 ± 0.239  ops/us
   BitsetToArrayBenchmark.denseBranchLessVectorized               50  thrpt    
5  14.978 ± 0.233  ops/us
   BitsetToArrayBenchmark.denseBranchLessVectorized               60  thrpt    
5  14.991 ± 0.338  ops/us
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to