Re: [PR] Optimize encoded numeric range bitsets [lucene]

via GitHub Mon, 15 Jun 2026 15:00:24 -0700


costin commented on code in PR #16160:
URL: https://github.com/apache/lucene/pull/16160#discussion_r3416818140



##########
lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GcdDeltaRangeIntoBitSetBenchmark.java:
##########
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Comparator;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+/** Benchmarks range queries over dense numeric doc values encoded as raw, 
delta, GCD, or both. */
+@State(Scope.Thread)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@Warmup(iterations = 3, time = 3)
+@Measurement(iterations = 5, time = 5)
+public class GcdDeltaRangeIntoBitSetBenchmark {
+
+  private static final String FIELD = "val";
+  private static final String NONE = "none";
+  private static final String DELTA_ONLY = "delta_only";
+  private static final String GCD_1000 = "gcd_1000";
+  private static final String GCD_100_DELTA = "gcd_100_delta";
+  private static final long DOMAIN = 10_000_000L;
+  private static final long DELTA = 1_700_000_000_000L;
+
+  private Directory dir;
+  private DirectoryReader reader;
+  private IndexSearcher searcher;
+  private Path path;
+  private Query query;
+
+  @Param({"1000000"})
+  public int numDocs;
+
+  @Param({NONE, DELTA_ONLY, GCD_1000, GCD_100_DELTA})
+  public String encoding;
+
+  @Param({"0.01", "0.1", "0.5"})
+  public double selectivity;
+
+  @Setup(Level.Trial)
+  public void setup() throws Exception {
+    path = Files.createTempDirectory("gcdDeltaRangeIntoBitSet");
+    dir = MMapDirectory.open(path);
+
+    Random random = new Random(0);
+    try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig())) {
+      for (int i = 0; i < numDocs; i++) {
+        Document doc = new Document();
+        doc.add(NumericDocValuesField.indexedField(FIELD, 
valueForDoc(encoding, i, random)));
+        writer.addDocument(doc);
+      }
+      writer.forceMerge(1);
+    }
+
+    reader = DirectoryReader.open(dir);
+    searcher = new IndexSearcher(reader);
+    query = rangeQuery(encoding, selectivity);
+  }
+
+  private static long valueForDoc(String encoding, int doc, Random random) {
+    if (doc == 0) {
+      return minimumValue(encoding);
+    } else if (doc == 1 && encoding.equals(GCD_100_DELTA)) {

Review Comment:
   It anchors entry.gcd to exactly 100 for the GCD_100_DELTA encoding. Random 
multiples of 100 can share a larger common factor under some seeds, which would 
shift the encoded ticket range and quietly change what the benchmark measures. 
   Added an inline comment explaining this so the next reader doesn't have to 
re-derive it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Optimize encoded numeric range bitsets [lucene]

Reply via email to