This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 8bb3816ed5 [tantivy] Reuse TantivySearcher across queries via searcher
pool (#7671)
8bb3816ed5 is described below
commit 8bb3816ed557ab4604ae4954405da34e52faaf93
Author: ChengHui Chen <[email protected]>
AuthorDate: Mon May 18 16:29:34 2026 +0800
[tantivy] Reuse TantivySearcher across queries via searcher pool (#7671)
Each full-text search query currently opens a fresh `TantivySearcher`,
which rebuilds the Rust-side index structures (including loading the
`.term` FST dictionary) from scratch. On object storage (S3/OSS), this
means a full GET of the index file on every query. In any engine that
repeatedly scans the same table, the same index shard is queried across
consecutive `plan()` calls, making repeated loading pure waste.
This PR introduces a `TantivySearcherPool` that keeps `TantivySearcher`
instances alive across queries, borrowing on query start and returning
on close rather than destroying and rebuilding.
### Benefit Assessment
Benchmark on local disk (500k docs, 17MB index, 500 queries,
JIT-warmed):
```
No-pool: avg=2.86 ms (open=1.40 ms / 49%, search=1.27 ms)
With pool: avg=0.79 ms (search only)
Speedup: 3.62x
```
On object storage the gap widens further: the open phase includes a full
GET of the `.term` file (FST dictionary, typically several MB per
shard). With the pool, `.term` stays resident in Rust memory across
queries, eliminating both the latency and the object storage data
transfer cost of repeated loading. For tables under heavy compaction,
index files are replaced by new paths; stale pool entries go unused
without affecting correctness.
---
...lobalIndexerFactory.java => ArchiveLayout.java} | 25 ++---
.../index/TantivyFullTextGlobalIndexReader.java | 106 +++++++++-----------
.../index/TantivyFullTextGlobalIndexer.java | 11 ++-
.../index/TantivyFullTextGlobalIndexerFactory.java | 12 ++-
.../tantivy/index/TantivyFullTextIndexOptions.java | 37 +++++++
.../paimon/tantivy/index/TantivySearcherPool.java | 109 +++++++++++++++++++++
.../index/TantivyFullTextGlobalIndexTest.java | 57 ++++++++---
7 files changed, 264 insertions(+), 93 deletions(-)
diff --git
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexerFactory.java
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/ArchiveLayout.java
similarity index 58%
copy from
paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexerFactory.java
copy to
paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/ArchiveLayout.java
index d4e6febc15..41fd22d1cd 100644
---
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexerFactory.java
+++
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/ArchiveLayout.java
@@ -18,23 +18,14 @@
package org.apache.paimon.tantivy.index;
-import org.apache.paimon.globalindex.GlobalIndexer;
-import org.apache.paimon.globalindex.GlobalIndexerFactory;
-import org.apache.paimon.options.Options;
-import org.apache.paimon.types.DataField;
+class ArchiveLayout {
+ final String[] fileNames;
+ final long[] fileOffsets;
+ final long[] fileLengths;
-/** Factory for creating Tantivy full-text index. */
-public class TantivyFullTextGlobalIndexerFactory implements
GlobalIndexerFactory {
-
- public static final String IDENTIFIER = "tantivy-fulltext";
-
- @Override
- public String identifier() {
- return IDENTIFIER;
- }
-
- @Override
- public GlobalIndexer create(DataField field, Options options) {
- return new TantivyFullTextGlobalIndexer();
+ ArchiveLayout(String[] fileNames, long[] fileOffsets, long[] fileLengths) {
+ this.fileNames = fileNames;
+ this.fileOffsets = fileOffsets;
+ this.fileLengths = fileLengths;
}
}
diff --git
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexReader.java
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexReader.java
index d45c18bd6e..15bf8f19bf 100644
---
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexReader.java
+++
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexReader.java
@@ -36,6 +36,7 @@ import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
import static org.apache.paimon.utils.Preconditions.checkArgument;
@@ -45,20 +46,31 @@ import static
org.apache.paimon.utils.Preconditions.checkArgument;
*
* <p>Reads the archive header to get file layout, then opens a Tantivy
searcher backed by JNI
* callbacks to the {@link SeekableInputStream}. No temp files are created.
+ *
+ * <p>On {@link #close()}, the searcher is returned to the {@link
TantivySearcherPool} rather than
+ * destroyed, so the Rust-side index (including the FST term dictionary) stays
warm across queries.
*/
public class TantivyFullTextGlobalIndexReader implements GlobalIndexReader {
private final GlobalIndexIOMeta ioMeta;
private final GlobalIndexFileReader fileReader;
+ private final Map<String, ArchiveLayout> layoutCache;
+ private final TantivySearcherPool searcherPool;
+ private final String poolKey;
- private volatile TantivySearcher searcher;
- private volatile SeekableInputStream openStream;
+ private volatile TantivySearcherPool.PooledEntry borrowed;
public TantivyFullTextGlobalIndexReader(
- GlobalIndexFileReader fileReader, List<GlobalIndexIOMeta> ioMetas)
{
+ GlobalIndexFileReader fileReader,
+ List<GlobalIndexIOMeta> ioMetas,
+ Map<String, ArchiveLayout> layoutCache,
+ TantivySearcherPool searcherPool) {
checkArgument(ioMetas.size() == 1, "Expected exactly one index file
per shard");
this.fileReader = fileReader;
this.ioMeta = ioMetas.get(0);
+ this.layoutCache = layoutCache;
+ this.searcherPool = searcherPool;
+ this.poolKey = this.ioMeta.filePath().toString() + "@" +
this.ioMeta.fileSize();
}
@Override
@@ -66,7 +78,7 @@ public class TantivyFullTextGlobalIndexReader implements
GlobalIndexReader {
try {
ensureLoaded();
SearchResult result =
- searcher.search(fullTextSearch.queryText(),
fullTextSearch.limit());
+ borrowed.searcher.search(fullTextSearch.queryText(),
fullTextSearch.limit());
return Optional.of(toScoredResult(result));
} catch (IOException e) {
throw new RuntimeException("Failed to search Tantivy full-text
index", e);
@@ -85,29 +97,38 @@ public class TantivyFullTextGlobalIndexReader implements
GlobalIndexReader {
}
private void ensureLoaded() throws IOException {
- if (searcher == null) {
+ if (borrowed == null) {
synchronized (this) {
- if (searcher == null) {
- SeekableInputStream in = fileReader.getInputStream(ioMeta);
- try {
- ArchiveLayout layout = parseArchiveHeader(in);
- StreamFileInput streamInput = new
SynchronizedStreamFileInput(in);
- searcher =
- new TantivySearcher(
- layout.fileNames,
- layout.fileOffsets,
- layout.fileLengths,
- streamInput);
- openStream = in;
- } catch (Exception e) {
- in.close();
- throw e;
+ if (borrowed == null) {
+ TantivySearcherPool.PooledEntry entry =
searcherPool.borrow(poolKey);
+ if (entry == null) {
+ entry = createEntry();
}
+ borrowed = entry;
}
}
}
}
+ private TantivySearcherPool.PooledEntry createEntry() throws IOException {
+ SeekableInputStream in = fileReader.getInputStream(ioMeta);
+ try {
+ ArchiveLayout layout = layoutCache.get(poolKey);
+ if (layout == null) {
+ layout = parseArchiveHeader(in);
+ layoutCache.put(poolKey, layout);
+ }
+ StreamFileInput streamInput = new SynchronizedStreamFileInput(in);
+ TantivySearcher searcher =
+ new TantivySearcher(
+ layout.fileNames, layout.fileOffsets,
layout.fileLengths, streamInput);
+ return new TantivySearcherPool.PooledEntry(searcher, in);
+ } catch (Exception e) {
+ in.close();
+ throw e;
+ }
+ }
+
/**
* Parse the archive header to extract file names, offsets, and lengths.
The archive format is:
* [fileCount(4)] then for each file: [nameLen(4)] [name(utf8)]
[dataLen(8)] [data].
@@ -170,35 +191,9 @@ public class TantivyFullTextGlobalIndexReader implements
GlobalIndexReader {
@Override
public void close() throws IOException {
- Throwable firstException = null;
-
- if (searcher != null) {
- try {
- searcher.close();
- } catch (Throwable t) {
- firstException = t;
- }
- searcher = null;
- }
-
- if (openStream != null) {
- try {
- openStream.close();
- } catch (Throwable t) {
- if (firstException == null) {
- firstException = t;
- } else {
- firstException.addSuppressed(t);
- }
- }
- openStream = null;
- }
-
- if (firstException != null) {
- if (firstException instanceof IOException) {
- throw (IOException) firstException;
- }
- throw new RuntimeException("Failed to close Tantivy reader",
firstException);
+ if (borrowed != null) {
+ searcherPool.returnEntry(poolKey, borrowed);
+ borrowed = null;
}
}
@@ -274,19 +269,6 @@ public class TantivyFullTextGlobalIndexReader implements
GlobalIndexReader {
return Optional.empty();
}
- /** Parsed archive layout: file names with their offsets and lengths in
the stream. */
- private static class ArchiveLayout {
- final String[] fileNames;
- final long[] fileOffsets;
- final long[] fileLengths;
-
- ArchiveLayout(String[] fileNames, long[] fileOffsets, long[]
fileLengths) {
- this.fileNames = fileNames;
- this.fileOffsets = fileOffsets;
- this.fileLengths = fileLengths;
- }
- }
-
/**
* Thread-safe wrapper around {@link SeekableInputStream} implementing
{@link StreamFileInput}.
* Rust JNI holds a Mutex across seek+read to prevent interleaving from
concurrent threads.
diff --git
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexer.java
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexer.java
index e8d882acae..add95cd8c5 100644
---
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexer.java
+++
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexer.java
@@ -26,11 +26,18 @@ import
org.apache.paimon.globalindex.io.GlobalIndexFileReader;
import org.apache.paimon.globalindex.io.GlobalIndexFileWriter;
import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
/** Tantivy full-text global indexer. */
public class TantivyFullTextGlobalIndexer implements GlobalIndexer {
- public TantivyFullTextGlobalIndexer() {}
+ private final Map<String, ArchiveLayout> layoutCache = new
ConcurrentHashMap<>();
+ private final TantivySearcherPool searcherPool;
+
+ public TantivyFullTextGlobalIndexer(TantivySearcherPool searcherPool) {
+ this.searcherPool = searcherPool;
+ }
@Override
public GlobalIndexWriter createWriter(GlobalIndexFileWriter fileWriter) {
@@ -40,6 +47,6 @@ public class TantivyFullTextGlobalIndexer implements
GlobalIndexer {
@Override
public GlobalIndexReader createReader(
GlobalIndexFileReader fileReader, List<GlobalIndexIOMeta> files) {
- return new TantivyFullTextGlobalIndexReader(fileReader, files);
+ return new TantivyFullTextGlobalIndexReader(fileReader, files,
layoutCache, searcherPool);
}
}
diff --git
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexerFactory.java
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexerFactory.java
index d4e6febc15..e54294340c 100644
---
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexerFactory.java
+++
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexerFactory.java
@@ -28,6 +28,8 @@ public class TantivyFullTextGlobalIndexerFactory implements
GlobalIndexerFactory
public static final String IDENTIFIER = "tantivy-fulltext";
+ private volatile TantivySearcherPool searcherPool;
+
@Override
public String identifier() {
return IDENTIFIER;
@@ -35,6 +37,14 @@ public class TantivyFullTextGlobalIndexerFactory implements
GlobalIndexerFactory
@Override
public GlobalIndexer create(DataField field, Options options) {
- return new TantivyFullTextGlobalIndexer();
+ if (searcherPool == null) {
+ synchronized (this) {
+ if (searcherPool == null) {
+ int maxSize =
options.get(TantivyFullTextIndexOptions.SEARCHER_POOL_MAX_SIZE);
+ searcherPool = new TantivySearcherPool(maxSize);
+ }
+ }
+ }
+ return new TantivyFullTextGlobalIndexer(searcherPool);
}
}
diff --git
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextIndexOptions.java
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextIndexOptions.java
new file mode 100644
index 0000000000..dbc4cd3960
--- /dev/null
+++
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivyFullTextIndexOptions.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.tantivy.index;
+
+import org.apache.paimon.options.ConfigOption;
+import org.apache.paimon.options.ConfigOptions;
+
+/** Options for the Tantivy full-text index. */
+public class TantivyFullTextIndexOptions {
+
+ public static final ConfigOption<Integer> SEARCHER_POOL_MAX_SIZE =
+ ConfigOptions.key("tantivy.searcher-pool.max-size")
+ .intType()
+ .defaultValue(32)
+ .withDescription(
+ "Maximum number of idle TantivySearcher instances
kept in the pool "
+ + "across all index shards. Each entry
holds the index open in "
+ + "Rust memory (including the FST term
dictionary), so memory "
+ + "usage scales with this value times the
index size per shard. "
+ + "Set to 0 to disable pooling.");
+}
diff --git
a/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivySearcherPool.java
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivySearcherPool.java
new file mode 100644
index 0000000000..d83f441e52
--- /dev/null
+++
b/paimon-tantivy/paimon-tantivy-index/src/main/java/org/apache/paimon/tantivy/index/TantivySearcherPool.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.tantivy.index;
+
+import org.apache.paimon.fs.SeekableInputStream;
+import org.apache.paimon.tantivy.TantivySearcher;
+import org.apache.paimon.utils.IOUtils;
+
+import
org.apache.paimon.shade.caffeine2.com.github.benmanes.caffeine.cache.Cache;
+import
org.apache.paimon.shade.caffeine2.com.github.benmanes.caffeine.cache.Caffeine;
+
+import javax.annotation.Nullable;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Pool of {@link TantivySearcher} instances keyed by index file identity
({@code filePath@size}).
+ *
+ * <p>Each searcher holds the Tantivy index open in Rust memory (including the
FST term dictionary).
+ *
+ * <p>At most one idle searcher is kept per key. Under concurrent queries on
the same shard, the
+ * last entry to be returned wins; the others are closed immediately. The
total number of idle
+ * entries across all keys is bounded by {@code maxSize}.
+ *
+ * <p>Thread-safe. Borrow/return semantics guarantee at most one thread uses a
given entry at a
+ * time.
+ */
+public class TantivySearcherPool {
+
+ static final long EXPIRE_AFTER_ACCESS_MINUTES = 30;
+
+ /** A borrowed searcher + its backing stream. Both are returned together.
*/
+ static final class PooledEntry implements Closeable {
+ final TantivySearcher searcher;
+ final SeekableInputStream stream;
+
+ PooledEntry(TantivySearcher searcher, SeekableInputStream stream) {
+ this.searcher = searcher;
+ this.stream = stream;
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.closeQuietly(searcher);
+ IOUtils.closeQuietly(stream);
+ }
+ }
+
+ @Nullable private final Cache<String, PooledEntry> idleCache;
+
+ public TantivySearcherPool(int maxSize) {
+ if (maxSize <= 0) {
+ this.idleCache = null;
+ } else {
+ Cache<String, PooledEntry> cache =
+ Caffeine.newBuilder()
+ .maximumSize(maxSize)
+ .expireAfterAccess(EXPIRE_AFTER_ACCESS_MINUTES,
TimeUnit.MINUTES)
+ .executor(Runnable::run)
+ .removalListener((k, v, c) ->
IOUtils.closeQuietly((PooledEntry) v))
+ .build();
+ this.idleCache = cache;
+ }
+ }
+
+ /**
+ * Borrow an idle entry for the given key, or {@code null} if the pool has
none.
+ *
+ * <p>The caller must either {@link #returnEntry} or {@link
PooledEntry#close} the entry when
+ * done.
+ */
+ @Nullable
+ public PooledEntry borrow(String key) {
+ if (idleCache == null) { // pool disabled (maxSize <= 0)
+ return null;
+ }
+ return idleCache.asMap().remove(key);
+ }
+
+ /**
+ * Return a previously borrowed entry to the pool. Any entry displaced by
size eviction, TTL
+ * expiry, or key replacement is closed automatically via the removal
listener.
+ */
+ public void returnEntry(String key, PooledEntry entry) {
+ if (idleCache == null) { // pool disabled (maxSize <= 0)
+ IOUtils.closeQuietly(entry);
+ return;
+ }
+ idleCache.put(key, entry);
+ }
+}
diff --git
a/paimon-tantivy/paimon-tantivy-index/src/test/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexTest.java
b/paimon-tantivy/paimon-tantivy-index/src/test/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexTest.java
index beb92ee0a2..8f3f13fade 100644
---
a/paimon-tantivy/paimon-tantivy-index/src/test/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexTest.java
+++
b/paimon-tantivy/paimon-tantivy-index/src/test/java/org/apache/paimon/tantivy/index/TantivyFullTextGlobalIndexTest.java
@@ -41,8 +41,10 @@ import org.junit.jupiter.api.io.TempDir;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.Optional;
import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
@@ -70,11 +72,15 @@ public class TantivyFullTextGlobalIndexTest {
private FileIO fileIO;
private Path indexPath;
+ private Map<String, ArchiveLayout> layoutCache;
+ private TantivySearcherPool pool;
@BeforeEach
public void setup() {
fileIO = new LocalFileIO();
indexPath = new Path(tempDir.toString());
+ layoutCache = new ConcurrentHashMap<>();
+ pool = new TantivySearcherPool(4);
}
@AfterEach
@@ -111,6 +117,11 @@ public class TantivyFullTextGlobalIndexTest {
new GlobalIndexIOMeta(filePath, fileIO.getFileSize(filePath),
result.meta()));
}
+ private TantivyFullTextGlobalIndexReader createReader(
+ GlobalIndexFileReader fileReader, List<GlobalIndexIOMeta> metas) {
+ return new TantivyFullTextGlobalIndexReader(fileReader, metas,
layoutCache, pool);
+ }
+
@Test
public void testEndToEnd() throws IOException {
GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
@@ -127,8 +138,7 @@ public class TantivyFullTextGlobalIndexTest {
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
GlobalIndexFileReader fileReader = createFileReader();
- try (TantivyFullTextGlobalIndexReader reader =
- new TantivyFullTextGlobalIndexReader(fileReader, metas)) {
+ try (TantivyFullTextGlobalIndexReader reader =
createReader(fileReader, metas)) {
FullTextSearch search = new FullTextSearch("paimon", 10, "text");
Optional<ScoredGlobalIndexResult> searchResult =
reader.visitFullTextSearch(search);
assertThat(searchResult).isPresent();
@@ -160,8 +170,7 @@ public class TantivyFullTextGlobalIndexTest {
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
GlobalIndexFileReader fileReader = createFileReader();
- try (TantivyFullTextGlobalIndexReader reader =
- new TantivyFullTextGlobalIndexReader(fileReader, metas)) {
+ try (TantivyFullTextGlobalIndexReader reader =
createReader(fileReader, metas)) {
FullTextSearch search = new FullTextSearch("nonexistent", 10,
"text");
Optional<ScoredGlobalIndexResult> searchResult =
reader.visitFullTextSearch(search);
assertThat(searchResult).isPresent();
@@ -186,8 +195,7 @@ public class TantivyFullTextGlobalIndexTest {
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
GlobalIndexFileReader fileReader = createFileReader();
- try (TantivyFullTextGlobalIndexReader reader =
- new TantivyFullTextGlobalIndexReader(fileReader, metas)) {
+ try (TantivyFullTextGlobalIndexReader reader =
createReader(fileReader, metas)) {
FullTextSearch search = new FullTextSearch("paimon", 10, "text");
Optional<ScoredGlobalIndexResult> searchResult =
reader.visitFullTextSearch(search);
assertThat(searchResult).isPresent();
@@ -231,8 +239,7 @@ public class TantivyFullTextGlobalIndexTest {
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
GlobalIndexFileReader fileReader = createFileReader();
- try (TantivyFullTextGlobalIndexReader reader =
- new TantivyFullTextGlobalIndexReader(fileReader, metas)) {
+ try (TantivyFullTextGlobalIndexReader reader =
createReader(fileReader, metas)) {
// Search for the special keyword — should match every 10th doc
FullTextSearch search = new FullTextSearch("special_keyword",
1000, "text");
Optional<ScoredGlobalIndexResult> searchResult =
reader.visitFullTextSearch(search);
@@ -261,8 +268,7 @@ public class TantivyFullTextGlobalIndexTest {
List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
GlobalIndexFileReader fileReader = createFileReader();
- try (TantivyFullTextGlobalIndexReader reader =
- new TantivyFullTextGlobalIndexReader(fileReader, metas)) {
+ try (TantivyFullTextGlobalIndexReader reader =
createReader(fileReader, metas)) {
// Limit to 5 results
FullTextSearch search = new FullTextSearch("paimon", 5, "text");
Optional<ScoredGlobalIndexResult> searchResult =
reader.visitFullTextSearch(search);
@@ -273,9 +279,38 @@ public class TantivyFullTextGlobalIndexTest {
}
}
+ @Test
+ public void testPoolReuse() throws IOException {
+ GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
+ TantivyFullTextGlobalIndexWriter writer = new
TantivyFullTextGlobalIndexWriter(fileWriter);
+ writer.write(BinaryString.fromString("Apache Paimon streaming lake"));
+ writer.write(BinaryString.fromString("Tantivy full-text search"));
+
+ List<ResultEntry> results = writer.finish();
+ List<GlobalIndexIOMeta> metas = toIOMetas(results, indexPath);
+ GlobalIndexFileReader fileReader = createFileReader();
+ FullTextSearch search = new FullTextSearch("paimon", 10, "text");
+
+ // First query: pool miss, searcher is loaded and returned to pool on
close.
+ try (TantivyFullTextGlobalIndexReader reader =
createReader(fileReader, metas)) {
+ Optional<ScoredGlobalIndexResult> result =
reader.visitFullTextSearch(search);
+ assertThat(result).isPresent();
+ assertThat(result.get().results().contains(0L)).isTrue();
+ }
+
+ // Second query: pool hit, reuses the same searcher. Results must be
identical.
+ try (TantivyFullTextGlobalIndexReader reader =
createReader(fileReader, metas)) {
+ Optional<ScoredGlobalIndexResult> result =
reader.visitFullTextSearch(search);
+ assertThat(result).isPresent();
+
assertThat(result.get().results().getLongCardinality()).isEqualTo(1);
+ assertThat(result.get().results().contains(0L)).isTrue();
+ }
+ }
+
@Test
public void testViaIndexer() throws IOException {
- TantivyFullTextGlobalIndexer indexer = new
TantivyFullTextGlobalIndexer();
+ TantivyFullTextGlobalIndexer indexer =
+ new TantivyFullTextGlobalIndexer(new TantivySearcherPool(0));
GlobalIndexFileWriter fileWriter = createFileWriter(indexPath);
TantivyFullTextGlobalIndexWriter writer =