This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new fe32969082 [core] Do not create GlobalIndexScanner when no index files
(#7521)
fe32969082 is described below
commit fe32969082855d86589f309c8da4656c4174faa6
Author: Jingsong Lee <[email protected]>
AuthorDate: Wed Mar 25 16:25:02 2026 +0800
[core] Do not create GlobalIndexScanner when no index files (#7521)
Bypass `GlobalIndexScanner` for better performance.
---
.../paimon/globalindex/DataEvolutionBatchScan.java | 9 +++++++--
.../paimon/globalindex/GlobalIndexScanner.java | 21 +++++++++++---------
.../apache/paimon/table/source/VectorReadImpl.java | 8 +++++---
.../paimon/table/BitmapGlobalIndexTableTest.java | 2 +-
.../paimon/table/BtreeGlobalIndexTableTest.java | 23 +---------------------
.../pypaimon/globalindex/global_index_scanner.py | 6 +++++-
.../pypaimon/read/scanner/file_scanner.py | 2 ++
7 files changed, 33 insertions(+), 38 deletions(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/globalindex/DataEvolutionBatchScan.java
b/paimon-core/src/main/java/org/apache/paimon/globalindex/DataEvolutionBatchScan.java
index af169f4c6d..0542b7d95f 100644
---
a/paimon-core/src/main/java/org/apache/paimon/globalindex/DataEvolutionBatchScan.java
+++
b/paimon-core/src/main/java/org/apache/paimon/globalindex/DataEvolutionBatchScan.java
@@ -267,8 +267,13 @@ public class DataEvolutionBatchScan implements
DataTableScan {
}
PartitionPredicate partitionFilter =
batchScan.snapshotReader().manifestsReader().partitionFilter();
- try (GlobalIndexScanner scanner =
- GlobalIndexScanner.create(table, partitionFilter, filter)) {
+ Optional<GlobalIndexScanner> optionalScanner =
+ GlobalIndexScanner.create(table, partitionFilter, filter);
+ if (!optionalScanner.isPresent()) {
+ return Optional.empty();
+ }
+
+ try (GlobalIndexScanner scanner = optionalScanner.get()) {
return scanner.scan(filter);
} catch (IOException e) {
throw new RuntimeException(e);
diff --git
a/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java
b/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java
index 1312f17b43..3092f1b3da 100644
---
a/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java
+++
b/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java
@@ -97,17 +97,21 @@ public class GlobalIndexScanner implements Closeable {
this.globalIndexEvaluator = new GlobalIndexEvaluator(rowType,
readersFunction);
}
- public static GlobalIndexScanner create(
+ public static Optional<GlobalIndexScanner> create(
FileStoreTable table, Collection<IndexFileMeta> indexFiles) {
- return new GlobalIndexScanner(
- table.coreOptions().toConfiguration(),
- table.rowType(),
- table.fileIO(),
- table.store().pathFactory().globalIndexFileFactory(),
- indexFiles);
+ if (indexFiles.isEmpty()) {
+ return Optional.empty();
+ }
+ return Optional.of(
+ new GlobalIndexScanner(
+ table.coreOptions().toConfiguration(),
+ table.rowType(),
+ table.fileIO(),
+ table.store().pathFactory().globalIndexFileFactory(),
+ indexFiles));
}
- public static GlobalIndexScanner create(
+ public static Optional<GlobalIndexScanner> create(
FileStoreTable table, PartitionPredicate partitionFilter,
Predicate filter) {
Set<Integer> filterFieldIds =
collectFieldNames(filter).stream()
@@ -131,7 +135,6 @@ public class GlobalIndexScanner implements Closeable {
.stream()
.map(IndexManifestEntry::indexFile)
.collect(Collectors.toList());
-
return create(table, indexFiles);
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java
b/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java
index 49708a0a2a..6971bb9084 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java
@@ -119,11 +119,13 @@ public class VectorReadImpl implements VectorRead {
for (VectorSearchSplit split : splits) {
scalarIndexFiles.addAll(split.scalarIndexFiles());
}
- if (scalarIndexFiles.isEmpty()) {
+
+ Optional<GlobalIndexScanner> optionalScanner =
+ GlobalIndexScanner.create(table, scalarIndexFiles);
+ if (!optionalScanner.isPresent()) {
return Optional.empty();
}
-
- try (GlobalIndexScanner scanner = GlobalIndexScanner.create(table,
scalarIndexFiles)) {
+ try (GlobalIndexScanner scanner = optionalScanner.get()) {
return scanner.scan(filter).map(GlobalIndexResult::results);
} catch (IOException e) {
throw new RuntimeException(e);
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/BitmapGlobalIndexTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/BitmapGlobalIndexTableTest.java
index cd888e6982..6443c6a5ec 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/BitmapGlobalIndexTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/BitmapGlobalIndexTableTest.java
@@ -248,7 +248,7 @@ public class BitmapGlobalIndexTableTest extends
DataEvolutionTestBase {
private RoaringNavigableMap64 globalIndexScan(FileStoreTable table,
Predicate predicate)
throws Exception {
try (GlobalIndexScanner scanner =
- GlobalIndexScanner.create(table,
PartitionPredicate.ALWAYS_TRUE, predicate)) {
+ GlobalIndexScanner.create(table,
PartitionPredicate.ALWAYS_TRUE, predicate).get()) {
return scanner.scan(predicate).get().results();
}
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/BtreeGlobalIndexTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/BtreeGlobalIndexTableTest.java
index 98f730359d..4be621942b 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/BtreeGlobalIndexTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/BtreeGlobalIndexTableTest.java
@@ -19,7 +19,6 @@
package org.apache.paimon.table;
import org.apache.paimon.data.BinaryString;
-import org.apache.paimon.data.GenericRow;
import org.apache.paimon.globalindex.DataEvolutionBatchScan;
import org.apache.paimon.globalindex.GlobalIndexResult;
import org.apache.paimon.globalindex.GlobalIndexScanner;
@@ -29,13 +28,10 @@ import org.apache.paimon.partition.PartitionPredicate;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.predicate.PredicateBuilder;
import org.apache.paimon.table.sink.BatchTableCommit;
-import org.apache.paimon.table.sink.BatchTableWrite;
-import org.apache.paimon.table.sink.BatchWriteBuilder;
import org.apache.paimon.table.sink.CommitMessage;
import org.apache.paimon.table.source.DataSplit;
import org.apache.paimon.table.source.ReadBuilder;
import org.apache.paimon.table.source.Split;
-import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.Range;
import org.apache.paimon.utils.RoaringNavigableMap64;
@@ -206,27 +202,10 @@ public class BtreeGlobalIndexTableTest extends
DataEvolutionTestBase {
.collect(Collectors.toList());
}
- private void append(int startInclusive, int endExclusive) throws Exception
{
- BatchWriteBuilder builder = getTableDefault().newBatchWriteBuilder();
- RowType writeType = schemaDefault().rowType();
- try (BatchTableWrite write0 =
builder.newWrite().withWriteType(writeType)) {
- for (int i = startInclusive; i < endExclusive; i++) {
- write0.write(
- GenericRow.of(
- i,
- BinaryString.fromString("a" + i),
- BinaryString.fromString("b" + i)));
- }
- try (BatchTableCommit commit = builder.newCommit()) {
- commit.commit(write0.prepareCommit());
- }
- }
- }
-
private RoaringNavigableMap64 globalIndexScan(FileStoreTable table,
Predicate predicate)
throws Exception {
try (GlobalIndexScanner scanner =
- GlobalIndexScanner.create(table,
PartitionPredicate.ALWAYS_TRUE, predicate)) {
+ GlobalIndexScanner.create(table,
PartitionPredicate.ALWAYS_TRUE, predicate).get()) {
return scanner.scan(predicate).get().results();
}
}
diff --git a/paimon-python/pypaimon/globalindex/global_index_scanner.py
b/paimon-python/pypaimon/globalindex/global_index_scanner.py
index c3d144811c..515600d77f 100644
--- a/paimon-python/pypaimon/globalindex/global_index_scanner.py
+++ b/paimon-python/pypaimon/globalindex/global_index_scanner.py
@@ -76,7 +76,7 @@ class GlobalIndexScanner:
return GlobalIndexEvaluator(fields, readers_function)
@staticmethod
- def create(table, index_files=None, partition_filter=None, predicate=None):
+ def create(table, index_files=None, partition_filter=None, predicate=None)
-> Optional['GlobalIndexScanner']:
"""Create a GlobalIndexScanner.
Can be called in two ways:
@@ -86,6 +86,8 @@ class GlobalIndexScanner:
from pypaimon.index.index_file_handler import IndexFileHandler
if index_files is not None:
+ if len(index_files) == 0:
+ return None
return GlobalIndexScanner(
options=table.table_schema.options,
fields=table.fields,
@@ -117,6 +119,8 @@ class GlobalIndexScanner:
entries = index_file_handler.scan(snapshot, index_file_filter)
scanned_index_files = [entry.index_file for entry in entries]
+ if len(scanned_index_files) == 0:
+ return None
return GlobalIndexScanner(
options=table.table_schema.options,
fields=table.fields,
diff --git a/paimon-python/pypaimon/read/scanner/file_scanner.py
b/paimon-python/pypaimon/read/scanner/file_scanner.py
index b770b15916..a5e8576802 100755
--- a/paimon-python/pypaimon/read/scanner/file_scanner.py
+++ b/paimon-python/pypaimon/read/scanner/file_scanner.py
@@ -313,6 +313,8 @@ class FileScanner:
partition_filter=self.partition_key_predicate,
predicate=self.predicate
)
+ if scanner is None:
+ return None
with scanner:
return scanner.scan(self.predicate)
except Exception: