This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-rust.git
The following commit(s) were added to refs/heads/main by this push:
new bb547dd Support canonical Lumina index type (#344)
bb547dd is described below
commit bb547dd6f7629e03d5869af8290ec7392d492027
Author: QuakeWang <[email protected]>
AuthorDate: Thu May 28 22:07:51 2026 +0800
Support canonical Lumina index type (#344)
---
crates/paimon/src/lumina/mod.rs | 22 +++-
crates/paimon/src/table/vector_search_builder.rs | 157 +++++++++++++++++------
2 files changed, 138 insertions(+), 41 deletions(-)
diff --git a/crates/paimon/src/lumina/mod.rs b/crates/paimon/src/lumina/mod.rs
index 04b4f87..7986a53 100644
--- a/crates/paimon/src/lumina/mod.rs
+++ b/crates/paimon/src/lumina/mod.rs
@@ -20,7 +20,16 @@ pub mod reader;
use std::collections::HashMap;
-pub const LUMINA_VECTOR_ANN_IDENTIFIER: &str = "lumina-vector-ann";
+pub const LUMINA_IDENTIFIER: &str = "lumina";
+pub const LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER: &str = "lumina-vector-ann";
+pub const LUMINA_VECTOR_ANN_IDENTIFIER: &str =
LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER;
+
+pub fn is_lumina_index_type(index_type: &str) -> bool {
+ matches!(
+ index_type,
+ LUMINA_IDENTIFIER | LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER
+ )
+}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LuminaVectorMetric {
@@ -507,6 +516,17 @@ mod tests {
assert!(LuminaVectorMetric::from_string("hamming").is_err());
}
+ #[test]
+ fn test_lumina_index_type_identifier_helper() {
+ assert!(is_lumina_index_type(LUMINA_IDENTIFIER));
+ assert!(is_lumina_index_type(LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER));
+ assert!(is_lumina_index_type(LUMINA_VECTOR_ANN_IDENTIFIER));
+ assert!(!is_lumina_index_type(""));
+ assert!(!is_lumina_index_type("btree"));
+ assert!(!is_lumina_index_type("lumina-vector"));
+ assert!(!is_lumina_index_type("LUMINA"));
+ }
+
#[test]
fn test_index_meta_serialize_deserialize() {
let mut options = HashMap::new();
diff --git a/crates/paimon/src/table/vector_search_builder.rs
b/crates/paimon/src/table/vector_search_builder.rs
index 1f79e54..887b230 100644
--- a/crates/paimon/src/table/vector_search_builder.rs
+++ b/crates/paimon/src/table/vector_search_builder.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::lumina::reader::LuminaVectorGlobalIndexReader;
-use crate::lumina::{GlobalIndexIOMeta, SearchResult, VectorSearch,
LUMINA_VECTOR_ANN_IDENTIFIER};
+use crate::lumina::{is_lumina_index_type, GlobalIndexIOMeta, SearchResult,
VectorSearch};
use crate::spec::{DataField, FileKind, IndexManifest};
use crate::table::snapshot_manager::SnapshotManager;
use crate::table::{find_field_id_by_name, RowRange, Table};
@@ -130,7 +130,7 @@ async fn evaluate_vector_search(
.iter()
.filter(|e| {
e.kind == FileKind::Add
- && e.index_file.index_type == LUMINA_VECTOR_ANN_IDENTIFIER
+ && is_lumina_index_type(&e.index_file.index_type)
&& e.index_file
.global_index_meta
.as_ref()
@@ -190,6 +190,7 @@ async fn evaluate_vector_search(
#[cfg(test)]
mod tests {
use super::*;
+ use crate::lumina::{LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER,
LUMINA_IDENTIFIER};
use crate::spec::{DataType, GlobalIndexMeta, IndexFileMeta,
IndexManifestEntry, IntType};
fn make_field(id: i32, name: &str) -> DataField {
@@ -237,32 +238,39 @@ mod tests {
assert!(result.is_empty());
}
+ #[tokio::test]
+ async fn test_evaluate_ignores_non_lumina_index_type() {
+ let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap();
+ let fields = vec![make_field(2, "embedding")];
+ let vs = VectorSearch::new(vec![1.0], 10,
"embedding".to_string()).unwrap();
+
+ let entry = make_lumina_entry("test.idx", "btree", FileKind::Add, 2);
+
+ let result = evaluate_vector_search(
+ &file_io,
+ "memory:///test_table",
+ &HashMap::new(),
+ &[entry],
+ &vs,
+ &fields,
+ )
+ .await
+ .unwrap();
+ assert!(result.is_empty());
+ }
+
#[tokio::test]
async fn test_evaluate_no_matching_field() {
let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap();
let fields = vec![make_field(1, "id")];
let vs = VectorSearch::new(vec![1.0], 10,
"embedding".to_string()).unwrap();
- let entry = IndexManifestEntry {
- kind: FileKind::Add,
- partition: vec![],
- bucket: 0,
- index_file: IndexFileMeta {
- index_type: LUMINA_VECTOR_ANN_IDENTIFIER.to_string(),
- file_name: "test.idx".to_string(),
- file_size: 100,
- row_count: 10,
- deletion_vectors_ranges: None,
- global_index_meta: Some(GlobalIndexMeta {
- row_range_start: 0,
- row_range_end: 9,
- index_field_id: 99,
- extra_field_ids: None,
- index_meta: None,
- }),
- },
- version: 1,
- };
+ let entry = make_lumina_entry(
+ "test.idx",
+ LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER,
+ FileKind::Add,
+ 99,
+ );
let result = evaluate_vector_search(
&file_io,
@@ -283,37 +291,106 @@ mod tests {
let fields = vec![make_field(2, "embedding")];
let vs = VectorSearch::new(vec![1.0], 10,
"embedding".to_string()).unwrap();
- let entry = IndexManifestEntry {
- kind: FileKind::Delete,
+ let entry = make_lumina_entry(
+ "test.idx",
+ LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER,
+ FileKind::Delete,
+ 2,
+ );
+
+ let result = evaluate_vector_search(
+ &file_io,
+ "memory:///test_table",
+ &HashMap::new(),
+ &[entry],
+ &vs,
+ &fields,
+ )
+ .await
+ .unwrap();
+ assert!(result.is_empty());
+ }
+
+ #[tokio::test]
+ async fn test_evaluate_accepts_canonical_lumina_index_type() {
+ let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap();
+ let fields = vec![make_field(2, "embedding")];
+ let vs = VectorSearch::new(vec![1.0], 10,
"embedding".to_string()).unwrap();
+
+ let entry = make_lumina_entry("missing.idx", LUMINA_IDENTIFIER,
FileKind::Add, 2);
+
+ let err = evaluate_vector_search(
+ &file_io,
+ "memory:///test_table",
+ &HashMap::new(),
+ &[entry],
+ &vs,
+ &fields,
+ )
+ .await
+ .unwrap_err();
+ assert!(
+ err.to_string()
+ .contains("Failed to read Lumina index file 'missing.idx'"),
+ "unexpected error: {err}"
+ );
+ }
+
+ #[tokio::test]
+ async fn test_evaluate_accepts_legacy_lumina_index_type() {
+ let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap();
+ let fields = vec![make_field(2, "embedding")];
+ let vs = VectorSearch::new(vec![1.0], 10,
"embedding".to_string()).unwrap();
+
+ let entry = make_lumina_entry(
+ "missing.idx",
+ LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER,
+ FileKind::Add,
+ 2,
+ );
+
+ let err = evaluate_vector_search(
+ &file_io,
+ "memory:///test_table",
+ &HashMap::new(),
+ &[entry],
+ &vs,
+ &fields,
+ )
+ .await
+ .unwrap_err();
+ assert!(
+ err.to_string()
+ .contains("Failed to read Lumina index file 'missing.idx'"),
+ "unexpected error: {err}"
+ );
+ }
+
+ fn make_lumina_entry(
+ file_name: &str,
+ index_type: &str,
+ kind: FileKind,
+ index_field_id: i32,
+ ) -> IndexManifestEntry {
+ IndexManifestEntry {
+ kind,
partition: vec![],
bucket: 0,
index_file: IndexFileMeta {
- index_type: LUMINA_VECTOR_ANN_IDENTIFIER.to_string(),
- file_name: "test.idx".to_string(),
+ index_type: index_type.to_string(),
+ file_name: file_name.to_string(),
file_size: 100,
row_count: 10,
deletion_vectors_ranges: None,
global_index_meta: Some(GlobalIndexMeta {
row_range_start: 0,
row_range_end: 9,
- index_field_id: 2,
+ index_field_id,
extra_field_ids: None,
index_meta: None,
}),
},
version: 1,
- };
-
- let result = evaluate_vector_search(
- &file_io,
- "memory:///test_table",
- &HashMap::new(),
- &[entry],
- &vs,
- &fields,
- )
- .await
- .unwrap();
- assert!(result.is_empty());
+ }
}
}