This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new bb547dd  Support canonical Lumina index type (#344)
bb547dd is described below

commit bb547dd6f7629e03d5869af8290ec7392d492027
Author: QuakeWang <[email protected]>
AuthorDate: Thu May 28 22:07:51 2026 +0800

    Support canonical Lumina index type (#344)
---
 crates/paimon/src/lumina/mod.rs                  |  22 +++-
 crates/paimon/src/table/vector_search_builder.rs | 157 +++++++++++++++++------
 2 files changed, 138 insertions(+), 41 deletions(-)

diff --git a/crates/paimon/src/lumina/mod.rs b/crates/paimon/src/lumina/mod.rs
index 04b4f87..7986a53 100644
--- a/crates/paimon/src/lumina/mod.rs
+++ b/crates/paimon/src/lumina/mod.rs
@@ -20,7 +20,16 @@ pub mod reader;
 
 use std::collections::HashMap;
 
-pub const LUMINA_VECTOR_ANN_IDENTIFIER: &str = "lumina-vector-ann";
+pub const LUMINA_IDENTIFIER: &str = "lumina";
+pub const LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER: &str = "lumina-vector-ann";
+pub const LUMINA_VECTOR_ANN_IDENTIFIER: &str = 
LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER;
+
+pub fn is_lumina_index_type(index_type: &str) -> bool {
+    matches!(
+        index_type,
+        LUMINA_IDENTIFIER | LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER
+    )
+}
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum LuminaVectorMetric {
@@ -507,6 +516,17 @@ mod tests {
         assert!(LuminaVectorMetric::from_string("hamming").is_err());
     }
 
+    #[test]
+    fn test_lumina_index_type_identifier_helper() {
+        assert!(is_lumina_index_type(LUMINA_IDENTIFIER));
+        assert!(is_lumina_index_type(LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER));
+        assert!(is_lumina_index_type(LUMINA_VECTOR_ANN_IDENTIFIER));
+        assert!(!is_lumina_index_type(""));
+        assert!(!is_lumina_index_type("btree"));
+        assert!(!is_lumina_index_type("lumina-vector"));
+        assert!(!is_lumina_index_type("LUMINA"));
+    }
+
     #[test]
     fn test_index_meta_serialize_deserialize() {
         let mut options = HashMap::new();
diff --git a/crates/paimon/src/table/vector_search_builder.rs 
b/crates/paimon/src/table/vector_search_builder.rs
index 1f79e54..887b230 100644
--- a/crates/paimon/src/table/vector_search_builder.rs
+++ b/crates/paimon/src/table/vector_search_builder.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::lumina::reader::LuminaVectorGlobalIndexReader;
-use crate::lumina::{GlobalIndexIOMeta, SearchResult, VectorSearch, 
LUMINA_VECTOR_ANN_IDENTIFIER};
+use crate::lumina::{is_lumina_index_type, GlobalIndexIOMeta, SearchResult, 
VectorSearch};
 use crate::spec::{DataField, FileKind, IndexManifest};
 use crate::table::snapshot_manager::SnapshotManager;
 use crate::table::{find_field_id_by_name, RowRange, Table};
@@ -130,7 +130,7 @@ async fn evaluate_vector_search(
         .iter()
         .filter(|e| {
             e.kind == FileKind::Add
-                && e.index_file.index_type == LUMINA_VECTOR_ANN_IDENTIFIER
+                && is_lumina_index_type(&e.index_file.index_type)
                 && e.index_file
                     .global_index_meta
                     .as_ref()
@@ -190,6 +190,7 @@ async fn evaluate_vector_search(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::lumina::{LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER, 
LUMINA_IDENTIFIER};
     use crate::spec::{DataType, GlobalIndexMeta, IndexFileMeta, 
IndexManifestEntry, IntType};
 
     fn make_field(id: i32, name: &str) -> DataField {
@@ -237,32 +238,39 @@ mod tests {
         assert!(result.is_empty());
     }
 
+    #[tokio::test]
+    async fn test_evaluate_ignores_non_lumina_index_type() {
+        let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap();
+        let fields = vec![make_field(2, "embedding")];
+        let vs = VectorSearch::new(vec![1.0], 10, 
"embedding".to_string()).unwrap();
+
+        let entry = make_lumina_entry("test.idx", "btree", FileKind::Add, 2);
+
+        let result = evaluate_vector_search(
+            &file_io,
+            "memory:///test_table",
+            &HashMap::new(),
+            &[entry],
+            &vs,
+            &fields,
+        )
+        .await
+        .unwrap();
+        assert!(result.is_empty());
+    }
+
     #[tokio::test]
     async fn test_evaluate_no_matching_field() {
         let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap();
         let fields = vec![make_field(1, "id")];
         let vs = VectorSearch::new(vec![1.0], 10, 
"embedding".to_string()).unwrap();
 
-        let entry = IndexManifestEntry {
-            kind: FileKind::Add,
-            partition: vec![],
-            bucket: 0,
-            index_file: IndexFileMeta {
-                index_type: LUMINA_VECTOR_ANN_IDENTIFIER.to_string(),
-                file_name: "test.idx".to_string(),
-                file_size: 100,
-                row_count: 10,
-                deletion_vectors_ranges: None,
-                global_index_meta: Some(GlobalIndexMeta {
-                    row_range_start: 0,
-                    row_range_end: 9,
-                    index_field_id: 99,
-                    extra_field_ids: None,
-                    index_meta: None,
-                }),
-            },
-            version: 1,
-        };
+        let entry = make_lumina_entry(
+            "test.idx",
+            LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER,
+            FileKind::Add,
+            99,
+        );
 
         let result = evaluate_vector_search(
             &file_io,
@@ -283,37 +291,106 @@ mod tests {
         let fields = vec![make_field(2, "embedding")];
         let vs = VectorSearch::new(vec![1.0], 10, 
"embedding".to_string()).unwrap();
 
-        let entry = IndexManifestEntry {
-            kind: FileKind::Delete,
+        let entry = make_lumina_entry(
+            "test.idx",
+            LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER,
+            FileKind::Delete,
+            2,
+        );
+
+        let result = evaluate_vector_search(
+            &file_io,
+            "memory:///test_table",
+            &HashMap::new(),
+            &[entry],
+            &vs,
+            &fields,
+        )
+        .await
+        .unwrap();
+        assert!(result.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_evaluate_accepts_canonical_lumina_index_type() {
+        let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap();
+        let fields = vec![make_field(2, "embedding")];
+        let vs = VectorSearch::new(vec![1.0], 10, 
"embedding".to_string()).unwrap();
+
+        let entry = make_lumina_entry("missing.idx", LUMINA_IDENTIFIER, 
FileKind::Add, 2);
+
+        let err = evaluate_vector_search(
+            &file_io,
+            "memory:///test_table",
+            &HashMap::new(),
+            &[entry],
+            &vs,
+            &fields,
+        )
+        .await
+        .unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Failed to read Lumina index file 'missing.idx'"),
+            "unexpected error: {err}"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_evaluate_accepts_legacy_lumina_index_type() {
+        let file_io = crate::io::FileIOBuilder::new("memory").build().unwrap();
+        let fields = vec![make_field(2, "embedding")];
+        let vs = VectorSearch::new(vec![1.0], 10, 
"embedding".to_string()).unwrap();
+
+        let entry = make_lumina_entry(
+            "missing.idx",
+            LEGACY_LUMINA_VECTOR_ANN_IDENTIFIER,
+            FileKind::Add,
+            2,
+        );
+
+        let err = evaluate_vector_search(
+            &file_io,
+            "memory:///test_table",
+            &HashMap::new(),
+            &[entry],
+            &vs,
+            &fields,
+        )
+        .await
+        .unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Failed to read Lumina index file 'missing.idx'"),
+            "unexpected error: {err}"
+        );
+    }
+
+    fn make_lumina_entry(
+        file_name: &str,
+        index_type: &str,
+        kind: FileKind,
+        index_field_id: i32,
+    ) -> IndexManifestEntry {
+        IndexManifestEntry {
+            kind,
             partition: vec![],
             bucket: 0,
             index_file: IndexFileMeta {
-                index_type: LUMINA_VECTOR_ANN_IDENTIFIER.to_string(),
-                file_name: "test.idx".to_string(),
+                index_type: index_type.to_string(),
+                file_name: file_name.to_string(),
                 file_size: 100,
                 row_count: 10,
                 deletion_vectors_ranges: None,
                 global_index_meta: Some(GlobalIndexMeta {
                     row_range_start: 0,
                     row_range_end: 9,
-                    index_field_id: 2,
+                    index_field_id,
                     extra_field_ids: None,
                     index_meta: None,
                 }),
             },
             version: 1,
-        };
-
-        let result = evaluate_vector_search(
-            &file_io,
-            "memory:///test_table",
-            &HashMap::new(),
-            &[entry],
-            &vs,
-            &fields,
-        )
-        .await
-        .unwrap();
-        assert!(result.is_empty());
+        }
     }
 }

Reply via email to