(paimon) branch master updated: [spark] fix build paimon scan of process vector search for spark3.2 (#8089)

lzljs3620320 Wed, 03 Jun 2026 01:24:46 -0700

This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new dd3e67e85a [spark] fix build paimon scan of process vector search for 
spark3.2 (#8089)
dd3e67e85a is described below

commit dd3e67e85a6c80a65a4143f1ca2eaaa1123058b3
Author: Stefanietry <[email protected]>
AuthorDate: Wed Jun 3 16:24:29 2026 +0800

    [spark] fix build paimon scan of process vector search for spark3.2 (#8089)
---
 .../apache/paimon/spark/PaimonScanBuilder.scala    | 18 ++++++++-
 .../paimon/spark/PaimonScanBuilderTest.scala       | 44 ++++++++++++++++++++++
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git 
a/paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonScanBuilder.scala
 
b/paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonScanBuilder.scala
index 770bd8f802..ef1f68c09f 100644
--- 
a/paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonScanBuilder.scala
+++ 
b/paimon-spark/paimon-spark-3.2/src/main/scala/org/apache/paimon/spark/PaimonScanBuilder.scala
@@ -25,13 +25,27 @@ import org.apache.spark.sql.connector.read.Scan
 class PaimonScanBuilder(val table: InnerTable) extends PaimonBaseScanBuilder {
 
   override def build(): Scan = {
+    val (actualTable, vectorSearch, fullTextSearch) = table match {
+      case vst: org.apache.paimon.table.VectorSearchTable =>
+        val tableVectorSearch = Option(vst.vectorSearch())
+        val vs = (tableVectorSearch, pushedVectorSearch) match {
+          case (Some(_), _) => tableVectorSearch
+          case (None, Some(_)) => pushedVectorSearch
+          case (None, None) => None
+        }
+        (vst.origin(), vs, None)
+      case ftst: org.apache.paimon.table.FullTextSearchTable =>
+        (ftst.origin(), None, Option(ftst.fullTextSearch()))
+      case _ => (table, pushedVectorSearch, pushedFullTextSearch)
+    }
     PaimonScan(
-      table,
+      actualTable,
       requiredSchema,
       pushedPartitionFilters,
       pushedDataFilters,
       pushedLimit,
       pushedTopN,
-      pushedVectorSearch)
+      vectorSearch,
+      fullTextSearch)
   }
 }
diff --git 
a/paimon-spark/paimon-spark-3.2/src/test/scala/org/apache/paimon/spark/PaimonScanBuilderTest.scala
 
b/paimon-spark/paimon-spark-3.2/src/test/scala/org/apache/paimon/spark/PaimonScanBuilderTest.scala
new file mode 100755
index 0000000000..bbd79c8d78
--- /dev/null
+++ 
b/paimon-spark/paimon-spark-3.2/src/test/scala/org/apache/paimon/spark/PaimonScanBuilderTest.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark
+
+/** Tests for [[PaimonScanBuilder]] in spark-3.2 module. */
+class PaimonScanBuilderTest extends PaimonSparkTestBase {
+
+  test("PaimonScanBuilder: read vector table normally on spark-3.2") {
+    withTable("T") {
+      spark.sql("""
+                  |CREATE TABLE T (id BIGINT, embs ARRAY<FLOAT>)
+                  |TBLPROPERTIES (
+                  |  'vector.file.format' = 'lance',
+                  |  'vector-field' = 'embs',
+                  |  'field.embs.vector-dim' = '3',
+                  |  'row-tracking.enabled' = 'true',
+                  |  'data-evolution.enabled' = 'true'
+                  |)
+                  |""".stripMargin)
+
+      var rows = spark.sql("SELECT id, embs FROM T ORDER BY id")
+      assert(rows.isEmpty)
+      rows =
+        spark.sql("select id, embs from vector_search('T', 'embs', array(1.0f, 
2.0f, 3.0f), 5)")
+      assert(rows.isEmpty)
+    }
+  }
+}

(paimon) branch master updated: [spark] fix build paimon scan of process vector search for spark3.2 (#8089)

Reply via email to