This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 5c54f301f7 [spark] validate vector search limit range (#8200)
5c54f301f7 is described below

commit 5c54f301f7423699b70529865bf8e85a7c020e02
Author: Stefanietry <[email protected]>
AuthorDate: Thu Jun 11 13:13:28 2026 +0800

    [spark] validate vector search limit range (#8200)
---
 .../plans/logical/PaimonTableValuedFunctions.scala | 39 +++++++++++-----------
 .../spark/sql/TableValuedFunctionsTest.scala       | 11 ++++++
 2 files changed, 30 insertions(+), 20 deletions(-)

diff --git 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/plans/logical/PaimonTableValuedFunctions.scala
 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/plans/logical/PaimonTableValuedFunctions.scala
index d570f60303..fcc6738733 100644
--- 
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/plans/logical/PaimonTableValuedFunctions.scala
+++ 
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/plans/logical/PaimonTableValuedFunctions.scala
@@ -54,6 +54,23 @@ object PaimonTableValuedFunctions {
       VECTOR_SEARCH,
       FULL_TEXT_SEARCH)
 
+  def parsePositiveLimit(value: Any): Int = {
+    val limit = value match {
+      case i: Int => i
+      case l: Long if l <= Int.MaxValue => l.toInt
+      case l: Long =>
+        throw new IllegalArgumentException(
+          s"Limit must be no greater than ${Int.MaxValue}, but got: $l")
+      case other => throw new RuntimeException(s"Invalid limit type: 
${other.getClass.getName}")
+    }
+    if (limit <= 0) {
+      throw new IllegalArgumentException(
+        s"Limit must be a positive integer, but got: $limit"
+      )
+    }
+    limit
+  }
+
   private type TableFunctionDescription = (FunctionIdentifier, ExpressionInfo, 
TableFunctionBuilder)
 
   def getTableValueFunctionInjection(fnName: String): TableFunctionDescription 
= {
@@ -307,16 +324,7 @@ case class VectorSearchQuery(override val args: 
Seq[Expression])
       )
     }
     val queryVector = extractQueryVector(argsWithoutTable(1))
-    val limit = argsWithoutTable(2).eval() match {
-      case i: Int => i
-      case l: Long => l.toInt
-      case other => throw new RuntimeException(s"Invalid limit type: 
${other.getClass.getName}")
-    }
-    if (limit <= 0) {
-      throw new IllegalArgumentException(
-        s"Limit must be a positive integer, but got: $limit"
-      )
-    }
+    val limit = parsePositiveLimit(argsWithoutTable(2).eval())
     new VectorSearch(queryVector, limit, columnName)
   }
 
@@ -374,16 +382,7 @@ case class FullTextSearchQuery(override val args: 
Seq[Expression])
       )
     }
     val queryText = argsWithoutTable(1).eval().toString
-    val limit = argsWithoutTable(2).eval() match {
-      case i: Int => i
-      case l: Long => l.toInt
-      case other => throw new RuntimeException(s"Invalid limit type: 
${other.getClass.getName}")
-    }
-    if (limit <= 0) {
-      throw new IllegalArgumentException(
-        s"Limit must be a positive integer, but got: $limit"
-      )
-    }
+    val limit = parsePositiveLimit(argsWithoutTable(2).eval())
     new FullTextSearch(queryText, limit, columnName)
   }
 }
diff --git 
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala
 
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala
index 68f97743d3..8114766a99 100644
--- 
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala
+++ 
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala
@@ -21,6 +21,7 @@ package org.apache.paimon.spark.sql
 import org.apache.paimon.data.{BinaryString, GenericRow, Timestamp}
 import org.apache.paimon.manifest.ManifestCommittable
 import org.apache.paimon.spark.PaimonHiveTestBase
+import 
org.apache.paimon.spark.catalyst.plans.logical.PaimonTableValuedFunctions
 import org.apache.paimon.utils.DateTimeUtils
 
 import org.apache.spark.sql.{DataFrame, Row}
@@ -30,6 +31,16 @@ import java.util.Collections
 
 class TableValuedFunctionsTest extends PaimonHiveTestBase {
 
+  test("parse positive limit rejects overflowing long") {
+    val longValue: Long = 4294967297L
+    assert(longValue.toInt > 0)
+
+    val error = intercept[IllegalArgumentException] {
+      PaimonTableValuedFunctions.parsePositiveLimit(longValue)
+    }
+    assert(error.getMessage.contains("Limit must be no greater than"))
+  }
+
   withPk.foreach {
     hasPk =>
       bucketModes.foreach {

Reply via email to