This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 5c54f301f7 [spark] validate vector search limit range (#8200)
5c54f301f7 is described below
commit 5c54f301f7423699b70529865bf8e85a7c020e02
Author: Stefanietry <[email protected]>
AuthorDate: Thu Jun 11 13:13:28 2026 +0800
[spark] validate vector search limit range (#8200)
---
.../plans/logical/PaimonTableValuedFunctions.scala | 39 +++++++++++-----------
.../spark/sql/TableValuedFunctionsTest.scala | 11 ++++++
2 files changed, 30 insertions(+), 20 deletions(-)
diff --git
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/plans/logical/PaimonTableValuedFunctions.scala
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/plans/logical/PaimonTableValuedFunctions.scala
index d570f60303..fcc6738733 100644
---
a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/plans/logical/PaimonTableValuedFunctions.scala
+++
b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/plans/logical/PaimonTableValuedFunctions.scala
@@ -54,6 +54,23 @@ object PaimonTableValuedFunctions {
VECTOR_SEARCH,
FULL_TEXT_SEARCH)
+ def parsePositiveLimit(value: Any): Int = {
+ val limit = value match {
+ case i: Int => i
+ case l: Long if l <= Int.MaxValue => l.toInt
+ case l: Long =>
+ throw new IllegalArgumentException(
+ s"Limit must be no greater than ${Int.MaxValue}, but got: $l")
+ case other => throw new RuntimeException(s"Invalid limit type:
${other.getClass.getName}")
+ }
+ if (limit <= 0) {
+ throw new IllegalArgumentException(
+ s"Limit must be a positive integer, but got: $limit"
+ )
+ }
+ limit
+ }
+
private type TableFunctionDescription = (FunctionIdentifier, ExpressionInfo,
TableFunctionBuilder)
def getTableValueFunctionInjection(fnName: String): TableFunctionDescription
= {
@@ -307,16 +324,7 @@ case class VectorSearchQuery(override val args:
Seq[Expression])
)
}
val queryVector = extractQueryVector(argsWithoutTable(1))
- val limit = argsWithoutTable(2).eval() match {
- case i: Int => i
- case l: Long => l.toInt
- case other => throw new RuntimeException(s"Invalid limit type:
${other.getClass.getName}")
- }
- if (limit <= 0) {
- throw new IllegalArgumentException(
- s"Limit must be a positive integer, but got: $limit"
- )
- }
+ val limit = parsePositiveLimit(argsWithoutTable(2).eval())
new VectorSearch(queryVector, limit, columnName)
}
@@ -374,16 +382,7 @@ case class FullTextSearchQuery(override val args:
Seq[Expression])
)
}
val queryText = argsWithoutTable(1).eval().toString
- val limit = argsWithoutTable(2).eval() match {
- case i: Int => i
- case l: Long => l.toInt
- case other => throw new RuntimeException(s"Invalid limit type:
${other.getClass.getName}")
- }
- if (limit <= 0) {
- throw new IllegalArgumentException(
- s"Limit must be a positive integer, but got: $limit"
- )
- }
+ val limit = parsePositiveLimit(argsWithoutTable(2).eval())
new FullTextSearch(queryText, limit, columnName)
}
}
diff --git
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala
index 68f97743d3..8114766a99 100644
---
a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala
+++
b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala
@@ -21,6 +21,7 @@ package org.apache.paimon.spark.sql
import org.apache.paimon.data.{BinaryString, GenericRow, Timestamp}
import org.apache.paimon.manifest.ManifestCommittable
import org.apache.paimon.spark.PaimonHiveTestBase
+import
org.apache.paimon.spark.catalyst.plans.logical.PaimonTableValuedFunctions
import org.apache.paimon.utils.DateTimeUtils
import org.apache.spark.sql.{DataFrame, Row}
@@ -30,6 +31,16 @@ import java.util.Collections
class TableValuedFunctionsTest extends PaimonHiveTestBase {
+ test("parse positive limit rejects overflowing long") {
+ val longValue: Long = 4294967297L
+ assert(longValue.toInt > 0)
+
+ val error = intercept[IllegalArgumentException] {
+ PaimonTableValuedFunctions.parsePositiveLimit(longValue)
+ }
+ assert(error.getMessage.contains("Limit must be no greater than"))
+ }
+
withPk.foreach {
hasPk =>
bucketModes.foreach {