This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 868549e7ac63 [SPARK-50560][SQL] Improve type coercion and boundary
checking for RANDSTR SQL function
868549e7ac63 is described below
commit 868549e7ac632e3d0fa26f26b22c2738b06caadc
Author: Daniel Tenedorio <[email protected]>
AuthorDate: Fri Jan 10 16:25:31 2025 +0800
[SPARK-50560][SQL] Improve type coercion and boundary checking for RANDSTR
SQL function
### What changes were proposed in this pull request?
This PR improve type coercion and boundary checking for RANDSTR SQL
function.
srielau found the following issues and wrote them down in SPARK-50560:
* TINYINT and BIGINT were not supported.
* No type coercion from floating-point numbers was implemented.
* No explicit error checking for negative numbers was implemented,
resulting in weird stacktraces instead.
### Why are the changes needed?
This PR fixes the above problems to make the function work in more cases
and produce better error messages when it fails.
For example:
```
SELECT randstr(CAST(10 AS TINYINT), 0) AS result
> ceV0PXaR2I
SELECT randstr(CAST(10 AS BIGINT), 0) AS result
> ceV0PXaR2I
SELECT randstr(1.0F, 0) AS result
> c
SELECT randstr(1.0D, 0) AS result
> c
SELECT randstr(-1, 0) AS result
> Error: The RANDSTR function desired string length must be positive or
zero. Please provide an updated string length and then retry the query or
command again.
```
### Does this PR introduce _any_ user-facing change?
Yes, see above.
### How was this patch tested?
This PR adds golden file based test coverage, and updates existing coverage.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #49210 from dtenedor/fix-randstr-types.
Lead-authored-by: Daniel Tenedorio <[email protected]>
Co-authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/analysis/TypeCoercionHelper.scala | 6 +
.../catalyst/expressions/randomExpressions.scala | 65 +++---
.../sql/catalyst/expressions/RandomSuite.scala | 1 -
.../sql-tests/analyzer-results/random.sql.out | 194 +++++++++---------
.../src/test/resources/sql-tests/inputs/random.sql | 11 +-
.../resources/sql-tests/results/random.sql.out | 227 ++++++++++++---------
.../spark/sql/CollationExpressionWalkerSuite.scala | 3 +
.../apache/spark/sql/DataFrameFunctionsSuite.scala | 8 +-
8 files changed, 280 insertions(+), 235 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
index ab2ab50cb33e..3b3cf748014b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
@@ -44,6 +44,7 @@ import org.apache.spark.sql.catalyst.expressions.{
MapConcat,
MapZipWith,
NaNvl,
+ RandStr,
RangeFrame,
ScalaUDF,
Sequence,
@@ -400,6 +401,11 @@ abstract class TypeCoercionHelper {
NaNvl(Cast(l, DoubleType), r)
case NaNvl(l, r) if r.dataType == NullType => NaNvl(l, Cast(r,
l.dataType))
+ case r: RandStr if r.length.dataType != IntegerType =>
+ implicitCast(r.length, IntegerType).map { casted =>
+ r.copy(length = casted)
+ }.getOrElse(r)
+
case other => other
}
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index 687dd83291be..50c699ef69bd 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -21,12 +21,12 @@ import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedSeed}
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
-import
org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.{ordinalNumber,
toSQLExpr, toSQLId, toSQLType}
+import org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.{toSQLExpr,
toSQLId}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
CodeGenerator, ExprCode, FalseLiteral}
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike}
import
org.apache.spark.sql.catalyst.trees.TreePattern.{EXPRESSION_WITH_RANDOM_SEED,
RUNTIME_REPLACEABLE, TreePattern}
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
import org.apache.spark.sql.types._
import org.apache.spark.util.random.XORShiftRandom
@@ -313,7 +313,8 @@ object Uniform {
group = "string_funcs")
case class RandStr(
length: Expression, override val seedExpression: Expression, hideSeed:
Boolean)
- extends ExpressionWithRandomSeed with BinaryLike[Expression] with
Nondeterministic {
+ extends ExpressionWithRandomSeed with BinaryLike[Expression] with
Nondeterministic
+ with ExpectsInputTypes {
def this(length: Expression) =
this(length, UnresolvedSeed, hideSeed = true)
def this(length: Expression, seedExpression: Expression) =
@@ -325,6 +326,10 @@ case class RandStr(
override def left: Expression = length
override def right: Expression = seedExpression
+ override def inputTypes: Seq[AbstractDataType] = Seq(
+ IntegerType,
+ TypeCollection(IntegerType, LongType))
+
/**
* Record ID within each partition. By being transient, the Random Number
Generator is
* reset every time we serialize and deserialize and initialize it.
@@ -349,52 +354,48 @@ case class RandStr(
}
override def checkInputDataTypes(): TypeCheckResult = {
- var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
- def requiredType = "INT or SMALLINT"
- Seq((length, "length", 0),
- (seedExpression, "seed", 1)).foreach {
- case (expr: Expression, name: String, index: Int) =>
- if (result == TypeCheckResult.TypeCheckSuccess) {
- if (!expr.foldable) {
- result = DataTypeMismatch(
- errorSubClass = "NON_FOLDABLE_INPUT",
- messageParameters = Map(
- "inputName" -> toSQLId(name),
- "inputType" -> requiredType,
- "inputExpr" -> toSQLExpr(expr)))
- } else expr.dataType match {
- case _: ShortType | _: IntegerType =>
- case _: LongType if index == 1 =>
- case _ =>
- result = DataTypeMismatch(
- errorSubClass = "UNEXPECTED_INPUT_TYPE",
- messageParameters = Map(
- "paramIndex" -> ordinalNumber(index),
- "requiredType" -> requiredType,
- "inputSql" -> toSQLExpr(expr),
- "inputType" -> toSQLType(expr.dataType)))
- }
+ var result: TypeCheckResult = super.checkInputDataTypes()
+ Seq((length, "length"),
+ (seedExpression, "seed")).foreach {
+ case (expr: Expression, name: String) =>
+ if (result == TypeCheckResult.TypeCheckSuccess && !expr.foldable) {
+ result = DataTypeMismatch(
+ errorSubClass = "NON_FOLDABLE_INPUT",
+ messageParameters = Map(
+ "inputName" -> toSQLId(name),
+ "inputType" -> "integer",
+ "inputExpr" -> toSQLExpr(expr)))
}
}
result
}
override def evalInternal(input: InternalRow): Any = {
- val numChars = length.eval(input).asInstanceOf[Number].intValue()
+ val numChars = lengthInteger()
ExpressionImplUtils.randStr(rng, numChars)
}
+ private def lengthInteger(): Int = {
+ // We should have already added a cast to IntegerType (if necessary) in
+ // FunctionArgumentTypeCoercion.
+ assert(length.dataType == IntegerType, s"Expected IntegerType, got
${length.dataType}")
+ val result = length.eval().asInstanceOf[Int]
+ if (result < 0) {
+ throw
QueryExecutionErrors.unexpectedValueForLengthInFunctionError(prettyName, result)
+ }
+ result
+ }
+
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val className = classOf[XORShiftRandom].getName
val rngTerm = ctx.addMutableState(className, "rng")
ctx.addPartitionInitializationStatement(
s"$rngTerm = new $className(${seed}L + partitionIndex);")
- val eval = length.genCode(ctx)
+ val numChars = lengthInteger()
ev.copy(code =
code"""
- |${eval.code}
|UTF8String ${ev.value} =
- | ${classOf[ExpressionImplUtils].getName}.randStr($rngTerm,
(int)(${eval.value}));
+ | ${classOf[ExpressionImplUtils].getName}.randStr($rngTerm,
$numChars);
|boolean ${ev.isNull} = false;
|""".stripMargin,
isNull = FalseLiteral)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
index 2d58d9d3136a..9e6b59b51138 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -52,7 +52,6 @@ class RandomSuite extends SparkFunSuite with
ExpressionEvalHelper {
testRandStr(1, "c")
testRandStr(5, "ceV0P")
testRandStr(10, "ceV0PXaR2I")
- testRandStr(10L, "ceV0PXaR2I")
def testUniform(first: Any, second: Any, result: Any): Unit = {
checkEvaluationWithoutCodegen(
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
index 59e903b8682f..96a4b2ec91c7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
@@ -559,6 +559,36 @@ SELECT randstr(10S, 0) AS result
[Analyzer test output redacted due to nondeterminism]
+-- !query
+SELECT randstr(CAST(10 AS TINYINT), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(CAST(10 AS BIGINT), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(1.0F, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(1.0D, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(cast(1 AS DECIMAL(10, 2)), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
-- !query
SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
-- !query analysis
@@ -571,124 +601,40 @@ SELECT randstr(10) IS NOT NULL AS result
[Analyzer test output redacted due to nondeterminism]
+-- !query
+SELECT randstr(1, -1) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
-- !query
SELECT randstr(10L, 0) AS result
-- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- "sqlState" : "42K09",
- "messageParameters" : {
- "inputSql" : "\"10\"",
- "inputType" : "\"BIGINT\"",
- "paramIndex" : "first",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(10, 0)\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 22,
- "fragment" : "randstr(10L, 0)"
- } ]
-}
+[Analyzer test output redacted due to nondeterminism]
-- !query
SELECT randstr(10.0F, 0) AS result
-- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- "sqlState" : "42K09",
- "messageParameters" : {
- "inputSql" : "\"10.0\"",
- "inputType" : "\"FLOAT\"",
- "paramIndex" : "first",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(10.0, 0)\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 24,
- "fragment" : "randstr(10.0F, 0)"
- } ]
-}
+[Analyzer test output redacted due to nondeterminism]
-- !query
SELECT randstr(10.0D, 0) AS result
-- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- "sqlState" : "42K09",
- "messageParameters" : {
- "inputSql" : "\"10.0\"",
- "inputType" : "\"DOUBLE\"",
- "paramIndex" : "first",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(10.0, 0)\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 24,
- "fragment" : "randstr(10.0D, 0)"
- } ]
-}
+[Analyzer test output redacted due to nondeterminism]
-- !query
SELECT randstr(NULL, 0) AS result
-- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- "sqlState" : "42K09",
- "messageParameters" : {
- "inputSql" : "\"NULL\"",
- "inputType" : "\"VOID\"",
- "paramIndex" : "first",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(NULL, 0)\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 23,
- "fragment" : "randstr(NULL, 0)"
- } ]
-}
+[Analyzer test output redacted due to nondeterminism]
-- !query
SELECT randstr(0, NULL) AS result
-- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- "sqlState" : "42K09",
- "messageParameters" : {
- "inputSql" : "\"NULL\"",
- "inputType" : "\"VOID\"",
- "paramIndex" : "second",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(0, NULL)\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 23,
- "fragment" : "randstr(0, NULL)"
- } ]
-}
+[Analyzer test output redacted due to nondeterminism]
-- !query
@@ -701,7 +647,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "`length`",
- "inputType" : "INT or SMALLINT",
+ "inputType" : "integer",
"sqlExpr" : "\"randstr(col, 0)\""
},
"queryContext" : [ {
@@ -724,7 +670,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"messageParameters" : {
"inputExpr" : "\"col\"",
"inputName" : "`seed`",
- "inputType" : "INT or SMALLINT",
+ "inputType" : "integer",
"sqlExpr" : "\"randstr(10, col)\""
},
"queryContext" : [ {
@@ -758,3 +704,57 @@ org.apache.spark.sql.AnalysisException
"fragment" : "randstr(10, 0, 1)"
} ]
}
+
+
+-- !query
+SELECT randstr(-1, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10, "a") AS result FROM VALUES (0) tab(a)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"a\"",
+ "inputType" : "\"STRING\"",
+ "paramIndex" : "second",
+ "requiredType" : "(\"INT\" or \"BIGINT\")",
+ "sqlExpr" : "\"randstr(10, a)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 23,
+ "fragment" : "randstr(10, \"a\")"
+ } ]
+}
+
+
+-- !query
+SELECT randstr(10, 1.5) AS result FROM VALUES (0) tab(a)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "inputSql" : "\"1.5\"",
+ "inputType" : "\"DECIMAL(2,1)\"",
+ "paramIndex" : "second",
+ "requiredType" : "(\"INT\" or \"BIGINT\")",
+ "sqlExpr" : "\"randstr(10, 1.5)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 23,
+ "fragment" : "randstr(10, 1.5)"
+ } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/random.sql
b/sql/core/src/test/resources/sql-tests/inputs/random.sql
index a17571aa7286..95be99595cc8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/random.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/random.sql
@@ -62,13 +62,19 @@ SELECT uniform(0, 'def', 0) AS result;
SELECT uniform(0, 10, 'ghi') AS result;
-- The randstr random string generation function supports generating random
strings within a
--- specified length. We use a seed of zero for these queries to keep tests
deterministic.
+-- specified length. We use a seed of zero for most queries to keep tests
deterministic.
SELECT randstr(1, 0) AS result;
SELECT randstr(5, 0) AS result;
SELECT randstr(10, 0) AS result;
SELECT randstr(10S, 0) AS result;
+SELECT randstr(CAST(10 AS TINYINT), 0) AS result;
+SELECT randstr(CAST(10 AS BIGINT), 0) AS result;
+SELECT randstr(1.0F, 0) AS result;
+SELECT randstr(1.0D, 0) AS result;
+SELECT randstr(cast(1 AS DECIMAL(10, 2)), 0) AS result;
SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col);
SELECT randstr(10) IS NOT NULL AS result;
+SELECT randstr(1, -1) AS result;
-- Negative test cases for the randstr random number generator.
SELECT randstr(10L, 0) AS result;
SELECT randstr(10.0F, 0) AS result;
@@ -78,3 +84,6 @@ SELECT randstr(0, NULL) AS result;
SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col);
SELECT randstr(10, col) AS result FROM VALUES (0), (1), (2) tab(col);
SELECT randstr(10, 0, 1) AS result;
+SELECT randstr(-1, 0) AS result;
+SELECT randstr(10, "a") AS result FROM VALUES (0) tab(a);
+SELECT randstr(10, 1.5) AS result FROM VALUES (0) tab(a);
diff --git a/sql/core/src/test/resources/sql-tests/results/random.sql.out
b/sql/core/src/test/resources/sql-tests/results/random.sql.out
index eebfac5fc2b1..049d134003cd 100644
--- a/sql/core/src/test/resources/sql-tests/results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/random.sql.out
@@ -671,6 +671,46 @@ struct<result:string>
ceV0PXaR2I
+-- !query
+SELECT randstr(CAST(10 AS TINYINT), 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(CAST(10 AS BIGINT), 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(1.0F, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+c
+
+
+-- !query
+SELECT randstr(1.0D, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+c
+
+
+-- !query
+SELECT randstr(cast(1 AS DECIMAL(10, 2)), 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+c
+
+
-- !query
SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
-- !query schema
@@ -689,206 +729,193 @@ struct<result:boolean>
true
+-- !query
+SELECT randstr(1, -1) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+S
+
+
-- !query
SELECT randstr(10L, 0) AS result
-- !query schema
-struct<>
+struct<result:string>
-- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- "sqlState" : "42K09",
- "messageParameters" : {
- "inputSql" : "\"10\"",
- "inputType" : "\"BIGINT\"",
- "paramIndex" : "first",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(10, 0)\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 22,
- "fragment" : "randstr(10L, 0)"
- } ]
-}
+ceV0PXaR2I
-- !query
SELECT randstr(10.0F, 0) AS result
-- !query schema
-struct<>
+struct<result:string>
-- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- "sqlState" : "42K09",
- "messageParameters" : {
- "inputSql" : "\"10.0\"",
- "inputType" : "\"FLOAT\"",
- "paramIndex" : "first",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(10.0, 0)\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 24,
- "fragment" : "randstr(10.0F, 0)"
- } ]
-}
+ceV0PXaR2I
-- !query
SELECT randstr(10.0D, 0) AS result
-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(NULL, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+
+
+
+-- !query
+SELECT randstr(0, NULL) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+
+
+
+-- !query
+SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
"sqlState" : "42K09",
"messageParameters" : {
- "inputSql" : "\"10.0\"",
- "inputType" : "\"DOUBLE\"",
- "paramIndex" : "first",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(10.0, 0)\""
+ "inputExpr" : "\"col\"",
+ "inputName" : "`length`",
+ "inputType" : "integer",
+ "sqlExpr" : "\"randstr(col, 0)\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
- "stopIndex" : 24,
- "fragment" : "randstr(10.0D, 0)"
+ "stopIndex" : 22,
+ "fragment" : "randstr(col, 0)"
} ]
}
-- !query
-SELECT randstr(NULL, 0) AS result
+SELECT randstr(10, col) AS result FROM VALUES (0), (1), (2) tab(col)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
"sqlState" : "42K09",
"messageParameters" : {
- "inputSql" : "\"NULL\"",
- "inputType" : "\"VOID\"",
- "paramIndex" : "first",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(NULL, 0)\""
+ "inputExpr" : "\"col\"",
+ "inputName" : "`seed`",
+ "inputType" : "integer",
+ "sqlExpr" : "\"randstr(10, col)\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 23,
- "fragment" : "randstr(NULL, 0)"
+ "fragment" : "randstr(10, col)"
} ]
}
-- !query
-SELECT randstr(0, NULL) AS result
+SELECT randstr(10, 0, 1) AS result
-- !query schema
struct<>
-- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.sql.AnalysisException
{
- "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
- "sqlState" : "42K09",
+ "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ "sqlState" : "42605",
"messageParameters" : {
- "inputSql" : "\"NULL\"",
- "inputType" : "\"VOID\"",
- "paramIndex" : "second",
- "requiredType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(0, NULL)\""
+ "actualNum" : "3",
+ "docroot" : "https://spark.apache.org/docs/latest",
+ "expectedNum" : "[1, 2]",
+ "functionName" : "`randstr`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
- "stopIndex" : 23,
- "fragment" : "randstr(0, NULL)"
+ "stopIndex" : 24,
+ "fragment" : "randstr(10, 0, 1)"
} ]
}
-- !query
-SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+SELECT randstr(-1, 0) AS result
-- !query schema
struct<>
-- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkRuntimeException
{
- "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
- "sqlState" : "42K09",
+ "errorClass" : "INVALID_PARAMETER_VALUE.LENGTH",
+ "sqlState" : "22023",
"messageParameters" : {
- "inputExpr" : "\"col\"",
- "inputName" : "`length`",
- "inputType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(col, 0)\""
- },
- "queryContext" : [ {
- "objectType" : "",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 22,
- "fragment" : "randstr(col, 0)"
- } ]
+ "functionName" : "`randstr`",
+ "length" : "-1",
+ "parameter" : "`length`"
+ }
}
-- !query
-SELECT randstr(10, col) AS result FROM VALUES (0), (1), (2) tab(col)
+SELECT randstr(10, "a") AS result FROM VALUES (0) tab(a)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
- "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
"sqlState" : "42K09",
"messageParameters" : {
- "inputExpr" : "\"col\"",
- "inputName" : "`seed`",
- "inputType" : "INT or SMALLINT",
- "sqlExpr" : "\"randstr(10, col)\""
+ "inputSql" : "\"a\"",
+ "inputType" : "\"STRING\"",
+ "paramIndex" : "second",
+ "requiredType" : "(\"INT\" or \"BIGINT\")",
+ "sqlExpr" : "\"randstr(10, a)\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
"stopIndex" : 23,
- "fragment" : "randstr(10, col)"
+ "fragment" : "randstr(10, \"a\")"
} ]
}
-- !query
-SELECT randstr(10, 0, 1) AS result
+SELECT randstr(10, 1.5) AS result FROM VALUES (0) tab(a)
-- !query schema
struct<>
-- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
- "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
- "sqlState" : "42605",
+ "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+ "sqlState" : "42K09",
"messageParameters" : {
- "actualNum" : "3",
- "docroot" : "https://spark.apache.org/docs/latest",
- "expectedNum" : "[1, 2]",
- "functionName" : "`randstr`"
+ "inputSql" : "\"1.5\"",
+ "inputType" : "\"DECIMAL(2,1)\"",
+ "paramIndex" : "second",
+ "requiredType" : "(\"INT\" or \"BIGINT\")",
+ "sqlExpr" : "\"randstr(10, 1.5)\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 8,
- "stopIndex" : 24,
- "fragment" : "randstr(10, 0, 1)"
+ "stopIndex" : 23,
+ "fragment" : "randstr(10, 1.5)"
} ]
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
index d0581621148a..1f9589c1c9ce 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
@@ -104,6 +104,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite
with SharedSparkSessi
Literal.create("DuMmY sTrInG".getBytes)
}
case BooleanType => Literal(true)
+ case ByteType => Literal(5.toByte)
case _: DatetimeType => Literal(Timestamp.valueOf("2009-07-30 12:58:59"))
case DecimalType => Literal((new Decimal).set(5))
case _: DecimalType => Literal((new Decimal).set(5))
@@ -183,6 +184,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite
with SharedSparkSessi
case Utf8Lcase => "Cast('DuMmY sTrInG' collate utf8_lcase as BINARY)"
}
case BooleanType => "True"
+ case ByteType => "cast(5 as tinyint)"
case _: DatetimeType => "date'2016-04-08'"
case DecimalType => "5.0"
case _: DecimalType => "5.0"
@@ -243,6 +245,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite
with SharedSparkSessi
case AnyTimestampType => "TIMESTAMP"
case BinaryType => "BINARY"
case BooleanType => "BOOLEAN"
+ case ByteType => "TINYINT"
case _: DatetimeType => "DATE"
case DecimalType => "DECIMAL(2, 1)"
case _: DecimalType => "DECIMAL(2, 1)"
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 018aa2159ba1..fc6d3023ed07 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -24,7 +24,7 @@ import scala.reflect.runtime.universe.runtimeMirror
import scala.util.Random
import org.apache.spark.{QueryContextType, SPARK_DOC_ROOT, SparkException,
SparkRuntimeException}
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, InternalRow}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal,
UnaryExpression}
import org.apache.spark.sql.catalyst.expressions.Cast._
@@ -458,14 +458,14 @@ class DataFrameFunctionsSuite extends QueryTest with
SharedSparkSession {
val df = Seq((0)).toDF("a")
var expr = randstr(lit(10), lit("a"))
checkError(
- intercept[AnalysisException](df.select(expr)),
+ intercept[ExtendedAnalysisException](df.select(expr).collect()),
condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
parameters = Map(
"sqlExpr" -> "\"randstr(10, a)\"",
"paramIndex" -> "second",
"inputSql" -> "\"a\"",
"inputType" -> "\"STRING\"",
- "requiredType" -> "INT or SMALLINT"),
+ "requiredType" -> "(\"INT\" or \"BIGINT\")"),
context = ExpectedContext(
contextType = QueryContextType.DataFrame,
fragment = "randstr",
@@ -480,7 +480,7 @@ class DataFrameFunctionsSuite extends QueryTest with
SharedSparkSession {
condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
parameters = Map(
"inputName" -> "`length`",
- "inputType" -> "INT or SMALLINT",
+ "inputType" -> "integer",
"inputExpr" -> "\"a\"",
"sqlExpr" -> "\"randstr(a, 10)\""),
context = ExpectedContext(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]