[
https://issues.apache.org/jira/browse/SPARK-41903?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Sandeep Singh updated SPARK-41903:
----------------------------------
Description:
{code:java}
import numpy as np
arr_dtype_to_spark_dtypes = [
("int8", [("b", "array<smallint>")]),
("int16", [("b", "array<smallint>")]),
("int32", [("b", "array<int>")]),
("int64", [("b", "array<bigint>")]),
("float32", [("b", "array<float>")]),
("float64", [("b", "array<double>")]),
]
for t, expected_spark_dtypes in arr_dtype_to_spark_dtypes:
arr = np.array([1, 2]).astype(t)
self.assertEqual(
expected_spark_dtypes,
self.spark.range(1).select(lit(arr).alias("b")).dtypes
)
arr = np.array([1, 2]).astype(np.uint)
with self.assertRaisesRegex(
TypeError, "The type of array scalar '%s' is not supported" % arr.dtype
):
self.spark.range(1).select(lit(arr).alias("b")){code}
{code:java}
Traceback (most recent call last):
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py",
line 1100, in test_ndarray_input
expected_spark_dtypes,
self.spark.range(1).select(lit(arr).alias("b")).dtypes
File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/utils.py", line
332, in wrapped
return getattr(functions, f.__name__)(*args, **kwargs)
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/functions.py",
line 198, in lit
return Column(LiteralExpression._from_value(col))
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py",
line 266, in _from_value
return LiteralExpression(value=value,
dataType=LiteralExpression._infer_type(value))
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py",
line 262, in _infer_type
raise ValueError(f"Unsupported Data Type {type(value).__name__}")
ValueError: Unsupported Data Type ndarray {code}
was:
{code:java}
import numpy as np
from pyspark.sql.functions import lit
dtype_to_spark_dtypes = [
(np.int8, [("CAST(1 AS TINYINT)", "tinyint")]),
(np.int16, [("CAST(1 AS SMALLINT)", "smallint")]),
(np.int32, [("CAST(1 AS INT)", "int")]),
(np.int64, [("CAST(1 AS BIGINT)", "bigint")]),
(np.float32, [("CAST(1.0 AS FLOAT)", "float")]),
(np.float64, [("CAST(1.0 AS DOUBLE)", "double")]),
(np.bool_, [("true", "boolean")]),
]
for dtype, spark_dtypes in dtype_to_spark_dtypes:
self.assertEqual(self.spark.range(1).select(lit(dtype(1))).dtypes,
spark_dtypes){code}
{code:java}
Traceback (most recent call last):
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py",
line 1064, in test_lit_np_scalar
self.assertEqual(self.spark.range(1).select(lit(dtype(1))).dtypes,
spark_dtypes)
File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/utils.py", line
332, in wrapped
return getattr(functions, f.__name__)(*args, **kwargs)
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/functions.py",
line 198, in lit
return Column(LiteralExpression._from_value(col))
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py",
line 266, in _from_value
return LiteralExpression(value=value,
dataType=LiteralExpression._infer_type(value))
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py",
line 262, in _infer_type
raise ValueError(f"Unsupported Data Type {type(value).__name__}")
ValueError: Unsupported Data Type int8
{code}
> Support data type ndarray
> -------------------------
>
> Key: SPARK-41903
> URL: https://issues.apache.org/jira/browse/SPARK-41903
> Project: Spark
> Issue Type: Sub-task
> Components: Connect
> Affects Versions: 3.4.0
> Reporter: Sandeep Singh
> Priority: Major
>
> {code:java}
> import numpy as np
> arr_dtype_to_spark_dtypes = [
> ("int8", [("b", "array<smallint>")]),
> ("int16", [("b", "array<smallint>")]),
> ("int32", [("b", "array<int>")]),
> ("int64", [("b", "array<bigint>")]),
> ("float32", [("b", "array<float>")]),
> ("float64", [("b", "array<double>")]),
> ]
> for t, expected_spark_dtypes in arr_dtype_to_spark_dtypes:
> arr = np.array([1, 2]).astype(t)
> self.assertEqual(
> expected_spark_dtypes,
> self.spark.range(1).select(lit(arr).alias("b")).dtypes
> )
> arr = np.array([1, 2]).astype(np.uint)
> with self.assertRaisesRegex(
> TypeError, "The type of array scalar '%s' is not supported" % arr.dtype
> ):
> self.spark.range(1).select(lit(arr).alias("b")){code}
> {code:java}
> Traceback (most recent call last):
> File
> "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py",
> line 1100, in test_ndarray_input
> expected_spark_dtypes,
> self.spark.range(1).select(lit(arr).alias("b")).dtypes
> File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/utils.py", line
> 332, in wrapped
> return getattr(functions, f.__name__)(*args, **kwargs)
> File
> "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/functions.py",
> line 198, in lit
> return Column(LiteralExpression._from_value(col))
> File
> "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py",
> line 266, in _from_value
> return LiteralExpression(value=value,
> dataType=LiteralExpression._infer_type(value))
> File
> "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py",
> line 262, in _infer_type
> raise ValueError(f"Unsupported Data Type {type(value).__name__}")
> ValueError: Unsupported Data Type ndarray {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]