[
https://issues.apache.org/jira/browse/SPARK-41901?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Sandeep Singh updated SPARK-41901:
----------------------------------
Description:
{code:java}
from pyspark.sql import functions
funs = [
(functions.acosh, "ACOSH"),
(functions.asinh, "ASINH"),
(functions.atanh, "ATANH"),
]
cols = ["a", functions.col("a")]
for f, alias in funs:
for c in cols:
self.assertIn(f"{alias}(a)", repr(f(c))){code}
{code:java}
Traceback (most recent call last):
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py",
line 271, in test_inverse_trig_functions
self.assertIn(f"{alias}(a)", repr(f(c)))
AssertionError: 'ACOSH(a)' not found in
"Column<'acosh(ColumnReference(a))'>"{code}
{code:java}
from pyspark.sql.functions import col, lit, overlay
from itertools import chain
import re
actual = list(
chain.from_iterable(
[
re.findall("(overlay\\(.*\\))", str(x))
for x in [
overlay(col("foo"), col("bar"), 1),
overlay("x", "y", 3),
overlay(col("x"), col("y"), 1, 3),
overlay("x", "y", 2, 5),
overlay("x", "y", lit(11)),
overlay("x", "y", lit(2), lit(5)),
]
]
)
)
expected = [
"overlay(foo, bar, 1, -1)",
"overlay(x, y, 3, -1)",
"overlay(x, y, 1, 3)",
"overlay(x, y, 2, 5)",
"overlay(x, y, 11, -1)",
"overlay(x, y, 2, 5)",
]
self.assertListEqual(actual, expected)
df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y",
"pos", "len"))
exp = [Row(ol="SPARK_CORESQL")]
self.assertTrue(
all(
[
df.select(overlay(df.x, df.y, 7, 0).alias("ol")).collect() == exp,
df.select(overlay(df.x, df.y, lit(7),
lit(0)).alias("ol")).collect() == exp,
df.select(overlay("x", "y", "pos", "len").alias("ol")).collect() ==
exp,
]
)
) {code}
{code:java}
Traceback (most recent call last): File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py",
line 675, in test_overlay self.assertListEqual(actual, expected)
AssertionError: Lists differ: ['overlay(ColumnReference(foo),
ColumnReference(bar[402 chars]5))'] != ['overlay(foo, bar, 1, -1)', 'overlay(x,
y, 3, -1)'[90 chars] 5)'] First differing element 0:
'overlay(ColumnReference(foo), ColumnReference(bar), Literal(1), Literal(-1))'
'overlay(foo, bar, 1, -1)' - ['overlay(ColumnReference(foo),
ColumnReference(bar), Literal(1), Literal(-1))', - 'overlay(ColumnReference(x),
ColumnReference(y), Literal(3), Literal(-1))', - 'overlay(ColumnReference(x),
ColumnReference(y), Literal(1), Literal(3))', - 'overlay(ColumnReference(x),
ColumnReference(y), Literal(2), Literal(5))', - 'overlay(ColumnReference(x),
ColumnReference(y), Literal(11), Literal(-1))', - 'overlay(ColumnReference(x),
ColumnReference(y), Literal(2), Literal(5))'] + ['overlay(foo, bar, 1, -1)', +
'overlay(x, y, 3, -1)', + 'overlay(x, y, 1, 3)', + 'overlay(x, y, 2, 5)', +
'overlay(x, y, 11, -1)', + 'overlay(x, y, 2, 5)']
{code}
was:
{code:java}
dt = datetime.date(2021, 12, 27)
# Note; number var in Python gets converted to LongType column;
# this is not supported by the function, so cast to Integer explicitly
df = self.spark.createDataFrame([Row(date=dt, add=2)], "date date, add integer")
self.assertTrue(
all(
df.select(
date_add(df.date, df.add) == datetime.date(2021, 12, 29),
date_add(df.date, "add") == datetime.date(2021, 12, 29),
date_add(df.date, 3) == datetime.date(2021, 12, 30),
).first()
)
){code}
{code:java}
Traceback (most recent call last):
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py",
line 391, in test_date_add_function
).first()
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/dataframe.py",
line 246, in first
return self.head()
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/dataframe.py",
line 310, in head
rs = self.head(1)
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/dataframe.py",
line 312, in head
return self.take(n)
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/dataframe.py",
line 317, in take
return self.limit(num).collect()
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/dataframe.py",
line 1076, in collect
table = self._session.client.to_table(query)
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/client.py", line
414, in to_table
table, _ = self._execute_and_fetch(req)
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/client.py", line
586, in _execute_and_fetch
self._handle_error(rpc_error)
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/client.py", line
625, in _handle_error
raise SparkConnectAnalysisException(
pyspark.sql.connect.client.SparkConnectAnalysisException:
[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_add(date, add)"
due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or
"TINYINT") type, however "add" has the type "BIGINT".
Plan: 'GlobalLimit 1
+- 'LocalLimit 1
+- 'Project [unresolvedalias('`==`(date_add(date#753, add#754L),
2021-12-29), None), unresolvedalias('`==`(date_add(date#753, add#754L),
2021-12-29), None), (date_add(date#753, 3) = 2021-12-30) AS (date_add(date, 3)
= DATE '2021-12-30')#759]
+- Project [date#753, add#754L]
+- Project [date#749 AS date#753, add#750L AS add#754L]
+- LocalRelation [date#749, add#750L]{code}
> Parity in String representation of Column
> -----------------------------------------
>
> Key: SPARK-41901
> URL: https://issues.apache.org/jira/browse/SPARK-41901
> Project: Spark
> Issue Type: Sub-task
> Components: Connect
> Affects Versions: 3.4.0
> Reporter: Sandeep Singh
> Priority: Major
>
> {code:java}
> from pyspark.sql import functions
> funs = [
> (functions.acosh, "ACOSH"),
> (functions.asinh, "ASINH"),
> (functions.atanh, "ATANH"),
> ]
> cols = ["a", functions.col("a")]
> for f, alias in funs:
> for c in cols:
> self.assertIn(f"{alias}(a)", repr(f(c))){code}
> {code:java}
> Traceback (most recent call last):
> File
> "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py",
> line 271, in test_inverse_trig_functions
> self.assertIn(f"{alias}(a)", repr(f(c)))
> AssertionError: 'ACOSH(a)' not found in
> "Column<'acosh(ColumnReference(a))'>"{code}
>
>
> {code:java}
> from pyspark.sql.functions import col, lit, overlay
> from itertools import chain
> import re
> actual = list(
> chain.from_iterable(
> [
> re.findall("(overlay\\(.*\\))", str(x))
> for x in [
> overlay(col("foo"), col("bar"), 1),
> overlay("x", "y", 3),
> overlay(col("x"), col("y"), 1, 3),
> overlay("x", "y", 2, 5),
> overlay("x", "y", lit(11)),
> overlay("x", "y", lit(2), lit(5)),
> ]
> ]
> )
> )
> expected = [
> "overlay(foo, bar, 1, -1)",
> "overlay(x, y, 3, -1)",
> "overlay(x, y, 1, 3)",
> "overlay(x, y, 2, 5)",
> "overlay(x, y, 11, -1)",
> "overlay(x, y, 2, 5)",
> ]
> self.assertListEqual(actual, expected)
> df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y",
> "pos", "len"))
> exp = [Row(ol="SPARK_CORESQL")]
> self.assertTrue(
> all(
> [
> df.select(overlay(df.x, df.y, 7, 0).alias("ol")).collect() == exp,
> df.select(overlay(df.x, df.y, lit(7),
> lit(0)).alias("ol")).collect() == exp,
> df.select(overlay("x", "y", "pos", "len").alias("ol")).collect()
> == exp,
> ]
> )
> ) {code}
> {code:java}
> Traceback (most recent call last): File
> "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py",
> line 675, in test_overlay self.assertListEqual(actual, expected)
> AssertionError: Lists differ: ['overlay(ColumnReference(foo),
> ColumnReference(bar[402 chars]5))'] != ['overlay(foo, bar, 1, -1)',
> 'overlay(x, y, 3, -1)'[90 chars] 5)'] First differing element 0:
> 'overlay(ColumnReference(foo), ColumnReference(bar), Literal(1),
> Literal(-1))' 'overlay(foo, bar, 1, -1)' - ['overlay(ColumnReference(foo),
> ColumnReference(bar), Literal(1), Literal(-1))', -
> 'overlay(ColumnReference(x), ColumnReference(y), Literal(3), Literal(-1))', -
> 'overlay(ColumnReference(x), ColumnReference(y), Literal(1), Literal(3))', -
> 'overlay(ColumnReference(x), ColumnReference(y), Literal(2), Literal(5))', -
> 'overlay(ColumnReference(x), ColumnReference(y), Literal(11), Literal(-1))',
> - 'overlay(ColumnReference(x), ColumnReference(y), Literal(2), Literal(5))']
> + ['overlay(foo, bar, 1, -1)', + 'overlay(x, y, 3, -1)', + 'overlay(x, y, 1,
> 3)', + 'overlay(x, y, 2, 5)', + 'overlay(x, y, 11, -1)', + 'overlay(x, y, 2,
> 5)']
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]