[ https://issues.apache.org/jira/browse/SPARK-41902?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sandeep Singh updated SPARK-41902: ---------------------------------- Description: {code:java} from pyspark.sql.functions import flatten, struct, transform df = self.spark.sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters") actual = df.select( flatten( transform( "numbers", lambda number: transform( "letters", lambda letter: struct(number.alias("n"), letter.alias("l")) ), ) ) ).first()[0] expected = [ (1, "a"), (1, "b"), (1, "c"), (2, "a"), (2, "b"), (2, "c"), (3, "a"), (3, "b"), (3, "c"), ] self.assertEquals(actual, expected){code} {code:java} Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 809, in test_nested_higher_order_function self.assertEquals(actual, expected) AssertionError: Lists differ: [{'n': 'a', 'l': 'a'}, {'n': 'b', 'l': 'b'[151 chars]'c'}] != [(1, 'a'), (1, 'b'), (1, 'c'), (2, 'a'), ([43 chars]'c')] First differing element 0: {'n': 'a', 'l': 'a'} (1, 'a') - [{'l': 'a', 'n': 'a'}, - {'l': 'b', 'n': 'b'}, - {'l': 'c', 'n': 'c'}, - {'l': 'a', 'n': 'a'}, - {'l': 'b', 'n': 'b'}, - {'l': 'c', 'n': 'c'}, - {'l': 'a', 'n': 'a'}, - {'l': 'b', 'n': 'b'}, - {'l': 'c', 'n': 'c'}] + [(1, 'a'), + (1, 'b'), + (1, 'c'), + (2, 'a'), + (2, 'b'), + (2, 'c'), + (3, 'a'), + (3, 'b'), + (3, 'c')] {code} was: {code:java} expected = {"a": 1, "b": 2} expected2 = {"c": 3, "d": 4} df = self.spark.createDataFrame( [(list(expected.keys()), list(expected.values()))], ["k", "v"] ) actual = ( df.select( expr("map('c', 3, 'd', 4) as dict2"), map_from_arrays(df.k, df.v).alias("dict"), "*", ) .select( map_contains_key("dict", "a").alias("one"), map_contains_key("dict", "d").alias("not_exists"), map_keys("dict").alias("keys"), map_values("dict").alias("values"), map_entries("dict").alias("items"), "*", ) .select( map_concat("dict", "dict2").alias("merged"), map_from_entries(arrays_zip("keys", "values")).alias("from_items"), "*", ) .first() ) self.assertEqual(expected, actual["dict"]){code} {code:java} Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 1142, in test_map_functions self.assertEqual(expected, actual["dict"]) AssertionError: {'a': 1, 'b': 2} != [('a', 1), ('b', 2)]{code} Summary: Parity in String representation of higher_order_function (was: Fix String representation of maps created by `map_from_arrays`) > Parity in String representation of higher_order_function > -------------------------------------------------------- > > Key: SPARK-41902 > URL: https://issues.apache.org/jira/browse/SPARK-41902 > Project: Spark > Issue Type: Sub-task > Components: Connect > Affects Versions: 3.4.0 > Reporter: Sandeep Singh > Priority: Major > > {code:java} > from pyspark.sql.functions import flatten, struct, transform > df = self.spark.sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') > as letters") > actual = df.select( > flatten( > transform( > "numbers", > lambda number: transform( > "letters", lambda letter: struct(number.alias("n"), > letter.alias("l")) > ), > ) > ) > ).first()[0] > expected = [ > (1, "a"), > (1, "b"), > (1, "c"), > (2, "a"), > (2, "b"), > (2, "c"), > (3, "a"), > (3, "b"), > (3, "c"), > ] > self.assertEquals(actual, expected){code} > {code:java} > Traceback (most recent call last): > File > "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", > line 809, in test_nested_higher_order_function > self.assertEquals(actual, expected) > AssertionError: Lists differ: [{'n': 'a', 'l': 'a'}, {'n': 'b', 'l': 'b'[151 > chars]'c'}] != [(1, 'a'), (1, 'b'), (1, 'c'), (2, 'a'), ([43 chars]'c')] > First differing element 0: > {'n': 'a', 'l': 'a'} > (1, 'a') > - [{'l': 'a', 'n': 'a'}, > - {'l': 'b', 'n': 'b'}, > - {'l': 'c', 'n': 'c'}, > - {'l': 'a', 'n': 'a'}, > - {'l': 'b', 'n': 'b'}, > - {'l': 'c', 'n': 'c'}, > - {'l': 'a', 'n': 'a'}, > - {'l': 'b', 'n': 'b'}, > - {'l': 'c', 'n': 'c'}] > + [(1, 'a'), > + (1, 'b'), > + (1, 'c'), > + (2, 'a'), > + (2, 'b'), > + (2, 'c'), > + (3, 'a'), > + (3, 'b'), > + (3, 'c')] > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org