This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e63b5601c1bd [SPARK-49595][CONNECT][SQL] Fix `DataFrame.unpivot/melt` 
in Spark Connect Scala Client
e63b5601c1bd is described below

commit e63b5601c1bd74b2b0054d48f944424d12b79835
Author: Xinrong Meng <[email protected]>
AuthorDate: Wed Sep 11 08:52:33 2024 -0700

    [SPARK-49595][CONNECT][SQL] Fix `DataFrame.unpivot/melt` in Spark Connect 
Scala Client
    
    ### What changes were proposed in this pull request?
    Fix DataFrame.unpivot/melt in Spark Connect Scala Client by correctly 
assigning the name for the variable column.
    
    The original code used `setValueColumnName` for both the variable and value 
columns.
    
    ### Why are the changes needed?
    This fix is necessary to ensure the correct behavior of the unpivot/melt 
operation.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. Variable and value columns can be set correctly as shown below.
    
    ```scala
    scala> val df = Seq((1, 11, 12L), (2, 21, 22L)).toDF("id", "int", "long")
    df: org.apache.spark.sql.package.DataFrame = [id: int, int: int ... 1 more 
field]
    
    scala> df.show()
    +---+---+----+
    | id|int|long|
    +---+---+----+
    |  1| 11|  12|
    |  2| 21|  22|
    +---+---+----+
    ```
    FROM (current master)
    ```scala
    scala> df.unpivot(Array($"id"), Array($"int", $"long"), "variable", 
"value").show()
    +---+----+-----+
    | id|    |value|
    +---+----+-----+
    |  1| int|   11|
    |  1|long|   12|
    |  2| int|   21|
    |  2|long|   22|
    +---+----+-----+
    
    ```
    
    TO
    ```scala
    scala> df.unpivot(Array($"id"), Array($"int", $"long"), "variable", 
"value").show()
    +---+--------+-----+
    | id|variable|value|
    +---+--------+-----+
    |  1|     int|   11|
    |  1|    long|   12|
    |  2|     int|   21|
    |  2|    long|   22|
    +---+--------+-----+
    ```
    
    ### How was this patch tested?
    Existing tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #48069 from xinrong-meng/fix_unpivot.
    
    Authored-by: Xinrong Meng <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../jvm/src/main/scala/org/apache/spark/sql/Dataset.scala |   2 +-
 .../query-tests/explain-results/melt_no_values.explain    |   2 +-
 .../query-tests/explain-results/melt_values.explain       |   2 +-
 .../query-tests/explain-results/unpivot_no_values.explain |   2 +-
 .../query-tests/explain-results/unpivot_values.explain    |   2 +-
 .../resources/query-tests/queries/melt_no_values.json     |   1 +
 .../query-tests/queries/melt_no_values.proto.bin          | Bin 71 -> 77 bytes
 .../test/resources/query-tests/queries/melt_values.json   |   1 +
 .../resources/query-tests/queries/melt_values.proto.bin   | Bin 73 -> 79 bytes
 .../resources/query-tests/queries/unpivot_no_values.json  |   1 +
 .../query-tests/queries/unpivot_no_values.proto.bin       | Bin 64 -> 70 bytes
 .../resources/query-tests/queries/unpivot_values.json     |   1 +
 .../query-tests/queries/unpivot_values.proto.bin          | Bin 80 -> 86 bytes
 13 files changed, 9 insertions(+), 5 deletions(-)

diff --git 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index f5606215be89..519193ebd9c7 100644
--- 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -481,7 +481,7 @@ class Dataset[T] private[sql] (
     val unpivot = builder.getUnpivotBuilder
       .setInput(plan.getRoot)
       .addAllIds(ids.toImmutableArraySeq.map(_.expr).asJava)
-      .setValueColumnName(variableColumnName)
+      .setVariableColumnName(variableColumnName)
       .setValueColumnName(valueColumnName)
     valuesOption.foreach { values =>
       unpivot.getValuesBuilder
diff --git 
a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
 
b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
index f61fc30a3a52..053937d84ec8 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
+++ 
b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, name#0, value#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git 
a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
 
b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
index b5742d976dee..5a953f792cd3 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
+++ 
b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
@@ -1,2 +1,2 @@
-Expand [[a#0, id, id#0L]], [a#0, #0, value#0L]
+Expand [[a#0, id, id#0L]], [a#0, name#0, value#0L]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git 
a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
 
b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
index 8d1749ee74c5..2b2ba19d0c3d 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
+++ 
b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, #0, value#0]
+Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, name#0, 
value#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git 
a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
 
b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
index f61fc30a3a52..053937d84ec8 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
+++ 
b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, name#0, value#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json 
b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
index 12db0a5abe36..a17da06b925b 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
+++ 
b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
@@ -20,6 +20,7 @@
         "unparsedIdentifier": "a"
       }
     }],
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
 
b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
index 23a6aa1289a9..eebb7ad6df8e 100644
Binary files 
a/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
 and 
b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
 differ
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json 
b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
index e2a004f46e78..a8142ee3a846 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
@@ -23,6 +23,7 @@
         }
       }]
     },
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
 
b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
index e021e1110def..35829fc62dae 100644
Binary files 
a/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
 and 
b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
 differ
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
 
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
index 9f550c031914..96b76443b679 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
+++ 
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
@@ -16,6 +16,7 @@
         "unparsedIdentifier": "id"
       }
     }],
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
 
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
index ac3bad8bd04e..b700190a9f66 100644
Binary files 
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
 and 
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
 differ
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json 
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
index 92bc19d195c6..6c31afb04e74 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
+++ 
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
@@ -27,6 +27,7 @@
         }
       }]
     },
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
 
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
index 7f717cb23517..a1cd388fd8a4 100644
Binary files 
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
 and 
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
 differ


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to