This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e63b5601c1bd [SPARK-49595][CONNECT][SQL] Fix `DataFrame.unpivot/melt`
in Spark Connect Scala Client
e63b5601c1bd is described below
commit e63b5601c1bd74b2b0054d48f944424d12b79835
Author: Xinrong Meng <[email protected]>
AuthorDate: Wed Sep 11 08:52:33 2024 -0700
[SPARK-49595][CONNECT][SQL] Fix `DataFrame.unpivot/melt` in Spark Connect
Scala Client
### What changes were proposed in this pull request?
Fix DataFrame.unpivot/melt in Spark Connect Scala Client by correctly
assigning the name for the variable column.
The original code used `setValueColumnName` for both the variable and value
columns.
### Why are the changes needed?
This fix is necessary to ensure the correct behavior of the unpivot/melt
operation.
### Does this PR introduce _any_ user-facing change?
Yes. Variable and value columns can be set correctly as shown below.
```scala
scala> val df = Seq((1, 11, 12L), (2, 21, 22L)).toDF("id", "int", "long")
df: org.apache.spark.sql.package.DataFrame = [id: int, int: int ... 1 more
field]
scala> df.show()
+---+---+----+
| id|int|long|
+---+---+----+
| 1| 11| 12|
| 2| 21| 22|
+---+---+----+
```
FROM (current master)
```scala
scala> df.unpivot(Array($"id"), Array($"int", $"long"), "variable",
"value").show()
+---+----+-----+
| id| |value|
+---+----+-----+
| 1| int| 11|
| 1|long| 12|
| 2| int| 21|
| 2|long| 22|
+---+----+-----+
```
TO
```scala
scala> df.unpivot(Array($"id"), Array($"int", $"long"), "variable",
"value").show()
+---+--------+-----+
| id|variable|value|
+---+--------+-----+
| 1| int| 11|
| 1| long| 12|
| 2| int| 21|
| 2| long| 22|
+---+--------+-----+
```
### How was this patch tested?
Existing tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #48069 from xinrong-meng/fix_unpivot.
Authored-by: Xinrong Meng <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../jvm/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
.../query-tests/explain-results/melt_no_values.explain | 2 +-
.../query-tests/explain-results/melt_values.explain | 2 +-
.../query-tests/explain-results/unpivot_no_values.explain | 2 +-
.../query-tests/explain-results/unpivot_values.explain | 2 +-
.../resources/query-tests/queries/melt_no_values.json | 1 +
.../query-tests/queries/melt_no_values.proto.bin | Bin 71 -> 77 bytes
.../test/resources/query-tests/queries/melt_values.json | 1 +
.../resources/query-tests/queries/melt_values.proto.bin | Bin 73 -> 79 bytes
.../resources/query-tests/queries/unpivot_no_values.json | 1 +
.../query-tests/queries/unpivot_no_values.proto.bin | Bin 64 -> 70 bytes
.../resources/query-tests/queries/unpivot_values.json | 1 +
.../query-tests/queries/unpivot_values.proto.bin | Bin 80 -> 86 bytes
13 files changed, 9 insertions(+), 5 deletions(-)
diff --git
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index f5606215be89..519193ebd9c7 100644
---
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -481,7 +481,7 @@ class Dataset[T] private[sql] (
val unpivot = builder.getUnpivotBuilder
.setInput(plan.getRoot)
.addAllIds(ids.toImmutableArraySeq.map(_.expr).asJava)
- .setValueColumnName(variableColumnName)
+ .setVariableColumnName(variableColumnName)
.setValueColumnName(valueColumnName)
valuesOption.foreach { values =>
unpivot.getValuesBuilder
diff --git
a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
index f61fc30a3a52..053937d84ec8 100644
---
a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
+++
b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, name#0, value#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git
a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
index b5742d976dee..5a953f792cd3 100644
---
a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
+++
b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
@@ -1,2 +1,2 @@
-Expand [[a#0, id, id#0L]], [a#0, #0, value#0L]
+Expand [[a#0, id, id#0L]], [a#0, name#0, value#0L]
+- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git
a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
index 8d1749ee74c5..2b2ba19d0c3d 100644
---
a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
+++
b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, #0, value#0]
+Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, name#0,
value#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git
a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
index f61fc30a3a52..053937d84ec8 100644
---
a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
+++
b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, name#0, value#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
index 12db0a5abe36..a17da06b925b 100644
---
a/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
+++
b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
@@ -20,6 +20,7 @@
"unparsedIdentifier": "a"
}
}],
+ "variableColumnName": "name",
"valueColumnName": "value"
}
}
\ No newline at end of file
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
index 23a6aa1289a9..eebb7ad6df8e 100644
Binary files
a/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
and
b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
differ
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
index e2a004f46e78..a8142ee3a846 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
@@ -23,6 +23,7 @@
}
}]
},
+ "variableColumnName": "name",
"valueColumnName": "value"
}
}
\ No newline at end of file
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
index e021e1110def..35829fc62dae 100644
Binary files
a/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
and
b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
differ
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
index 9f550c031914..96b76443b679 100644
---
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
+++
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
@@ -16,6 +16,7 @@
"unparsedIdentifier": "id"
}
}],
+ "variableColumnName": "name",
"valueColumnName": "value"
}
}
\ No newline at end of file
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
index ac3bad8bd04e..b700190a9f66 100644
Binary files
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
and
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
differ
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
index 92bc19d195c6..6c31afb04e74 100644
---
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
+++
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
@@ -27,6 +27,7 @@
}
}]
},
+ "variableColumnName": "name",
"valueColumnName": "value"
}
}
\ No newline at end of file
diff --git
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
index 7f717cb23517..a1cd388fd8a4 100644
Binary files
a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
and
b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]