Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/19480#discussion_r144581126
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
---
@@ -2103,4 +2103,35 @@ class DataFrameSuite extends QueryTest with
SharedSQLContext {
testData2.select(lit(7), 'a, 'b).orderBy(lit(1), lit(2), lit(3)),
Seq(Row(7, 1, 1), Row(7, 1, 2), Row(7, 2, 1), Row(7, 2, 2), Row(7,
3, 1), Row(7, 3, 2)))
}
+
+ test("SPARK-22226: splitExpressions should not generate codes beyond
64KB") {
+ val colNumber = 10000
+ val input = spark.range(2).rdd.map(_ => Row(1 to colNumber: _*))
+ val df = sqlContext.createDataFrame(input, StructType(
+ (1 to colNumber).map(colIndex => StructField(s"_$colIndex",
IntegerType, false))))
+ val newCols = (1 to colNumber).flatMap { colIndex =>
+ Seq(expr(s"if(1000 < _$colIndex, 1000, _$colIndex)"),
+ expr(s"sqrt(_$colIndex)"))
+ }
+ df.select(newCols: _*).collect()
+ }
+
+ test("SPARK-22226: too many splitted expressions should not exceed
constant pool limit") {
--- End diff --
I've just tried this. Seems we can simplify it to:
```scala
test("SPARK-22226: too many splitted expressions should not exceed
constant pool limit") {
val colNumber = 6000
val input = spark.range(2).rdd.map(_ => Row(1 to colNumber: _*))
val df = sqlContext.createDataFrame(input, StructType(
(1 to colNumber).map(colIndex => StructField(s"_$colIndex",
IntegerType, false))))
df.dropDuplicates((1 to 5).map(colIndex => s"_$colIndex")).collect()
}
```
@mgaido91 Can you verify it? Thanks.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]