HyukjinKwon commented on code in PR #47688:
URL: https://github.com/apache/spark/pull/47688#discussion_r1719271705
##########
sql/core/src/main/scala/org/apache/spark/sql/Column.scala:
##########
@@ -27,22 +27,21 @@ import org.apache.spark.sql.catalyst.encoders.{encoderFor,
ExpressionEncoder}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils}
+import org.apache.spark.sql.catalyst.util.toPrettySQL
import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
-import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.internal.TypedAggUtils
+import org.apache.spark.sql.internal.{ColumnNode, TypedAggUtils, Wrapper}
import org.apache.spark.sql.types._
import org.apache.spark.util.ArrayImplicits._
-private[sql] object Column {
+private[spark] object Column {
def apply(colName: String): Column = new Column(colName)
- def apply(expr: Expression): Column = new Column(expr)
+ def apply(expr: Expression): Column = Column(Wrapper(expr))
- def unapply(col: Column): Option[Expression] = Some(col.expr)
+ def apply(node: => ColumnNode): Column = withOrigin(new Column(node))
Review Comment:
I think the error report here would be a bit weird via `Origin` because the
top level of the function call changed. We get
`Thread.currentThread().getStackTrace`, and set it to `Origin` so whichever
function fails, it will always point it here.
Would be great if we can double check the code like the below still works
fine:
```scala
val df = spark.range(10)
val df1 = df.withColumn("div_ten", df.col("id") / 10)
val df2 = df1.withColumn("plus_four", df.col("id") + 4)
// This is problematic divide operation that occurs DIVIDE_BY_ZERO.
val df3 = df2.withColumn("div_zero", df.col("id") / 0) // Error here
val df4 = df3.withColumn("minus_five", df.col("id") / 5)
df4.collect()
```
should report sth like:
```
org.apache.spark.SparkArithmeticException: [DIVIDE_BY_ZERO] Division by
zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If
necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
SQLSTATE: 22012
== DataFrame ==
"div" was called from
<init>(<console>:7)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]