cloud-fan commented on code in PR #39248:
URL: https://github.com/apache/spark/pull/39248#discussion_r1058767972
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala:
##########
@@ -127,6 +125,54 @@ abstract class Expression extends TreeNode[Expression] {
def references: AttributeSet = _references
+ /**
+ * Returns true if the expression contains mutable state.
+ *
+ * A stateful expression should never be evaluated multiple times for a
single row. This should
+ * only be a problem for interpreted execution. This can be prevented by
creating fresh copies
+ * of the stateful expression before execution. A common example to trigger
this issue:
+ * {{{
+ * val rand = functions.rand()
+ * df.select(rand, rand) // These 2 rand should not share a state.
+ * }}}
+ */
+ def stateful: Boolean = false
+
+ /**
+ * Returns a copy of this expression where all stateful expressions are
replaced with fresh
+ * uninitialized copies. If the expression contains no stateful expressions
then the original
+ * expression is returned.
+ */
+ def freshCopyIfContainsStatefulExpression(): Expression = {
+ val childrenIndexedSeq: IndexedSeq[Expression] = children match {
+ case types: IndexedSeq[Expression] => types
+ case other => other.toIndexedSeq
+ }
+ val newChildren =
childrenIndexedSeq.map(_.freshCopyIfContainsStatefulExpression())
+ // A more efficient version of `children.zip(newChildren).exists(_ ne _)`
+ val anyChildChanged = {
+ val size = newChildren.length
+ var i = 0
+ var res: Boolean = false
+ while (!res && i < size) {
+ res |= (childrenIndexedSeq(i) ne newChildren(i))
+ i += 1
+ }
+ res
+ }
+ // If the children contain stateful expressions and get copied, or this
expression is stateful,
+ // copy this expression with the new children.
+ if (anyChildChanged || stateful) {
+ CurrentOrigin.withOrigin(origin) {
+ val res = withNewChildrenInternal(newChildren)
+ res.copyTagsFrom(this)
Review Comment:
How is this possible? keys are basically strings, right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]