dongjoon-hyun commented on code in PR #54034:
URL: https://github.com/apache/spark/pull/54034#discussion_r2744728995
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala:
##########
@@ -107,15 +115,39 @@ abstract class Collect[T <: Growable[Any] with
Iterable[Any]] extends TypedImper
case class CollectList(
child: Expression,
mutableAggBufferOffset: Int = 0,
- inputAggBufferOffset: Int = 0) extends Collect[mutable.ArrayBuffer[Any]]
+ inputAggBufferOffset: Int = 0,
+ ignoreNulls: Boolean = true) extends Collect[mutable.ArrayBuffer[Any]]
with UnaryLike[Expression] {
- def this(child: Expression) = this(child, 0, 0)
+ def this(child: Expression) = this(child, 0, 0, true)
+
+ // Buffer can contain nulls when ignoreNulls is false (RESPECT NULLS)
+ override protected def bufferContainsNull: Boolean = !ignoreNulls
override lazy val bufferElementType = child.dataType
override def convertToBufferElement(value: Any): Any =
InternalRow.copyValue(value)
+ override def update(
+ buffer: mutable.ArrayBuffer[Any],
+ input: InternalRow): mutable.ArrayBuffer[Any] = {
+ val value = child.eval(input)
+ if (ignoreNulls) {
+ // Hive behavior: skip null values
+ if (value != null) {
+ buffer += convertToBufferElement(value)
+ }
+ } else {
+ // ANSI behavior: preserve null values
+ if (value != null) {
+ buffer += convertToBufferElement(value)
+ } else {
+ buffer += null
+ }
+ }
Review Comment:
This looks a little overcomplicated. Shall we simplify this logic like the
following, @yaooqinn ?
**BEFORE**
```java
if (ignoreNulls) {
// Hive behavior: skip null values
if (value != null) {
buffer += convertToBufferElement(value)
}
} else {
// ANSI behavior: preserve null values
if (value != null) {
buffer += convertToBufferElement(value)
} else {
buffer += null
}
}
```
**AFTER**
```java
if (value != null) {
buffer += convertToBufferElement(value);
} else if (!ignoreNulls) { // ANSI behavior: preserve null values
buffer += null;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]