Xiao-zhen-Liu commented on code in PR #4024:
URL: https://github.com/apache/texera/pull/4024#discussion_r2524561115
##########
common/workflow-core/src/main/scala/org/apache/amber/core/tuple/AttributeTypeUtils.scala:
##########
@@ -387,6 +387,136 @@ object AttributeTypeUtils extends Serializable {
}
}
+ /** Three-way compare for the given attribute type.
+ * Returns < 0 if left < right, > 0 if left > right, 0 if equal.
+ * Null semantics: null < non-null (both null => 0).
+ */
+ @throws[UnsupportedOperationException]
+ def compare(left: Any, right: Any, attrType: AttributeType): Int =
+ (left, right) match {
+ case (null, null) => 0
+ case (null, _) => -1
+ case (_, null) => 1
+ case _ =>
+ attrType match {
+ case AttributeType.INTEGER =>
+ java.lang.Integer.compare(
+ left.asInstanceOf[Number].intValue(),
+ right.asInstanceOf[Number].intValue()
+ )
+ case AttributeType.LONG =>
+ java.lang.Long.compare(
+ left.asInstanceOf[Number].longValue(),
+ right.asInstanceOf[Number].longValue()
+ )
+ case AttributeType.DOUBLE =>
+ java.lang.Double.compare(
+ left.asInstanceOf[Number].doubleValue(),
+ right.asInstanceOf[Number].doubleValue()
+ ) // handles ±Inf/NaN per JDK
Review Comment:
What does this mean?
##########
common/workflow-core/src/main/scala/org/apache/amber/core/tuple/Tuple.scala:
##########
@@ -112,6 +112,29 @@ case class Tuple @JsonCreator() (
object Tuple {
+ /** Build a Tuple from (name -> value) pairs, coercing values to the schema
types. */
+ def of(schema: Schema, values: (String, Any)*): Tuple = {
+ val nameToValue: Map[String, Any] = values.toMap
+ val coercedFields: Array[Any] =
+ schema.getAttributes.map { attribute =>
+ val rawValue: Any = nameToValue.getOrElse(attribute.getName, null)
+ AttributeTypeUtils.parseField(rawValue, attribute.getType, force =
true)
+ }.toArray
+ Tuple(schema, coercedFields)
+ }
+
+ /** Build a Tuple without coercion.
+ * Uses the builder’s runtime type checks; values must already match the
schema’s field classes.
+ * Missing attributes (or unknown attribute names) will cause an error.
+ */
+ def ofStrict(schema: Schema, values: (String, Any)*): Tuple =
Review Comment:
If it is not used anywhere there is no need to introduce this method in the
current PR.
##########
common/workflow-core/src/main/scala/org/apache/amber/core/tuple/AttributeTypeUtils.scala:
##########
@@ -387,6 +387,136 @@ object AttributeTypeUtils extends Serializable {
}
}
+ /** Three-way compare for the given attribute type.
Review Comment:
Why did you change the coercion to `Number` for some types? Can you explain
the implications of this change, possibly in the code comments?
##########
common/workflow-core/src/main/scala/org/apache/amber/core/tuple/AttributeTypeUtils.scala:
##########
@@ -387,6 +387,136 @@ object AttributeTypeUtils extends Serializable {
}
}
+ /** Three-way compare for the given attribute type.
+ * Returns < 0 if left < right, > 0 if left > right, 0 if equal.
+ * Null semantics: null < non-null (both null => 0).
+ */
+ @throws[UnsupportedOperationException]
+ def compare(left: Any, right: Any, attrType: AttributeType): Int =
+ (left, right) match {
+ case (null, null) => 0
+ case (null, _) => -1
+ case (_, null) => 1
+ case _ =>
+ attrType match {
+ case AttributeType.INTEGER =>
+ java.lang.Integer.compare(
+ left.asInstanceOf[Number].intValue(),
+ right.asInstanceOf[Number].intValue()
+ )
+ case AttributeType.LONG =>
+ java.lang.Long.compare(
+ left.asInstanceOf[Number].longValue(),
+ right.asInstanceOf[Number].longValue()
+ )
+ case AttributeType.DOUBLE =>
+ java.lang.Double.compare(
+ left.asInstanceOf[Number].doubleValue(),
+ right.asInstanceOf[Number].doubleValue()
+ ) // handles ±Inf/NaN per JDK
+ case AttributeType.BOOLEAN =>
+ java.lang.Boolean.compare(
+ left.asInstanceOf[Boolean],
+ right.asInstanceOf[Boolean]
+ )
+ case AttributeType.TIMESTAMP =>
+ java.lang.Long.compare(
+ left.asInstanceOf[Timestamp].getTime,
+ right.asInstanceOf[Timestamp].getTime
+ )
+ case AttributeType.STRING =>
+ left.toString.compareTo(right.toString)
+ case AttributeType.BINARY =>
+ java.util.Arrays.compareUnsigned(
+ left.asInstanceOf[Array[Byte]],
+ right.asInstanceOf[Array[Byte]]
+ )
+ case _ =>
+ throw new UnsupportedOperationException(
+ s"Unsupported attribute type for compare: $attrType"
+ )
+ }
+ }
+
+ /** Type-aware addition (null is identity). */
+ @throws[UnsupportedOperationException]
+ def add(left: Object, right: Object, attrType: AttributeType): Object = {
+ if (left == null && right == null) return zeroValue(attrType)
+ if (left == null) return right
+ if (right == null) return left
+
+ attrType match {
+ case AttributeType.INTEGER =>
+ java.lang.Integer.valueOf(
+ left.asInstanceOf[Number].intValue() +
right.asInstanceOf[Number].intValue()
+ )
+ case AttributeType.LONG =>
+ java.lang.Long.valueOf(
+ left.asInstanceOf[Number].longValue() +
right.asInstanceOf[Number].longValue()
+ )
+ case AttributeType.DOUBLE =>
+ java.lang.Double.valueOf(
+ left.asInstanceOf[Number].doubleValue() +
right.asInstanceOf[Number].doubleValue()
+ )
+ case AttributeType.TIMESTAMP =>
+ new Timestamp(
+ left.asInstanceOf[Timestamp].getTime +
right.asInstanceOf[Timestamp].getTime
+ )
+ case _ =>
+ throw new UnsupportedOperationException(
+ s"Unsupported attribute type for addition: $attrType"
+ )
+ }
+ }
+
+ /** Additive identity for supported numeric/timestamp types.
+ * For BINARY an empty array is returned as a benign identity value.
+ */
+ @throws[UnsupportedOperationException]
+ def zeroValue(attrType: AttributeType): Object =
+ attrType match {
+ case AttributeType.INTEGER => java.lang.Integer.valueOf(0)
+ case AttributeType.LONG => java.lang.Long.valueOf(0L)
+ case AttributeType.DOUBLE => java.lang.Double.valueOf(0.0d)
+ case AttributeType.TIMESTAMP => new Timestamp(0L)
+ case AttributeType.BINARY => Array.emptyByteArray
+ case _ =>
+ throw new UnsupportedOperationException(
+ s"Unsupported attribute type for zero value: $attrType"
+ )
+ }
+
+ /** Maximum sentinel. */
Review Comment:
What is "maximum sentinel / minimum sentinel"?
##########
common/workflow-core/src/main/scala/org/apache/amber/core/tuple/AttributeTypeUtils.scala:
##########
@@ -387,6 +387,136 @@ object AttributeTypeUtils extends Serializable {
}
}
+ /** Three-way compare for the given attribute type.
+ * Returns < 0 if left < right, > 0 if left > right, 0 if equal.
+ * Null semantics: null < non-null (both null => 0).
+ */
+ @throws[UnsupportedOperationException]
+ def compare(left: Any, right: Any, attrType: AttributeType): Int =
+ (left, right) match {
+ case (null, null) => 0
+ case (null, _) => -1
+ case (_, null) => 1
+ case _ =>
+ attrType match {
+ case AttributeType.INTEGER =>
+ java.lang.Integer.compare(
+ left.asInstanceOf[Number].intValue(),
+ right.asInstanceOf[Number].intValue()
+ )
+ case AttributeType.LONG =>
+ java.lang.Long.compare(
+ left.asInstanceOf[Number].longValue(),
+ right.asInstanceOf[Number].longValue()
+ )
+ case AttributeType.DOUBLE =>
+ java.lang.Double.compare(
+ left.asInstanceOf[Number].doubleValue(),
+ right.asInstanceOf[Number].doubleValue()
+ ) // handles ±Inf/NaN per JDK
+ case AttributeType.BOOLEAN =>
+ java.lang.Boolean.compare(
+ left.asInstanceOf[Boolean],
+ right.asInstanceOf[Boolean]
+ )
+ case AttributeType.TIMESTAMP =>
+ java.lang.Long.compare(
+ left.asInstanceOf[Timestamp].getTime,
+ right.asInstanceOf[Timestamp].getTime
+ )
+ case AttributeType.STRING =>
+ left.toString.compareTo(right.toString)
+ case AttributeType.BINARY =>
+ java.util.Arrays.compareUnsigned(
+ left.asInstanceOf[Array[Byte]],
+ right.asInstanceOf[Array[Byte]]
+ )
+ case _ =>
+ throw new UnsupportedOperationException(
+ s"Unsupported attribute type for compare: $attrType"
+ )
+ }
+ }
+
+ /** Type-aware addition (null is identity). */
+ @throws[UnsupportedOperationException]
+ def add(left: Object, right: Object, attrType: AttributeType): Object = {
+ if (left == null && right == null) return zeroValue(attrType)
Review Comment:
nit: why are the pattern matching logic different in `add` and `compare`? In
`compare` you are putting the null checking logic in `match`, while here you
are keeping them in the `if` statements. Can you unify them?
##########
common/workflow-core/src/main/scala/org/apache/amber/core/tuple/AttributeTypeUtils.scala:
##########
@@ -387,6 +387,136 @@ object AttributeTypeUtils extends Serializable {
}
}
+ /** Three-way compare for the given attribute type.
+ * Returns < 0 if left < right, > 0 if left > right, 0 if equal.
+ * Null semantics: null < non-null (both null => 0).
+ */
+ @throws[UnsupportedOperationException]
+ def compare(left: Any, right: Any, attrType: AttributeType): Int =
+ (left, right) match {
+ case (null, null) => 0
+ case (null, _) => -1
+ case (_, null) => 1
+ case _ =>
+ attrType match {
+ case AttributeType.INTEGER =>
+ java.lang.Integer.compare(
+ left.asInstanceOf[Number].intValue(),
+ right.asInstanceOf[Number].intValue()
+ )
+ case AttributeType.LONG =>
+ java.lang.Long.compare(
+ left.asInstanceOf[Number].longValue(),
+ right.asInstanceOf[Number].longValue()
+ )
+ case AttributeType.DOUBLE =>
+ java.lang.Double.compare(
+ left.asInstanceOf[Number].doubleValue(),
+ right.asInstanceOf[Number].doubleValue()
+ ) // handles ±Inf/NaN per JDK
+ case AttributeType.BOOLEAN =>
+ java.lang.Boolean.compare(
+ left.asInstanceOf[Boolean],
+ right.asInstanceOf[Boolean]
+ )
+ case AttributeType.TIMESTAMP =>
+ java.lang.Long.compare(
+ left.asInstanceOf[Timestamp].getTime,
+ right.asInstanceOf[Timestamp].getTime
+ )
+ case AttributeType.STRING =>
+ left.toString.compareTo(right.toString)
+ case AttributeType.BINARY =>
+ java.util.Arrays.compareUnsigned(
+ left.asInstanceOf[Array[Byte]],
+ right.asInstanceOf[Array[Byte]]
+ )
+ case _ =>
+ throw new UnsupportedOperationException(
+ s"Unsupported attribute type for compare: $attrType"
+ )
+ }
+ }
+
+ /** Type-aware addition (null is identity). */
+ @throws[UnsupportedOperationException]
+ def add(left: Object, right: Object, attrType: AttributeType): Object = {
+ if (left == null && right == null) return zeroValue(attrType)
+ if (left == null) return right
+ if (right == null) return left
+
+ attrType match {
+ case AttributeType.INTEGER =>
+ java.lang.Integer.valueOf(
+ left.asInstanceOf[Number].intValue() +
right.asInstanceOf[Number].intValue()
+ )
+ case AttributeType.LONG =>
+ java.lang.Long.valueOf(
+ left.asInstanceOf[Number].longValue() +
right.asInstanceOf[Number].longValue()
+ )
+ case AttributeType.DOUBLE =>
+ java.lang.Double.valueOf(
+ left.asInstanceOf[Number].doubleValue() +
right.asInstanceOf[Number].doubleValue()
+ )
+ case AttributeType.TIMESTAMP =>
+ new Timestamp(
+ left.asInstanceOf[Timestamp].getTime +
right.asInstanceOf[Timestamp].getTime
+ )
+ case _ =>
+ throw new UnsupportedOperationException(
+ s"Unsupported attribute type for addition: $attrType"
+ )
+ }
+ }
+
+ /** Additive identity for supported numeric/timestamp types.
+ * For BINARY an empty array is returned as a benign identity value.
Review Comment:
What is "benign identity value"?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]