Yicong-Huang commented on code in PR #5896:
URL: https://github.com/apache/texera/pull/5896#discussion_r3463951067
##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregationOperation.scala:
##########
@@ -55,17 +55,33 @@ case class AveragePartialObj(sum: Double, count: Double)
extends Serializable {}
}
]
}
- }
+ },
+ "allOf": [
+ {
+ "if": {
+ "properties": {
+ "aggFunction": { "const": "count(*)" }
+ }
+ },
+ "then": {},
+ "else": {
+ "required": ["attribute"],
+ "properties": {
+ "attribute": { "minLength": 1 }
+ }
+ }
+ }
+ ]
}
""")
class AggregationOperation {
@JsonProperty(required = true)
@JsonSchemaTitle("Aggregate Func")
- @JsonPropertyDescription("sum, count, average, min, max, or concat")
+ @JsonPropertyDescription("sum, count, count(*), average, min, max, or
concat")
Review Comment:
let's remove it. I think `count` already include `count(*)` on the semantic.
##########
frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts:
##########
@@ -545,6 +545,20 @@ export class OperatorPropertyEditFrameComponent implements
OnInit, OnChanges, On
mappedField.type = "datasetversionselector";
}
+ // Aggregate: the attribute is required for every function except
count(*), which counts all rows.
+ // For count(*) the attribute is irrelevant, so disable the field
(keeping the row layout consistent),
+ // drop the required marker, and clear any previously-selected column so
it isn't shown greyed-out as if used.
+ // All react to the sibling aggFunction within the same row.
+ if (this.currentOperatorSchema?.operatorType === "Aggregate" &&
mappedField.key === "attribute") {
+ mappedField.expressions = {
+ ...mappedField.expressions,
+ "props.required": (field: FormlyFieldConfig) =>
field.parent?.model?.aggFunction !== "count(*)",
+ "props.disabled": (field: FormlyFieldConfig) =>
field.parent?.model?.aggFunction === "count(*)",
+ "model.attribute": (field: FormlyFieldConfig) =>
+ field.parent?.model?.aggFunction === "count(*)" ? "" :
field.formControl?.value,
Review Comment:
better to make `"count(*)"` a constant like `export const COUNT_STAR =
"count(*)"` and reuse it, also in test.
##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregationOperation.scala:
##########
@@ -77,26 +93,28 @@ class AggregationOperation {
@JsonIgnore
def getAggregationAttribute(attrType: AttributeType): Attribute = {
val resultAttrType = this.aggFunction match {
- case AggregationFunction.SUM => attrType
- case AggregationFunction.COUNT => AttributeType.INTEGER
- case AggregationFunction.AVERAGE => AttributeType.DOUBLE
- case AggregationFunction.MIN => attrType
- case AggregationFunction.MAX => attrType
- case AggregationFunction.CONCAT => AttributeType.STRING
- case _ => throw new RuntimeException("Unknown
aggregation function: " + this.aggFunction)
+ case AggregationFunction.SUM => attrType
+ case AggregationFunction.COUNT => AttributeType.INTEGER
+ case AggregationFunction.COUNT_STAR => AttributeType.INTEGER
+ case AggregationFunction.AVERAGE => AttributeType.DOUBLE
+ case AggregationFunction.MIN => attrType
+ case AggregationFunction.MAX => attrType
+ case AggregationFunction.CONCAT => AttributeType.STRING
+ case _ => throw new
RuntimeException("Unknown aggregation function: " + this.aggFunction)
}
new Attribute(resultAttribute, resultAttrType)
}
@JsonIgnore
def getAggFunc(attrType: AttributeType): DistributedAggregation[Object] = {
val aggFunc = aggFunction match {
- case AggregationFunction.AVERAGE => averageAgg()
- case AggregationFunction.COUNT => countAgg()
- case AggregationFunction.MAX => maxAgg(attrType)
- case AggregationFunction.MIN => minAgg(attrType)
- case AggregationFunction.SUM => sumAgg(attrType)
- case AggregationFunction.CONCAT => concatAgg()
+ case AggregationFunction.AVERAGE => averageAgg()
+ case AggregationFunction.COUNT => countAgg()
+ case AggregationFunction.COUNT_STAR => countStarAgg()
Review Comment:
on the frontend we made it special `count(*)`, but for backend, we could
make it reuse the `COUNT` function? can we avoid intorducing a new `COUNT_STAR`
and `countStarAGG()`, which could just be `countAgg(’*‘)?
##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregationOperation.scala:
##########
@@ -55,17 +55,33 @@ case class AveragePartialObj(sum: Double, count: Double)
extends Serializable {}
}
]
}
- }
+ },
+ "allOf": [
+ {
+ "if": {
+ "properties": {
+ "aggFunction": { "const": "count(*)" }
+ }
+ },
+ "then": {},
+ "else": {
+ "required": ["attribute"],
+ "properties": {
+ "attribute": { "minLength": 1 }
+ }
+ }
+ }
+ ]
}
""")
class AggregationOperation {
@JsonProperty(required = true)
@JsonSchemaTitle("Aggregate Func")
- @JsonPropertyDescription("sum, count, average, min, max, or concat")
+ @JsonPropertyDescription("sum, count, count(*), average, min, max, or
concat")
var aggFunction: AggregationFunction = _
- @JsonProperty(value = "attribute", required = true)
- @JsonPropertyDescription("column to calculate average value")
+ @JsonProperty(value = "attribute")
+ @JsonPropertyDescription("column to aggregate on")
Review Comment:
good change
##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregationFunction.java:
##########
@@ -27,6 +27,8 @@ public enum AggregationFunction {
COUNT("count"),
+ COUNT_STAR("count(*)"),
Review Comment:
having two names could be confusing. no need to change it if separate `*` to
value input box is too hard.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]