[
https://issues.apache.org/jira/browse/FLINK-5956?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15895916#comment-15895916
]
ASF GitHub Bot commented on FLINK-5956:
---------------------------------------
Github user fhueske commented on a diff in the pull request:
https://github.com/apache/flink/pull/3470#discussion_r104297801
--- Diff:
flink-libraries/flink-table/src/main/scala/org/apache/flink/table/functions/aggfunctions/MaxAggFunctionWithRetract.scala
---
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flink.table.functions.aggfunctions
+
+import java.math.BigDecimal
+import java.util.{HashMap => JHashMap, List => JList}
+
+import org.apache.flink.api.common.typeinfo.{BasicTypeInfo,
TypeInformation}
+import org.apache.flink.api.java.tuple.{Tuple3 => JTuple3}
+import org.apache.flink.api.java.typeutils.{MapTypeInfo, TupleTypeInfo}
+import org.apache.flink.table.api.TableException
+import org.apache.flink.table.functions.{Accumulator, AggregateFunction}
+
+/** The initial accumulator for Max with retraction aggregate function */
+class MaxWithRetractAccumulator[T] extends JTuple3[T, Long, JHashMap[T,
Long]] with Accumulator
+
+/**
+ * Base class for built-in Max with retraction aggregate function
+ *
+ * @tparam T the type for the aggregation result
+ */
+abstract class MaxWithRetractAggFunction[T](implicit ord: Ordering[T])
extends AggregateFunction[T] {
+
+ override def createAccumulator(): Accumulator = {
+ val acc = new MaxWithRetractAccumulator[T]
+ acc.f0 = getInitValue //max
+ acc.f1 = 0L //total count
+ acc.f2 = new JHashMap[T, Long]() //store the count for each value
+ acc
+ }
+
+ override def accumulate(accumulator: Accumulator, value: Any): Unit = {
+ if (value != null) {
+ val v = value.asInstanceOf[T]
+ val a = accumulator.asInstanceOf[MaxWithRetractAccumulator[T]]
+
+ if (a.f1 == 0 || (ord.compare(a.f0, v) < 0)) {
+ a.f0 = v
+ }
+
+ a.f1 += 1L
+
+ if (!a.f2.containsKey(v)) {
+ a.f2.put(v, 1L)
+ } else {
+ var count = a.f2.get(v)
+ count += 1L
+ a.f2.put(v, count)
+ }
+ }
+ }
+
+ override def retract(accumulator: Accumulator, value: Any): Unit = {
+ if (value != null) {
+ val v = value.asInstanceOf[T]
+ val a = accumulator.asInstanceOf[MaxWithRetractAccumulator[T]]
+
+ a.f1 -= 1L
+
+ if (!a.f2.containsKey(v)) {
+ throw TableException("unexpected retract message")
+ } else {
+ var count = a.f2.get(v)
+ count -= 1L
+ if (count == 0) {
+ //remove the key v from the map if the number of appearance of
the value v is 0
+ a.f2.remove(v)
+ //if the total count is 0, we could just simply set the f0(max)
to the initial value
+ if (a.f1 == 0) {
+ a.f0 = getInitValue
+ return
+ }
+ //if v is the current max value, we have to iterate the map to
find the 2nd biggest
+ // value to replace v as the max value
+ if (v == a.f0) {
+ val iterator = a.f2.keySet().iterator()
+ var key = iterator.next()
+ a.f0 = key
+ while (iterator.hasNext()) {
+ key = iterator.next()
+ if (ord.compare(a.f0, key) < 0) {
+ a.f0 = key
+ }
+ }
+ }
+ } else {
+ a.f2.put(v, count)
+ }
+ }
+ }
+ }
+
+ override def getValue(accumulator: Accumulator): T = {
+ val a = accumulator.asInstanceOf[MaxWithRetractAccumulator[T]]
+ if (a.f1 != 0) {
+ a.f0
+ } else {
+ null.asInstanceOf[T]
+ }
+ }
+
+ override def merge(accumulators: JList[Accumulator]): Accumulator = {
+ val ret = accumulators.get(0)
+ var i: Int = 1
+ while (i < accumulators.size()) {
+ val a =
accumulators.get(i).asInstanceOf[MaxWithRetractAccumulator[T]]
+ if (a.f1 != 0) {
+ accumulate(ret.asInstanceOf[MaxWithRetractAccumulator[T]], a.f0)
--- End diff --
This assumes, that `accumulate` and `retract` is never called on `merged`
accumulators (otherwise we would need to merge the hash maps too). If this is
the case, we should add this to the documentation of the `merge` function in
`AggregateFunction` and clear the hash map.
> Add retract method into the aggregateFunction
> ---------------------------------------------
>
> Key: FLINK-5956
> URL: https://issues.apache.org/jira/browse/FLINK-5956
> Project: Flink
> Issue Type: Sub-task
> Components: Table API & SQL
> Reporter: Shaoxuan Wang
> Assignee: Shaoxuan Wang
>
> Retraction method is help for processing updated message. It will also very
> helpful for window Aggregation. This PR will first add retraction methods
> into the aggregateFunctions, such that on-going over window Aggregation can
> get benefit from it.
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)