tsreaper commented on code in PR #121:
URL: https://github.com/apache/flink-table-store/pull/121#discussion_r873280827


##########
flink-table-store-core/src/main/java/org/apache/flink/table/store/file/mergetree/compact/AggregationMergeFunction.java:
##########
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.file.mergetree.compact;
+
+import org.apache.flink.api.java.aggregation.AggregationFunction;
+import org.apache.flink.api.java.aggregation.AggregationFunctionFactory;
+import org.apache.flink.api.java.aggregation.Aggregations;
+import 
org.apache.flink.api.java.aggregation.UnsupportedAggregationTypeException;
+import org.apache.flink.table.data.GenericRowData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+
+import javax.annotation.Nullable;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A {@link MergeFunction} where key is primary key (unique) and value is the 
partial record, update
+ * non-null fields on merge.
+ */
+@SuppressWarnings("checkstyle:RegexpSingleline")
+public class AggregationMergeFunction implements MergeFunction {
+
+    private static final long serialVersionUID = 1L;
+
+    private final RowData.FieldGetter[] getters;
+    private final RowType primaryKeyType;
+    private final RowType rowType;
+    private final Set<String> primaryKeyNames;
+    private final ArrayList<String> rowNames;
+
+    private final ArrayList<AggregationFunction<Object>> types;

Review Comment:
   `aggregateFunctions` may be better.



##########
flink-table-store-core/src/main/java/org/apache/flink/table/store/file/mergetree/compact/AggregationMergeFunction.java:
##########
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.file.mergetree.compact;
+
+import org.apache.flink.api.java.aggregation.AggregationFunction;
+import org.apache.flink.api.java.aggregation.AggregationFunctionFactory;
+import org.apache.flink.api.java.aggregation.Aggregations;
+import 
org.apache.flink.api.java.aggregation.UnsupportedAggregationTypeException;
+import org.apache.flink.table.data.GenericRowData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+
+import javax.annotation.Nullable;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A {@link MergeFunction} where key is primary key (unique) and value is the 
partial record, update
+ * non-null fields on merge.
+ */
+@SuppressWarnings("checkstyle:RegexpSingleline")
+public class AggregationMergeFunction implements MergeFunction {
+
+    private static final long serialVersionUID = 1L;
+
+    private final RowData.FieldGetter[] getters;
+    private final RowType primaryKeyType;
+    private final RowType rowType;
+    private final Set<String> primaryKeyNames;
+    private final ArrayList<String> rowNames;
+
+    private final ArrayList<AggregationFunction<Object>> types;
+    private transient GenericRowData row;
+
+    public AggregationMergeFunction(
+            RowData.FieldGetter[] fieldGetters, RowType primaryKeyType, 
RowType rowType) {
+        this.getters = fieldGetters;
+        this.primaryKeyType = primaryKeyType;
+        this.rowType = rowType;
+        this.primaryKeyNames = new HashSet<>(primaryKeyType.getFieldNames());
+        this.rowNames = new ArrayList<>(rowType.getFieldNames());
+        this.types = new ArrayList<>(rowType.getFieldCount());
+        AggregationFunctionFactory factory = Aggregations.SUM.getFactory();
+        for (LogicalType type : rowType.getChildren()) {
+            try {
+                AggregationFunction<Object> f =
+                        factory.createAggregationFunction(
+                                (Class<Object>) type.getDefaultConversion());
+                types.add(f);
+            } catch (UnsupportedAggregationTypeException e) {
+                types.add(null);
+            }
+        }
+    }
+
+    @Override
+    public void reset() {
+        this.row = new GenericRowData(getters.length);
+    }
+
+    @Override
+    public void add(RowData value) {
+        for (int i = 0; i < getters.length; i++) {
+            Object currentField = getters[i].getFieldOrNull(value);
+            AggregationFunction<Object> f = types.get(i);
+            if (primaryKeyNames.contains(rowNames.get(i))) {

Review Comment:
   Calculate the indices of primary keys and other columns in the constructor 
so we don't need to search from the set here.



##########
flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/AggregationITCase.java:
##########
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.connector;
+
+import org.apache.flink.types.Row;
+
+import org.junit.Test;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+
+import static org.apache.flink.util.CollectionUtil.iteratorToList;
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** ITCase for partial update. */
+public class AggregationITCase extends FileStoreTableITCase {
+
+    @Override
+    protected List<String> ddl() {
+        return Collections.singletonList(
+                "CREATE TABLE IF NOT EXISTS T3 ( "
+                        + " a STRING, "
+                        + " b INT, "
+                        + " c INT, "
+                        + " PRIMARY KEY (a) NOT ENFORCED )"
+                        + " WITH ("
+                        + " 'merge-engine'='aggregation' ,"
+                        + " 'b.aggregate-function'='sum' ,"
+                        + " 'c.aggregate-function'='sum' "

Review Comment:
   What happens if I only set `'b.aggregate-function'='sum'`? Maybe an 
exception telling the user that they should set aggregate function for every 
column not part of primary key? Implement this and add this test case.



##########
flink-table-store-core/src/main/java/org/apache/flink/table/store/file/mergetree/compact/AggregationMergeFunction.java:
##########
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.file.mergetree.compact;
+
+import org.apache.flink.api.java.aggregation.AggregationFunction;
+import org.apache.flink.api.java.aggregation.AggregationFunctionFactory;
+import org.apache.flink.api.java.aggregation.Aggregations;
+import 
org.apache.flink.api.java.aggregation.UnsupportedAggregationTypeException;
+import org.apache.flink.table.data.GenericRowData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+
+import javax.annotation.Nullable;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A {@link MergeFunction} where key is primary key (unique) and value is the 
partial record, update
+ * non-null fields on merge.
+ */
+@SuppressWarnings("checkstyle:RegexpSingleline")
+public class AggregationMergeFunction implements MergeFunction {
+
+    private static final long serialVersionUID = 1L;
+
+    private final RowData.FieldGetter[] getters;
+    private final RowType primaryKeyType;
+    private final RowType rowType;
+    private final Set<String> primaryKeyNames;
+    private final ArrayList<String> rowNames;
+
+    private final ArrayList<AggregationFunction<Object>> types;
+    private transient GenericRowData row;
+
+    public AggregationMergeFunction(
+            RowData.FieldGetter[] fieldGetters, RowType primaryKeyType, 
RowType rowType) {

Review Comment:
   Also create `FieldGetter` in the constructor to decrease the number of 
arguments?



##########
flink-table-store-core/src/main/java/org/apache/flink/table/store/file/mergetree/compact/AggregationMergeFunction.java:
##########
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.file.mergetree.compact;
+
+import org.apache.flink.api.java.aggregation.AggregationFunction;
+import org.apache.flink.api.java.aggregation.AggregationFunctionFactory;
+import org.apache.flink.api.java.aggregation.Aggregations;
+import 
org.apache.flink.api.java.aggregation.UnsupportedAggregationTypeException;
+import org.apache.flink.table.data.GenericRowData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+
+import javax.annotation.Nullable;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A {@link MergeFunction} where key is primary key (unique) and value is the 
partial record, update
+ * non-null fields on merge.
+ */
+@SuppressWarnings("checkstyle:RegexpSingleline")
+public class AggregationMergeFunction implements MergeFunction {
+
+    private static final long serialVersionUID = 1L;
+
+    private final RowData.FieldGetter[] getters;
+    private final RowType primaryKeyType;
+    private final RowType rowType;
+    private final Set<String> primaryKeyNames;
+    private final ArrayList<String> rowNames;
+
+    private final ArrayList<AggregationFunction<Object>> types;
+    private transient GenericRowData row;
+
+    public AggregationMergeFunction(
+            RowData.FieldGetter[] fieldGetters, RowType primaryKeyType, 
RowType rowType) {
+        this.getters = fieldGetters;
+        this.primaryKeyType = primaryKeyType;
+        this.rowType = rowType;
+        this.primaryKeyNames = new HashSet<>(primaryKeyType.getFieldNames());
+        this.rowNames = new ArrayList<>(rowType.getFieldNames());
+        this.types = new ArrayList<>(rowType.getFieldCount());
+        AggregationFunctionFactory factory = Aggregations.SUM.getFactory();

Review Comment:
   Nice find. I don't even know we have such a class in Flink.
   
   However this class is marked as `@Internal` in Flink which means it is not a 
public API. Also this class does not support retraction and other aggregate 
functions such as `avg`. I'm not sure if there are better ways to do this or if 
it'll be better to create our own aggregate function classes. I'll leave this 
to the other reviewers.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to