polyzos commented on code in PR #3319:
URL: https://github.com/apache/fluss/pull/3319#discussion_r3256913771


##########
fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/functions/bitmap/AbstractRbAggFunction.java:
##########
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.flink.functions.bitmap;
+
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.table.annotation.DataTypeHint;
+import org.apache.flink.table.annotation.FunctionHint;
+import org.apache.flink.table.functions.AggregateFunction;
+import org.roaringbitmap.RoaringBitmap;
+
+import javax.annotation.Nullable;
+
+import java.io.IOException;
+
+/**
+ * Shared base for bitmap aggregate UDFs that use {@link RoaringBitmap} as the 
accumulator.
+ *
+ * <p>The {@code @FunctionHint} annotation with {@code accumulator = 
@DataTypeHint("RAW")} tells
+ * Flink's Table planner to skip reflection-based POJO extraction and instead 
use the {@link
+ * TypeInformation} returned by {@link #getAccumulatorType()}, which provides 
the custom {@link
+ * RoaringBitmapSerializer}. Without this annotation, Flink attempts POJO 
field extraction on
+ * RoaringBitmap and fails.
+ */
+@FunctionHint(accumulator = @DataTypeHint(value = "RAW", bridgedTo = 
RoaringBitmap.class))
+abstract class AbstractRbAggFunction extends AggregateFunction<byte[], 
RoaringBitmap> {
+
+    @Override
+    public RoaringBitmap createAccumulator() {
+        return new RoaringBitmap();
+    }
+
+    /** Merges multiple accumulators — required for session window 
aggregation. */

Review Comment:
   "required for session window aggregation" is misleading.
   The  merge operation is required for any two-phase / batch / merge-capable 
aggregation in the Flink Table API, not specifically session windows



##########
fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/functions/bitmap/AbstractRbAggFunction.java:
##########
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.flink.functions.bitmap;
+
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.table.annotation.DataTypeHint;
+import org.apache.flink.table.annotation.FunctionHint;
+import org.apache.flink.table.functions.AggregateFunction;
+import org.roaringbitmap.RoaringBitmap;
+
+import javax.annotation.Nullable;
+
+import java.io.IOException;
+
+/**
+ * Shared base for bitmap aggregate UDFs that use {@link RoaringBitmap} as the 
accumulator.
+ *
+ * <p>The {@code @FunctionHint} annotation with {@code accumulator = 
@DataTypeHint("RAW")} tells
+ * Flink's Table planner to skip reflection-based POJO extraction and instead 
use the {@link
+ * TypeInformation} returned by {@link #getAccumulatorType()}, which provides 
the custom {@link
+ * RoaringBitmapSerializer}. Without this annotation, Flink attempts POJO 
field extraction on
+ * RoaringBitmap and fails.
+ */
+@FunctionHint(accumulator = @DataTypeHint(value = "RAW", bridgedTo = 
RoaringBitmap.class))
+abstract class AbstractRbAggFunction extends AggregateFunction<byte[], 
RoaringBitmap> {
+
+    @Override
+    public RoaringBitmap createAccumulator() {
+        return new RoaringBitmap();
+    }
+
+    /** Merges multiple accumulators — required for session window 
aggregation. */
+    public void merge(RoaringBitmap acc, Iterable<RoaringBitmap> it) {
+        for (RoaringBitmap other : it) {
+            if (other != null) {
+                acc.or(other);
+            }
+        }
+    }
+
+    public void resetAccumulator(RoaringBitmap acc) {
+        acc.clear();
+    }
+
+    @Override
+    @Nullable
+    public byte[] getValue(RoaringBitmap acc) {
+        if (acc == null || acc.isEmpty()) {
+            return null;
+        }
+        try {
+            return BitmapUtils.toBytes(acc);
+        } catch (IOException e) {
+            throw new RuntimeException("Failed to serialize bitmap 
accumulator.", e);

Review Comment:
   use `FlussRuntimeException`



##########
fluss-test-coverage/pom.xml:
##########
@@ -484,6 +484,9 @@
                                         
<exclude>org.apache.fluss.flink.tiering.FlussLakeTieringEntrypoint</exclude>
                                         
<exclude>org.apache.fluss.flink.tiering.FlussLakeTiering</exclude>
                                         <!-- end exclude for flink tiering 
service -->
+                                        <exclude>

Review Comment:
   instead of excluding can we add some test, for example for 
    merge / getValue / resetAccumulator? maybe ship with just only one concrete 
implementation? 
   
   without a consumer, there's no way to validate that the planner actually 
accepts the RAW(... bridgedTo = RoaringBitmap.class) hint together with the 
custom TypeInformation.
   WDYT?



##########
fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/functions/bitmap/RoaringBitmapTypeInfo.java:
##########
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.flink.functions.bitmap;
+
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.roaringbitmap.RoaringBitmap;
+
+import java.util.Objects;
+
+/**
+ * {@link TypeInformation} for {@link RoaringBitmap}.
+ *
+ * <p>Provides the custom {@link RoaringBitmapSerializer} to Flink's type 
system, ensuring correct
+ * checkpoint and savepoint behavior for bitmap aggregate function 
accumulators.
+ */
+public final class RoaringBitmapTypeInfo extends 
TypeInformation<RoaringBitmap> {
+
+    public static final RoaringBitmapTypeInfo INSTANCE = new 
RoaringBitmapTypeInfo();
+
+    private static final long serialVersionUID = 1L;
+
+    private RoaringBitmapTypeInfo() {}
+
+    @Override
+    public boolean isBasicType() {
+        return false;
+    }
+
+    @Override
+    public boolean isTupleType() {
+        return false;
+    }
+
+    @Override
+    public int getArity() {
+        return 1;
+    }
+
+    @Override
+    public int getTotalFields() {
+        return 1;
+    }
+
+    @Override
+    public Class<RoaringBitmap> getTypeClass() {
+        return RoaringBitmap.class;
+    }
+
+    @Override
+    public boolean isKeyType() {
+        return false;
+    }
+
+    @Override
+    public TypeSerializer<RoaringBitmap> createSerializer(ExecutionConfig 
config) {
+        return RoaringBitmapSerializer.INSTANCE;
+    }
+
+    @Override
+    public String toString() {
+        return "RoaringBitmapTypeInfo";
+    }
+
+    @Override
+    public boolean equals(Object obj) {

Review Comment:
   equals doesn't delegate through `canEqual`, which breaks Flink's documented 
symmetry contract for TypeInformation subclasses. 
   
   Either: return obj instanceof RoaringBitmapTypeInfo && 
((RoaringBitmapTypeInfo) obj).canEqual(this); or simplify the whole class to 
reference equality since the constructor is private and INSTANCE is the only 
instance. Also worth adding @ThreadSafe here and on RoaringBitmapSerializer



##########
fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/functions/bitmap/RoaringBitmapSerializer.java:
##########
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.flink.functions.bitmap;
+
+import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot;
+import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
+import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton;
+import org.apache.flink.core.memory.DataInputView;
+import org.apache.flink.core.memory.DataOutputView;
+import org.roaringbitmap.RoaringBitmap;
+
+import java.io.IOException;
+
+/**
+ * Flink {@link org.apache.flink.api.common.typeutils.TypeSerializer} for 
{@link RoaringBitmap}.
+ *
+ * <p>Used as the accumulator serializer for bitmap aggregate functions to 
ensure correct
+ * checkpoint/savepoint behavior. Without a custom serializer, Flink falls 
back to Kryo which is
+ * sensitive to internal class layout changes across RoaringBitmap library 
versions.
+ */
+public final class RoaringBitmapSerializer extends 
TypeSerializerSingleton<RoaringBitmap> {
+
+    public static final RoaringBitmapSerializer INSTANCE = new 
RoaringBitmapSerializer();
+
+    private static final long serialVersionUID = 1L;
+
+    private RoaringBitmapSerializer() {}
+
+    @Override
+    public boolean isImmutableType() {
+        return false;
+    }
+
+    @Override
+    public RoaringBitmap createInstance() {
+        return new RoaringBitmap();
+    }
+
+    @Override
+    public RoaringBitmap copy(RoaringBitmap from) {
+        return from.clone();
+    }
+
+    @Override
+    public RoaringBitmap copy(RoaringBitmap from, RoaringBitmap reuse) {
+        return from.clone();
+    }
+
+    @Override
+    public int getLength() {
+        return -1;
+    }
+
+    @Override
+    public void serialize(RoaringBitmap record, DataOutputView target) throws 
IOException {

Review Comment:
   `record.runOptimize()` is called here and then BitmapUtils.toBytes(record) 
also calls runOptimize().



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to