This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new a279d77ea [core] Fix bugs in TruncateFieldStatsCollector
a279d77ea is described below

commit a279d77ea3c3e0ef1162a9c5eef7fbac86e1f7d6
Author: JingsongLi <[email protected]>
AuthorDate: Wed Jul 5 00:04:27 2023 +0800

    [core] Fix bugs in TruncateFieldStatsCollector
---
 .../statistics/TruncateFieldStatsCollector.java    | 39 ++++++++++++++------
 .../paimon/statistics/FieldStatsCollectorTest.java | 43 ++++++++++++++++++++++
 2 files changed, 70 insertions(+), 12 deletions(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/statistics/TruncateFieldStatsCollector.java
 
b/paimon-common/src/main/java/org/apache/paimon/statistics/TruncateFieldStatsCollector.java
index bae77bed6..a8ff78133 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/statistics/TruncateFieldStatsCollector.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/statistics/TruncateFieldStatsCollector.java
@@ -25,8 +25,6 @@ import org.apache.paimon.data.serializer.Serializer;
 import org.apache.paimon.format.FieldStats;
 import org.apache.paimon.utils.Preconditions;
 
-import javax.annotation.Nullable;
-
 import java.util.regex.Pattern;
 
 /**
@@ -39,6 +37,8 @@ public class TruncateFieldStatsCollector extends 
AbstractFieldStatsCollector {
 
     private final int length;
 
+    private boolean failed = false;
+
     public TruncateFieldStatsCollector(int length) {
         Preconditions.checkArgument(length > 0, "Truncate length should larger 
than zero.");
         this.length = length;
@@ -62,35 +62,49 @@ public class TruncateFieldStatsCollector extends 
AbstractFieldStatsCollector {
 
         Comparable<Object> c = (Comparable<Object>) field;
         if (minValue == null || c.compareTo(minValue) < 0) {
-            minValue = truncateMin(field, fieldSerializer);
+            minValue = fieldSerializer.copy(truncateMin(field));
         }
         if (maxValue == null || c.compareTo(maxValue) > 0) {
-            maxValue = truncateMax(field, fieldSerializer);
+            Object max = truncateMax(field);
+            // may fail
+            if (max != null) {
+                maxValue = fieldSerializer.copy(truncateMax(field));
+            }
         }
     }
 
     @Override
     public FieldStats convert(FieldStats source) {
-        return new FieldStats(
-                truncateMin(source.minValue(), null),
-                truncateMax(source.maxValue(), null),
-                source.nullCount());
+        Object min = truncateMin(source.minValue());
+        Object max = truncateMax(source.maxValue());
+        if (max == null) {
+            return new FieldStats(null, null, source.nullCount());
+        }
+        return new FieldStats(min, max, source.nullCount());
+    }
+
+    @Override
+    public FieldStats result() {
+        if (failed) {
+            return new FieldStats(null, null, nullCount);
+        }
+        return new FieldStats(minValue, maxValue, nullCount);
     }
 
     /** @return a truncated value less or equal than the old value. */
-    private Object truncateMin(Object field, @Nullable Serializer<Object> 
fieldSerializer) {
+    private Object truncateMin(Object field) {
         if (field == null) {
             return null;
         }
         if (field instanceof BinaryString) {
             return ((BinaryString) field).substring(0, length);
         } else {
-            return fieldSerializer == null ? field : 
fieldSerializer.copy(field);
+            return field;
         }
     }
 
     /** @return a value greater or equal than the old value. */
-    private Object truncateMax(Object field, @Nullable Serializer<Object> 
fieldSerializer) {
+    private Object truncateMax(Object field) {
         if (field == null) {
             return null;
         }
@@ -119,9 +133,10 @@ public class TruncateFieldStatsCollector extends 
AbstractFieldStatsCollector {
                     return 
BinaryString.fromString(truncatedStringBuilder.toString());
                 }
             }
+            failed = true;
             return null; // Cannot find a valid upper bound
         } else {
-            return fieldSerializer == null ? field : 
fieldSerializer.copy(field);
+            return field;
         }
     }
 }
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/statistics/FieldStatsCollectorTest.java
 
b/paimon-common/src/test/java/org/apache/paimon/statistics/FieldStatsCollectorTest.java
index 80121d805..5285f0691 100644
--- 
a/paimon-common/src/test/java/org/apache/paimon/statistics/FieldStatsCollectorTest.java
+++ 
b/paimon-common/src/test/java/org/apache/paimon/statistics/FieldStatsCollectorTest.java
@@ -22,6 +22,7 @@ package org.apache.paimon.statistics;
 
 import org.apache.paimon.data.BinaryString;
 import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.serializer.BinaryStringSerializer;
 import org.apache.paimon.data.serializer.InternalSerializers;
 import org.apache.paimon.data.serializer.Serializer;
 import org.apache.paimon.format.FieldStats;
@@ -39,6 +40,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
+import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
 
 /** Test for {@link FieldStatsCollector}. */
@@ -166,6 +168,47 @@ public class FieldStatsCollectorTest {
         Assertions.assertEquals(BinaryString.fromString("\uD83E\uDD19"), 
fieldStats.maxValue());
     }
 
+    @Test
+    public void testTruncateCopied() {
+        TruncateFieldStatsCollector collector = new 
TruncateFieldStatsCollector(16);
+        BinaryString str = BinaryString.fromString("str");
+        collector.collect(str, (Serializer) BinaryStringSerializer.INSTANCE);
+        FieldStats stats = collector.result();
+        assertThat(stats.minValue()).isNotSameAs(str);
+        assertThat(stats.maxValue()).isNotSameAs(str);
+    }
+
+    @Test
+    public void testFullCopied() {
+        FullFieldStatsCollector collector = new FullFieldStatsCollector();
+        BinaryString str = BinaryString.fromString("str");
+        collector.collect(str, (Serializer) BinaryStringSerializer.INSTANCE);
+        FieldStats stats = collector.result();
+        assertThat(stats.minValue()).isNotSameAs(str);
+        assertThat(stats.maxValue()).isNotSameAs(str);
+    }
+
+    @Test
+    public void testTruncateFail() {
+        TruncateFieldStatsCollector collector = new 
TruncateFieldStatsCollector(3);
+
+        StringBuilder builder = new StringBuilder();
+        builder.appendCodePoint(Character.MAX_CODE_POINT);
+        builder.appendCodePoint(Character.MAX_CODE_POINT);
+        builder.appendCodePoint(Character.MAX_CODE_POINT);
+        builder.appendCodePoint(Character.MAX_CODE_POINT);
+        BinaryString str = BinaryString.fromString(builder.toString());
+
+        collector.collect(str, (Serializer) BinaryStringSerializer.INSTANCE);
+        FieldStats stats = collector.result();
+        assertThat(stats.minValue()).isNull();
+        assertThat(stats.maxValue()).isNull();
+
+        stats = collector.convert(new FieldStats(str, str, 0L));
+        assertThat(stats.minValue()).isNull();
+        assertThat(stats.maxValue()).isNull();
+    }
+
     private void check(
             List<GenericRow> rows,
             int column,

Reply via email to