This is an automated email from the ASF dual-hosted git repository.

Gabriel39 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e0c956b3d94 [fix](iceberg) Fix wrong processing for `NaN` (#64315)
e0c956b3d94 is described below

commit e0c956b3d9498c0d23f8b7dc0585e1575f131709
Author: Gabriel <[email protected]>
AuthorDate: Wed Jun 10 09:54:05 2026 +0800

    [fix](iceberg) Fix wrong processing for `NaN` (#64315)
    
    This change fixes Iceberg writes when FLOAT or DOUBLE partition values
    contain NaN or infinity.
    
    Previously, the BE Iceberg writer serialized floating-point partition
    values with std::to_string(), which produced lowercase special values
    such as "nan", "inf", and "-inf". These strings are not accepted by
    Java's
    Float.parseFloat() / Double.parseDouble() in the FE partition commit
    path, causing writes with NaN partition values to fail.
    
    The fix canonicalizes BE floating-point partition serialization to
    Java/Iceberg-compatible values: "NaN", "Infinity", and "-Infinity". It
    also makes FE partition value parsing tolerate legacy lowercase forms
    such as "nan" and
      "-inf".
    
    Unit tests were added for BE partition value serialization and FE
    parsing of special floating-point partition values.
---
 .../sink/writer/iceberg/partition_transformers.cpp | 20 ++++++++++++++++--
 .../writer/iceberg/partition_transformers_test.cpp | 24 ++++++++++++++++++++++
 .../doris/datasource/iceberg/IcebergUtils.java     | 18 ++++++++++++++--
 .../doris/datasource/iceberg/IcebergUtilsTest.java | 20 ++++++++++++++++++
 4 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/be/src/exec/sink/writer/iceberg/partition_transformers.cpp 
b/be/src/exec/sink/writer/iceberg/partition_transformers.cpp
index 8b49d68573c..e38e0e4c5eb 100644
--- a/be/src/exec/sink/writer/iceberg/partition_transformers.cpp
+++ b/be/src/exec/sink/writer/iceberg/partition_transformers.cpp
@@ -18,12 +18,28 @@
 #include "exec/sink/writer/iceberg/partition_transformers.h"
 
 #include <any>
+#include <cmath>
 
 #include "core/types.h"
 #include "format/table/iceberg/partition_spec.h"
 
 namespace doris {
 
+namespace {
+
+template <typename T>
+std::string floating_point_partition_value_to_string(T value) {
+    if (std::isnan(value)) {
+        return "NaN";
+    }
+    if (std::isinf(value)) {
+        return value > 0 ? "Infinity" : "-Infinity";
+    }
+    return std::to_string(value);
+}
+
+} // namespace
+
 const std::chrono::sys_days PartitionColumnTransformUtils::EPOCH = 
std::chrono::sys_days(
         std::chrono::year {1970} / std::chrono::January / std::chrono::day 
{1});
 
@@ -225,10 +241,10 @@ std::string 
PartitionColumnTransform::get_partition_value(const DataTypePtr type
             return std::to_string(std::any_cast<Int64>(value));
         }
         case TYPE_FLOAT: {
-            return std::to_string(std::any_cast<Float32>(value));
+            return 
floating_point_partition_value_to_string(std::any_cast<Float32>(value));
         }
         case TYPE_DOUBLE: {
-            return std::to_string(std::any_cast<Float64>(value));
+            return 
floating_point_partition_value_to_string(std::any_cast<Float64>(value));
         }
         case TYPE_VARCHAR:
         case TYPE_CHAR:
diff --git a/be/test/exec/sink/writer/iceberg/partition_transformers_test.cpp 
b/be/test/exec/sink/writer/iceberg/partition_transformers_test.cpp
index 69758b794db..94b8aec8c77 100644
--- a/be/test/exec/sink/writer/iceberg/partition_transformers_test.cpp
+++ b/be/test/exec/sink/writer/iceberg/partition_transformers_test.cpp
@@ -19,6 +19,8 @@
 
 #include <gtest/gtest.h>
 
+#include <limits>
+
 #include "core/data_type/data_type_date_or_datetime_v2.h"
 
 namespace doris {
@@ -115,6 +117,28 @@ TEST_F(PartitionTransformersTest, 
test_string_truncate_transform) {
     }
 }
 
+TEST_F(PartitionTransformersTest, test_floating_point_special_partition_value) 
{
+    auto float_type =
+            
DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_FLOAT, false);
+    auto double_type =
+            
DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_DOUBLE, false);
+    IdentityPartitionColumnTransform float_transform(float_type);
+    IdentityPartitionColumnTransform double_transform(double_type);
+
+    EXPECT_EQ("NaN", float_transform.get_partition_value(
+                             float_type, 
std::numeric_limits<Float32>::quiet_NaN()));
+    EXPECT_EQ("Infinity", float_transform.get_partition_value(
+                                  float_type, 
std::numeric_limits<Float32>::infinity()));
+    EXPECT_EQ("-Infinity", float_transform.get_partition_value(
+                                   float_type, 
-std::numeric_limits<Float32>::infinity()));
+    EXPECT_EQ("NaN", double_transform.get_partition_value(
+                             double_type, 
std::numeric_limits<Float64>::quiet_NaN()));
+    EXPECT_EQ("Infinity", double_transform.get_partition_value(
+                                  double_type, 
std::numeric_limits<Float64>::infinity()));
+    EXPECT_EQ("-Infinity", double_transform.get_partition_value(
+                                   double_type, 
-std::numeric_limits<Float64>::infinity()));
+}
+
 TEST_F(PartitionTransformersTest, test_integer_bucket_transform) {
     const std::vector<int32_t> values({34, -123}); // 2017239379, -471378254
     auto column = ColumnInt32::create();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index bf1bee501bc..ea36d755cfd 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -967,9 +967,9 @@ public class IcebergUtils {
                 case LONG:
                     return Long.parseLong(valueStr);
                 case FLOAT:
-                    return Float.parseFloat(valueStr);
+                    return 
Float.parseFloat(normalizeFloatingPointPartitionValue(valueStr));
                 case DOUBLE:
-                    return Double.parseDouble(valueStr);
+                    return 
Double.parseDouble(normalizeFloatingPointPartitionValue(valueStr));
                 case BOOLEAN:
                     return Boolean.parseBoolean(valueStr);
                 case DATE:
@@ -988,6 +988,20 @@ public class IcebergUtils {
         }
     }
 
+    private static String normalizeFloatingPointPartitionValue(String 
valueStr) {
+        if ("nan".equalsIgnoreCase(valueStr)) {
+            return "NaN";
+        }
+        if ("inf".equalsIgnoreCase(valueStr) || 
"+inf".equalsIgnoreCase(valueStr)
+                || "infinity".equalsIgnoreCase(valueStr) || 
"+infinity".equalsIgnoreCase(valueStr)) {
+            return "Infinity";
+        }
+        if ("-inf".equalsIgnoreCase(valueStr) || 
"-infinity".equalsIgnoreCase(valueStr)) {
+            return "-Infinity";
+        }
+        return valueStr;
+    }
+
     /**
      * Parse timestamp string to microseconds using Doris's built-in datetime
      * parser.
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
index 4f76e82a3a6..5b25da419cc 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
@@ -296,6 +296,26 @@ public class IcebergUtilsTest {
                 Double.doubleToLongBits(Double.parseDouble(serializedDouble)));
     }
 
+    @Test
+    public void testParseFloatingPointPartitionValueSupportsSpecialValues() {
+        Assert.assertTrue(Float.isNaN(
+                (Float) IcebergUtils.parsePartitionValueFromString("NaN", 
Types.FloatType.get())));
+        Assert.assertTrue(Float.isNaN(
+                (Float) IcebergUtils.parsePartitionValueFromString("nan", 
Types.FloatType.get())));
+        Assert.assertEquals(Float.POSITIVE_INFINITY,
+                (Float) IcebergUtils.parsePartitionValueFromString("Infinity", 
Types.FloatType.get()), 0.0F);
+        Assert.assertEquals(Float.NEGATIVE_INFINITY,
+                (Float) IcebergUtils.parsePartitionValueFromString("-inf", 
Types.FloatType.get()), 0.0F);
+        Assert.assertTrue(Double.isNaN(
+                (Double) IcebergUtils.parsePartitionValueFromString("NaN", 
Types.DoubleType.get())));
+        Assert.assertTrue(Double.isNaN(
+                (Double) IcebergUtils.parsePartitionValueFromString("nan", 
Types.DoubleType.get())));
+        Assert.assertEquals(Double.POSITIVE_INFINITY,
+                (Double) 
IcebergUtils.parsePartitionValueFromString("Infinity", Types.DoubleType.get()), 
0.0D);
+        Assert.assertEquals(Double.NEGATIVE_INFINITY,
+                (Double) IcebergUtils.parsePartitionValueFromString("-inf", 
Types.DoubleType.get()), 0.0D);
+    }
+
     @Test
     public void testGetMatchingManifest() {
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to