This is an automated email from the ASF dual-hosted git repository.
Gabriel39 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e0c956b3d94 [fix](iceberg) Fix wrong processing for `NaN` (#64315)
e0c956b3d94 is described below
commit e0c956b3d9498c0d23f8b7dc0585e1575f131709
Author: Gabriel <[email protected]>
AuthorDate: Wed Jun 10 09:54:05 2026 +0800
[fix](iceberg) Fix wrong processing for `NaN` (#64315)
This change fixes Iceberg writes when FLOAT or DOUBLE partition values
contain NaN or infinity.
Previously, the BE Iceberg writer serialized floating-point partition
values with std::to_string(), which produced lowercase special values
such as "nan", "inf", and "-inf". These strings are not accepted by
Java's
Float.parseFloat() / Double.parseDouble() in the FE partition commit
path, causing writes with NaN partition values to fail.
The fix canonicalizes BE floating-point partition serialization to
Java/Iceberg-compatible values: "NaN", "Infinity", and "-Infinity". It
also makes FE partition value parsing tolerate legacy lowercase forms
such as "nan" and
"-inf".
Unit tests were added for BE partition value serialization and FE
parsing of special floating-point partition values.
---
.../sink/writer/iceberg/partition_transformers.cpp | 20 ++++++++++++++++--
.../writer/iceberg/partition_transformers_test.cpp | 24 ++++++++++++++++++++++
.../doris/datasource/iceberg/IcebergUtils.java | 18 ++++++++++++++--
.../doris/datasource/iceberg/IcebergUtilsTest.java | 20 ++++++++++++++++++
4 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/be/src/exec/sink/writer/iceberg/partition_transformers.cpp
b/be/src/exec/sink/writer/iceberg/partition_transformers.cpp
index 8b49d68573c..e38e0e4c5eb 100644
--- a/be/src/exec/sink/writer/iceberg/partition_transformers.cpp
+++ b/be/src/exec/sink/writer/iceberg/partition_transformers.cpp
@@ -18,12 +18,28 @@
#include "exec/sink/writer/iceberg/partition_transformers.h"
#include <any>
+#include <cmath>
#include "core/types.h"
#include "format/table/iceberg/partition_spec.h"
namespace doris {
+namespace {
+
+template <typename T>
+std::string floating_point_partition_value_to_string(T value) {
+ if (std::isnan(value)) {
+ return "NaN";
+ }
+ if (std::isinf(value)) {
+ return value > 0 ? "Infinity" : "-Infinity";
+ }
+ return std::to_string(value);
+}
+
+} // namespace
+
const std::chrono::sys_days PartitionColumnTransformUtils::EPOCH =
std::chrono::sys_days(
std::chrono::year {1970} / std::chrono::January / std::chrono::day
{1});
@@ -225,10 +241,10 @@ std::string
PartitionColumnTransform::get_partition_value(const DataTypePtr type
return std::to_string(std::any_cast<Int64>(value));
}
case TYPE_FLOAT: {
- return std::to_string(std::any_cast<Float32>(value));
+ return
floating_point_partition_value_to_string(std::any_cast<Float32>(value));
}
case TYPE_DOUBLE: {
- return std::to_string(std::any_cast<Float64>(value));
+ return
floating_point_partition_value_to_string(std::any_cast<Float64>(value));
}
case TYPE_VARCHAR:
case TYPE_CHAR:
diff --git a/be/test/exec/sink/writer/iceberg/partition_transformers_test.cpp
b/be/test/exec/sink/writer/iceberg/partition_transformers_test.cpp
index 69758b794db..94b8aec8c77 100644
--- a/be/test/exec/sink/writer/iceberg/partition_transformers_test.cpp
+++ b/be/test/exec/sink/writer/iceberg/partition_transformers_test.cpp
@@ -19,6 +19,8 @@
#include <gtest/gtest.h>
+#include <limits>
+
#include "core/data_type/data_type_date_or_datetime_v2.h"
namespace doris {
@@ -115,6 +117,28 @@ TEST_F(PartitionTransformersTest,
test_string_truncate_transform) {
}
}
+TEST_F(PartitionTransformersTest, test_floating_point_special_partition_value)
{
+ auto float_type =
+
DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_FLOAT, false);
+ auto double_type =
+
DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_DOUBLE, false);
+ IdentityPartitionColumnTransform float_transform(float_type);
+ IdentityPartitionColumnTransform double_transform(double_type);
+
+ EXPECT_EQ("NaN", float_transform.get_partition_value(
+ float_type,
std::numeric_limits<Float32>::quiet_NaN()));
+ EXPECT_EQ("Infinity", float_transform.get_partition_value(
+ float_type,
std::numeric_limits<Float32>::infinity()));
+ EXPECT_EQ("-Infinity", float_transform.get_partition_value(
+ float_type,
-std::numeric_limits<Float32>::infinity()));
+ EXPECT_EQ("NaN", double_transform.get_partition_value(
+ double_type,
std::numeric_limits<Float64>::quiet_NaN()));
+ EXPECT_EQ("Infinity", double_transform.get_partition_value(
+ double_type,
std::numeric_limits<Float64>::infinity()));
+ EXPECT_EQ("-Infinity", double_transform.get_partition_value(
+ double_type,
-std::numeric_limits<Float64>::infinity()));
+}
+
TEST_F(PartitionTransformersTest, test_integer_bucket_transform) {
const std::vector<int32_t> values({34, -123}); // 2017239379, -471378254
auto column = ColumnInt32::create();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index bf1bee501bc..ea36d755cfd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -967,9 +967,9 @@ public class IcebergUtils {
case LONG:
return Long.parseLong(valueStr);
case FLOAT:
- return Float.parseFloat(valueStr);
+ return
Float.parseFloat(normalizeFloatingPointPartitionValue(valueStr));
case DOUBLE:
- return Double.parseDouble(valueStr);
+ return
Double.parseDouble(normalizeFloatingPointPartitionValue(valueStr));
case BOOLEAN:
return Boolean.parseBoolean(valueStr);
case DATE:
@@ -988,6 +988,20 @@ public class IcebergUtils {
}
}
+ private static String normalizeFloatingPointPartitionValue(String
valueStr) {
+ if ("nan".equalsIgnoreCase(valueStr)) {
+ return "NaN";
+ }
+ if ("inf".equalsIgnoreCase(valueStr) ||
"+inf".equalsIgnoreCase(valueStr)
+ || "infinity".equalsIgnoreCase(valueStr) ||
"+infinity".equalsIgnoreCase(valueStr)) {
+ return "Infinity";
+ }
+ if ("-inf".equalsIgnoreCase(valueStr) ||
"-infinity".equalsIgnoreCase(valueStr)) {
+ return "-Infinity";
+ }
+ return valueStr;
+ }
+
/**
* Parse timestamp string to microseconds using Doris's built-in datetime
* parser.
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
index 4f76e82a3a6..5b25da419cc 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java
@@ -296,6 +296,26 @@ public class IcebergUtilsTest {
Double.doubleToLongBits(Double.parseDouble(serializedDouble)));
}
+ @Test
+ public void testParseFloatingPointPartitionValueSupportsSpecialValues() {
+ Assert.assertTrue(Float.isNaN(
+ (Float) IcebergUtils.parsePartitionValueFromString("NaN",
Types.FloatType.get())));
+ Assert.assertTrue(Float.isNaN(
+ (Float) IcebergUtils.parsePartitionValueFromString("nan",
Types.FloatType.get())));
+ Assert.assertEquals(Float.POSITIVE_INFINITY,
+ (Float) IcebergUtils.parsePartitionValueFromString("Infinity",
Types.FloatType.get()), 0.0F);
+ Assert.assertEquals(Float.NEGATIVE_INFINITY,
+ (Float) IcebergUtils.parsePartitionValueFromString("-inf",
Types.FloatType.get()), 0.0F);
+ Assert.assertTrue(Double.isNaN(
+ (Double) IcebergUtils.parsePartitionValueFromString("NaN",
Types.DoubleType.get())));
+ Assert.assertTrue(Double.isNaN(
+ (Double) IcebergUtils.parsePartitionValueFromString("nan",
Types.DoubleType.get())));
+ Assert.assertEquals(Double.POSITIVE_INFINITY,
+ (Double)
IcebergUtils.parsePartitionValueFromString("Infinity", Types.DoubleType.get()),
0.0D);
+ Assert.assertEquals(Double.NEGATIVE_INFINITY,
+ (Double) IcebergUtils.parsePartitionValueFromString("-inf",
Types.DoubleType.get()), 0.0D);
+ }
+
@Test
public void testGetMatchingManifest() {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]