This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new bd1b8900ad Spark: Fix Z-order UDF to correctly handle DateType
(#14108)
bd1b8900ad is described below
commit bd1b8900ad17a0625349aec3f73c87e79ad2af66
Author: Ron Kapoor <[email protected]>
AuthorDate: Thu Oct 30 17:00:44 2025 -0400
Spark: Fix Z-order UDF to correctly handle DateType (#14108)
* Fix Z-order UDF to correctly handle DateType using unix_date
* Update
spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
---------
Co-authored-by: Eduard Tudenhoefner <[email protected]>
---
.../apache/iceberg/spark/actions/SparkZOrderUDF.java | 2 +-
.../spark/actions/TestRewriteDataFilesAction.java | 19 +++++++++++++++++++
2 files changed, 20 insertions(+), 1 deletion(-)
diff --git
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
index db359fdd62..d142e3fd1a 100644
---
a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
+++
b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
@@ -310,7 +310,7 @@ class SparkZOrderUDF implements Serializable {
} else if (type instanceof TimestampType) {
return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
} else if (type instanceof DateType) {
- return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
+ return
longToOrderedBytesUDF().apply(functions.unix_date(column).cast(DataTypes.LongType));
} else {
throw new IllegalArgumentException(
String.format(
diff --git
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
index 09f5b109fb..6d965f3dcc 100644
---
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
+++
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -22,6 +22,7 @@ import static
org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES;
import static org.apache.iceberg.data.FileHelpers.encrypt;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.apache.spark.sql.functions.col;
import static org.apache.spark.sql.functions.current_date;
import static org.apache.spark.sql.functions.date_add;
import static org.apache.spark.sql.functions.expr;
@@ -127,6 +128,7 @@ import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
import org.apache.spark.sql.internal.SQLConf;
+import org.apache.spark.sql.types.DataTypes;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.TestTemplate;
@@ -2109,6 +2111,23 @@ public class TestRewriteDataFilesAction extends TestBase
{
.isFalse();
}
+ @TestTemplate
+ public void testZOrderUDFWithDateType() {
+ SparkZOrderUDF zorderUDF = new SparkZOrderUDF(1, 16, 1024);
+ Dataset<Row> result =
+ spark
+ .sql("SELECT DATE '2025-01-01' as test_col")
+ .withColumn(
+ "zorder_result",
+ zorderUDF.sortedLexicographically(col("test_col"),
DataTypes.DateType));
+
+
assertThat(result.schema().apply("zorder_result").dataType()).isEqualTo(DataTypes.BinaryType);
+ List<Row> rows = result.collectAsList();
+ Row row = rows.get(0);
+ byte[] zorderBytes = row.getAs("zorder_result");
+ assertThat(zorderBytes).isNotNull().isNotEmpty();
+ }
+
protected void shouldRewriteDataFilesWithPartitionSpec(Table table, int
outputSpecId) {
List<DataFile> rewrittenFiles = currentDataFiles(table);
assertThat(rewrittenFiles).allMatch(file -> file.specId() == outputSpecId);