This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8a4f118979f [improvement](iceberg) Reconstruct partition spec in SHOW
CREATE TABLE for Iceberg tables (#63240)
8a4f118979f is described below
commit 8a4f118979f7014f091de57fdf6b21465a267f19
Author: nsivarajan <[email protected]>
AuthorDate: Tue May 19 03:04:43 2026 +0530
[improvement](iceberg) Reconstruct partition spec in SHOW CREATE TABLE for
Iceberg tables (#63240)
### What problem does this PR solve?
Problem Summary:
`SHOW CREATE TABLE` for Iceberg external tables omitted the `PARTITION
BY LIST (...) ()` clause entirely. The generated DDL was not
reproducible — running it as a new `CREATE TABLE` would create an
unpartitioned table, silently losing the partition strategy.
`Env.getDdlStmt()` for `ICEBERG_EXTERNAL_TABLE` reconstructed `ORDER BY`
and `LOCATION` from Iceberg metadata but had no code path for the
partition spec.
Added `getPartitionSpecSql()` to `IcebergExternalTable`, which reads the
live `PartitionSpec` from the Iceberg catalog and converts each
`PartitionField` back to Doris DDL syntax. Wired into `Env.getDdlStmt()`
between `ORDER BY` and `LOCATION`.
*Transform mapping* (all Iceberg standard transforms covered):
- isIdentity() → col
- isBucket(n) → BUCKET(n, col)
- toString() = "truncate[n]" → TRUNCATE(n, col)
- toString() = "year/month/day/hour" → YEAR/MONTH/DAY/HOUR(col)
- isVoid() → skipped (dropped partition field from evolution)
Uses `isVoid()` and `isIdentity()` from the public `Transform` interface
where available, falls back to the spec-defined canonical `toString()`
names for remaining transforms (stable across all Iceberg versions as
they are used in metadata serialisation).
Co-authored-by: Sivarajan Narayanan <[email protected]>
---
.../main/java/org/apache/doris/catalog/Env.java | 12 ++
.../datasource/iceberg/IcebergExternalTable.java | 50 +++++++
.../iceberg/IcebergExternalTableTest.java | 160 ++++++++++++++++++++-
3 files changed, 216 insertions(+), 6 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index 44115f9b2fa..5705d409c2d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -4469,6 +4469,12 @@ public class Env {
if (icebergExternalTable.hasSortOrder()) {
sb.append("\n").append(icebergExternalTable.getSortOrderSql());
}
+ if (table instanceof IcebergExternalTable) {
+ String partitionSpecSql =
icebergExternalTable.getPartitionSpecSql();
+ if (!partitionSpecSql.isEmpty()) {
+ sb.append("\n").append(partitionSpecSql);
+ }
+ }
sb.append("\nLOCATION
'").append(icebergExternalTable.location()).append("'");
sb.append("\nPROPERTIES (");
Iterator<Entry<String, String>> iterator =
icebergExternalTable.properties().entrySet().iterator();
@@ -4861,6 +4867,12 @@ public class Env {
if (icebergExternalTable.hasSortOrder()) {
sb.append("\n").append(icebergExternalTable.getSortOrderSql());
}
+ if (table instanceof IcebergExternalTable) {
+ String partitionSpecSql =
icebergExternalTable.getPartitionSpecSql();
+ if (!partitionSpecSql.isEmpty()) {
+ sb.append("\n").append(partitionSpecSql);
+ }
+ }
sb.append("\nLOCATION
'").append(icebergExternalTable.location()).append("'");
sb.append("\nPROPERTIES (");
Iterator<Entry<String, String>> iterator =
icebergExternalTable.properties().entrySet().iterator();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalTable.java
index 6a3faf49949..64d64a3102e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalTable.java
@@ -459,4 +459,54 @@ public class IcebergExternalTable extends ExternalTable
implements MTMVRelatedTa
org.apache.iceberg.SortOrder sortOrder = table.sortOrder();
return sortOrder != null && !sortOrder.isUnsorted();
}
+
+ /** Reconstructs PARTITION BY LIST (...) () from the Iceberg PartitionSpec
for SHOW CREATE TABLE. */
+ public String getPartitionSpecSql() {
+ makeSureInitialized();
+ Table table = getIcebergTable();
+ PartitionSpec spec = table.spec();
+ if (spec == null || spec.isUnpartitioned()) {
+ return "";
+ }
+ List<String> fields = new ArrayList<>();
+ for (PartitionField field : spec.fields()) {
+ String colName = table.schema().findColumnName(field.sourceId());
+ if (colName == null) {
+ continue;
+ }
+ org.apache.iceberg.transforms.Transform<?, ?> t =
field.transform();
+ // isVoid/isIdentity: public interface methods; toString():
canonical spec-defined names.
+ if (t.isVoid()) {
+ continue;
+ }
+ String quotedCol = "`" + colName + "`";
+ if (t.isIdentity()) {
+ fields.add(quotedCol);
+ } else {
+ String transformStr = t.toString();
+ if (transformStr.startsWith("bucket[")) {
+ int n = Integer.parseInt(transformStr.substring(7,
transformStr.length() - 1));
+ fields.add("BUCKET(" + n + ", " + quotedCol + ")");
+ } else if (transformStr.startsWith("truncate[")) {
+ int w = Integer.parseInt(transformStr.substring(9,
transformStr.length() - 1));
+ fields.add("TRUNCATE(" + w + ", " + quotedCol + ")");
+ } else if ("year".equals(transformStr)) {
+ fields.add("YEAR(" + quotedCol + ")");
+ } else if ("month".equals(transformStr)) {
+ fields.add("MONTH(" + quotedCol + ")");
+ } else if ("day".equals(transformStr)) {
+ fields.add("DAY(" + quotedCol + ")");
+ } else if ("hour".equals(transformStr)) {
+ fields.add("HOUR(" + quotedCol + ")");
+ } else {
+ LOG.warn("Unsupported Iceberg partition transform '{}' on
column '{}', "
+ + "skipped in SHOW CREATE TABLE.", transformStr,
colName);
+ }
+ }
+ }
+ if (fields.isEmpty()) {
+ return "";
+ }
+ return "PARTITION BY LIST (" + String.join(", ", fields) + ") ()";
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergExternalTableTest.java
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergExternalTableTest.java
index 7408898ab74..2d1e7a390ae 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergExternalTableTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergExternalTableTest.java
@@ -30,9 +30,7 @@ import com.google.common.collect.Range;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
-import org.apache.iceberg.transforms.Days;
-import org.apache.iceberg.transforms.Hours;
-import org.apache.iceberg.transforms.Months;
+import org.apache.iceberg.transforms.Transform;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -124,7 +122,7 @@ public class IcebergExternalTableTest {
Mockito.when(icebergTable.schema()).thenReturn(schema);
Mockito.when(schema.findColumnName(ArgumentMatchers.anyInt())).thenReturn("col1");
- Mockito.when(field.transform()).thenReturn(new Hours());
+ Mockito.doReturn(mockTransform("hour")).when(field).transform();
Mockito.when(field.sourceId()).thenReturn(1);
Assertions.assertTrue(spyTable.isValidRelatedTable());
@@ -132,13 +130,13 @@ public class IcebergExternalTableTest {
Assertions.assertTrue(spyTable.validRelatedTableCache());
Mockito.verify(schema,
Mockito.times(1)).findColumnName(ArgumentMatchers.anyInt());
- Mockito.when(field.transform()).thenReturn(new Days());
+ Mockito.doReturn(mockTransform("day")).when(field).transform();
Mockito.when(field.sourceId()).thenReturn(1);
spyTable.setIsValidRelatedTableCached(false);
Assertions.assertFalse(spyTable.isValidRelatedTableCached());
Assertions.assertTrue(spyTable.isValidRelatedTable());
- Mockito.when(field.transform()).thenReturn(new Months());
+ Mockito.doReturn(mockTransform("month")).when(field).transform();
Mockito.when(field.sourceId()).thenReturn(1);
spyTable.setIsValidRelatedTableCached(false);
Assertions.assertFalse(spyTable.isValidRelatedTableCached());
@@ -249,5 +247,155 @@ public class IcebergExternalTableTest {
Assertions.assertTrue(map.containsKey("month197204"));
Assertions.assertTrue(map.containsKey("day19730101"));
}
+
+ // ── helpers ────────────────────────────────────────────────────────────
+
+ private IcebergExternalTable createSpyTable() {
+ IcebergExternalDatabase db = new IcebergExternalDatabase(mockCatalog,
1L, "db", "db");
+ IcebergExternalTable t = new IcebergExternalTable(1, "tbl", "tbl",
mockCatalog, db);
+ IcebergExternalTable spy = Mockito.spy(t);
+ Mockito.doReturn(icebergTable).when(spy).getIcebergTable();
+ Mockito.doNothing().when(spy).makeSureInitialized();
+ return spy;
+ }
+
+ /** Creates a mock Transform with the given canonical toString() value.
+ * Also stubs isIdentity() and isVoid() based on the value. */
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ private static Transform mockTransform(String toStringValue) {
+ Transform t = Mockito.mock(Transform.class);
+ Mockito.when(t.toString()).thenReturn(toStringValue);
+
Mockito.when(t.isIdentity()).thenReturn("identity".equals(toStringValue));
+ Mockito.when(t.isVoid()).thenReturn("void".equals(toStringValue));
+ return t;
+ }
+
+ @SuppressWarnings("rawtypes")
+ private void setupSingleField(Transform transform, String colName) {
+ Mockito.when(icebergTable.spec()).thenReturn(spec);
+ Mockito.when(icebergTable.schema()).thenReturn(schema);
+ Mockito.when(spec.isUnpartitioned()).thenReturn(false);
+ Mockito.when(spec.fields()).thenReturn(Lists.newArrayList(field));
+ Mockito.when(field.sourceId()).thenReturn(1);
+ Mockito.when(schema.findColumnName(1)).thenReturn(colName);
+ Mockito.doReturn(transform).when(field).transform();
+ }
+
+ // ── getPartitionSpecSql tests ───────────────────────────────────────────
+
+ @Test
+ public void testGetPartitionSpecSqlNullSpec() {
+ IcebergExternalTable spy = createSpyTable();
+ Mockito.when(icebergTable.spec()).thenReturn(null);
+ Assertions.assertEquals("", spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlUnpartitioned() {
+ IcebergExternalTable spy = createSpyTable();
+ Mockito.when(icebergTable.spec()).thenReturn(spec);
+ Mockito.when(spec.isUnpartitioned()).thenReturn(true);
+ Assertions.assertEquals("", spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlIdentity() {
+ IcebergExternalTable spy = createSpyTable();
+ setupSingleField(mockTransform("identity"), "d_year");
+ Assertions.assertEquals("PARTITION BY LIST (`d_year`) ()",
spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlBucket() {
+ IcebergExternalTable spy = createSpyTable();
+ setupSingleField(mockTransform("bucket[2048]"), "ss_item_sk");
+ Assertions.assertEquals("PARTITION BY LIST (BUCKET(2048,
`ss_item_sk`)) ()",
+ spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlTruncate() {
+ IcebergExternalTable spy = createSpyTable();
+ setupSingleField(mockTransform("truncate[10]"), "category");
+ Assertions.assertEquals("PARTITION BY LIST (TRUNCATE(10, `category`))
()",
+ spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlTimeTransforms() {
+ IcebergExternalTable spy = createSpyTable();
+ Mockito.when(icebergTable.spec()).thenReturn(spec);
+ Mockito.when(icebergTable.schema()).thenReturn(schema);
+ Mockito.when(spec.isUnpartitioned()).thenReturn(false);
+ Mockito.when(spec.fields()).thenReturn(Lists.newArrayList(field));
+ Mockito.when(field.sourceId()).thenReturn(1);
+
Mockito.when(schema.findColumnName(ArgumentMatchers.anyInt())).thenReturn("ts");
+
+ Mockito.doReturn(mockTransform("year")).when(field).transform();
+ Assertions.assertEquals("PARTITION BY LIST (YEAR(`ts`)) ()",
spy.getPartitionSpecSql());
+
+ Mockito.doReturn(mockTransform("month")).when(field).transform();
+ Assertions.assertEquals("PARTITION BY LIST (MONTH(`ts`)) ()",
spy.getPartitionSpecSql());
+
+ Mockito.doReturn(mockTransform("day")).when(field).transform();
+ Assertions.assertEquals("PARTITION BY LIST (DAY(`ts`)) ()",
spy.getPartitionSpecSql());
+
+ Mockito.doReturn(mockTransform("hour")).when(field).transform();
+ Assertions.assertEquals("PARTITION BY LIST (HOUR(`ts`)) ()",
spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlVoidSkipped() {
+ IcebergExternalTable spy = createSpyTable();
+ Mockito.when(icebergTable.spec()).thenReturn(spec);
+ Mockito.when(icebergTable.schema()).thenReturn(schema);
+ Mockito.when(spec.isUnpartitioned()).thenReturn(false);
+ Mockito.when(spec.fields()).thenReturn(Lists.newArrayList(field));
+ Mockito.when(field.sourceId()).thenReturn(1);
+ Mockito.when(schema.findColumnName(1)).thenReturn("ts");
+ Mockito.doReturn(mockTransform("void")).when(field).transform();
+ Assertions.assertEquals("", spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlMultipleFields() {
+ IcebergExternalTable spy = createSpyTable();
+ PartitionField field2 = Mockito.mock(PartitionField.class);
+
+ Mockito.when(icebergTable.spec()).thenReturn(spec);
+ Mockito.when(icebergTable.schema()).thenReturn(schema);
+ Mockito.when(spec.isUnpartitioned()).thenReturn(false);
+ Mockito.when(spec.fields()).thenReturn(Lists.newArrayList(field,
field2));
+ Mockito.when(field.sourceId()).thenReturn(1);
+ Mockito.when(schema.findColumnName(1)).thenReturn("sold_date_sk");
+ Mockito.doReturn(mockTransform("identity")).when(field).transform();
+ Mockito.when(field2.sourceId()).thenReturn(2);
+ Mockito.when(schema.findColumnName(2)).thenReturn("item_sk");
+
Mockito.doReturn(mockTransform("bucket[128]")).when(field2).transform();
+
+ Assertions.assertEquals("PARTITION BY LIST (`sold_date_sk`,
BUCKET(128, `item_sk`)) ()",
+ spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlReservedWordColumnQuoted() {
+ // Reserved SQL keyword as column name must be backtick-quoted for
replayable DDL.
+ IcebergExternalTable spy = createSpyTable();
+ setupSingleField(mockTransform("identity"), "select");
+ Assertions.assertEquals("PARTITION BY LIST (`select`) ()",
spy.getPartitionSpecSql());
+ }
+
+ @Test
+ public void testGetPartitionSpecSqlUnresolvableColumnSkipped() {
+ IcebergExternalTable spy = createSpyTable();
+ int unknownSourceId = 999;
+ Mockito.when(icebergTable.spec()).thenReturn(spec);
+ Mockito.when(icebergTable.schema()).thenReturn(schema);
+ Mockito.when(spec.isUnpartitioned()).thenReturn(false);
+ Mockito.when(spec.fields()).thenReturn(Lists.newArrayList(field));
+ Mockito.when(field.sourceId()).thenReturn(unknownSourceId);
+ Mockito.when(schema.findColumnName(unknownSourceId)).thenReturn(null);
+ Assertions.assertEquals("", spy.getPartitionSpecSql());
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]