This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c334a01575a [fix](external) check duplicate column names for external
table schema (#52315)
c334a01575a is described below
commit c334a01575afb307e0cb119982aedd2ff21d27a3
Author: Socrates <[email protected]>
AuthorDate: Mon Jul 7 09:09:46 2025 +0800
[fix](external) check duplicate column names for external table schema
(#52315)
### What problem does this PR solve?
Problem Summary:
Flink is case-sensitive when creating table column names. For example,
it may create a paimon table like this:
```sql
create table dup_column_table(id int, ID int);
```
However, Doris is case-insensitive to column names, which will cause
errors when looking up the table.
---
...ta-72d4d52a-eca9-4542-a2af-cc17499731e6-0.parquet | Bin 0 -> 504 bytes
.../manifest-5cae1365-d123-4172-9a89-4fbc02bee658-0 | Bin 0 -> 1942 bytes
...ifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0 | Bin 0 -> 884 bytes
...ifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1 | Bin 0 -> 989 bytes
.../paimon1/db1.db/dup_columns_table/schema/schema-0 | 19 +++++++++++++++++++
.../db1.db/dup_columns_table/snapshot/EARLIEST | 1 +
.../paimon1/db1.db/dup_columns_table/snapshot/LATEST | 1 +
.../db1.db/dup_columns_table/snapshot/snapshot-1 | 19 +++++++++++++++++++
.../apache/doris/datasource/ExternalSchemaCache.java | 3 +++
.../apache/doris/datasource/SchemaCacheValue.java | 12 ++++++++++++
.../paimon/test_paimon_catalog.groovy | 6 ++++++
11 files changed, 61 insertions(+)
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/bucket-0/data-72d4d52a-eca9-4542-a2af-cc17499731e6-0.parquet
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/bucket-0/data-72d4d52a-eca9-4542-a2af-cc17499731e6-0.parquet
new file mode 100644
index 00000000000..4ced9cc1688
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/bucket-0/data-72d4d52a-eca9-4542-a2af-cc17499731e6-0.parquet
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-5cae1365-d123-4172-9a89-4fbc02bee658-0
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-5cae1365-d123-4172-9a89-4fbc02bee658-0
new file mode 100644
index 00000000000..0aade3101c0
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-5cae1365-d123-4172-9a89-4fbc02bee658-0
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0
new file mode 100644
index 00000000000..34bd4182196
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1
new file mode 100644
index 00000000000..b4d11e9dba4
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/manifest/manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1
differ
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/schema/schema-0
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/schema/schema-0
new file mode 100644
index 00000000000..4578d38066e
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/schema/schema-0
@@ -0,0 +1,19 @@
+{
+ "version" : 3,
+ "id" : 0,
+ "fields" : [ {
+ "id" : 0,
+ "name" : "id",
+ "type" : "INT"
+ }, {
+ "id" : 1,
+ "name" : "ID",
+ "type" : "INT"
+ } ],
+ "highestFieldId" : 1,
+ "partitionKeys" : [ ],
+ "primaryKeys" : [ ],
+ "options" : { },
+ "comment" : "",
+ "timeMillis" : 1750851313662
+}
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/EARLIEST
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/EARLIEST
new file mode 100644
index 00000000000..56a6051ca2b
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/EARLIEST
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/LATEST
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/LATEST
new file mode 100644
index 00000000000..56a6051ca2b
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/LATEST
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/snapshot-1
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/snapshot-1
new file mode 100644
index 00000000000..14de268c592
--- /dev/null
+++
b/docker/thirdparties/docker-compose/hive/scripts/paimon1/db1.db/dup_columns_table/snapshot/snapshot-1
@@ -0,0 +1,19 @@
+{
+ "version" : 3,
+ "id" : 1,
+ "schemaId" : 0,
+ "baseManifestList" : "manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-0",
+ "baseManifestListSize" : 884,
+ "deltaManifestList" : "manifest-list-391058a9-952c-4aa9-892f-df3334e4109b-1",
+ "deltaManifestListSize" : 989,
+ "changelogManifestList" : null,
+ "commitUser" : "67873442-3b91-4ce9-983c-ae4df219a769",
+ "commitIdentifier" : 9223372036854775807,
+ "commitKind" : "APPEND",
+ "timeMillis" : 1750851330423,
+ "logOffsets" : { },
+ "totalRecordCount" : 2,
+ "deltaRecordCount" : 2,
+ "changelogRecordCount" : 0,
+ "watermark" : -9223372036854775808
+}
\ No newline at end of file
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
index 73b96198ccf..85e6fe4e41f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalSchemaCache.java
@@ -76,6 +76,9 @@ public class ExternalSchemaCache {
private Optional<SchemaCacheValue> loadSchema(SchemaCacheKey key) {
Optional<SchemaCacheValue> schema = catalog.getSchema(key);
+ if (schema.isPresent()) {
+ schema.get().validateSchema();
+ }
if (LOG.isDebugEnabled()) {
LOG.debug("load schema for {} in catalog {}", key,
catalog.getName());
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/SchemaCacheValue.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/SchemaCacheValue.java
index b02b8bda840..4611a0d9816 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/SchemaCacheValue.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/SchemaCacheValue.java
@@ -19,7 +19,9 @@ package org.apache.doris.datasource;
import org.apache.doris.catalog.Column;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
/**
* The cache value of ExternalSchemaCache.
@@ -37,4 +39,14 @@ public class SchemaCacheValue {
public List<Column> getSchema() {
return schema;
}
+
+ public void validateSchema() throws IllegalArgumentException {
+ Set<String> columnNames = new HashSet<>();
+ for (Column column : schema) {
+ if (!columnNames.add(column.getName().toLowerCase())) {
+ throw new IllegalArgumentException("Duplicate column name
found: " + column.getName());
+ }
+ // Add more validation logic if needed
+ }
+ }
}
diff --git
a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
index 41afb02e0f9..35d36c9a5c3 100644
--- a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
+++ b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
@@ -295,6 +295,12 @@ suite("test_paimon_catalog",
"p0,external,doris,external_docker,external_docker_
test_cases("false", "true")
test_cases("true", "false")
test_cases("true", "true")
+
+ test {
+ sql """select * from dup_columns_table;"""
+ exception "Duplicate column name found: id"
+ }
+
sql """ set force_jni_scanner=false; """
// test view from jion paimon
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]