This is an automated email from the ASF dual-hosted git repository.
yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 38c2115f6a [#9757][followup] fix(clickhouse-catalog): fix creating
distribute table error. (#10088)
38c2115f6a is described below
commit 38c2115f6a82a6142da99e37016b2c2e49d2dcbd
Author: Qi Yu <[email protected]>
AuthorDate: Thu Mar 5 13:48:56 2026 +0800
[#9757][followup] fix(clickhouse-catalog): fix creating distribute table
error. (#10088)
### What changes were proposed in this pull request?
This pull request introduces improvements to the backend integration
test workflow and enhances ClickHouse cluster schema support in the
codebase. The main changes include more efficient detection and testing
of modified `catalogs-contrib` modules, and improved handling of
ClickHouse cluster schemas, including the ability to infer cluster
information for databases created via SQL. These updates streamline CI
runs and strengthen cluster-related features and tests.
### Why are the changes needed?
To improve CI efficiency and fix ClickHouse bugs.
Fix: #9757
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
new ITs and CI
---------
Co-authored-by: Yuhui <[email protected]>
---
build.gradle.kts | 24 +++++++++++
.../operations/ClickHouseTableOperations.java | 49 ++++++++++++++--------
.../test/CatalogClickHouseClusterIT.java | 2 +-
.../TestClickHouseTableOperationsCluster.java | 29 ++++++++++++-
.../gravitino/catalog/OperationDispatcher.java | 6 ++-
docs/jdbc-clickhouse-catalog.md | 1 +
6 files changed, 90 insertions(+), 21 deletions(-)
diff --git a/build.gradle.kts b/build.gradle.kts
index e1cbefaa82..4bd2e1e8e0 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -598,6 +598,30 @@ subprojects {
initTest(this)
}
+ val testTaskStartTimeMsKey = "testTaskStartTimeMs"
+ doFirst {
+ extensions.extraProperties[testTaskStartTimeMsKey] =
System.currentTimeMillis()
+ logger.lifecycle(
+ "[TEST-TIMING] START module={} task={} at={}",
+ project.path,
+ path,
+ extensions.extraProperties[testTaskStartTimeMsKey]
+ )
+ }
+
+ doLast {
+ val endTimeMs = System.currentTimeMillis()
+ val startTimeMs = extensions.extraProperties[testTaskStartTimeMsKey] as?
Long ?: endTimeMs
+ logger.lifecycle(
+ "[TEST-TIMING] END module={} task={} startMs={} endMs={}
durationMs={}",
+ project.path,
+ path,
+ startTimeMs,
+ endTimeMs,
+ endTimeMs - startTimeMs
+ )
+ }
+
testLogging {
exceptionFormat = TestExceptionFormat.FULL
showExceptions = true
diff --git
a/catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseTableOperations.java
b/catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseTableOperations.java
index 9858f59f65..b0b566b9b8 100644
---
a/catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseTableOperations.java
+++
b/catalogs-contrib/catalog-jdbc-clickhouse/src/main/java/org/apache/gravitino/catalog/clickhouse/operations/ClickHouseTableOperations.java
@@ -168,13 +168,15 @@ public class ClickHouseTableOperations extends
JdbcTableOperations {
// Add Create table clause
appendCreateTableClause(notNullProperties, sqlBuilder, tableName);
- // Add columns
- buildColumnsDefinition(columns, sqlBuilder);
+ // We still allow empty columns when the engine is distributed.
+ if (columns.length > 0) {
+ buildColumnsDefinition(columns, sqlBuilder);
- // Index definition
- appendIndexesSql(indexes, sqlBuilder);
+ // Index definition
+ appendIndexesSql(indexes, sqlBuilder);
- sqlBuilder.append("\n)");
+ sqlBuilder.append("\n)");
+ }
// Extract engine from properties
ClickHouseTablePropertiesMetadata.ENGINE engine =
@@ -220,10 +222,10 @@ public class ClickHouseTableOperations extends
JdbcTableOperations {
if (onCluster) {
sqlBuilder.append(
- "CREATE TABLE %s ON CLUSTER %s (\n"
+ "CREATE TABLE %s ON CLUSTER %s \n"
.formatted(quoteIdentifier(tableName),
quoteIdentifier(clusterName)));
} else {
- sqlBuilder.append("CREATE TABLE %s
(\n".formatted(quoteIdentifier(tableName)));
+ sqlBuilder.append("CREATE TABLE %s
\n".formatted(quoteIdentifier(tableName)));
}
return onCluster;
@@ -343,21 +345,29 @@ public class ClickHouseTableOperations extends
JdbcTableOperations {
Preconditions.checkArgument(
StringUtils.isNotBlank(shardingKey), "Sharding key must be specified
for Distributed");
- List<String> shardingColumns =
ClickHouseTableSqlUtils.extractShardingKeyColumns(shardingKey);
- if (CollectionUtils.isNotEmpty(shardingColumns)) {
- for (String columnName : shardingColumns) {
- JdbcColumn shardingColumn = findColumn(columns, columnName);
- Preconditions.checkArgument(
- shardingColumn != null,
- "Sharding key column %s must be defined in the table",
- columnName);
+ // Users have defined the columns explicitly for the distributed table, we
will check the
+ // columns should contain the sharding key, as clickhouse requires the
sharding key must be
+ // defined in the columns of the distributed table.
+ if (ArrayUtils.isNotEmpty(columns)) {
+ List<String> shardingColumns =
ClickHouseTableSqlUtils.extractShardingKeyColumns(shardingKey);
+ if (CollectionUtils.isNotEmpty(shardingColumns)) {
+ for (String columnName : shardingColumns) {
+ JdbcColumn shardingColumn = findColumn(columns, columnName);
+ Preconditions.checkArgument(
+ shardingColumn != null,
+ "Sharding key column %s must be defined in the table",
+ columnName);
+ }
}
}
- String sanitizedShardingKey =
ClickHouseTableSqlUtils.formatShardingKey(shardingKey);
+ if (ArrayUtils.isEmpty(columns)) {
+ sqlBuilder.append(" AS `%s`.`%s` ".formatted(remoteDatabase,
remoteTable));
+ }
+ String sanitizedShardingKey =
ClickHouseTableSqlUtils.formatShardingKey(shardingKey);
sqlBuilder.append(
- "\n ENGINE = %s(`%s`,`%s`,`%s`,%s)"
+ " ENGINE = %s(`%s`,`%s`,`%s`,%s)"
.formatted(
ENGINE.DISTRIBUTED.getValue(),
clusterName,
@@ -418,6 +428,11 @@ public class ClickHouseTableOperations extends
JdbcTableOperations {
}
private void buildColumnsDefinition(JdbcColumn[] columns, StringBuilder
sqlBuilder) {
+ if (ArrayUtils.isEmpty(columns)) {
+ return;
+ }
+
+ sqlBuilder.append(" (");
for (int i = 0; i < columns.length; i++) {
JdbcColumn column = columns[i];
sqlBuilder.append(" %s".formatted(quoteIdentifier(column.name())));
diff --git
a/catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/integration/test/CatalogClickHouseClusterIT.java
b/catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/integration/test/CatalogClickHouseClusterIT.java
index 43a6146106..e80f8d4ea5 100644
---
a/catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/integration/test/CatalogClickHouseClusterIT.java
+++
b/catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/integration/test/CatalogClickHouseClusterIT.java
@@ -227,7 +227,7 @@ public class CatalogClickHouseClusterIT extends BaseIT {
Table distributedTable =
tableCatalog.createTable(
distributedTableIdent,
- columns,
+ new Column[] {},
tableComment,
distributedProperties(localTableName),
partitioning,
diff --git
a/catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/operations/TestClickHouseTableOperationsCluster.java
b/catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/operations/TestClickHouseTableOperationsCluster.java
index 7117d379cd..0f521bcbbb 100644
---
a/catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/operations/TestClickHouseTableOperationsCluster.java
+++
b/catalogs-contrib/catalog-jdbc-clickhouse/src/test/java/org/apache/gravitino/catalog/clickhouse/operations/TestClickHouseTableOperationsCluster.java
@@ -175,7 +175,11 @@ class TestClickHouseTableOperationsCluster {
void testGenerateCreateTableSqlWithDistributedEngineWithoutOnCluster() {
JdbcColumn[] columns =
new JdbcColumn[] {
-
JdbcColumn.builder().withName("user_id").withType(Types.IntegerType.get()).build()
+ JdbcColumn.builder()
+ .withName("user_id")
+ .withType(Types.IntegerType.get())
+ .withNullable(false)
+ .build()
};
Map<String, String> props = new HashMap<>();
@@ -189,7 +193,28 @@ class TestClickHouseTableOperationsCluster {
ops.buildCreateSql(
"tbl", columns, "comment", props, null, Distributions.NONE, new
Index[0], null);
- Assertions.assertTrue(sql.startsWith("CREATE TABLE `tbl` ("));
+ Assertions.assertTrue(sql.startsWith("CREATE TABLE `tbl`"));
+ Assertions.assertTrue(
+ sql.contains("ENGINE =
Distributed(`ck_cluster`,`remote_db`,`remote_table`,`user_id`)"));
+ }
+
+ @Test
+ void testDistributedTableWithEmptyColumns() {
+ JdbcColumn[] columns = new JdbcColumn[] {};
+
+ Map<String, String> props = new HashMap<>();
+ props.put(ClusterConstants.CLUSTER_NAME, "ck_cluster");
+ props.put(GRAVITINO_ENGINE_KEY, "Distributed");
+ props.put(DistributedTableConstants.REMOTE_DATABASE, "remote_db");
+ props.put(DistributedTableConstants.REMOTE_TABLE, "remote_table");
+ props.put(DistributedTableConstants.SHARDING_KEY, "user_id");
+
+ String sql =
+ ops.buildCreateSql(
+ "tbl", columns, "comment", props, null, Distributions.NONE, new
Index[0], null);
+
+ Assertions.assertTrue(sql.startsWith("CREATE TABLE `tbl`"));
+ Assertions.assertTrue(sql.contains("AS `remote_db`.`remote_table`"));
Assertions.assertTrue(
sql.contains("ENGINE =
Distributed(`ck_cluster`,`remote_db`,`remote_table`,`user_id`)"));
}
diff --git
a/core/src/main/java/org/apache/gravitino/catalog/OperationDispatcher.java
b/core/src/main/java/org/apache/gravitino/catalog/OperationDispatcher.java
index 809efa926f..006e05c64b 100644
--- a/core/src/main/java/org/apache/gravitino/catalog/OperationDispatcher.java
+++ b/core/src/main/java/org/apache/gravitino/catalog/OperationDispatcher.java
@@ -233,7 +233,11 @@ public abstract class OperationDispatcher {
try {
return store.get(ident, type, entityClass);
} catch (Exception e) {
- LOG.warn(FormattedErrorMessages.STORE_OP_FAILURE, "get", ident,
e.getMessage(), e);
+ // Fix unexpected error messages like "2026-02-26T14:33:11.810125Z
Gravitino-webserver-91 WARN
+ // found 2 argument placeholders, but provided 4 for pattern `Failed to
{} entity for {} in
+ // Gravitino, with this situation the returned object will not contain
the metadata from
+ // Gravitino.`" in ${GRAVITINO_HOME}/logs/gravitino-server.out.
+ LOG.warn(FormattedErrorMessages.STORE_OP_FAILURE, "get", ident, e);
return null;
}
}
diff --git a/docs/jdbc-clickhouse-catalog.md b/docs/jdbc-clickhouse-catalog.md
index 394c10b989..91506f7cbb 100644
--- a/docs/jdbc-clickhouse-catalog.md
+++ b/docs/jdbc-clickhouse-catalog.md
@@ -191,6 +191,7 @@ Other ClickHouse types are exposed as [External
Type](./manage-relational-metada
:::note
- `settings.*` keys are passed to the ClickHouse `SETTINGS` clause verbatim.
- The `engine` value is immutable after creation.
+- When loading table metadata, Gravitino cannot determine whether it is a
cluster table or a local table, because properties such as `cluster-name` and
`on-cluster` are not available from the JDBC metadata.
:::
| Property Name | Description
| Default Value |
Required | Reserved | Immutable | Since version |