vinishjail97 commented on code in PR #802: URL: https://github.com/apache/incubator-xtable/pull/802#discussion_r2841723788
########## xtable-databricks/src/main/java/org/apache/xtable/databricks/DatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +import lombok.extern.log4j.Log4j2; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import com.databricks.sdk.WorkspaceClient; +import com.databricks.sdk.core.DatabricksConfig; +import com.databricks.sdk.core.error.platform.NotFound; +import com.databricks.sdk.service.catalog.ColumnInfo; +import com.databricks.sdk.service.catalog.CreateSchema; +import com.databricks.sdk.service.catalog.SchemaInfo; +import com.databricks.sdk.service.catalog.SchemasAPI; +import com.databricks.sdk.service.catalog.TableInfo; +import com.databricks.sdk.service.catalog.TablesAPI; +import com.databricks.sdk.service.sql.Disposition; +import com.databricks.sdk.service.sql.ExecuteStatementRequest; +import com.databricks.sdk.service.sql.ExecuteStatementRequestOnWaitTimeout; +import com.databricks.sdk.service.sql.Format; +import com.databricks.sdk.service.sql.StatementExecutionAPI; +import com.databricks.sdk.service.sql.StatementResponse; +import com.databricks.sdk.service.sql.StatementState; + +import org.apache.xtable.catalog.CatalogUtils; +import org.apache.xtable.conversion.ExternalCatalogConfig; +import org.apache.xtable.exception.CatalogSyncException; +import org.apache.xtable.model.InternalTable; +import org.apache.xtable.model.catalog.CatalogTableIdentifier; +import org.apache.xtable.model.catalog.HierarchicalTableIdentifier; +import org.apache.xtable.model.schema.InternalField; +import org.apache.xtable.model.schema.InternalSchema; +import org.apache.xtable.model.storage.CatalogType; +import org.apache.xtable.model.storage.TableFormat; +import org.apache.xtable.spi.sync.CatalogSyncClient; + +/** + * Databricks Unity Catalog implementation skeleton for CatalogSyncClient. + * + * <p>This is a placeholder to wire Databricks UC as a catalog target via the SQL Statement + * Execution API. Actual DDL execution and schema diffing should be implemented in a follow-up + * change. + */ +@Log4j2 +public class DatabricksUnityCatalogSyncClient implements CatalogSyncClient<TableInfo> { + + private ExternalCatalogConfig catalogConfig; + private DatabricksUnityCatalogConfig databricksConfig; + private Configuration hadoopConf; + private String tableFormat; + private WorkspaceClient workspaceClient; + private StatementExecutionAPI statementExecution; + private TablesAPI tablesApi; + private SchemasAPI schemasApi; + + // For loading the instance using ServiceLoader + public DatabricksUnityCatalogSyncClient() {} + + public DatabricksUnityCatalogSyncClient( + ExternalCatalogConfig catalogConfig, String tableFormat, Configuration configuration) { + init(catalogConfig, tableFormat, configuration); + } + + DatabricksUnityCatalogSyncClient( + ExternalCatalogConfig catalogConfig, + String tableFormat, + Configuration configuration, + StatementExecutionAPI statementExecution, + TablesAPI tablesApi, + SchemasAPI schemasApi) { + this.statementExecution = statementExecution; + this.tablesApi = tablesApi; + this.schemasApi = schemasApi; + init(catalogConfig, tableFormat, configuration); + } + + @Override + public String getCatalogId() { + return catalogConfig.getCatalogId(); + } + + @Override + public String getCatalogType() { + return CatalogType.DATABRICKS_UC; + } + + @Override + public String getStorageLocation(TableInfo table) { + if (table == null) { + return null; + } + return table.getStorageLocation(); + } + + @Override + public boolean hasDatabase(CatalogTableIdentifier tableIdentifier) { + HierarchicalTableIdentifier hierarchical = + CatalogUtils.toHierarchicalTableIdentifier(tableIdentifier); + String catalog = hierarchical.getCatalogName(); + if (StringUtils.isBlank(catalog)) { + throw new CatalogSyncException( + "Databricks UC requires a catalog name (expected catalog.schema.table)"); Review Comment: `createOrReplaceTable` drops then recreates the table. This is not atomic — if `createTable` fails after `dropTable` succeeds, the table is gone. Consider using `CREATE OR REPLACE TABLE` DDL instead (if supported for external tables), or at minimum catch the create failure and log a clear error that the table was dropped but not recreated. ########## xtable-databricks/src/main/java/org/apache/xtable/databricks/DatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +import lombok.extern.log4j.Log4j2; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import com.databricks.sdk.WorkspaceClient; +import com.databricks.sdk.core.DatabricksConfig; +import com.databricks.sdk.core.error.platform.NotFound; +import com.databricks.sdk.service.catalog.ColumnInfo; +import com.databricks.sdk.service.catalog.CreateSchema; +import com.databricks.sdk.service.catalog.SchemaInfo; +import com.databricks.sdk.service.catalog.SchemasAPI; +import com.databricks.sdk.service.catalog.TableInfo; +import com.databricks.sdk.service.catalog.TablesAPI; +import com.databricks.sdk.service.sql.Disposition; +import com.databricks.sdk.service.sql.ExecuteStatementRequest; +import com.databricks.sdk.service.sql.ExecuteStatementRequestOnWaitTimeout; +import com.databricks.sdk.service.sql.Format; +import com.databricks.sdk.service.sql.StatementExecutionAPI; +import com.databricks.sdk.service.sql.StatementResponse; +import com.databricks.sdk.service.sql.StatementState; + +import org.apache.xtable.catalog.CatalogUtils; +import org.apache.xtable.conversion.ExternalCatalogConfig; +import org.apache.xtable.exception.CatalogSyncException; +import org.apache.xtable.model.InternalTable; +import org.apache.xtable.model.catalog.CatalogTableIdentifier; +import org.apache.xtable.model.catalog.HierarchicalTableIdentifier; +import org.apache.xtable.model.schema.InternalField; +import org.apache.xtable.model.schema.InternalSchema; +import org.apache.xtable.model.storage.CatalogType; +import org.apache.xtable.model.storage.TableFormat; +import org.apache.xtable.spi.sync.CatalogSyncClient; + +/** + * Databricks Unity Catalog implementation skeleton for CatalogSyncClient. + * + * <p>This is a placeholder to wire Databricks UC as a catalog target via the SQL Statement + * Execution API. Actual DDL execution and schema diffing should be implemented in a follow-up + * change. + */ +@Log4j2 +public class DatabricksUnityCatalogSyncClient implements CatalogSyncClient<TableInfo> { + + private ExternalCatalogConfig catalogConfig; + private DatabricksUnityCatalogConfig databricksConfig; + private Configuration hadoopConf; + private String tableFormat; + private WorkspaceClient workspaceClient; + private StatementExecutionAPI statementExecution; + private TablesAPI tablesApi; + private SchemasAPI schemasApi; + + // For loading the instance using ServiceLoader + public DatabricksUnityCatalogSyncClient() {} + + public DatabricksUnityCatalogSyncClient( + ExternalCatalogConfig catalogConfig, String tableFormat, Configuration configuration) { + init(catalogConfig, tableFormat, configuration); + } + + DatabricksUnityCatalogSyncClient( + ExternalCatalogConfig catalogConfig, + String tableFormat, + Configuration configuration, + StatementExecutionAPI statementExecution, + TablesAPI tablesApi, + SchemasAPI schemasApi) { + this.statementExecution = statementExecution; + this.tablesApi = tablesApi; + this.schemasApi = schemasApi; + init(catalogConfig, tableFormat, configuration); + } + + @Override + public String getCatalogId() { + return catalogConfig.getCatalogId(); + } + + @Override + public String getCatalogType() { + return CatalogType.DATABRICKS_UC; + } + + @Override + public String getStorageLocation(TableInfo table) { + if (table == null) { + return null; + } + return table.getStorageLocation(); Review Comment: `MSCK REPAIR TABLE ... SYNC METADATA` is a Hive-specific command. Does Databricks UC's SQL Warehouse support this syntax? The standard Databricks approach for refreshing external table metadata is `ALTER TABLE <table> SET TBLPROPERTIES` or reading from the Delta log directly. If this doesn't actually work on Databricks, schema drift would silently be ignored. Has this been tested against a real SQL Warehouse? ########## xtable-databricks/src/main/java/org/apache/xtable/databricks/DatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +import lombok.extern.log4j.Log4j2; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import com.databricks.sdk.WorkspaceClient; +import com.databricks.sdk.core.DatabricksConfig; +import com.databricks.sdk.core.error.platform.NotFound; +import com.databricks.sdk.service.catalog.ColumnInfo; +import com.databricks.sdk.service.catalog.CreateSchema; +import com.databricks.sdk.service.catalog.SchemaInfo; +import com.databricks.sdk.service.catalog.SchemasAPI; +import com.databricks.sdk.service.catalog.TableInfo; +import com.databricks.sdk.service.catalog.TablesAPI; +import com.databricks.sdk.service.sql.Disposition; +import com.databricks.sdk.service.sql.ExecuteStatementRequest; +import com.databricks.sdk.service.sql.ExecuteStatementRequestOnWaitTimeout; +import com.databricks.sdk.service.sql.Format; +import com.databricks.sdk.service.sql.StatementExecutionAPI; +import com.databricks.sdk.service.sql.StatementResponse; +import com.databricks.sdk.service.sql.StatementState; + +import org.apache.xtable.catalog.CatalogUtils; +import org.apache.xtable.conversion.ExternalCatalogConfig; +import org.apache.xtable.exception.CatalogSyncException; +import org.apache.xtable.model.InternalTable; +import org.apache.xtable.model.catalog.CatalogTableIdentifier; +import org.apache.xtable.model.catalog.HierarchicalTableIdentifier; +import org.apache.xtable.model.schema.InternalField; +import org.apache.xtable.model.schema.InternalSchema; +import org.apache.xtable.model.storage.CatalogType; +import org.apache.xtable.model.storage.TableFormat; +import org.apache.xtable.spi.sync.CatalogSyncClient; + +/** + * Databricks Unity Catalog implementation skeleton for CatalogSyncClient. + * + * <p>This is a placeholder to wire Databricks UC as a catalog target via the SQL Statement + * Execution API. Actual DDL execution and schema diffing should be implemented in a follow-up + * change. + */ +@Log4j2 +public class DatabricksUnityCatalogSyncClient implements CatalogSyncClient<TableInfo> { + + private ExternalCatalogConfig catalogConfig; + private DatabricksUnityCatalogConfig databricksConfig; + private Configuration hadoopConf; Review Comment: All fields (`catalogConfig`, `databricksConfig`, `hadoopConf`, `tableFormat`, `workspaceClient`, etc.) are mutable and set via `init()`. This pattern is fragile — calling any method before `init()` would NPE. Consider making these `final` and removing the no-arg constructor, or at least adding a null-check guard in methods that use these fields. The ServiceLoader no-arg constructor + `init()` pattern should be documented with a warning. ########## xtable-databricks/src/main/java/org/apache/xtable/databricks/DatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +import lombok.extern.log4j.Log4j2; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import com.databricks.sdk.WorkspaceClient; +import com.databricks.sdk.core.DatabricksConfig; +import com.databricks.sdk.core.error.platform.NotFound; +import com.databricks.sdk.service.catalog.ColumnInfo; +import com.databricks.sdk.service.catalog.CreateSchema; +import com.databricks.sdk.service.catalog.SchemaInfo; +import com.databricks.sdk.service.catalog.SchemasAPI; +import com.databricks.sdk.service.catalog.TableInfo; +import com.databricks.sdk.service.catalog.TablesAPI; +import com.databricks.sdk.service.sql.Disposition; +import com.databricks.sdk.service.sql.ExecuteStatementRequest; +import com.databricks.sdk.service.sql.ExecuteStatementRequestOnWaitTimeout; +import com.databricks.sdk.service.sql.Format; +import com.databricks.sdk.service.sql.StatementExecutionAPI; +import com.databricks.sdk.service.sql.StatementResponse; +import com.databricks.sdk.service.sql.StatementState; + +import org.apache.xtable.catalog.CatalogUtils; +import org.apache.xtable.conversion.ExternalCatalogConfig; +import org.apache.xtable.exception.CatalogSyncException; +import org.apache.xtable.model.InternalTable; +import org.apache.xtable.model.catalog.CatalogTableIdentifier; +import org.apache.xtable.model.catalog.HierarchicalTableIdentifier; +import org.apache.xtable.model.schema.InternalField; +import org.apache.xtable.model.schema.InternalSchema; +import org.apache.xtable.model.storage.CatalogType; +import org.apache.xtable.model.storage.TableFormat; +import org.apache.xtable.spi.sync.CatalogSyncClient; + +/** + * Databricks Unity Catalog implementation skeleton for CatalogSyncClient. + * + * <p>This is a placeholder to wire Databricks UC as a catalog target via the SQL Statement + * Execution API. Actual DDL execution and schema diffing should be implemented in a follow-up + * change. + */ +@Log4j2 +public class DatabricksUnityCatalogSyncClient implements CatalogSyncClient<TableInfo> { + + private ExternalCatalogConfig catalogConfig; + private DatabricksUnityCatalogConfig databricksConfig; + private Configuration hadoopConf; + private String tableFormat; + private WorkspaceClient workspaceClient; + private StatementExecutionAPI statementExecution; + private TablesAPI tablesApi; + private SchemasAPI schemasApi; + + // For loading the instance using ServiceLoader + public DatabricksUnityCatalogSyncClient() {} + + public DatabricksUnityCatalogSyncClient( + ExternalCatalogConfig catalogConfig, String tableFormat, Configuration configuration) { + init(catalogConfig, tableFormat, configuration); + } + + DatabricksUnityCatalogSyncClient( + ExternalCatalogConfig catalogConfig, + String tableFormat, Review Comment: SQL injection risk: `fullName` is derived from user-provided `tableIdentifier` and interpolated directly into the SQL string. While `escapeSqlString` is used for `location`, the table name itself is not sanitized. If `hierarchicalId` contains `; DROP TABLE ...`, this would be injected. Consider validating that catalog/schema/table names match `[a-zA-Z0-9_]+` before interpolating them into DDL statements. ########## xtable-databricks/src/main/java/org/apache/xtable/databricks/DatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +import lombok.extern.log4j.Log4j2; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import com.databricks.sdk.WorkspaceClient; +import com.databricks.sdk.core.DatabricksConfig; +import com.databricks.sdk.core.error.platform.NotFound; +import com.databricks.sdk.service.catalog.ColumnInfo; +import com.databricks.sdk.service.catalog.CreateSchema; +import com.databricks.sdk.service.catalog.SchemaInfo; +import com.databricks.sdk.service.catalog.SchemasAPI; +import com.databricks.sdk.service.catalog.TableInfo; +import com.databricks.sdk.service.catalog.TablesAPI; +import com.databricks.sdk.service.sql.Disposition; +import com.databricks.sdk.service.sql.ExecuteStatementRequest; +import com.databricks.sdk.service.sql.ExecuteStatementRequestOnWaitTimeout; +import com.databricks.sdk.service.sql.Format; +import com.databricks.sdk.service.sql.StatementExecutionAPI; +import com.databricks.sdk.service.sql.StatementResponse; +import com.databricks.sdk.service.sql.StatementState; + +import org.apache.xtable.catalog.CatalogUtils; +import org.apache.xtable.conversion.ExternalCatalogConfig; +import org.apache.xtable.exception.CatalogSyncException; +import org.apache.xtable.model.InternalTable; +import org.apache.xtable.model.catalog.CatalogTableIdentifier; +import org.apache.xtable.model.catalog.HierarchicalTableIdentifier; +import org.apache.xtable.model.schema.InternalField; +import org.apache.xtable.model.schema.InternalSchema; +import org.apache.xtable.model.storage.CatalogType; +import org.apache.xtable.model.storage.TableFormat; +import org.apache.xtable.spi.sync.CatalogSyncClient; + +/** + * Databricks Unity Catalog implementation skeleton for CatalogSyncClient. + * + * <p>This is a placeholder to wire Databricks UC as a catalog target via the SQL Statement Review Comment: Class-level Javadoc says "This is a placeholder" and "Actual DDL execution and schema diffing should be implemented in a follow-up change" — but this PR implements both. The Javadoc is stale. ########## xtable-databricks/src/main/java/org/apache/xtable/databricks/DatabricksUnityCatalogConfig.java: ########## @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import java.util.Map; + +import lombok.Value; + +import org.apache.xtable.conversion.ExternalCatalogConfig; + +@Value +public class DatabricksUnityCatalogConfig { + public static final String HOST = "externalCatalog.uc.host"; + public static final String WAREHOUSE_ID = "externalCatalog.uc.warehouseId"; + public static final String AUTH_TYPE = "externalCatalog.uc.authType"; + public static final String CLIENT_ID = "externalCatalog.uc.clientId"; + public static final String CLIENT_SECRET = "externalCatalog.uc.clientSecret"; + public static final String TOKEN = "externalCatalog.uc.token"; + + String host; + String warehouseId; + String authType; + String clientId; + String clientSecret; + String token; + + public static DatabricksUnityCatalogConfig from(ExternalCatalogConfig catalogConfig) { + Map<String, String> props = catalogConfig.getCatalogProperties(); + return new DatabricksUnityCatalogConfig( + props.get(HOST), + props.get(WAREHOUSE_ID), + props.get(AUTH_TYPE), + props.get(CLIENT_ID), + props.get(CLIENT_SECRET), + props.get(TOKEN)); + } +} Review Comment: The `token` field is defined and parsed but the docs say PAT auth is "intentionally not wired" and "not supported yet". If it's not supported, don't add the field — it creates confusion. Remove `TOKEN` constant and `token` field until PAT auth is actually implemented. ########## xtable-databricks/src/test/java/org/apache/xtable/databricks/TestDatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,570 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance Review Comment: Tests have massive boilerplate — every test method repeats the same 15-line client construction block. Extract a `createClient(String tableFormat)` helper method to reduce duplication and improve readability. ########## xtable-databricks/pom.xml: ########## @@ -0,0 +1,80 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.xtable</groupId> + <artifactId>xtable</artifactId> + <version>0.3.0-incubating</version> + </parent> + + <artifactId>xtable-databricks</artifactId> + <name>XTable Databricks</name> + + <dependencies> + <dependency> + <groupId>org.apache.xtable</groupId> Review Comment: Pin the Databricks SDK version to a property in the parent `pom.xml` instead of hardcoding `0.85.0` here. Follow the pattern used for other dependency versions in the project. ########## xtable-databricks/src/main/java/org/apache/xtable/databricks/DatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +import lombok.extern.log4j.Log4j2; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import com.databricks.sdk.WorkspaceClient; +import com.databricks.sdk.core.DatabricksConfig; +import com.databricks.sdk.core.error.platform.NotFound; +import com.databricks.sdk.service.catalog.ColumnInfo; +import com.databricks.sdk.service.catalog.CreateSchema; +import com.databricks.sdk.service.catalog.SchemaInfo; +import com.databricks.sdk.service.catalog.SchemasAPI; +import com.databricks.sdk.service.catalog.TableInfo; +import com.databricks.sdk.service.catalog.TablesAPI; +import com.databricks.sdk.service.sql.Disposition; +import com.databricks.sdk.service.sql.ExecuteStatementRequest; +import com.databricks.sdk.service.sql.ExecuteStatementRequestOnWaitTimeout; +import com.databricks.sdk.service.sql.Format; +import com.databricks.sdk.service.sql.StatementExecutionAPI; +import com.databricks.sdk.service.sql.StatementResponse; +import com.databricks.sdk.service.sql.StatementState; + +import org.apache.xtable.catalog.CatalogUtils; +import org.apache.xtable.conversion.ExternalCatalogConfig; +import org.apache.xtable.exception.CatalogSyncException; +import org.apache.xtable.model.InternalTable; +import org.apache.xtable.model.catalog.CatalogTableIdentifier; +import org.apache.xtable.model.catalog.HierarchicalTableIdentifier; +import org.apache.xtable.model.schema.InternalField; +import org.apache.xtable.model.schema.InternalSchema; +import org.apache.xtable.model.storage.CatalogType; +import org.apache.xtable.model.storage.TableFormat; +import org.apache.xtable.spi.sync.CatalogSyncClient; + +/** + * Databricks Unity Catalog implementation skeleton for CatalogSyncClient. + * + * <p>This is a placeholder to wire Databricks UC as a catalog target via the SQL Statement + * Execution API. Actual DDL execution and schema diffing should be implemented in a follow-up + * change. + */ +@Log4j2 +public class DatabricksUnityCatalogSyncClient implements CatalogSyncClient<TableInfo> { + + private ExternalCatalogConfig catalogConfig; + private DatabricksUnityCatalogConfig databricksConfig; + private Configuration hadoopConf; + private String tableFormat; + private WorkspaceClient workspaceClient; + private StatementExecutionAPI statementExecution; + private TablesAPI tablesApi; + private SchemasAPI schemasApi; + + // For loading the instance using ServiceLoader + public DatabricksUnityCatalogSyncClient() {} + + public DatabricksUnityCatalogSyncClient( + ExternalCatalogConfig catalogConfig, String tableFormat, Configuration configuration) { + init(catalogConfig, tableFormat, configuration); + } + + DatabricksUnityCatalogSyncClient( + ExternalCatalogConfig catalogConfig, + String tableFormat, + Configuration configuration, + StatementExecutionAPI statementExecution, + TablesAPI tablesApi, + SchemasAPI schemasApi) { + this.statementExecution = statementExecution; + this.tablesApi = tablesApi; + this.schemasApi = schemasApi; + init(catalogConfig, tableFormat, configuration); + } + + @Override + public String getCatalogId() { + return catalogConfig.getCatalogId(); + } + + @Override + public String getCatalogType() { + return CatalogType.DATABRICKS_UC; + } + + @Override + public String getStorageLocation(TableInfo table) { + if (table == null) { + return null; + } + return table.getStorageLocation(); + } + + @Override + public boolean hasDatabase(CatalogTableIdentifier tableIdentifier) { + HierarchicalTableIdentifier hierarchical = + CatalogUtils.toHierarchicalTableIdentifier(tableIdentifier); + String catalog = hierarchical.getCatalogName(); + if (StringUtils.isBlank(catalog)) { + throw new CatalogSyncException( + "Databricks UC requires a catalog name (expected catalog.schema.table)"); + } + String schema = hierarchical.getDatabaseName(); + if (StringUtils.isBlank(schema)) { + throw new CatalogSyncException("Databricks UC requires a schema name"); + } + + String fullName = catalog + "." + schema; + try { + SchemaInfo schemaInfo = schemasApi.get(fullName); + return schemaInfo != null; + } catch (NotFound e) { + return false; + } catch (Exception e) { + throw new CatalogSyncException("Failed to get schema: " + fullName, e); + } + } Review Comment: The `init` method can create up to 3 separate `WorkspaceClient` instances (lines 153, 158, 164) if `statementExecution`, `tablesApi`, and `schemasApi` are all null. Create the `WorkspaceClient` once at the top: ```java if (this.workspaceClient == null) { this.workspaceClient = new WorkspaceClient(buildConfig(databricksConfig)); } if (this.statementExecution == null) { this.statementExecution = workspaceClient.statementExecution(); } ... ``` ########## xtable-databricks/src/test/java/org/apache/xtable/databricks/TestDatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,570 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import com.databricks.sdk.core.error.platform.NotFound; +import com.databricks.sdk.service.catalog.ColumnInfo; +import com.databricks.sdk.service.catalog.CreateSchema; +import com.databricks.sdk.service.catalog.SchemaInfo; +import com.databricks.sdk.service.catalog.SchemasAPI; +import com.databricks.sdk.service.catalog.TableInfo; +import com.databricks.sdk.service.catalog.TablesAPI; +import com.databricks.sdk.service.sql.ExecuteStatementRequest; +import com.databricks.sdk.service.sql.StatementExecutionAPI; +import com.databricks.sdk.service.sql.StatementResponse; +import com.databricks.sdk.service.sql.StatementState; +import com.databricks.sdk.service.sql.StatementStatus; + +import org.apache.xtable.conversion.ExternalCatalogConfig; +import org.apache.xtable.exception.CatalogSyncException; +import org.apache.xtable.model.InternalTable; +import org.apache.xtable.model.catalog.ThreePartHierarchicalTableIdentifier; +import org.apache.xtable.model.schema.InternalField; +import org.apache.xtable.model.schema.InternalSchema; +import org.apache.xtable.model.schema.InternalType; +import org.apache.xtable.model.storage.CatalogType; +import org.apache.xtable.model.storage.TableFormat; + +@ExtendWith(MockitoExtension.class) +public class TestDatabricksUnityCatalogSyncClient { + + @Mock private StatementExecutionAPI mockStatementExecution; + @Mock private TablesAPI mockTablesApi; + @Mock private SchemasAPI mockSchemasApi; + + @Test + void testCreateTableDelta_NoColumns() { + Map<String, String> props = new HashMap<>(); + props.put(DatabricksUnityCatalogConfig.HOST, "https://example.cloud.databricks.com"); + props.put(DatabricksUnityCatalogConfig.WAREHOUSE_ID, "wh-1"); + props.put(DatabricksUnityCatalogConfig.AUTH_TYPE, "oauth-m2m"); + ExternalCatalogConfig config = + ExternalCatalogConfig.builder() + .catalogId("uc") + .catalogType(CatalogType.DATABRICKS_UC) + .catalogProperties(props) + .build(); + + DatabricksUnityCatalogSyncClient client = + new DatabricksUnityCatalogSyncClient( + config, + TableFormat.DELTA, + new Configuration(), + mockStatementExecution, + mockTablesApi, + mockSchemasApi); + + when(mockStatementExecution.executeStatement(any(ExecuteStatementRequest.class))) + .thenReturn( + new StatementResponse() + .setStatus(new StatementStatus().setState(StatementState.SUCCEEDED))); + Review Comment: Missing test: `refreshTable` with matching schema (no-op path). You test the schema-mismatch path but not the case where schemas match and no MSCK REPAIR is issued. Also missing: `refreshTable` with null `catalogTable` and `refreshTable` with null/empty schema. ########## xtable-databricks/src/main/java/org/apache/xtable/databricks/DatabricksUnityCatalogSyncClient.java: ########## @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.databricks; + +import java.util.HashMap; Review Comment: Repeated validation logic for catalog/schema names in `hasDatabase`, `createDatabase`, and `getFullName`. Extract a shared method like `validateAndGetHierarchicalId(CatalogTableIdentifier)` that throws `CatalogSyncException` if catalog/schema are blank, and returns the `HierarchicalTableIdentifier`. ########## website/sidebars.js: ########## @@ -38,7 +38,7 @@ module.exports = { items: [ 'hms', 'glue-catalog', - 'unity-catalog', + 'databricks-unity-catalog', Review Comment: The sidebar reference was changed from `unity-catalog` to `databricks-unity-catalog`, but the docs file is still named `unity-catalog.md` (not renamed). This will break the sidebar link. Either rename the md file or keep the sidebar reference as `unity-catalog`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
