Hisoka-X commented on code in PR #9743: URL: https://github.com/apache/seatunnel/pull/9743#discussion_r2347339215
########## seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSaveModeHandler.java: ########## @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.SaveModeHandler; +import org.apache.seatunnel.api.sink.SchemaSaveMode; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.connectors.seatunnel.hive.config.HiveOptions; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorException; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveMetaStoreCatalog; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTableTemplateUtils; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTypeConvertor; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; + +@Slf4j +public class HiveSaveModeHandler implements SaveModeHandler, AutoCloseable { + + private final ReadonlyConfig readonlyConfig; + private final CatalogTable catalogTable; + private final SchemaSaveMode schemaSaveMode; + private final TablePath tablePath; + private final String dbName; + private final String tableName; + private final TableSchema tableSchema; + private final List<String> partitionFields; + + private HiveMetaStoreCatalog hiveCatalog; + private Catalog optionalCatalog; + + public HiveSaveModeHandler( + ReadonlyConfig readonlyConfig, + CatalogTable catalogTable, + SchemaSaveMode schemaSaveMode) { + this.readonlyConfig = readonlyConfig; + this.catalogTable = catalogTable; + this.schemaSaveMode = schemaSaveMode; + this.tablePath = TablePath.of(readonlyConfig.get(HiveOptions.TABLE_NAME)); + this.dbName = tablePath.getDatabaseName(); + this.tableName = tablePath.getTableName(); + this.tableSchema = catalogTable.getTableSchema(); + + // Initialize partition fields from template if available + this.partitionFields = extractPartitionFieldsFromConfig(); Review Comment: I found this field only used by test case? We should use another way to verify partition fields in test case, not in runtime code. ########## seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSaveModeHandler.java: ########## @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.SaveModeHandler; +import org.apache.seatunnel.api.sink.SchemaSaveMode; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.connectors.seatunnel.hive.config.HiveOptions; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorException; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveMetaStoreCatalog; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTableTemplateUtils; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTypeConvertor; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; + +@Slf4j +public class HiveSaveModeHandler implements SaveModeHandler, AutoCloseable { + + private final ReadonlyConfig readonlyConfig; + private final CatalogTable catalogTable; + private final SchemaSaveMode schemaSaveMode; + private final TablePath tablePath; + private final String dbName; + private final String tableName; + private final TableSchema tableSchema; + private final List<String> partitionFields; + + private HiveMetaStoreCatalog hiveCatalog; + private Catalog optionalCatalog; + + public HiveSaveModeHandler( + ReadonlyConfig readonlyConfig, + CatalogTable catalogTable, + SchemaSaveMode schemaSaveMode) { + this.readonlyConfig = readonlyConfig; + this.catalogTable = catalogTable; + this.schemaSaveMode = schemaSaveMode; + this.tablePath = TablePath.of(readonlyConfig.get(HiveOptions.TABLE_NAME)); + this.dbName = tablePath.getDatabaseName(); + this.tableName = tablePath.getTableName(); + this.tableSchema = catalogTable.getTableSchema(); + + // Initialize partition fields from template if available + this.partitionFields = extractPartitionFieldsFromConfig(); + } + + public HiveSaveModeHandler( + ReadonlyConfig readonlyConfig, + CatalogTable catalogTable, + SchemaSaveMode schemaSaveMode, + Catalog catalog) { + this(readonlyConfig, catalogTable, schemaSaveMode); + this.optionalCatalog = catalog; + } + + @Override + public void open() { + this.hiveCatalog = HiveMetaStoreCatalog.create(readonlyConfig); + if (this.optionalCatalog == null) { + this.optionalCatalog = this.hiveCatalog; + } + } + + @Override + public void handleSchemaSaveModeWithRestore() { + // For Hive, we use the same logic as handleSchemaSaveMode + handleSchemaSaveMode(); + } + + @Override + public TablePath getHandleTablePath() { + return tablePath; + } + + @Override + public Catalog getHandleCatalog() { + return optionalCatalog; + } + + @Override + public SchemaSaveMode getSchemaSaveMode() { + return schemaSaveMode; + } + + @Override + public DataSaveMode getDataSaveMode() { + // Hive uses OVERWRITE parameter for data handling + return DataSaveMode.APPEND_DATA; + } + + @Override + public void close() throws Exception { + if (optionalCatalog != null) { + optionalCatalog.close(); + } + if (hiveCatalog != null && hiveCatalog != optionalCatalog) { + hiveCatalog.close(); + } + } + + @Override + public void handleSchemaSaveMode() { + try { + switch (schemaSaveMode) { + case RECREATE_SCHEMA: + handleRecreateSchema(); + break; + case CREATE_SCHEMA_WHEN_NOT_EXIST: + handleCreateSchemaWhenNotExist(); + break; + case ERROR_WHEN_SCHEMA_NOT_EXIST: + handleErrorWhenSchemaNotExist(); + break; + case IGNORE: + log.info( + "Ignore schema save mode, skip schema handling for table {}.{}", + dbName, + tableName); + break; + default: + throw new HiveConnectorException( + HiveConnectorErrorCode.CREATE_HIVE_TABLE_FAILED, + "Unsupported schema save mode: " + schemaSaveMode); + } + } catch (Exception e) { + throw new HiveConnectorException( + HiveConnectorErrorCode.CREATE_HIVE_TABLE_FAILED, + "Failed to handle schema save mode: " + e.getMessage(), + e); + } + } + + @Override + public void handleDataSaveMode() { + // For Hive, data save mode is handled by the existing OVERWRITE parameter + // No additional data handling is needed here + log.info( + "Data save mode handling is managed by existing OVERWRITE parameter for table {}.{}", + dbName, + tableName); Review Comment: Let's merge overwrite logic with datasavemode. We can set `overwrite=true` or `datasavemode=TRUNCATE` to do same thing. ########## seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkOptions.java: ########## @@ -36,4 +37,26 @@ public class HiveSinkOptions extends HiveOptions { .defaultValue(false) .withDescription( "Flag to decide whether to use overwrite mode when inserting data into Hive. If set to true, for non-partitioned tables, the existing data in the table will be deleted before inserting new data. For partitioned tables, the data in the relevant partition will be deleted before inserting new data."); + + // SaveMode related options + public static final Option<SchemaSaveMode> SCHEMA_SAVE_MODE = + Options.key("schema_save_mode") + .enumType(SchemaSaveMode.class) + .defaultValue(SchemaSaveMode.CREATE_SCHEMA_WHEN_NOT_EXIST) + .withDescription( + "Schema save mode for auto table creation. " + + "CREATE_SCHEMA_WHEN_NOT_EXIST: Create table when not exists (default). " + + "RECREATE_SCHEMA: Drop and recreate table. " + + "ERROR_WHEN_SCHEMA_NOT_EXIST: Throw error when table not exists. " + + "IGNORE: Skip table creation."); + + public static final Option<String> SAVE_MODE_CREATE_TEMPLATE = + Options.key("save_mode_create_template") + .stringType() + .noDefaultValue() Review Comment: default value? ########## seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSaveModeHandler.java: ########## @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.SaveModeHandler; +import org.apache.seatunnel.api.sink.SchemaSaveMode; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.connectors.seatunnel.hive.config.HiveOptions; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorException; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveMetaStoreCatalog; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTableTemplateUtils; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTypeConvertor; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; + +@Slf4j +public class HiveSaveModeHandler implements SaveModeHandler, AutoCloseable { + + private final ReadonlyConfig readonlyConfig; + private final CatalogTable catalogTable; + private final SchemaSaveMode schemaSaveMode; + private final TablePath tablePath; + private final String dbName; + private final String tableName; + private final TableSchema tableSchema; + private final List<String> partitionFields; + + private HiveMetaStoreCatalog hiveCatalog; + private Catalog optionalCatalog; + + public HiveSaveModeHandler( + ReadonlyConfig readonlyConfig, + CatalogTable catalogTable, + SchemaSaveMode schemaSaveMode) { + this.readonlyConfig = readonlyConfig; + this.catalogTable = catalogTable; + this.schemaSaveMode = schemaSaveMode; + this.tablePath = TablePath.of(readonlyConfig.get(HiveOptions.TABLE_NAME)); + this.dbName = tablePath.getDatabaseName(); + this.tableName = tablePath.getTableName(); + this.tableSchema = catalogTable.getTableSchema(); + + // Initialize partition fields from template if available + this.partitionFields = extractPartitionFieldsFromConfig(); + } + + public HiveSaveModeHandler( Review Comment: useless? ########## seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveIT.java: ########## @@ -259,4 +256,36 @@ public void testFakeSinkHiveOnOSS(TestContainer container) throws Exception { public void testFakeSinkHiveOnCos(TestContainer container) throws Exception { executeJob(container, "/fake_to_hive_on_cos.conf", "/hive_on_cos_to_assert.conf"); } + + @TestTemplate + public void testAutoTableCreationCreateWhenNotExist(TestContainer container) throws Exception { + executeJob( + container, + "/auto_table_creation/fake_to_hive_create_when_not_exist.conf", + "/auto_table_creation/hive_auto_create_to_assert.conf"); Review Comment: why use new seatunnel job to verify table? Maybe we can just verify new table by hiveclient. ########## seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSaveModeHandler.java: ########## @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.SaveModeHandler; +import org.apache.seatunnel.api.sink.SchemaSaveMode; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.connectors.seatunnel.hive.config.HiveOptions; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorException; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveMetaStoreCatalog; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTableTemplateUtils; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTypeConvertor; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; + +@Slf4j +public class HiveSaveModeHandler implements SaveModeHandler, AutoCloseable { + + private final ReadonlyConfig readonlyConfig; + private final CatalogTable catalogTable; + private final SchemaSaveMode schemaSaveMode; + private final TablePath tablePath; + private final String dbName; + private final String tableName; + private final TableSchema tableSchema; + private final List<String> partitionFields; + + private HiveMetaStoreCatalog hiveCatalog; + private Catalog optionalCatalog; Review Comment: why not direct use `hiveCatalog` but `optionalCatalog`? ########## seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSaveModeHandler.java: ########## @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.hive.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.SaveModeHandler; +import org.apache.seatunnel.api.sink.SchemaSaveMode; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.connectors.seatunnel.hive.config.HiveOptions; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.hive.exception.HiveConnectorException; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveMetaStoreCatalog; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTableTemplateUtils; +import org.apache.seatunnel.connectors.seatunnel.hive.utils.HiveTypeConvertor; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; + +@Slf4j +public class HiveSaveModeHandler implements SaveModeHandler, AutoCloseable { Review Comment: Why not implement `DefaultSaveModeHandler`? Many code is duplicated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
